mirror of
https://github.com/fosrl/pangolin.git
synced 2026-06-15 20:07:35 +00:00
Add uptime tracking
This commit is contained in:
@@ -1159,3 +1159,19 @@ export type RoundTripMessageTracker = InferSelectModel<
|
||||
typeof roundTripMessageTracker
|
||||
>;
|
||||
export type Network = InferSelectModel<typeof networks>;
|
||||
|
||||
export const statusHistory = pgTable("statusHistory", {
|
||||
id: serial("id").primaryKey(),
|
||||
entityType: varchar("entityType").notNull(),
|
||||
entityId: integer("entityId").notNull(),
|
||||
orgId: varchar("orgId")
|
||||
.notNull()
|
||||
.references(() => orgs.orgId, { onDelete: "cascade" }),
|
||||
status: varchar("status").notNull(),
|
||||
timestamp: integer("timestamp").notNull(),
|
||||
}, (table) => [
|
||||
index("idx_statusHistory_entity").on(table.entityType, table.entityId, table.timestamp),
|
||||
index("idx_statusHistory_org_timestamp").on(table.orgId, table.timestamp),
|
||||
]);
|
||||
|
||||
export type StatusHistory = InferSelectModel<typeof statusHistory>;
|
||||
|
||||
@@ -1181,6 +1181,20 @@ export const deviceWebAuthCodes = sqliteTable("deviceWebAuthCodes", {
|
||||
})
|
||||
});
|
||||
|
||||
export const statusHistory = sqliteTable("statusHistory", {
|
||||
id: integer("id").primaryKey({ autoIncrement: true }),
|
||||
entityType: text("entityType").notNull(), // "site" | "healthCheck"
|
||||
entityId: integer("entityId").notNull(), // siteId or targetHealthCheckId
|
||||
orgId: text("orgId")
|
||||
.notNull()
|
||||
.references(() => orgs.orgId, { onDelete: "cascade" }),
|
||||
status: text("status").notNull(), // "online"/"offline" for sites; "healthy"/"unhealthy"/"unknown" for healthChecks
|
||||
timestamp: integer("timestamp").notNull(), // unix epoch seconds
|
||||
}, (table) => [
|
||||
index("idx_statusHistory_entity").on(table.entityType, table.entityId, table.timestamp),
|
||||
index("idx_statusHistory_org_timestamp").on(table.orgId, table.timestamp),
|
||||
]);
|
||||
|
||||
export const roundTripMessageTracker = sqliteTable("roundTripMessageTracker", {
|
||||
messageId: integer("messageId").primaryKey({ autoIncrement: true }),
|
||||
wsClientId: text("clientId"),
|
||||
@@ -1258,3 +1272,4 @@ export type DeviceWebAuthCode = InferSelectModel<typeof deviceWebAuthCodes>;
|
||||
export type RoundTripMessageTracker = InferSelectModel<
|
||||
typeof roundTripMessageTracker
|
||||
>;
|
||||
export type StatusHistory = InferSelectModel<typeof statusHistory>;
|
||||
|
||||
@@ -285,6 +285,20 @@ authenticated.get(
|
||||
site.listContainers
|
||||
);
|
||||
|
||||
authenticated.get(
|
||||
"/site/:siteId/status-history",
|
||||
verifySiteAccess,
|
||||
verifyUserHasAction(ActionsEnum.getSite),
|
||||
site.getSiteStatusHistory
|
||||
);
|
||||
|
||||
authenticated.get(
|
||||
"/target/:targetId/health-check/status-history",
|
||||
verifyTargetAccess,
|
||||
verifyUserHasAction(ActionsEnum.getTarget),
|
||||
site.getHealthCheckStatusHistory
|
||||
);
|
||||
|
||||
// Site Resource endpoints
|
||||
authenticated.put(
|
||||
"/org/:orgId/site-resource",
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { db, newts, sites, targetHealthCheck, targets } from "@server/db";
|
||||
import { db, newts, sites, targetHealthCheck, targets, statusHistory } from "@server/db";
|
||||
import {
|
||||
hasActiveConnections,
|
||||
} from "#dynamic/routers/ws";
|
||||
@@ -77,6 +77,14 @@ export const startNewtOfflineChecker = (): void => {
|
||||
.set({ online: false })
|
||||
.where(eq(sites.siteId, staleSite.siteId));
|
||||
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "site",
|
||||
entityId: staleSite.siteId,
|
||||
orgId: staleSite.orgId,
|
||||
status: "offline",
|
||||
timestamp: Math.floor(Date.now() / 1000),
|
||||
}).execute();
|
||||
|
||||
const healthChecksOnSite = await db
|
||||
.select()
|
||||
.from(targetHealthCheck)
|
||||
@@ -147,6 +155,14 @@ export const startNewtOfflineChecker = (): void => {
|
||||
.set({ online: false })
|
||||
.where(eq(sites.siteId, site.siteId));
|
||||
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "site",
|
||||
entityId: site.siteId,
|
||||
orgId: site.orgId,
|
||||
status: "offline",
|
||||
timestamp: Math.floor(Date.now() / 1000),
|
||||
}).execute();
|
||||
|
||||
await fireSiteOfflineAlert(site.orgId, site.siteId, site.name);
|
||||
} else if (
|
||||
lastBandwidthUpdate >= wireguardOfflineThreshold &&
|
||||
@@ -161,6 +177,14 @@ export const startNewtOfflineChecker = (): void => {
|
||||
.set({ online: true })
|
||||
.where(eq(sites.siteId, site.siteId));
|
||||
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "site",
|
||||
entityId: site.siteId,
|
||||
orgId: site.orgId,
|
||||
status: "online",
|
||||
timestamp: Math.floor(Date.now() / 1000),
|
||||
}).execute();
|
||||
|
||||
await fireSiteOnlineAlert(site.orgId, site.siteId, site.name);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { db } from "@server/db";
|
||||
import { sites, clients, olms } from "@server/db";
|
||||
import { sites, clients, olms, statusHistory } from "@server/db";
|
||||
import { and, eq, inArray } from "drizzle-orm";
|
||||
import logger from "@server/logger";
|
||||
import { fireSiteOnlineAlert } from "#dynamic/lib/alerts";
|
||||
@@ -147,6 +147,13 @@ async function flushSitePingsToDb(): Promise<void> {
|
||||
}, "flushSitePingsToDb");
|
||||
|
||||
for (const site of newlyOnlineSites) {
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "site",
|
||||
entityId: site.siteId,
|
||||
orgId: site.orgId,
|
||||
status: "online",
|
||||
timestamp: Math.floor(Date.now() / 1000),
|
||||
}).execute();
|
||||
await fireSiteOnlineAlert(site.orgId, site.siteId, site.name);
|
||||
}
|
||||
} catch (error) {
|
||||
|
||||
305
server/routers/site/getStatusHistory.ts
Normal file
305
server/routers/site/getStatusHistory.ts
Normal file
@@ -0,0 +1,305 @@
|
||||
import { Request, Response, NextFunction } from "express";
|
||||
import { z } from "zod";
|
||||
import { db, statusHistory } from "@server/db";
|
||||
import { and, eq, gte, asc } from "drizzle-orm";
|
||||
import response from "@server/lib/response";
|
||||
import HttpCode from "@server/types/HttpCode";
|
||||
import createHttpError from "http-errors";
|
||||
import logger from "@server/logger";
|
||||
import { fromError } from "zod-validation-error";
|
||||
|
||||
const siteParamsSchema = z.object({
|
||||
siteId: z.string().transform((v) => parseInt(v, 10)),
|
||||
});
|
||||
|
||||
const healthCheckParamsSchema = z.object({
|
||||
targetHealthCheckId: z.string().transform((v) => parseInt(v, 10)),
|
||||
});
|
||||
|
||||
const querySchema = z
|
||||
.object({
|
||||
days: z
|
||||
.string()
|
||||
.optional()
|
||||
.transform((v) => (v ? parseInt(v, 10) : 90)),
|
||||
})
|
||||
.pipe(
|
||||
z.object({
|
||||
days: z.number().int().min(1).max(365),
|
||||
})
|
||||
);
|
||||
|
||||
export interface DayBucket {
|
||||
date: string; // ISO date "YYYY-MM-DD"
|
||||
uptimePercent: number; // 0-100
|
||||
totalDowntimeSeconds: number;
|
||||
downtimeWindows: { start: number; end: number | null; status: string }[];
|
||||
status: "good" | "degraded" | "bad" | "no_data";
|
||||
}
|
||||
|
||||
export interface StatusHistoryResponse {
|
||||
entityType: string;
|
||||
entityId: number;
|
||||
days: DayBucket[];
|
||||
overallUptimePercent: number;
|
||||
totalDowntimeSeconds: number;
|
||||
}
|
||||
|
||||
function computeBuckets(
|
||||
events: { entityType: string; entityId: number; orgId: string; status: string; timestamp: number; id: number }[],
|
||||
days: number
|
||||
): { buckets: DayBucket[]; totalDowntime: number } {
|
||||
const nowSec = Math.floor(Date.now() / 1000);
|
||||
const buckets: DayBucket[] = [];
|
||||
let totalDowntime = 0;
|
||||
|
||||
for (let d = 0; d < days; d++) {
|
||||
const dayStartSec = nowSec - (days - d) * 86400;
|
||||
const dayEndSec = dayStartSec + 86400;
|
||||
|
||||
const dayEvents = events.filter(
|
||||
(e) => e.timestamp >= dayStartSec && e.timestamp < dayEndSec
|
||||
);
|
||||
|
||||
// Determine the status at the start of this day (last event before dayStart)
|
||||
const lastBeforeDay = [...events]
|
||||
.filter((e) => e.timestamp < dayStartSec)
|
||||
.at(-1);
|
||||
|
||||
let currentStatus = lastBeforeDay?.status ?? null;
|
||||
|
||||
const windows: { start: number; end: number | null; status: string }[] = [];
|
||||
let dayDowntime = 0;
|
||||
|
||||
let windowStart = dayStartSec;
|
||||
let windowStatus = currentStatus;
|
||||
|
||||
for (const evt of dayEvents) {
|
||||
if (windowStatus !== null && windowStatus !== evt.status) {
|
||||
const windowEnd = evt.timestamp;
|
||||
const isDown =
|
||||
windowStatus === "offline" ||
|
||||
windowStatus === "unhealthy" ||
|
||||
windowStatus === "unknown";
|
||||
if (isDown) {
|
||||
dayDowntime += windowEnd - windowStart;
|
||||
windows.push({
|
||||
start: windowStart,
|
||||
end: windowEnd,
|
||||
status: windowStatus,
|
||||
});
|
||||
}
|
||||
}
|
||||
windowStart = evt.timestamp;
|
||||
windowStatus = evt.status;
|
||||
}
|
||||
|
||||
// Close the final window at the end of the day (or now if day hasn't ended)
|
||||
if (windowStatus !== null) {
|
||||
const finalEnd = Math.min(dayEndSec, nowSec);
|
||||
const isDown =
|
||||
windowStatus === "offline" ||
|
||||
windowStatus === "unhealthy" ||
|
||||
windowStatus === "unknown";
|
||||
if (isDown && finalEnd > windowStart) {
|
||||
dayDowntime += finalEnd - windowStart;
|
||||
windows.push({
|
||||
start: windowStart,
|
||||
end: finalEnd,
|
||||
status: windowStatus,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
totalDowntime += dayDowntime;
|
||||
|
||||
const effectiveDayLength = Math.max(
|
||||
0,
|
||||
Math.min(dayEndSec, nowSec) - dayStartSec
|
||||
);
|
||||
const uptimePct =
|
||||
effectiveDayLength > 0
|
||||
? Math.max(
|
||||
0,
|
||||
((effectiveDayLength - dayDowntime) /
|
||||
effectiveDayLength) *
|
||||
100
|
||||
)
|
||||
: 100;
|
||||
|
||||
const dateStr = new Date(dayStartSec * 1000).toISOString().slice(0, 10);
|
||||
|
||||
let status: DayBucket["status"] = "no_data";
|
||||
if (currentStatus !== null || dayEvents.length > 0) {
|
||||
if (uptimePct >= 99) status = "good";
|
||||
else if (uptimePct >= 50) status = "degraded";
|
||||
else status = "bad";
|
||||
}
|
||||
|
||||
buckets.push({
|
||||
date: dateStr,
|
||||
uptimePercent: Math.round(uptimePct * 100) / 100,
|
||||
totalDowntimeSeconds: dayDowntime,
|
||||
downtimeWindows: windows,
|
||||
status,
|
||||
});
|
||||
}
|
||||
|
||||
return { buckets, totalDowntime };
|
||||
}
|
||||
|
||||
export async function getSiteStatusHistory(
|
||||
req: Request,
|
||||
res: Response,
|
||||
next: NextFunction
|
||||
): Promise<any> {
|
||||
try {
|
||||
const parsedParams = siteParamsSchema.safeParse(req.params);
|
||||
if (!parsedParams.success) {
|
||||
return next(
|
||||
createHttpError(
|
||||
HttpCode.BAD_REQUEST,
|
||||
fromError(parsedParams.error).toString()
|
||||
)
|
||||
);
|
||||
}
|
||||
const parsedQuery = querySchema.safeParse(req.query);
|
||||
if (!parsedQuery.success) {
|
||||
return next(
|
||||
createHttpError(
|
||||
HttpCode.BAD_REQUEST,
|
||||
fromError(parsedQuery.error).toString()
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
const entityType = "site";
|
||||
const entityId = parsedParams.data.siteId;
|
||||
const { days } = parsedQuery.data;
|
||||
|
||||
const nowSec = Math.floor(Date.now() / 1000);
|
||||
const startSec = nowSec - days * 86400;
|
||||
|
||||
const events = await db
|
||||
.select()
|
||||
.from(statusHistory)
|
||||
.where(
|
||||
and(
|
||||
eq(statusHistory.entityType, entityType),
|
||||
eq(statusHistory.entityId, entityId),
|
||||
gte(statusHistory.timestamp, startSec)
|
||||
)
|
||||
)
|
||||
.orderBy(asc(statusHistory.timestamp));
|
||||
|
||||
const { buckets, totalDowntime } = computeBuckets(events, days);
|
||||
const totalWindow = days * 86400;
|
||||
const overallUptime =
|
||||
totalWindow > 0
|
||||
? Math.max(
|
||||
0,
|
||||
((totalWindow - totalDowntime) / totalWindow) * 100
|
||||
)
|
||||
: 100;
|
||||
|
||||
return response<StatusHistoryResponse>(res, {
|
||||
data: {
|
||||
entityType,
|
||||
entityId,
|
||||
days: buckets,
|
||||
overallUptimePercent: Math.round(overallUptime * 100) / 100,
|
||||
totalDowntimeSeconds: totalDowntime,
|
||||
},
|
||||
success: true,
|
||||
error: false,
|
||||
message: "Status history retrieved successfully",
|
||||
status: HttpCode.OK,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error(error);
|
||||
return next(
|
||||
createHttpError(
|
||||
HttpCode.INTERNAL_SERVER_ERROR,
|
||||
"An error occurred"
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export async function getHealthCheckStatusHistory(
|
||||
req: Request,
|
||||
res: Response,
|
||||
next: NextFunction
|
||||
): Promise<any> {
|
||||
try {
|
||||
const parsedParams = healthCheckParamsSchema.safeParse(req.params);
|
||||
if (!parsedParams.success) {
|
||||
return next(
|
||||
createHttpError(
|
||||
HttpCode.BAD_REQUEST,
|
||||
fromError(parsedParams.error).toString()
|
||||
)
|
||||
);
|
||||
}
|
||||
const parsedQuery = querySchema.safeParse(req.query);
|
||||
if (!parsedQuery.success) {
|
||||
return next(
|
||||
createHttpError(
|
||||
HttpCode.BAD_REQUEST,
|
||||
fromError(parsedQuery.error).toString()
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
const entityType = "healthCheck";
|
||||
const entityId = parsedParams.data.targetHealthCheckId;
|
||||
const { days } = parsedQuery.data;
|
||||
|
||||
const nowSec = Math.floor(Date.now() / 1000);
|
||||
const startSec = nowSec - days * 86400;
|
||||
|
||||
const events = await db
|
||||
.select()
|
||||
.from(statusHistory)
|
||||
.where(
|
||||
and(
|
||||
eq(statusHistory.entityType, entityType),
|
||||
eq(statusHistory.entityId, entityId),
|
||||
gte(statusHistory.timestamp, startSec)
|
||||
)
|
||||
)
|
||||
.orderBy(asc(statusHistory.timestamp));
|
||||
|
||||
const { buckets, totalDowntime } = computeBuckets(events, days);
|
||||
const totalWindow = days * 86400;
|
||||
const overallUptime =
|
||||
totalWindow > 0
|
||||
? Math.max(
|
||||
0,
|
||||
((totalWindow - totalDowntime) / totalWindow) * 100
|
||||
)
|
||||
: 100;
|
||||
|
||||
return response<StatusHistoryResponse>(res, {
|
||||
data: {
|
||||
entityType,
|
||||
entityId,
|
||||
days: buckets,
|
||||
overallUptimePercent: Math.round(overallUptime * 100) / 100,
|
||||
totalDowntimeSeconds: totalDowntime,
|
||||
},
|
||||
success: true,
|
||||
error: false,
|
||||
message: "Status history retrieved successfully",
|
||||
status: HttpCode.OK,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error(error);
|
||||
return next(
|
||||
createHttpError(
|
||||
HttpCode.INTERNAL_SERVER_ERROR,
|
||||
"An error occurred"
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
export * from "./getSite";
|
||||
export * from "./getStatusHistory";
|
||||
export * from "./createSite";
|
||||
export * from "./deleteSite";
|
||||
export * from "./updateSite";
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { db, targets, resources, sites, targetHealthCheck } from "@server/db";
|
||||
import { db, targets, resources, sites, targetHealthCheck, statusHistory } from "@server/db";
|
||||
import { MessageHandler } from "@server/routers/ws";
|
||||
import { Newt } from "@server/db";
|
||||
import { eq, and } from "drizzle-orm";
|
||||
@@ -137,6 +137,15 @@ export const handleHealthcheckStatusMessage: MessageHandler = async (
|
||||
.where(eq(targetHealthCheck.targetId, targetIdNum))
|
||||
.execute();
|
||||
|
||||
// Log the state change to status history
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "healthCheck",
|
||||
entityId: targetCheck.targetHealthCheckId,
|
||||
orgId: targetCheck.orgId || targetCheck.resourceOrgId,
|
||||
status: healthStatus.status,
|
||||
timestamp: Math.floor(Date.now() / 1000),
|
||||
}).execute();
|
||||
|
||||
// because we are checking above if there was a change we can fire the alert here because it changed
|
||||
if (healthStatus.status === "unhealthy") {
|
||||
await fireHealthCheckHealthyAlert(
|
||||
|
||||
Reference in New Issue
Block a user