Add uptime tracking

This commit is contained in:
Owen
2026-04-16 18:25:25 -07:00
parent d6c15c8b81
commit c1782a2650
14 changed files with 794 additions and 4 deletions

View File

@@ -1159,3 +1159,19 @@ export type RoundTripMessageTracker = InferSelectModel<
typeof roundTripMessageTracker
>;
export type Network = InferSelectModel<typeof networks>;
export const statusHistory = pgTable("statusHistory", {
id: serial("id").primaryKey(),
entityType: varchar("entityType").notNull(),
entityId: integer("entityId").notNull(),
orgId: varchar("orgId")
.notNull()
.references(() => orgs.orgId, { onDelete: "cascade" }),
status: varchar("status").notNull(),
timestamp: integer("timestamp").notNull(),
}, (table) => [
index("idx_statusHistory_entity").on(table.entityType, table.entityId, table.timestamp),
index("idx_statusHistory_org_timestamp").on(table.orgId, table.timestamp),
]);
export type StatusHistory = InferSelectModel<typeof statusHistory>;

View File

@@ -1181,6 +1181,20 @@ export const deviceWebAuthCodes = sqliteTable("deviceWebAuthCodes", {
})
});
export const statusHistory = sqliteTable("statusHistory", {
id: integer("id").primaryKey({ autoIncrement: true }),
entityType: text("entityType").notNull(), // "site" | "healthCheck"
entityId: integer("entityId").notNull(), // siteId or targetHealthCheckId
orgId: text("orgId")
.notNull()
.references(() => orgs.orgId, { onDelete: "cascade" }),
status: text("status").notNull(), // "online"/"offline" for sites; "healthy"/"unhealthy"/"unknown" for healthChecks
timestamp: integer("timestamp").notNull(), // unix epoch seconds
}, (table) => [
index("idx_statusHistory_entity").on(table.entityType, table.entityId, table.timestamp),
index("idx_statusHistory_org_timestamp").on(table.orgId, table.timestamp),
]);
export const roundTripMessageTracker = sqliteTable("roundTripMessageTracker", {
messageId: integer("messageId").primaryKey({ autoIncrement: true }),
wsClientId: text("clientId"),
@@ -1258,3 +1272,4 @@ export type DeviceWebAuthCode = InferSelectModel<typeof deviceWebAuthCodes>;
export type RoundTripMessageTracker = InferSelectModel<
typeof roundTripMessageTracker
>;
export type StatusHistory = InferSelectModel<typeof statusHistory>;

View File

@@ -285,6 +285,20 @@ authenticated.get(
site.listContainers
);
authenticated.get(
"/site/:siteId/status-history",
verifySiteAccess,
verifyUserHasAction(ActionsEnum.getSite),
site.getSiteStatusHistory
);
authenticated.get(
"/target/:targetId/health-check/status-history",
verifyTargetAccess,
verifyUserHasAction(ActionsEnum.getTarget),
site.getHealthCheckStatusHistory
);
// Site Resource endpoints
authenticated.put(
"/org/:orgId/site-resource",

View File

@@ -1,4 +1,4 @@
import { db, newts, sites, targetHealthCheck, targets } from "@server/db";
import { db, newts, sites, targetHealthCheck, targets, statusHistory } from "@server/db";
import {
hasActiveConnections,
} from "#dynamic/routers/ws";
@@ -77,6 +77,14 @@ export const startNewtOfflineChecker = (): void => {
.set({ online: false })
.where(eq(sites.siteId, staleSite.siteId));
await db.insert(statusHistory).values({
entityType: "site",
entityId: staleSite.siteId,
orgId: staleSite.orgId,
status: "offline",
timestamp: Math.floor(Date.now() / 1000),
}).execute();
const healthChecksOnSite = await db
.select()
.from(targetHealthCheck)
@@ -147,6 +155,14 @@ export const startNewtOfflineChecker = (): void => {
.set({ online: false })
.where(eq(sites.siteId, site.siteId));
await db.insert(statusHistory).values({
entityType: "site",
entityId: site.siteId,
orgId: site.orgId,
status: "offline",
timestamp: Math.floor(Date.now() / 1000),
}).execute();
await fireSiteOfflineAlert(site.orgId, site.siteId, site.name);
} else if (
lastBandwidthUpdate >= wireguardOfflineThreshold &&
@@ -161,6 +177,14 @@ export const startNewtOfflineChecker = (): void => {
.set({ online: true })
.where(eq(sites.siteId, site.siteId));
await db.insert(statusHistory).values({
entityType: "site",
entityId: site.siteId,
orgId: site.orgId,
status: "online",
timestamp: Math.floor(Date.now() / 1000),
}).execute();
await fireSiteOnlineAlert(site.orgId, site.siteId, site.name);
}
}

View File

@@ -1,5 +1,5 @@
import { db } from "@server/db";
import { sites, clients, olms } from "@server/db";
import { sites, clients, olms, statusHistory } from "@server/db";
import { and, eq, inArray } from "drizzle-orm";
import logger from "@server/logger";
import { fireSiteOnlineAlert } from "#dynamic/lib/alerts";
@@ -147,6 +147,13 @@ async function flushSitePingsToDb(): Promise<void> {
}, "flushSitePingsToDb");
for (const site of newlyOnlineSites) {
await db.insert(statusHistory).values({
entityType: "site",
entityId: site.siteId,
orgId: site.orgId,
status: "online",
timestamp: Math.floor(Date.now() / 1000),
}).execute();
await fireSiteOnlineAlert(site.orgId, site.siteId, site.name);
}
} catch (error) {

View File

@@ -0,0 +1,305 @@
import { Request, Response, NextFunction } from "express";
import { z } from "zod";
import { db, statusHistory } from "@server/db";
import { and, eq, gte, asc } from "drizzle-orm";
import response from "@server/lib/response";
import HttpCode from "@server/types/HttpCode";
import createHttpError from "http-errors";
import logger from "@server/logger";
import { fromError } from "zod-validation-error";
const siteParamsSchema = z.object({
siteId: z.string().transform((v) => parseInt(v, 10)),
});
const healthCheckParamsSchema = z.object({
targetHealthCheckId: z.string().transform((v) => parseInt(v, 10)),
});
const querySchema = z
.object({
days: z
.string()
.optional()
.transform((v) => (v ? parseInt(v, 10) : 90)),
})
.pipe(
z.object({
days: z.number().int().min(1).max(365),
})
);
export interface DayBucket {
date: string; // ISO date "YYYY-MM-DD"
uptimePercent: number; // 0-100
totalDowntimeSeconds: number;
downtimeWindows: { start: number; end: number | null; status: string }[];
status: "good" | "degraded" | "bad" | "no_data";
}
export interface StatusHistoryResponse {
entityType: string;
entityId: number;
days: DayBucket[];
overallUptimePercent: number;
totalDowntimeSeconds: number;
}
function computeBuckets(
events: { entityType: string; entityId: number; orgId: string; status: string; timestamp: number; id: number }[],
days: number
): { buckets: DayBucket[]; totalDowntime: number } {
const nowSec = Math.floor(Date.now() / 1000);
const buckets: DayBucket[] = [];
let totalDowntime = 0;
for (let d = 0; d < days; d++) {
const dayStartSec = nowSec - (days - d) * 86400;
const dayEndSec = dayStartSec + 86400;
const dayEvents = events.filter(
(e) => e.timestamp >= dayStartSec && e.timestamp < dayEndSec
);
// Determine the status at the start of this day (last event before dayStart)
const lastBeforeDay = [...events]
.filter((e) => e.timestamp < dayStartSec)
.at(-1);
let currentStatus = lastBeforeDay?.status ?? null;
const windows: { start: number; end: number | null; status: string }[] = [];
let dayDowntime = 0;
let windowStart = dayStartSec;
let windowStatus = currentStatus;
for (const evt of dayEvents) {
if (windowStatus !== null && windowStatus !== evt.status) {
const windowEnd = evt.timestamp;
const isDown =
windowStatus === "offline" ||
windowStatus === "unhealthy" ||
windowStatus === "unknown";
if (isDown) {
dayDowntime += windowEnd - windowStart;
windows.push({
start: windowStart,
end: windowEnd,
status: windowStatus,
});
}
}
windowStart = evt.timestamp;
windowStatus = evt.status;
}
// Close the final window at the end of the day (or now if day hasn't ended)
if (windowStatus !== null) {
const finalEnd = Math.min(dayEndSec, nowSec);
const isDown =
windowStatus === "offline" ||
windowStatus === "unhealthy" ||
windowStatus === "unknown";
if (isDown && finalEnd > windowStart) {
dayDowntime += finalEnd - windowStart;
windows.push({
start: windowStart,
end: finalEnd,
status: windowStatus,
});
}
}
totalDowntime += dayDowntime;
const effectiveDayLength = Math.max(
0,
Math.min(dayEndSec, nowSec) - dayStartSec
);
const uptimePct =
effectiveDayLength > 0
? Math.max(
0,
((effectiveDayLength - dayDowntime) /
effectiveDayLength) *
100
)
: 100;
const dateStr = new Date(dayStartSec * 1000).toISOString().slice(0, 10);
let status: DayBucket["status"] = "no_data";
if (currentStatus !== null || dayEvents.length > 0) {
if (uptimePct >= 99) status = "good";
else if (uptimePct >= 50) status = "degraded";
else status = "bad";
}
buckets.push({
date: dateStr,
uptimePercent: Math.round(uptimePct * 100) / 100,
totalDowntimeSeconds: dayDowntime,
downtimeWindows: windows,
status,
});
}
return { buckets, totalDowntime };
}
export async function getSiteStatusHistory(
req: Request,
res: Response,
next: NextFunction
): Promise<any> {
try {
const parsedParams = siteParamsSchema.safeParse(req.params);
if (!parsedParams.success) {
return next(
createHttpError(
HttpCode.BAD_REQUEST,
fromError(parsedParams.error).toString()
)
);
}
const parsedQuery = querySchema.safeParse(req.query);
if (!parsedQuery.success) {
return next(
createHttpError(
HttpCode.BAD_REQUEST,
fromError(parsedQuery.error).toString()
)
);
}
const entityType = "site";
const entityId = parsedParams.data.siteId;
const { days } = parsedQuery.data;
const nowSec = Math.floor(Date.now() / 1000);
const startSec = nowSec - days * 86400;
const events = await db
.select()
.from(statusHistory)
.where(
and(
eq(statusHistory.entityType, entityType),
eq(statusHistory.entityId, entityId),
gte(statusHistory.timestamp, startSec)
)
)
.orderBy(asc(statusHistory.timestamp));
const { buckets, totalDowntime } = computeBuckets(events, days);
const totalWindow = days * 86400;
const overallUptime =
totalWindow > 0
? Math.max(
0,
((totalWindow - totalDowntime) / totalWindow) * 100
)
: 100;
return response<StatusHistoryResponse>(res, {
data: {
entityType,
entityId,
days: buckets,
overallUptimePercent: Math.round(overallUptime * 100) / 100,
totalDowntimeSeconds: totalDowntime,
},
success: true,
error: false,
message: "Status history retrieved successfully",
status: HttpCode.OK,
});
} catch (error) {
logger.error(error);
return next(
createHttpError(
HttpCode.INTERNAL_SERVER_ERROR,
"An error occurred"
)
);
}
}
export async function getHealthCheckStatusHistory(
req: Request,
res: Response,
next: NextFunction
): Promise<any> {
try {
const parsedParams = healthCheckParamsSchema.safeParse(req.params);
if (!parsedParams.success) {
return next(
createHttpError(
HttpCode.BAD_REQUEST,
fromError(parsedParams.error).toString()
)
);
}
const parsedQuery = querySchema.safeParse(req.query);
if (!parsedQuery.success) {
return next(
createHttpError(
HttpCode.BAD_REQUEST,
fromError(parsedQuery.error).toString()
)
);
}
const entityType = "healthCheck";
const entityId = parsedParams.data.targetHealthCheckId;
const { days } = parsedQuery.data;
const nowSec = Math.floor(Date.now() / 1000);
const startSec = nowSec - days * 86400;
const events = await db
.select()
.from(statusHistory)
.where(
and(
eq(statusHistory.entityType, entityType),
eq(statusHistory.entityId, entityId),
gte(statusHistory.timestamp, startSec)
)
)
.orderBy(asc(statusHistory.timestamp));
const { buckets, totalDowntime } = computeBuckets(events, days);
const totalWindow = days * 86400;
const overallUptime =
totalWindow > 0
? Math.max(
0,
((totalWindow - totalDowntime) / totalWindow) * 100
)
: 100;
return response<StatusHistoryResponse>(res, {
data: {
entityType,
entityId,
days: buckets,
overallUptimePercent: Math.round(overallUptime * 100) / 100,
totalDowntimeSeconds: totalDowntime,
},
success: true,
error: false,
message: "Status history retrieved successfully",
status: HttpCode.OK,
});
} catch (error) {
logger.error(error);
return next(
createHttpError(
HttpCode.INTERNAL_SERVER_ERROR,
"An error occurred"
)
);
}
}

View File

@@ -1,4 +1,5 @@
export * from "./getSite";
export * from "./getStatusHistory";
export * from "./createSite";
export * from "./deleteSite";
export * from "./updateSite";

View File

@@ -1,4 +1,4 @@
import { db, targets, resources, sites, targetHealthCheck } from "@server/db";
import { db, targets, resources, sites, targetHealthCheck, statusHistory } from "@server/db";
import { MessageHandler } from "@server/routers/ws";
import { Newt } from "@server/db";
import { eq, and } from "drizzle-orm";
@@ -137,6 +137,15 @@ export const handleHealthcheckStatusMessage: MessageHandler = async (
.where(eq(targetHealthCheck.targetId, targetIdNum))
.execute();
// Log the state change to status history
await db.insert(statusHistory).values({
entityType: "healthCheck",
entityId: targetCheck.targetHealthCheckId,
orgId: targetCheck.orgId || targetCheck.resourceOrgId,
status: healthStatus.status,
timestamp: Math.floor(Date.now() / 1000),
}).execute();
// because we are checking above if there was a change we can fire the alert here because it changed
if (healthStatus.status === "unhealthy") {
await fireHealthCheckHealthyAlert(