diff --git a/server/db/pg/schema/schema.ts b/server/db/pg/schema/schema.ts index b61cfcf19..30dfef353 100644 --- a/server/db/pg/schema/schema.ts +++ b/server/db/pg/schema/schema.ts @@ -157,7 +157,8 @@ export const resources = pgTable("resources", { maintenanceTitle: text("maintenanceTitle"), maintenanceMessage: text("maintenanceMessage"), maintenanceEstimatedTime: text("maintenanceEstimatedTime"), - postAuthPath: text("postAuthPath") + postAuthPath: text("postAuthPath"), + health: varchar("health") // "healthy", "unhealthy" }); export const targets = pgTable("targets", { diff --git a/server/db/sqlite/schema/schema.ts b/server/db/sqlite/schema/schema.ts index c5600b756..0f0844d2f 100644 --- a/server/db/sqlite/schema/schema.ts +++ b/server/db/sqlite/schema/schema.ts @@ -178,7 +178,8 @@ export const resources = sqliteTable("resources", { maintenanceTitle: text("maintenanceTitle"), maintenanceMessage: text("maintenanceMessage"), maintenanceEstimatedTime: text("maintenanceEstimatedTime"), - postAuthPath: text("postAuthPath") + postAuthPath: text("postAuthPath"), + health: text("health") // "healthy", "unhealthy" }); export const targets = sqliteTable("targets", { diff --git a/server/private/lib/alerts/events/healthCheckEvents.ts b/server/private/lib/alerts/events/healthCheckEvents.ts index f6ab56ceb..470788ad5 100644 --- a/server/private/lib/alerts/events/healthCheckEvents.ts +++ b/server/private/lib/alerts/events/healthCheckEvents.ts @@ -13,6 +13,18 @@ import logger from "@server/logger"; import { processAlerts } from "../processAlerts"; +import { + db, + statusHistory, + targetHealthCheck, + targets, + resources +} from "@server/db"; +import { eq } from "drizzle-orm"; +import { + fireResourceHealthyAlert, + fireResourceUnhealthyAlert +} from "./resourceEvents"; // --------------------------------------------------------------------------- // Public API @@ -33,9 +45,20 @@ export async function fireHealthCheckHealthyAlert( orgId: string, healthCheckId: number, healthCheckName?: string | null, + healthCheckTargetId?: number | null, extra?: Record ): Promise { try { + await db.insert(statusHistory).values({ + entityType: "health_check", + entityId: healthCheckId, + orgId: orgId, + status: "healthy", + timestamp: Math.floor(Date.now() / 1000) + }); + + await handleResource(orgId, healthCheckTargetId); + await processAlerts({ eventType: "health_check_healthy", orgId, @@ -78,9 +101,20 @@ export async function fireHealthCheckUnhealthyAlert( orgId: string, healthCheckId: number, healthCheckName?: string | null, + healthCheckTargetId?: number | null, extra?: Record ): Promise { try { + await db.insert(statusHistory).values({ + entityType: "health_check", + entityId: healthCheckId, + orgId: orgId, + status: "unhealthy", + timestamp: Math.floor(Date.now() / 1000) + }); + + await handleResource(orgId, healthCheckTargetId); + await processAlerts({ eventType: "health_check_unhealthy", orgId, @@ -107,3 +141,63 @@ export async function fireHealthCheckUnhealthyAlert( ); } } + +async function handleResource(orgId: string, healthCheckTargetId?: number | null) { + if (!healthCheckTargetId) { + return; + } + // we have resources lets get them + const [target] = await db + .select() + .from(targets) + .where(eq(targets.targetId, healthCheckTargetId)) + .limit(1); + + if (!target) { + return; + } + const [resource] = await db + .select() + .from(resources) + .where(eq(resources.resourceId, target.resourceId)) + .limit(1); + + if (!resource) { + return; + } + const otherTargets = await db + .select({ hcHealth: targetHealthCheck.hcHealth }) + .from(targets) + .where(eq(targets.resourceId, resource.resourceId)); + + let health = "healthy"; + const allHealthy = otherTargets.every((t) => t.hcHealth === "healthy"); + if (!allHealthy) { + logger.debug( + `Not marking resource ${resource.resourceId} as healthy because not all targets are healthy` + ); + health = "unhealthy"; + } + + if (health != resource.health) { + // it changed + await db + .update(resources) + .set({ health }) + .where(eq(resources.resourceId, resource.resourceId)); + + if (health === "unhealthy") { + await fireResourceUnhealthyAlert( + orgId, + resource.resourceId, + resource.name + ); + } else if (health === "healthy") { + await fireResourceHealthyAlert( + orgId, + resource.resourceId, + resource.name + ); + } + } +} diff --git a/server/private/lib/alerts/events/resourceEvents.ts b/server/private/lib/alerts/events/resourceEvents.ts index 280b1d0c9..9fd351047 100644 --- a/server/private/lib/alerts/events/resourceEvents.ts +++ b/server/private/lib/alerts/events/resourceEvents.ts @@ -13,6 +13,7 @@ import logger from "@server/logger"; import { processAlerts } from "../processAlerts"; +import { db, statusHistory } from "@server/db"; // --------------------------------------------------------------------------- // Public API @@ -36,6 +37,14 @@ export async function fireResourceHealthyAlert( extra?: Record ): Promise { try { + await db.insert(statusHistory).values({ + entityType: "resource", + entityId: resourceId, + orgId: orgId, + status: "healthy", + timestamp: Math.floor(Date.now() / 1000) + }); + await processAlerts({ eventType: "resource_healthy", orgId, @@ -81,6 +90,14 @@ export async function fireResourceUnhealthyAlert( extra?: Record ): Promise { try { + await db.insert(statusHistory).values({ + entityType: "resource", + entityId: resourceId, + orgId: orgId, + status: "unhealthy", + timestamp: Math.floor(Date.now() / 1000) + }); + await processAlerts({ eventType: "resource_unhealthy", orgId, diff --git a/server/private/lib/alerts/events/siteEvents.ts b/server/private/lib/alerts/events/siteEvents.ts index d8531a5b7..66c813ab9 100644 --- a/server/private/lib/alerts/events/siteEvents.ts +++ b/server/private/lib/alerts/events/siteEvents.ts @@ -13,6 +13,9 @@ import logger from "@server/logger"; import { processAlerts } from "../processAlerts"; +import { db, sites, statusHistory, targetHealthCheck } from "@server/db"; +import { and, eq, inArray } from "drizzle-orm"; +import { fireHealthCheckUnhealthyAlert } from "./healthCheckEvents"; // --------------------------------------------------------------------------- // Public API @@ -36,6 +39,14 @@ export async function fireSiteOnlineAlert( extra?: Record ): Promise { try { + await db.insert(statusHistory).values({ + entityType: "site", + entityId: siteId, + orgId: orgId, + status: "online", + timestamp: Math.floor(Date.now() / 1000) + }); + await processAlerts({ eventType: "site_online", orgId, @@ -81,6 +92,37 @@ export async function fireSiteOfflineAlert( extra?: Record ): Promise { try { + await db.insert(statusHistory).values({ + entityType: "site", + entityId: siteId, + orgId: orgId, + status: "offline", + timestamp: Math.floor(Date.now() / 1000) + }); + + const unhealthyHealthChecks = await db + .update(targetHealthCheck) + .set({ hcHealth: "unhealthy" }) + .where( + and( + eq(targetHealthCheck.orgId, orgId), + eq(targetHealthCheck.siteId, siteId) + ) + ) + .returning(); + + for (const healthCheck of unhealthyHealthChecks) { + logger.info( + `Marking health check ${healthCheck.targetHealthCheckId} unhealthy due to site ${siteId} being marked offline` + ); + + await fireHealthCheckUnhealthyAlert( + healthCheck.orgId, + healthCheck.targetHealthCheckId, + healthCheck.name + ); + } + await processAlerts({ eventType: "site_offline", orgId, diff --git a/server/private/routers/alertEvents/triggerHealthCheckAlert.ts b/server/private/routers/alertEvents/triggerHealthCheckAlert.ts index 94202b0b2..0590c2bcd 100644 --- a/server/private/routers/alertEvents/triggerHealthCheckAlert.ts +++ b/server/private/routers/alertEvents/triggerHealthCheckAlert.ts @@ -91,14 +91,6 @@ export async function triggerHealthCheckAlert( ); } - await db.insert(statusHistory).values({ - entityType: "healthCheck", - entityId: healthCheckId, - orgId, - status: eventType === "health_check_healthy" ? "healthy" : "unhealthy", - timestamp: Math.floor(Date.now() / 1000) - }); - if (eventType === "health_check_healthy") { await fireHealthCheckHealthyAlert( orgId, diff --git a/server/private/routers/alertEvents/triggerResourceAlert.ts b/server/private/routers/alertEvents/triggerResourceAlert.ts index 61b81d900..42e63b288 100644 --- a/server/private/routers/alertEvents/triggerResourceAlert.ts +++ b/server/private/routers/alertEvents/triggerResourceAlert.ts @@ -89,16 +89,6 @@ export async function triggerResourceAlert( ); } - if (eventType === "resource_healthy" || eventType === "resource_unhealthy") { - await db.insert(statusHistory).values({ - entityType: "resource", - entityId: resourceId, - orgId, - status: eventType === "resource_healthy" ? "healthy" : "unhealthy", - timestamp: Math.floor(Date.now() / 1000) - }); - } - if (eventType === "resource_healthy") { await fireResourceHealthyAlert( orgId, @@ -132,4 +122,4 @@ export async function triggerResourceAlert( createHttpError(HttpCode.INTERNAL_SERVER_ERROR, "An error occurred") ); } -} \ No newline at end of file +} diff --git a/server/private/routers/alertEvents/triggerSiteAlert.ts b/server/private/routers/alertEvents/triggerSiteAlert.ts index 084fbc758..a7fa0cafc 100644 --- a/server/private/routers/alertEvents/triggerSiteAlert.ts +++ b/server/private/routers/alertEvents/triggerSiteAlert.ts @@ -83,14 +83,6 @@ export async function triggerSiteAlert( ); } - await db.insert(statusHistory).values({ - entityType: "site", - entityId: siteId, - orgId, - status: eventType === "site_online" ? "online" : "offline", - timestamp: Math.floor(Date.now() / 1000) - }); - if (eventType === "site_online") { await fireSiteOnlineAlert(orgId, siteId, site.name ?? undefined); } else { @@ -110,4 +102,4 @@ export async function triggerSiteAlert( createHttpError(HttpCode.INTERNAL_SERVER_ERROR, "An error occurred") ); } -} \ No newline at end of file +} diff --git a/server/routers/newt/handleNewtDisconnectingMessage.ts b/server/routers/newt/handleNewtDisconnectingMessage.ts index 302738f19..82af2e6a3 100644 --- a/server/routers/newt/handleNewtDisconnectingMessage.ts +++ b/server/routers/newt/handleNewtDisconnectingMessage.ts @@ -1,5 +1,12 @@ import { MessageHandler } from "@server/routers/ws"; -import { db, Newt, sites } from "@server/db"; +import { + db, + Newt, + sites, + statusHistory, + targetHealthCheck, + targets +} from "@server/db"; import { eq } from "drizzle-orm"; import logger from "@server/logger"; import { fireSiteOfflineAlert } from "@server/lib/alerts"; diff --git a/server/routers/newt/offlineChecker.ts b/server/routers/newt/offlineChecker.ts index 426d80323..fe6253581 100644 --- a/server/routers/newt/offlineChecker.ts +++ b/server/routers/newt/offlineChecker.ts @@ -1,8 +1,13 @@ -import { db, newts, sites, targetHealthCheck, targets, statusHistory } from "@server/db"; import { - hasActiveConnections, -} from "#dynamic/routers/ws"; -import { eq, lt, isNull, and, or, ne, not } from "drizzle-orm"; + db, + newts, + sites, + targetHealthCheck, + targets, + statusHistory +} from "@server/db"; +import { hasActiveConnections } from "#dynamic/routers/ws"; +import { eq, lt, isNull, and, or, ne, not, inArray } from "drizzle-orm"; import logger from "@server/logger"; import { fireSiteOfflineAlert, fireSiteOnlineAlert } from "#dynamic/lib/alerts"; @@ -77,43 +82,11 @@ export const startNewtOfflineChecker = (): void => { .set({ online: false }) .where(eq(sites.siteId, staleSite.siteId)); - await db.insert(statusHistory).values({ - entityType: "site", - entityId: staleSite.siteId, - orgId: staleSite.orgId, - status: "offline", - timestamp: Math.floor(Date.now() / 1000), - }).execute(); - - const healthChecksOnSite = await db - .select() - .from(targetHealthCheck) - .innerJoin( - targets, - eq(targets.targetId, targetHealthCheck.targetId) - ) - .innerJoin(sites, eq(sites.siteId, targets.siteId)) - .where(eq(sites.siteId, staleSite.siteId)); - - for (const healthCheck of healthChecksOnSite) { - logger.info( - `Marking health check ${healthCheck.targetHealthCheck.targetHealthCheckId} offline due to site ${staleSite.siteId} being marked offline` - ); - await db - .update(targetHealthCheck) - .set({ hcHealth: "unknown" }) - .where( - eq( - targetHealthCheck.targetHealthCheckId, - healthCheck.targetHealthCheck - .targetHealthCheckId - ) - ); - - // TODO: should we be firing an alert here when the health check goes to unknown? - } - - await fireSiteOfflineAlert(staleSite.orgId, staleSite.siteId, staleSite.name); + await fireSiteOfflineAlert( + staleSite.orgId, + staleSite.siteId, + staleSite.name + ); } // this part only effects self hosted. Its not efficient but we dont expect people to have very many wireguard sites @@ -155,15 +128,11 @@ export const startNewtOfflineChecker = (): void => { .set({ online: false }) .where(eq(sites.siteId, site.siteId)); - await db.insert(statusHistory).values({ - entityType: "site", - entityId: site.siteId, - orgId: site.orgId, - status: "offline", - timestamp: Math.floor(Date.now() / 1000), - }).execute(); - - await fireSiteOfflineAlert(site.orgId, site.siteId, site.name); + await fireSiteOfflineAlert( + site.orgId, + site.siteId, + site.name + ); } else if ( lastBandwidthUpdate >= wireguardOfflineThreshold && !site.online @@ -177,15 +146,11 @@ export const startNewtOfflineChecker = (): void => { .set({ online: true }) .where(eq(sites.siteId, site.siteId)); - await db.insert(statusHistory).values({ - entityType: "site", - entityId: site.siteId, - orgId: site.orgId, - status: "online", - timestamp: Math.floor(Date.now() / 1000), - }).execute(); - - await fireSiteOnlineAlert(site.orgId, site.siteId, site.name); + await fireSiteOnlineAlert( + site.orgId, + site.siteId, + site.name + ); } } } catch (error) { diff --git a/server/routers/newt/pingAccumulator.ts b/server/routers/newt/pingAccumulator.ts index 9b2f04c8e..70d8afa9f 100644 --- a/server/routers/newt/pingAccumulator.ts +++ b/server/routers/newt/pingAccumulator.ts @@ -1,5 +1,5 @@ import { db } from "@server/db"; -import { sites, clients, olms, statusHistory } from "@server/db"; +import { sites, clients, olms } from "@server/db"; import { and, eq, inArray } from "drizzle-orm"; import logger from "@server/logger"; import { fireSiteOnlineAlert } from "#dynamic/lib/alerts"; @@ -147,13 +147,6 @@ async function flushSitePingsToDb(): Promise { }, "flushSitePingsToDb"); for (const site of newlyOnlineSites) { - await db.insert(statusHistory).values({ - entityType: "site", - entityId: site.siteId, - orgId: site.orgId, - status: "online", - timestamp: Math.floor(Date.now() / 1000), - }).execute(); await fireSiteOnlineAlert(site.orgId, site.siteId, site.name); } } catch (error) { diff --git a/server/routers/target/handleHealthcheckStatusMessage.ts b/server/routers/target/handleHealthcheckStatusMessage.ts index 2f5a167c8..c6f2863f8 100644 --- a/server/routers/target/handleHealthcheckStatusMessage.ts +++ b/server/routers/target/handleHealthcheckStatusMessage.ts @@ -94,26 +94,13 @@ export const handleHealthcheckStatusMessage: MessageHandler = async ( const [targetCheck] = await db .select({ - targetId: targets.targetId, - siteId: targets.siteId, + targetId: targetHealthCheck.targetId, orgId: targetHealthCheck.orgId, targetHealthCheckId: targetHealthCheck.targetHealthCheckId, - resourceOrgId: resources.orgId, - resourceId: resources.resourceId, - resourceName: resources.name, name: targetHealthCheck.name, hcHealth: targetHealthCheck.hcHealth }) .from(targetHealthCheck) - .innerJoin(sites, eq(targetHealthCheck.siteId, sites.siteId)) - .innerJoin( - targets, - eq(targetHealthCheck.targetId, targets.targetId) - ) - .innerJoin( - resources, - eq(targets.resourceId, resources.resourceId) - ) .where( and( eq(targetHealthCheck.targetHealthCheckId, targetIdNum), @@ -147,92 +134,22 @@ export const handleHealthcheckStatusMessage: MessageHandler = async ( | "healthy" | "unhealthy" }) - .where(eq(targetHealthCheck.targetId, targetCheck.targetId)); - - const orgId = targetCheck.orgId || targetCheck.resourceOrgId; // for backwards compatibility, check both orgId fields because the target health checks dont have the orgId - if (!orgId) { - logger.warn( - `No org ID found for target ${targetId}, skipping status history logging` - ); - continue; - } - - // Log the state change to status history - await db.insert(statusHistory).values({ - entityType: "healthCheck", - entityId: targetCheck.targetHealthCheckId, - orgId: orgId, - status: healthStatus.status, - timestamp: Math.floor(Date.now() / 1000) - }); - - if (targetCheck.resourceId) { - // Log the state change to status history for the resource as well - // so we can show the resource status along with the site - - // if the status is healthy we should check if ALL of the targets on the resource are currently healthy and if not then dont mark the resource as healthy yet, we want to wait until all targets are healthy to mark the resource as healthy - let status = healthStatus.status; - if (healthStatus.status === "healthy") { - const otherTargets = await db - .select({ hcHealth: targetHealthCheck.hcHealth }) - .from(targets) - .innerJoin( - targetHealthCheck, - eq(targets.targetId, targetHealthCheck.targetId) - ) - .where( - and( - eq(targets.resourceId, targetCheck.resourceId), - ne(targets.targetId, targetCheck.targetId) // only check the other targets, not the one we just updated - ) - ); - - const allHealthy = otherTargets.every( - (t) => t.hcHealth === "healthy" - ); - if (!allHealthy) { - logger.debug( - `Not marking resource ${targetCheck.resourceId} as healthy because not all targets are healthy` - ); - status = "unhealthy"; - } - } - - await db.insert(statusHistory).values({ - entityType: "resource", - entityId: targetCheck.resourceId, - orgId: orgId, - status: status, - timestamp: Math.floor(Date.now() / 1000) - }); - - if (status === "unhealthy") { - await fireResourceUnhealthyAlert( - orgId, - targetCheck.resourceId, - targetCheck.resourceName - ); - } else if (status === "healthy") { - await fireResourceHealthyAlert( - orgId, - targetCheck.resourceId, - targetCheck.resourceName - ); - } - } + .where(eq(targetHealthCheck.targetHealthCheckId, targetCheck.targetHealthCheckId)); // because we are checking above if there was a change we can fire the alert here because it changed if (healthStatus.status === "unhealthy") { await fireHealthCheckUnhealthyAlert( - orgId, + targetCheck.orgId, targetCheck.targetHealthCheckId, - targetCheck.name ?? undefined + targetCheck.name ?? undefined, + targetCheck.targetId ); } else if (healthStatus.status === "healthy") { await fireHealthCheckHealthyAlert( - orgId, + targetCheck.orgId, targetCheck.targetHealthCheckId, - targetCheck.name ?? undefined + targetCheck.name ?? undefined, + targetCheck.targetId ); }