From 5dd19edb5661f74c3edc905c8c677ae54e82a26c Mon Sep 17 00:00:00 2001 From: Owen Date: Mon, 1 Jun 2026 14:05:19 -0700 Subject: [PATCH] Hold the hp error message until after 18 tries --- server/routers/olm/error.ts | 2 +- .../routers/olm/handleOlmRegisterMessage.ts | 55 ++++++++++++++++++- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/server/routers/olm/error.ts b/server/routers/olm/error.ts index 15047b94c..d9058a3f6 100644 --- a/server/routers/olm/error.ts +++ b/server/routers/olm/error.ts @@ -93,7 +93,7 @@ export const OlmErrorCodes = { }, HOLEPUNCH_MISSING: { code: "HOLEPUNCH_MISSING", - message: `Hole punch information is missing. Please ensure your client can reach the server on UDP port ${udpPort} and try registering again.` + message: `Unable to coordinate client P2P connection. Please ensure your client can reach the server on UDP port ${udpPort} and try registering again.` } } as const; diff --git a/server/routers/olm/handleOlmRegisterMessage.ts b/server/routers/olm/handleOlmRegisterMessage.ts index 747770bff..3b0e1637a 100644 --- a/server/routers/olm/handleOlmRegisterMessage.ts +++ b/server/routers/olm/handleOlmRegisterMessage.ts @@ -20,6 +20,14 @@ import { handleFingerprintInsertion } from "./fingerprintingUtils"; import { build } from "@server/build"; import { canCompress } from "@server/lib/clientVersionChecks"; import config from "@server/lib/config"; +import cache from "#dynamic/lib/cache"; + +const HOLEPUNCH_STALE_CHAIN_THRESHOLD = 18; +const HOLEPUNCH_STALE_CHAIN_TTL_SECONDS = 1800; + +function getHolePunchChainCounterKey(olmId: string, chainId: string): string { + return `olm:register:stale_holepunch:${olmId}:${chainId}`; +} export const handleOlmRegisterMessage: MessageHandler = async (context) => { logger.info("[handleOlmRegisterMessage] Handling register olm message"); @@ -319,6 +327,24 @@ export const handleOlmRegisterMessage: MessageHandler = async (context) => { ); } + let staleHolePunchChainCount: number | undefined; + const hasChainId = + chainId !== undefined && chainId !== null && String(chainId) !== ""; + + if (hasChainId) { + const cacheKey = getHolePunchChainCounterKey( + olm.olmId, + String(chainId) + ); + const existingCount = (await cache.get(cacheKey)) ?? 0; + staleHolePunchChainCount = existingCount + 1; + await cache.set( + cacheKey, + staleHolePunchChainCount, + HOLEPUNCH_STALE_CHAIN_TTL_SECONDS + ); + } + // this prevents us from accepting a register from an olm that has not hole punched yet. // the olm will pump the register so we can keep checking // TODO: I still think there is a better way to do this rather than locking it out here but ??? @@ -327,7 +353,34 @@ export const handleOlmRegisterMessage: MessageHandler = async (context) => { `[handleOlmRegisterMessage] Client last hole punch is too old and we have sites to send; skipping this register. The client is failing to hole punch and identify its network address with the server. Can the client reach the server on UDP port ${config.getRawConfig().gerbil.clients_start_port}?`, { orgId: client.orgId, clientId: client.clientId } ); - sendOlmError(OlmErrorCodes.HOLEPUNCH_MISSING, olm.olmId); + + if (!hasChainId) { + logger.debug( + "[handleOlmRegisterMessage] Skipping HOLEPUNCH_MISSING because chainId is missing", + { + orgId: client.orgId, + clientId: client.clientId, + olmId: olm.olmId + } + ); + return; + } + + if (staleHolePunchChainCount === HOLEPUNCH_STALE_CHAIN_THRESHOLD) { + sendOlmError(OlmErrorCodes.HOLEPUNCH_MISSING, olm.olmId); + } else { + logger.debug( + "[handleOlmRegisterMessage] Suppressing HOLEPUNCH_MISSING until chain threshold is met", + { + orgId: client.orgId, + clientId: client.clientId, + olmId: olm.olmId, + chainId, + staleHolePunchChainCount, + threshold: HOLEPUNCH_STALE_CHAIN_THRESHOLD + } + ); + } return; }