Compare commits

..

8 Commits
1.17.0 ... dev

Author SHA1 Message Date
miloschwartz
840684aeba dont show wildcard in domain picker 2026-04-09 17:54:25 -04:00
miloschwartz
f57012eb90 dont show international domain warning when capital letter present 2026-04-09 17:06:04 -04:00
miloschwartz
34387d9859 simplify wildcard domain on non pangolin-dns 2026-04-09 17:04:28 -04:00
miloschwartz
80f5914fdd add pluto 2026-04-09 16:15:19 -04:00
miloschwartz
eaa70da4dd add pluto 2026-04-09 16:14:46 -04:00
Owen
466f137590 Fix migration by testing for orphans 2026-04-09 10:29:51 -04:00
Owen
28ef5238c9 Add CODEOWNERS 2026-04-07 11:36:02 -04:00
Owen
d948d2ec33 Try to prevent deadlocks 2026-04-03 22:55:04 -04:00
10 changed files with 171 additions and 128 deletions

1
.github/CODEOWNERS vendored Normal file
View File

@@ -0,0 +1 @@
* @oschwartz10612 @miloschwartz

View File

@@ -2116,6 +2116,7 @@
"domainPickerFreeProvidedDomain": "Free Provided Domain",
"domainPickerVerified": "Verified",
"domainPickerUnverified": "Unverified",
"domainPickerManual": "Manual",
"domainPickerInvalidSubdomainStructure": "This subdomain contains invalid characters or structure. It will be sanitized automatically when you save.",
"domainPickerError": "Error",
"domainPickerErrorLoadDomains": "Failed to load organization domains",

View File

@@ -1,6 +1,6 @@
import { db } from "@server/db";
import { sites, clients, olms } from "@server/db";
import { eq, inArray } from "drizzle-orm";
import { inArray } from "drizzle-orm";
import logger from "@server/logger";
/**
@@ -21,7 +21,7 @@ import logger from "@server/logger";
*/
const FLUSH_INTERVAL_MS = 10_000; // Flush every 10 seconds
const MAX_RETRIES = 2;
const MAX_RETRIES = 5;
const BASE_DELAY_MS = 50;
// ── Site (newt) pings ──────────────────────────────────────────────────
@@ -36,6 +36,14 @@ const pendingOlmArchiveResets: Set<string> = new Set();
let flushTimer: NodeJS.Timeout | null = null;
/**
* Guard that prevents two flush cycles from running concurrently.
* setInterval does not await async callbacks, so without this a slow flush
* (e.g. due to DB latency) would overlap with the next scheduled cycle and
* the two concurrent bulk UPDATEs would deadlock each other.
*/
let isFlushing = false;
// ── Public API ─────────────────────────────────────────────────────────
/**
@@ -72,6 +80,12 @@ export function recordClientPing(
/**
* Flush all accumulated site pings to the database.
*
* Each batch of up to BATCH_SIZE rows is written with a **single** UPDATE
* statement. We use the maximum timestamp across the batch so that `lastPing`
* reflects the most recent ping seen for any site in the group. This avoids
* the multi-statement transaction that previously created additional
* row-lock ordering hazards.
*/
async function flushSitePingsToDb(): Promise<void> {
if (pendingSitePings.size === 0) {
@@ -83,55 +97,35 @@ async function flushSitePingsToDb(): Promise<void> {
const pingsToFlush = new Map(pendingSitePings);
pendingSitePings.clear();
// Sort by siteId for consistent lock ordering (prevents deadlocks)
const sortedEntries = Array.from(pingsToFlush.entries()).sort(
([a], [b]) => a - b
);
const entries = Array.from(pingsToFlush.entries());
const BATCH_SIZE = 50;
for (let i = 0; i < sortedEntries.length; i += BATCH_SIZE) {
const batch = sortedEntries.slice(i, i + BATCH_SIZE);
for (let i = 0; i < entries.length; i += BATCH_SIZE) {
const batch = entries.slice(i, i + BATCH_SIZE);
// Use the latest timestamp in the batch so that `lastPing` always
// moves forward. Using a single timestamp for the whole batch means
// we only ever need one UPDATE statement (no transaction).
const maxTimestamp = Math.max(...batch.map(([, ts]) => ts));
const siteIds = batch.map(([id]) => id);
try {
await withRetry(async () => {
// Group by timestamp for efficient bulk updates
const byTimestamp = new Map<number, number[]>();
for (const [siteId, timestamp] of batch) {
const group = byTimestamp.get(timestamp) || [];
group.push(siteId);
byTimestamp.set(timestamp, group);
}
if (byTimestamp.size === 1) {
const [timestamp, siteIds] = Array.from(
byTimestamp.entries()
)[0];
await db
.update(sites)
.set({
online: true,
lastPing: timestamp
})
.where(inArray(sites.siteId, siteIds));
} else {
await db.transaction(async (tx) => {
for (const [timestamp, siteIds] of byTimestamp) {
await tx
.update(sites)
.set({
online: true,
lastPing: timestamp
})
.where(inArray(sites.siteId, siteIds));
}
});
}
await db
.update(sites)
.set({
online: true,
lastPing: maxTimestamp
})
.where(inArray(sites.siteId, siteIds));
}, "flushSitePingsToDb");
} catch (error) {
logger.error(
`Failed to flush site ping batch (${batch.length} sites), re-queuing for next cycle`,
{ error }
);
// Re-queue only if the preserved timestamp is newer than any
// update that may have landed since we snapshotted.
for (const [siteId, timestamp] of batch) {
const existing = pendingSitePings.get(siteId);
if (!existing || existing < timestamp) {
@@ -144,6 +138,8 @@ async function flushSitePingsToDb(): Promise<void> {
/**
* Flush all accumulated client (OLM) pings to the database.
*
* Same single-UPDATE-per-batch approach as `flushSitePingsToDb`.
*/
async function flushClientPingsToDb(): Promise<void> {
if (pendingClientPings.size === 0 && pendingOlmArchiveResets.size === 0) {
@@ -159,51 +155,25 @@ async function flushClientPingsToDb(): Promise<void> {
// ── Flush client pings ─────────────────────────────────────────────
if (pingsToFlush.size > 0) {
const sortedEntries = Array.from(pingsToFlush.entries()).sort(
([a], [b]) => a - b
);
const entries = Array.from(pingsToFlush.entries());
const BATCH_SIZE = 50;
for (let i = 0; i < sortedEntries.length; i += BATCH_SIZE) {
const batch = sortedEntries.slice(i, i + BATCH_SIZE);
for (let i = 0; i < entries.length; i += BATCH_SIZE) {
const batch = entries.slice(i, i + BATCH_SIZE);
const maxTimestamp = Math.max(...batch.map(([, ts]) => ts));
const clientIds = batch.map(([id]) => id);
try {
await withRetry(async () => {
const byTimestamp = new Map<number, number[]>();
for (const [clientId, timestamp] of batch) {
const group = byTimestamp.get(timestamp) || [];
group.push(clientId);
byTimestamp.set(timestamp, group);
}
if (byTimestamp.size === 1) {
const [timestamp, clientIds] = Array.from(
byTimestamp.entries()
)[0];
await db
.update(clients)
.set({
lastPing: timestamp,
online: true,
archived: false
})
.where(inArray(clients.clientId, clientIds));
} else {
await db.transaction(async (tx) => {
for (const [timestamp, clientIds] of byTimestamp) {
await tx
.update(clients)
.set({
lastPing: timestamp,
online: true,
archived: false
})
.where(
inArray(clients.clientId, clientIds)
);
}
});
}
await db
.update(clients)
.set({
lastPing: maxTimestamp,
online: true,
archived: false
})
.where(inArray(clients.clientId, clientIds));
}, "flushClientPingsToDb");
} catch (error) {
logger.error(
@@ -260,7 +230,12 @@ export async function flushPingsToDb(): Promise<void> {
/**
* Simple retry wrapper with exponential backoff for transient errors
* (connection timeouts, unexpected disconnects).
* (deadlocks, connection timeouts, unexpected disconnects).
*
* PostgreSQL deadlocks (40P01) are always safe to retry: the database
* guarantees exactly one winner per deadlock pair, so the loser just needs
* to try again. MAX_RETRIES is intentionally higher than typical connection
* retry budgets to give deadlock victims enough chances to succeed.
*/
async function withRetry<T>(
operation: () => Promise<T>,
@@ -277,7 +252,8 @@ async function withRetry<T>(
const jitter = Math.random() * baseDelay;
const delay = baseDelay + jitter;
logger.warn(
`Transient DB error in ${context}, retrying attempt ${attempt}/${MAX_RETRIES} after ${delay.toFixed(0)}ms`
`Transient DB error in ${context}, retrying attempt ${attempt}/${MAX_RETRIES} after ${delay.toFixed(0)}ms`,
{ code: error?.code ?? error?.cause?.code }
);
await new Promise((resolve) => setTimeout(resolve, delay));
continue;
@@ -288,14 +264,14 @@ async function withRetry<T>(
}
/**
* Detect transient connection errors that are safe to retry.
* Detect transient errors that are safe to retry.
*/
function isTransientError(error: any): boolean {
if (!error) return false;
const message = (error.message || "").toLowerCase();
const causeMessage = (error.cause?.message || "").toLowerCase();
const code = error.code || "";
const code = error.code || error.cause?.code || "";
// Connection timeout / terminated
if (
@@ -308,12 +284,17 @@ function isTransientError(error: any): boolean {
return true;
}
// PostgreSQL deadlock
// PostgreSQL deadlock detected — always safe to retry (one winner guaranteed)
if (code === "40P01" || message.includes("deadlock")) {
return true;
}
// ECONNRESET, ECONNREFUSED, EPIPE
// PostgreSQL serialization failure
if (code === "40001") {
return true;
}
// ECONNRESET, ECONNREFUSED, EPIPE, ETIMEDOUT
if (
code === "ECONNRESET" ||
code === "ECONNREFUSED" ||
@@ -337,12 +318,26 @@ export function startPingAccumulator(): void {
}
flushTimer = setInterval(async () => {
// Skip this tick if the previous flush is still in progress.
// setInterval does not await async callbacks, so without this guard
// two flush cycles can run concurrently and deadlock each other on
// overlapping bulk UPDATE statements.
if (isFlushing) {
logger.debug(
"Ping accumulator: previous flush still in progress, skipping cycle"
);
return;
}
isFlushing = true;
try {
await flushPingsToDb();
} catch (error) {
logger.error("Unhandled error in ping accumulator flush", {
error
});
} finally {
isFlushing = false;
}
}, FLUSH_INTERVAL_MS);
@@ -364,7 +359,22 @@ export async function stopPingAccumulator(): Promise<void> {
flushTimer = null;
}
// Final flush to persist any remaining pings
// Final flush to persist any remaining pings.
// Wait for any in-progress flush to finish first so we don't race.
if (isFlushing) {
logger.debug(
"Ping accumulator: waiting for in-progress flush before stopping…"
);
await new Promise<void>((resolve) => {
const poll = setInterval(() => {
if (!isFlushing) {
clearInterval(poll);
resolve();
}
}, 50);
});
}
try {
await flushPingsToDb();
} catch (error) {
@@ -379,4 +389,4 @@ export async function stopPingAccumulator(): Promise<void> {
*/
export function getPendingPingCount(): number {
return pendingSitePings.size + pendingClientPings.size;
}
}

View File

@@ -235,7 +235,9 @@ export default async function migration() {
for (const row of existingUserInviteRoles) {
await db.execute(sql`
INSERT INTO "userInviteRoles" ("inviteId", "roleId")
VALUES (${row.inviteId}, ${row.roleId})
SELECT ${row.inviteId}, ${row.roleId}
WHERE EXISTS (SELECT 1 FROM "userInvites" WHERE "inviteId" = ${row.inviteId})
AND EXISTS (SELECT 1 FROM "roles" WHERE "roleId" = ${row.roleId})
ON CONFLICT DO NOTHING
`);
}
@@ -258,7 +260,10 @@ export default async function migration() {
for (const row of existingUserOrgRoles) {
await db.execute(sql`
INSERT INTO "userOrgRoles" ("userId", "orgId", "roleId")
VALUES (${row.userId}, ${row.orgId}, ${row.roleId})
SELECT ${row.userId}, ${row.orgId}, ${row.roleId}
WHERE EXISTS (SELECT 1 FROM "user" WHERE "id" = ${row.userId})
AND EXISTS (SELECT 1 FROM "orgs" WHERE "orgId" = ${row.orgId})
AND EXISTS (SELECT 1 FROM "roles" WHERE "roleId" = ${row.roleId})
ON CONFLICT DO NOTHING
`);
}

View File

@@ -145,7 +145,7 @@ export default async function migration() {
).run();
db.prepare(
`INSERT INTO '__new_userOrgs'("userId", "orgId", "isOwner", "autoProvisioned", "pamUsername") SELECT "userId", "orgId", "isOwner", "autoProvisioned", "pamUsername" FROM 'userOrgs';`
`INSERT INTO '__new_userOrgs'("userId", "orgId", "isOwner", "autoProvisioned", "pamUsername") SELECT "userId", "orgId", "isOwner", "autoProvisioned", "pamUsername" FROM 'userOrgs' WHERE EXISTS (SELECT 1 FROM 'user' WHERE id = userOrgs.userId) AND EXISTS (SELECT 1 FROM 'orgs' WHERE orgId = userOrgs.orgId);`
).run();
db.prepare(`DROP TABLE 'userOrgs';`).run();
db.prepare(
@@ -246,12 +246,15 @@ export default async function migration() {
// Re-insert the preserved invite role assignments into the new userInviteRoles table
if (existingUserInviteRoles.length > 0) {
const insertUserInviteRole = db.prepare(
`INSERT OR IGNORE INTO 'userInviteRoles' ("inviteId", "roleId") VALUES (?, ?)`
`INSERT OR IGNORE INTO 'userInviteRoles' ("inviteId", "roleId")
SELECT ?, ?
WHERE EXISTS (SELECT 1 FROM 'userInvites' WHERE inviteId = ?)
AND EXISTS (SELECT 1 FROM 'roles' WHERE roleId = ?)`
);
const insertAll = db.transaction(() => {
for (const row of existingUserInviteRoles) {
insertUserInviteRole.run(row.inviteId, row.roleId);
insertUserInviteRole.run(row.inviteId, row.roleId, row.inviteId, row.roleId);
}
});
@@ -265,12 +268,16 @@ export default async function migration() {
// Re-insert the preserved role assignments into the new userOrgRoles table
if (existingUserOrgRoles.length > 0) {
const insertUserOrgRole = db.prepare(
`INSERT OR IGNORE INTO 'userOrgRoles' ("userId", "orgId", "roleId") VALUES (?, ?, ?)`
`INSERT OR IGNORE INTO 'userOrgRoles' ("userId", "orgId", "roleId")
SELECT ?, ?, ?
WHERE EXISTS (SELECT 1 FROM 'user' WHERE id = ?)
AND EXISTS (SELECT 1 FROM 'orgs' WHERE orgId = ?)
AND EXISTS (SELECT 1 FROM 'roles' WHERE roleId = ?)`
);
const insertAll = db.transaction(() => {
for (const row of existingUserOrgRoles) {
insertUserOrgRole.run(row.userId, row.orgId, row.roleId);
insertUserOrgRole.run(row.userId, row.orgId, row.roleId, row.userId, row.orgId, row.roleId);
}
});

View File

@@ -10,6 +10,7 @@ import { authCookieHeader } from "@app/lib/api/cookies";
import { GetDNSRecordsResponse } from "@server/routers/domain";
import DNSRecordsTable from "@app/components/DNSRecordTable";
import DomainCertForm from "@app/components/DomainCertForm";
import { build } from "@server/build";
interface DomainSettingsPageProps {
params: Promise<{ domainId: string; orgId: string }>;
@@ -65,12 +66,14 @@ export default async function DomainSettingsPage({
)}
</div>
<div className="space-y-6">
<DomainInfoCard
failed={domain.failed}
verified={domain.verified}
type={domain.type}
errorMessage={domain.errorMessage}
/>
{build != "oss" && env.flags.usePangolinDns ? (
<DomainInfoCard
failed={domain.failed}
verified={domain.verified}
type={domain.type}
errorMessage={domain.errorMessage}
/>
) : null}
<DNSRecordsTable records={dnsRecords} type={domain.type} />

View File

@@ -154,7 +154,7 @@ export default function CreateDomainForm({
const punycodePreview = useMemo(() => {
if (!baseDomain) return "";
const punycode = toPunycode(baseDomain);
const punycode = toPunycode(baseDomain.toLowerCase());
return punycode !== baseDomain.toLowerCase() ? punycode : "";
}, [baseDomain]);
@@ -239,21 +239,24 @@ export default function CreateDomainForm({
className="space-y-4"
id="create-domain-form"
>
<FormField
control={form.control}
name="type"
render={({ field }) => (
<FormItem>
<StrategySelect
options={domainOptions}
defaultValue={field.value}
onChange={field.onChange}
cols={1}
/>
<FormMessage />
</FormItem>
)}
/>
{build != "oss" && env.flags.usePangolinDns ? (
<FormField
control={form.control}
name="type"
render={({ field }) => (
<FormItem>
<StrategySelect
options={domainOptions}
defaultValue={field.value}
onChange={field.onChange}
cols={1}
/>
<FormMessage />
</FormItem>
)}
/>
) : null}
<FormField
control={form.control}
name="baseDomain"

View File

@@ -509,9 +509,11 @@ export default function DomainPicker({
<span className="truncate">
{selectedBaseDomain.domain}
</span>
{selectedBaseDomain.verified && (
<CheckCircle2 className="h-3 w-3 text-green-500 shrink-0" />
)}
{selectedBaseDomain.verified &&
selectedBaseDomain.domainType !==
"wildcard" && (
<CheckCircle2 className="h-3 w-3 text-green-500 shrink-0" />
)}
</div>
) : (
t("domainPickerSelectBaseDomain")
@@ -574,14 +576,23 @@ export default function DomainPicker({
}
</span>
<span className="text-xs text-muted-foreground">
{orgDomain.type.toUpperCase()}{" "}
{" "}
{orgDomain.verified
{orgDomain.type ===
"wildcard"
? t(
"domainPickerVerified"
"domainPickerManual"
)
: t(
"domainPickerUnverified"
: (
<>
{orgDomain.type.toUpperCase()}{" "}
{" "}
{orgDomain.verified
? t(
"domainPickerVerified"
)
: t(
"domainPickerUnverified"
)}
</>
)}
</span>
</div>

View File

@@ -333,7 +333,8 @@ export default function PendingSitesTable({
"jupiter",
"saturn",
"uranus",
"neptune"
"neptune",
"pluto"
].includes(originalRow.exitNodeName.toLowerCase());
if (isCloudNode) {

View File

@@ -342,7 +342,8 @@ export default function SitesTable({
"jupiter",
"saturn",
"uranus",
"neptune"
"neptune",
"pluto"
].includes(originalRow.exitNodeName.toLowerCase());
if (isCloudNode) {