security: fix auth bypass, branch injection, silent failures, hardcoded IPs + ensure-checkout repo_dir fix

Security (4 issues):
1. Remove the 172.* Docker-bridge auth bypass — any bridge container inherited
   tag:admin (incl /unlock, /progress/reset). Bridge callers now need Tailscale
   identity or bearer token. (kua-mcp-core unaffected — reaches engine via
   docker exec localhost.)
2. Validate request-supplied source_branch/target_branch on /release (400 on bad
   input) before they reach the shell in release().
3. Check .ok on previously-ignored runOnServer results: post_deploy hook
   (→partial), no-health-url docker compose ps (→unhealthy); add a catch to
   rollback() so a failed rollback records failure instead of hanging 'running'.
4. Replace hardcoded bruno/gal Tailscale IP map with runtime resolution via the
   tailscaled LocalAPI over the mounted socket (cached per host).

Regression fix (ensure-checkout):
- ensureCheckout now probes/clones the GIT ROOT (registry repo_dir), not deploy_dir.
  They differ for sub-monorepo apps (coder-core: repo_dir=/root/apps/coder-core,
  deploy_dir=.../services/production). Probing deploy_dir/.git falsely reported
  MISSING and broke coder-core deploys (e33b1e9 regression). 18 normal apps where
  repo_dir==deploy_dir are unchanged.
This commit is contained in:
Kavi 2026-05-26 18:50:16 -04:00
parent e33b1e96cb
commit d75a145d90
1 changed files with 150 additions and 35 deletions

177
server.js
View File

@ -21,6 +21,17 @@ function validateMessage(msg) {
return msg; return msg;
} }
// Validate a request-supplied git branch name before it is interpolated into a
// shell command in release(). Registry-derived defaults are trusted and skip this.
const SAFE_BRANCH_RE = /^[A-Za-z0-9._/-]{1,200}$/;
function validateBranchName(name, label) {
if (typeof name !== 'string' || !SAFE_BRANCH_RE.test(name) ||
name.includes('..') || name.startsWith('-') || name.includes('@{')) {
throw new Error(`Invalid ${label}: ${JSON.stringify(name)} — must match ${SAFE_BRANCH_RE}, with no '..', leading '-', or '@{'`);
}
return name;
}
// --- Configuration --- // --- Configuration ---
const DATA_DIR = path.join(process.cwd(), 'data'); const DATA_DIR = path.join(process.cwd(), 'data');
const LOG_DIR = path.join(process.cwd(), 'logs'); const LOG_DIR = path.join(process.cwd(), 'logs');
@ -217,7 +228,11 @@ fastify.addHook('onRequest', async (request, reply) => {
// Webhook endpoint uses its own auth (HMAC signature verification inside the handler) // Webhook endpoint uses its own auth (HMAC signature verification inside the handler)
if (request.url === '/webhook/forgejo') return; if (request.url === '/webhook/forgejo') return;
const isLocalhost = ['127.0.0.1', '::1', '::ffff:127.0.0.1'].includes(request.ip) || request.ip.startsWith('172.'); // Genuine loopback only. The Docker-bridge "172.*" shortcut was removed: the
// service binds 0.0.0.0, so ANY container on the bridge inherited tag:admin
// (including /unlock and /progress/reset). Bridge callers now go through the
// normal Tailscale-Whois-or-bearer-token path like everyone else.
const isLocalhost = ['127.0.0.1', '::1', '::ffff:127.0.0.1'].includes(request.ip);
if (isLocalhost) { if (isLocalhost) {
request.identity = { stableId: 'local', hostname: HOSTNAME, tags: ['tag:admin'], user: 'local' }; request.identity = { stableId: 'local', hostname: HOSTNAME, tags: ['tag:admin'], user: 'local' };
return; return;
@ -260,17 +275,53 @@ function isLocal(server) {
return host === HOSTNAME; return host === HOSTNAME;
} }
function tailscaleIpForServer(server) { // Resolve a server's Tailscale IPv4 at runtime via the tailscaled LocalAPI over
const host = server.includes('@') ? server.split('@')[1] : server; // the mounted socket (the same mechanism tailscaleWhois uses). The kua-deploy
const ips = { // container has the socket but NOT the `tailscale` CLI, so we query
bruno: '100.74.17.6', // /localapi/v0/status and match the host by HostName/DNSName rather than shelling
gal: '100.122.129.114', // out. Cached per host for the process lifetime; falls back to '' (TAILSCALE_IP
}; // left unset, prior behavior for unknown hosts) if resolution fails.
return ips[host] || ''; const _tailscaleIpCache = new Map();
async function tailscaleStatusLookup(host) {
return new Promise((resolve) => {
const timeout = setTimeout(() => resolve(''), 2000);
const req = http.request({
socketPath: TAILSCALE_SOCKET,
path: '/localapi/v0/status',
method: 'GET',
headers: { Host: 'local-tailscaled.sock' }, // anti-DNS-rebind guard the LocalAPI requires
}, (res) => {
let data = '';
res.on('data', (chunk) => { data += chunk; });
res.on('end', () => {
clearTimeout(timeout);
try {
const status = JSON.parse(data);
const all = [status.Self, ...Object.values(status.Peer || {})].filter(Boolean);
const want = host.toLowerCase();
const match = all.find(p =>
(p.HostName || '').toLowerCase() === want ||
(p.DNSName || '').toLowerCase().startsWith(want + '.'));
const ip = (match?.TailscaleIPs || []).find(a => /^\d+\.\d+\.\d+\.\d+$/.test(a)) || '';
resolve(ip);
} catch { resolve(''); }
});
});
req.on('error', () => { clearTimeout(timeout); resolve(''); });
req.end();
});
} }
function composeEnvPrefix(server) { async function tailscaleIpForServer(server) {
const tailscaleIp = tailscaleIpForServer(server); const host = server.includes('@') ? server.split('@')[1] : server;
if (_tailscaleIpCache.has(host)) return _tailscaleIpCache.get(host);
const ip = await tailscaleStatusLookup(host);
_tailscaleIpCache.set(host, ip);
return ip;
}
async function composeEnvPrefix(server) {
const tailscaleIp = await tailscaleIpForServer(server);
return tailscaleIp ? `TAILSCALE_IP=${tailscaleIp} ` : ''; return tailscaleIp ? `TAILSCALE_IP=${tailscaleIp} ` : '';
} }
@ -332,7 +383,7 @@ async function recreateService({
'-v', `${deployDir}:${deployDir}`, '-v', `${deployDir}:${deployDir}`,
'-w', deployDir, '-w', deployDir,
]; ];
const tailscaleIp = tailscaleIpForServer(server); const tailscaleIp = await tailscaleIpForServer(server);
if (tailscaleIp) runArgs.push('-e', `TAILSCALE_IP=${tailscaleIp}`); if (tailscaleIp) runArgs.push('-e', `TAILSCALE_IP=${tailscaleIp}`);
if (envFileWritten) runArgs.push('--env-file', envFilePath); if (envFileWritten) runArgs.push('--env-file', envFilePath);
runArgs.push('docker:cli'); runArgs.push('docker:cli');
@ -406,36 +457,42 @@ async function runOnServer(server, cmd, opts = {}) {
// Clone source is NOT derived from the app name — origins are heterogeneous (Forgejo :2222, // Clone source is NOT derived from the app name — origins are heterogeneous (Forgejo :2222,
// scp-style, and at least one GitHub repo whose name differs from the app) — so it MUST come // scp-style, and at least one GitHub repo whose name differs from the app) — so it MUST come
// from the registry. The caller still performs its own branch/tag checkout afterwards. // from the registry. The caller still performs its own branch/tag checkout afterwards.
async function ensureCheckout(server, deployDir, repoUrl) { // repoDir is the GIT ROOT (registry `repo_dir`), which is NOT always the deploy_dir:
const probe = await runOnServer(server, `test -e ${deployDir}/.git && echo REPO || echo MISSING`); // sub-monorepo apps like coder-core have repo_dir=/root/apps/coder-core but
// deploy_dir=/root/apps/coder-core/services/production (compose lives in a subdir).
// Probing/cloning must target the git root — probing deploy_dir/.git would falsely
// report MISSING for those apps (the bug that broke coder-core deploys 2026-05-26).
// For the 18 normal apps repo_dir == deploy_dir, so behavior is unchanged.
async function ensureCheckout(server, repoDir, repoUrl) {
const probe = await runOnServer(server, `test -e ${repoDir}/.git && echo REPO || echo MISSING`);
if (probe.stdout.trim() === 'REPO') { if (probe.stdout.trim() === 'REPO') {
// Already a checkout — leave branch/tag selection to the caller. Optionally assert origin. // Already a checkout — leave branch/tag selection to the caller. Optionally assert origin.
if (repoUrl) { if (repoUrl) {
const originRes = await runOnServer(server, `git -C ${deployDir} config --get remote.origin.url || true`); const originRes = await runOnServer(server, `git -C ${repoDir} config --get remote.origin.url || true`);
const actual = originRes.stdout.trim(); const actual = originRes.stdout.trim();
if (actual && actual !== repoUrl) { if (actual && actual !== repoUrl) {
throw new Error(`ensure-checkout: ${deployDir} origin (${actual}) != registry repo_url (${repoUrl}) — refusing to deploy a mismatched checkout`); throw new Error(`ensure-checkout: ${repoDir} origin (${actual}) != registry repo_url (${repoUrl}) — refusing to deploy a mismatched checkout`);
} }
} }
return { cloned: false }; return { cloned: false };
} }
if (!repoUrl) { if (!repoUrl) {
throw new Error(`ensure-checkout: ${deployDir} is not a git checkout and no "repo_url" is set in the registry — cannot clone. Add repo_url to the app's registry entry (or create the checkout manually).`); throw new Error(`ensure-checkout: ${repoDir} is not a git checkout and no "repo_url" is set in the registry — cannot clone. Add repo_url to the app's registry entry (or create the checkout manually).`);
} }
// Refuse to clobber a non-empty, non-repo directory. // Refuse to clobber a non-empty, non-repo directory.
const dirState = await runOnServer(server, `if [ -e ${deployDir} ] && [ -n "$(ls -A ${deployDir} 2>/dev/null)" ]; then echo NONEMPTY; else echo OK; fi`); const dirState = await runOnServer(server, `if [ -e ${repoDir} ] && [ -n "$(ls -A ${repoDir} 2>/dev/null)" ]; then echo NONEMPTY; else echo OK; fi`);
if (dirState.stdout.trim() === 'NONEMPTY') { if (dirState.stdout.trim() === 'NONEMPTY') {
throw new Error(`ensure-checkout: ${deployDir} exists, is not a git repo, and is non-empty — refusing to clobber. Inspect/remove it manually.`); throw new Error(`ensure-checkout: ${repoDir} exists, is not a git repo, and is non-empty — refusing to clobber. Inspect/remove it manually.`);
} }
const cloneRes = await runOnServer(server, `git clone ${repoUrl} ${deployDir}`, { timeout: 180000 }); const cloneRes = await runOnServer(server, `git clone ${repoUrl} ${repoDir}`, { timeout: 180000 });
if (!cloneRes.ok) { if (!cloneRes.ok) {
throw new Error(`ensure-checkout: git clone ${repoUrl} -> ${deployDir} failed: ${cloneRes.stderr}`); throw new Error(`ensure-checkout: git clone ${repoUrl} -> ${repoDir} failed: ${cloneRes.stderr}`);
} }
// Verify the clone landed and origin matches what we asked for. // Verify the clone landed and origin matches what we asked for.
const verifyRes = await runOnServer(server, `git -C ${deployDir} config --get remote.origin.url || true`); const verifyRes = await runOnServer(server, `git -C ${repoDir} config --get remote.origin.url || true`);
const landed = verifyRes.stdout.trim(); const landed = verifyRes.stdout.trim();
if (landed !== repoUrl) { if (landed !== repoUrl) {
throw new Error(`ensure-checkout: cloned ${deployDir} but origin is ${landed || '(none)'} (expected ${repoUrl})`); throw new Error(`ensure-checkout: cloned ${repoDir} but origin is ${landed || '(none)'} (expected ${repoUrl})`);
} }
return { cloned: true }; return { cloned: true };
} }
@ -620,7 +677,10 @@ async function deploy(appName, opts = {}) {
// registry repo_url, so a first-time API deploy doesn't die at the `cd` below. No-op // registry repo_url, so a first-time API deploy doesn't die at the `cd` below. No-op
// for existing checkouts. Inside the per-app lock acquired above. // for existing checkouts. Inside the per-app lock acquired above.
try { try {
const ec = await ensureCheckout(server, deployDir, app.repo_url); // Probe/clone the GIT ROOT (repo_dir), not deploy_dir — they differ for
// sub-monorepo apps (coder-core). git fetch/checkout below run from deploy_dir
// and git walks up to the root, so only the ensure-checkout probe needs repo_dir.
const ec = await ensureCheckout(server, app.repo_dir || deployDir, app.repo_url);
if (ec.cloned) steps[steps.length - 1].cloned = true; if (ec.cloned) steps[steps.length - 1].cloned = true;
} catch (err) { } catch (err) {
steps[steps.length - 1] = { step: 'git_pull', status: 'failed', error: err.message }; steps[steps.length - 1] = { step: 'git_pull', status: 'failed', error: err.message };
@ -660,7 +720,7 @@ ${detail}`);
const kvPrefix = prod.vault const kvPrefix = prod.vault
? `kua-vault run --project ${prod.vault.project} --env ${prod.vault.env} --` ? `kua-vault run --project ${prod.vault.project} --env ${prod.vault.env} --`
: ''; : '';
const envPrefix = composeEnvPrefix(server); const envPrefix = await composeEnvPrefix(server);
const buildCmd = `cd ${deployDir} && ${envPrefix}${kvPrefix} docker compose build`; const buildCmd = `cd ${deployDir} && ${envPrefix}${kvPrefix} docker compose build`;
const buildRes = await runOnServer(server, buildCmd, { timeout: 600000 }); const buildRes = await runOnServer(server, buildCmd, { timeout: 600000 });
if (!buildRes.ok) { if (!buildRes.ok) {
@ -894,20 +954,37 @@ ${detail}`);
await markProgressPhase(appName, 'health_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit }); await markProgressPhase(appName, 'health_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit });
} }
} else { } else {
// No health URL — check containers // No health URL — fall back to confirming containers are listable. runOnServer
// returns {ok:false} on failure (it does not throw); if we can't even run
// `docker compose ps` we cannot claim health, so mark it failed (mirrors the
// health_url failure path: finalResult='unhealthy') instead of silent 'done'.
const psRes = await runOnServer(server, `cd ${deployDir} && docker compose ps --format json`); const psRes = await runOnServer(server, `cd ${deployDir} && docker compose ps --format json`);
if (!psRes.ok) {
steps[steps.length - 1] = { step: 'health', status: 'failed', error: psRes.stderr?.slice(-300) || psRes.error, note: 'no health URL; docker compose ps failed' };
finalResult = 'unhealthy';
await markProgressPhase(appName, 'health_failed', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit, result: finalResult });
} else {
steps[steps.length - 1] = { step: 'health', status: 'done', note: 'no health URL configured' }; steps[steps.length - 1] = { step: 'health', status: 'done', note: 'no health URL configured' };
await markProgressPhase(appName, 'health_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit }); await markProgressPhase(appName, 'health_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit });
} }
}
// Step 7: Post-deploy hooks // Step 7: Post-deploy hooks
if (prod.post_deploy) { if (prod.post_deploy) {
steps.push({ step: 'post_deploy', status: 'running' }); steps.push({ step: 'post_deploy', status: 'running' });
await markProgressPhase(appName, 'post_deploy', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit }); await markProgressPhase(appName, 'post_deploy', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit });
await runOnServer(server, prod.post_deploy, { timeout: 30000 }); // runOnServer returns {ok:false} on failure (it does not throw) — check it,
// else a failing post-deploy hook was silently reported as success.
const postRes = await runOnServer(server, prod.post_deploy, { timeout: 30000 });
if (!postRes.ok) {
steps[steps.length - 1] = { step: 'post_deploy', status: 'failed', error: postRes.stderr?.slice(-500) || postRes.error };
finalResult = 'partial';
await markProgressPhase(appName, 'post_deploy_failed', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit, result: finalResult });
} else {
steps[steps.length - 1] = { step: 'post_deploy', status: 'done' }; steps[steps.length - 1] = { step: 'post_deploy', status: 'done' };
await markProgressPhase(appName, 'post_deploy_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit }); await markProgressPhase(appName, 'post_deploy_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit });
} }
}
// Get tag // Get tag
const tagRes = await runOnServer(server, `cd ${deployDir} && git describe --tags --abbrev=0 2>/dev/null || echo "untagged"`); const tagRes = await runOnServer(server, `cd ${deployDir} && git describe --tags --abbrev=0 2>/dev/null || echo "untagged"`);
@ -1018,9 +1095,9 @@ async function rollback(appName, opts = {}) {
rolled_back_from: current?.tag || current?.commit || 'unknown', rolled_back_from: current?.tag || current?.commit || 'unknown',
}); });
// ensure-checkout (TUBE step 1): rollback shares deploy()'s `cd ${deployDir}` assumption, // ensure-checkout (TUBE step 1): probe/clone the git root (repo_dir), not deploy_dir
// so the deploy_dir must exist here too. No-op for existing checkouts. // (they differ for sub-monorepo apps like coder-core). No-op for existing checkouts.
await ensureCheckout(server, deployDir, app.repo_url); await ensureCheckout(server, app.repo_dir || deployDir, app.repo_url);
// Checkout the rollback target on production (--tags so an explicit to_ref tag resolves). // Checkout the rollback target on production (--tags so an explicit to_ref tag resolves).
const checkoutRes = await runOnServer(server, `cd ${deployDir} && git fetch --prune --tags ${remote} && git checkout ${tag}`, { timeout: 60000 }); const checkoutRes = await runOnServer(server, `cd ${deployDir} && git fetch --prune --tags ${remote} && git checkout ${tag}`, { timeout: 60000 });
@ -1032,7 +1109,8 @@ async function rollback(appName, opts = {}) {
const kvPrefix = prod.vault const kvPrefix = prod.vault
? `kua-vault run --project ${prod.vault.project} --env ${prod.vault.env} --` ? `kua-vault run --project ${prod.vault.project} --env ${prod.vault.env} --`
: ''; : '';
const buildRes = await runOnServer(server, `cd ${deployDir} && ${composeEnvPrefix(server)}${kvPrefix} docker compose build`, { timeout: 600000 }); const envPrefix = await composeEnvPrefix(server);
const buildRes = await runOnServer(server, `cd ${deployDir} && ${envPrefix}${kvPrefix} docker compose build`, { timeout: 600000 });
if (!buildRes.ok) throw new Error(`rollback build failed: ${buildRes.stderr?.slice(-500)}`); if (!buildRes.ok) throw new Error(`rollback build failed: ${buildRes.stderr?.slice(-500)}`);
// Recreate all services for the rollback target. // Recreate all services for the rollback target.
const svcList = (await runOnServer(server, `cd ${deployDir} && docker compose config --services`)).stdout.split('\n').filter(Boolean); const svcList = (await runOnServer(server, `cd ${deployDir} && docker compose config --services`)).stdout.split('\n').filter(Boolean);
@ -1085,6 +1163,37 @@ async function rollback(appName, opts = {}) {
return { app: appName, ...entry }; return { app: appName, ...entry };
} catch (err) {
// The checkout/build/recreate steps above return {ok:false}/throw on failure;
// without this catch a failed rollback left progress stuck in 'running' and was
// never recorded. Mirror deploy()'s catch: record the failure and return a
// {result:'failed'} object (the rollback route's contract) rather than 500ing.
const entry = {
result: 'failed',
action: 'rollback',
error: err.message,
rolled_back_to: tag,
rolled_back_from: current?.tag || current?.commit || 'unknown',
server,
triggered_by: 'api',
};
await writeProgress(appName, {
action: 'rollback',
triggered_by: 'api',
status: 'failed',
phase: 'rollback_failed',
current_step: 'rollback',
result: 'failed',
error: err.message,
rolled_back_to: tag,
rolled_back_from: current?.tag || current?.commit || 'unknown',
server,
finished_at: Math.floor(Date.now() / 1000),
});
recordDeploy(appName, entry);
await audit({ action: 'rollback_failed', app: appName, error: err.message });
return { app: appName, ...entry };
} finally { } finally {
releaseLock(appName); releaseLock(appName);
} }
@ -1292,8 +1401,14 @@ fastify.get('/api/v1/apps/:app/deploys', async (request) => {
// --- Actions --- // --- Actions ---
// Release (merge main→production, tag, push — triggers webhook deploy) // Release (merge main→production, tag, push — triggers webhook deploy)
fastify.post('/api/v1/apps/:app/release', async (request) => { fastify.post('/api/v1/apps/:app/release', async (request, reply) => {
const { message, source_branch, target_branch } = request.body || {}; const { message, source_branch, target_branch } = request.body || {};
try {
if (source_branch !== undefined) validateBranchName(source_branch, 'source_branch');
if (target_branch !== undefined) validateBranchName(target_branch, 'target_branch');
} catch (err) {
return reply.code(400).send({ ok: false, error: err.message });
}
return await release(request.params.app, message || 'Release to production', { source_branch, target_branch }); return await release(request.params.app, message || 'Release to production', { source_branch, target_branch });
}); });