From d75a145d903bb9d6b00ed676bb1ff707332cbcfe Mon Sep 17 00:00:00 2001 From: Kavi Date: Tue, 26 May 2026 18:50:16 -0400 Subject: [PATCH] security: fix auth bypass, branch injection, silent failures, hardcoded IPs + ensure-checkout repo_dir fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Security (4 issues): 1. Remove the 172.* Docker-bridge auth bypass — any bridge container inherited tag:admin (incl /unlock, /progress/reset). Bridge callers now need Tailscale identity or bearer token. (kua-mcp-core unaffected — reaches engine via docker exec localhost.) 2. Validate request-supplied source_branch/target_branch on /release (400 on bad input) before they reach the shell in release(). 3. Check .ok on previously-ignored runOnServer results: post_deploy hook (→partial), no-health-url docker compose ps (→unhealthy); add a catch to rollback() so a failed rollback records failure instead of hanging 'running'. 4. Replace hardcoded bruno/gal Tailscale IP map with runtime resolution via the tailscaled LocalAPI over the mounted socket (cached per host). Regression fix (ensure-checkout): - ensureCheckout now probes/clones the GIT ROOT (registry repo_dir), not deploy_dir. They differ for sub-monorepo apps (coder-core: repo_dir=/root/apps/coder-core, deploy_dir=.../services/production). Probing deploy_dir/.git falsely reported MISSING and broke coder-core deploys (e33b1e9 regression). 18 normal apps where repo_dir==deploy_dir are unchanged. --- server.js | 185 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 150 insertions(+), 35 deletions(-) diff --git a/server.js b/server.js index 74851d3..9508812 100644 --- a/server.js +++ b/server.js @@ -21,6 +21,17 @@ function validateMessage(msg) { return msg; } +// Validate a request-supplied git branch name before it is interpolated into a +// shell command in release(). Registry-derived defaults are trusted and skip this. +const SAFE_BRANCH_RE = /^[A-Za-z0-9._/-]{1,200}$/; +function validateBranchName(name, label) { + if (typeof name !== 'string' || !SAFE_BRANCH_RE.test(name) || + name.includes('..') || name.startsWith('-') || name.includes('@{')) { + throw new Error(`Invalid ${label}: ${JSON.stringify(name)} — must match ${SAFE_BRANCH_RE}, with no '..', leading '-', or '@{'`); + } + return name; +} + // --- Configuration --- const DATA_DIR = path.join(process.cwd(), 'data'); const LOG_DIR = path.join(process.cwd(), 'logs'); @@ -217,7 +228,11 @@ fastify.addHook('onRequest', async (request, reply) => { // Webhook endpoint uses its own auth (HMAC signature verification inside the handler) if (request.url === '/webhook/forgejo') return; - const isLocalhost = ['127.0.0.1', '::1', '::ffff:127.0.0.1'].includes(request.ip) || request.ip.startsWith('172.'); + // Genuine loopback only. The Docker-bridge "172.*" shortcut was removed: the + // service binds 0.0.0.0, so ANY container on the bridge inherited tag:admin + // (including /unlock and /progress/reset). Bridge callers now go through the + // normal Tailscale-Whois-or-bearer-token path like everyone else. + const isLocalhost = ['127.0.0.1', '::1', '::ffff:127.0.0.1'].includes(request.ip); if (isLocalhost) { request.identity = { stableId: 'local', hostname: HOSTNAME, tags: ['tag:admin'], user: 'local' }; return; @@ -260,17 +275,53 @@ function isLocal(server) { return host === HOSTNAME; } -function tailscaleIpForServer(server) { - const host = server.includes('@') ? server.split('@')[1] : server; - const ips = { - bruno: '100.74.17.6', - gal: '100.122.129.114', - }; - return ips[host] || ''; +// Resolve a server's Tailscale IPv4 at runtime via the tailscaled LocalAPI over +// the mounted socket (the same mechanism tailscaleWhois uses). The kua-deploy +// container has the socket but NOT the `tailscale` CLI, so we query +// /localapi/v0/status and match the host by HostName/DNSName rather than shelling +// out. Cached per host for the process lifetime; falls back to '' (TAILSCALE_IP +// left unset, prior behavior for unknown hosts) if resolution fails. +const _tailscaleIpCache = new Map(); +async function tailscaleStatusLookup(host) { + return new Promise((resolve) => { + const timeout = setTimeout(() => resolve(''), 2000); + const req = http.request({ + socketPath: TAILSCALE_SOCKET, + path: '/localapi/v0/status', + method: 'GET', + headers: { Host: 'local-tailscaled.sock' }, // anti-DNS-rebind guard the LocalAPI requires + }, (res) => { + let data = ''; + res.on('data', (chunk) => { data += chunk; }); + res.on('end', () => { + clearTimeout(timeout); + try { + const status = JSON.parse(data); + const all = [status.Self, ...Object.values(status.Peer || {})].filter(Boolean); + const want = host.toLowerCase(); + const match = all.find(p => + (p.HostName || '').toLowerCase() === want || + (p.DNSName || '').toLowerCase().startsWith(want + '.')); + const ip = (match?.TailscaleIPs || []).find(a => /^\d+\.\d+\.\d+\.\d+$/.test(a)) || ''; + resolve(ip); + } catch { resolve(''); } + }); + }); + req.on('error', () => { clearTimeout(timeout); resolve(''); }); + req.end(); + }); } -function composeEnvPrefix(server) { - const tailscaleIp = tailscaleIpForServer(server); +async function tailscaleIpForServer(server) { + const host = server.includes('@') ? server.split('@')[1] : server; + if (_tailscaleIpCache.has(host)) return _tailscaleIpCache.get(host); + const ip = await tailscaleStatusLookup(host); + _tailscaleIpCache.set(host, ip); + return ip; +} + +async function composeEnvPrefix(server) { + const tailscaleIp = await tailscaleIpForServer(server); return tailscaleIp ? `TAILSCALE_IP=${tailscaleIp} ` : ''; } @@ -332,7 +383,7 @@ async function recreateService({ '-v', `${deployDir}:${deployDir}`, '-w', deployDir, ]; - const tailscaleIp = tailscaleIpForServer(server); + const tailscaleIp = await tailscaleIpForServer(server); if (tailscaleIp) runArgs.push('-e', `TAILSCALE_IP=${tailscaleIp}`); if (envFileWritten) runArgs.push('--env-file', envFilePath); runArgs.push('docker:cli'); @@ -406,36 +457,42 @@ async function runOnServer(server, cmd, opts = {}) { // Clone source is NOT derived from the app name — origins are heterogeneous (Forgejo :2222, // scp-style, and at least one GitHub repo whose name differs from the app) — so it MUST come // from the registry. The caller still performs its own branch/tag checkout afterwards. -async function ensureCheckout(server, deployDir, repoUrl) { - const probe = await runOnServer(server, `test -e ${deployDir}/.git && echo REPO || echo MISSING`); +// repoDir is the GIT ROOT (registry `repo_dir`), which is NOT always the deploy_dir: +// sub-monorepo apps like coder-core have repo_dir=/root/apps/coder-core but +// deploy_dir=/root/apps/coder-core/services/production (compose lives in a subdir). +// Probing/cloning must target the git root — probing deploy_dir/.git would falsely +// report MISSING for those apps (the bug that broke coder-core deploys 2026-05-26). +// For the 18 normal apps repo_dir == deploy_dir, so behavior is unchanged. +async function ensureCheckout(server, repoDir, repoUrl) { + const probe = await runOnServer(server, `test -e ${repoDir}/.git && echo REPO || echo MISSING`); if (probe.stdout.trim() === 'REPO') { // Already a checkout — leave branch/tag selection to the caller. Optionally assert origin. if (repoUrl) { - const originRes = await runOnServer(server, `git -C ${deployDir} config --get remote.origin.url || true`); + const originRes = await runOnServer(server, `git -C ${repoDir} config --get remote.origin.url || true`); const actual = originRes.stdout.trim(); if (actual && actual !== repoUrl) { - throw new Error(`ensure-checkout: ${deployDir} origin (${actual}) != registry repo_url (${repoUrl}) — refusing to deploy a mismatched checkout`); + throw new Error(`ensure-checkout: ${repoDir} origin (${actual}) != registry repo_url (${repoUrl}) — refusing to deploy a mismatched checkout`); } } return { cloned: false }; } if (!repoUrl) { - throw new Error(`ensure-checkout: ${deployDir} is not a git checkout and no "repo_url" is set in the registry — cannot clone. Add repo_url to the app's registry entry (or create the checkout manually).`); + throw new Error(`ensure-checkout: ${repoDir} is not a git checkout and no "repo_url" is set in the registry — cannot clone. Add repo_url to the app's registry entry (or create the checkout manually).`); } // Refuse to clobber a non-empty, non-repo directory. - const dirState = await runOnServer(server, `if [ -e ${deployDir} ] && [ -n "$(ls -A ${deployDir} 2>/dev/null)" ]; then echo NONEMPTY; else echo OK; fi`); + const dirState = await runOnServer(server, `if [ -e ${repoDir} ] && [ -n "$(ls -A ${repoDir} 2>/dev/null)" ]; then echo NONEMPTY; else echo OK; fi`); if (dirState.stdout.trim() === 'NONEMPTY') { - throw new Error(`ensure-checkout: ${deployDir} exists, is not a git repo, and is non-empty — refusing to clobber. Inspect/remove it manually.`); + throw new Error(`ensure-checkout: ${repoDir} exists, is not a git repo, and is non-empty — refusing to clobber. Inspect/remove it manually.`); } - const cloneRes = await runOnServer(server, `git clone ${repoUrl} ${deployDir}`, { timeout: 180000 }); + const cloneRes = await runOnServer(server, `git clone ${repoUrl} ${repoDir}`, { timeout: 180000 }); if (!cloneRes.ok) { - throw new Error(`ensure-checkout: git clone ${repoUrl} -> ${deployDir} failed: ${cloneRes.stderr}`); + throw new Error(`ensure-checkout: git clone ${repoUrl} -> ${repoDir} failed: ${cloneRes.stderr}`); } // Verify the clone landed and origin matches what we asked for. - const verifyRes = await runOnServer(server, `git -C ${deployDir} config --get remote.origin.url || true`); + const verifyRes = await runOnServer(server, `git -C ${repoDir} config --get remote.origin.url || true`); const landed = verifyRes.stdout.trim(); if (landed !== repoUrl) { - throw new Error(`ensure-checkout: cloned ${deployDir} but origin is ${landed || '(none)'} (expected ${repoUrl})`); + throw new Error(`ensure-checkout: cloned ${repoDir} but origin is ${landed || '(none)'} (expected ${repoUrl})`); } return { cloned: true }; } @@ -620,7 +677,10 @@ async function deploy(appName, opts = {}) { // registry repo_url, so a first-time API deploy doesn't die at the `cd` below. No-op // for existing checkouts. Inside the per-app lock acquired above. try { - const ec = await ensureCheckout(server, deployDir, app.repo_url); + // Probe/clone the GIT ROOT (repo_dir), not deploy_dir — they differ for + // sub-monorepo apps (coder-core). git fetch/checkout below run from deploy_dir + // and git walks up to the root, so only the ensure-checkout probe needs repo_dir. + const ec = await ensureCheckout(server, app.repo_dir || deployDir, app.repo_url); if (ec.cloned) steps[steps.length - 1].cloned = true; } catch (err) { steps[steps.length - 1] = { step: 'git_pull', status: 'failed', error: err.message }; @@ -660,7 +720,7 @@ ${detail}`); const kvPrefix = prod.vault ? `kua-vault run --project ${prod.vault.project} --env ${prod.vault.env} --` : ''; - const envPrefix = composeEnvPrefix(server); + const envPrefix = await composeEnvPrefix(server); const buildCmd = `cd ${deployDir} && ${envPrefix}${kvPrefix} docker compose build`; const buildRes = await runOnServer(server, buildCmd, { timeout: 600000 }); if (!buildRes.ok) { @@ -894,19 +954,36 @@ ${detail}`); await markProgressPhase(appName, 'health_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit }); } } else { - // No health URL — check containers + // No health URL — fall back to confirming containers are listable. runOnServer + // returns {ok:false} on failure (it does not throw); if we can't even run + // `docker compose ps` we cannot claim health, so mark it failed (mirrors the + // health_url failure path: finalResult='unhealthy') instead of silent 'done'. const psRes = await runOnServer(server, `cd ${deployDir} && docker compose ps --format json`); - steps[steps.length - 1] = { step: 'health', status: 'done', note: 'no health URL configured' }; - await markProgressPhase(appName, 'health_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit }); + if (!psRes.ok) { + steps[steps.length - 1] = { step: 'health', status: 'failed', error: psRes.stderr?.slice(-300) || psRes.error, note: 'no health URL; docker compose ps failed' }; + finalResult = 'unhealthy'; + await markProgressPhase(appName, 'health_failed', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit, result: finalResult }); + } else { + steps[steps.length - 1] = { step: 'health', status: 'done', note: 'no health URL configured' }; + await markProgressPhase(appName, 'health_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit }); + } } // Step 7: Post-deploy hooks if (prod.post_deploy) { steps.push({ step: 'post_deploy', status: 'running' }); await markProgressPhase(appName, 'post_deploy', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit }); - await runOnServer(server, prod.post_deploy, { timeout: 30000 }); - steps[steps.length - 1] = { step: 'post_deploy', status: 'done' }; - await markProgressPhase(appName, 'post_deploy_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit }); + // runOnServer returns {ok:false} on failure (it does not throw) — check it, + // else a failing post-deploy hook was silently reported as success. + const postRes = await runOnServer(server, prod.post_deploy, { timeout: 30000 }); + if (!postRes.ok) { + steps[steps.length - 1] = { step: 'post_deploy', status: 'failed', error: postRes.stderr?.slice(-500) || postRes.error }; + finalResult = 'partial'; + await markProgressPhase(appName, 'post_deploy_failed', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit, result: finalResult }); + } else { + steps[steps.length - 1] = { step: 'post_deploy', status: 'done' }; + await markProgressPhase(appName, 'post_deploy_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit }); + } } // Get tag @@ -1018,9 +1095,9 @@ async function rollback(appName, opts = {}) { rolled_back_from: current?.tag || current?.commit || 'unknown', }); - // ensure-checkout (TUBE step 1): rollback shares deploy()'s `cd ${deployDir}` assumption, - // so the deploy_dir must exist here too. No-op for existing checkouts. - await ensureCheckout(server, deployDir, app.repo_url); + // ensure-checkout (TUBE step 1): probe/clone the git root (repo_dir), not deploy_dir + // (they differ for sub-monorepo apps like coder-core). No-op for existing checkouts. + await ensureCheckout(server, app.repo_dir || deployDir, app.repo_url); // Checkout the rollback target on production (--tags so an explicit to_ref tag resolves). const checkoutRes = await runOnServer(server, `cd ${deployDir} && git fetch --prune --tags ${remote} && git checkout ${tag}`, { timeout: 60000 }); @@ -1032,7 +1109,8 @@ async function rollback(appName, opts = {}) { const kvPrefix = prod.vault ? `kua-vault run --project ${prod.vault.project} --env ${prod.vault.env} --` : ''; - const buildRes = await runOnServer(server, `cd ${deployDir} && ${composeEnvPrefix(server)}${kvPrefix} docker compose build`, { timeout: 600000 }); + const envPrefix = await composeEnvPrefix(server); + const buildRes = await runOnServer(server, `cd ${deployDir} && ${envPrefix}${kvPrefix} docker compose build`, { timeout: 600000 }); if (!buildRes.ok) throw new Error(`rollback build failed: ${buildRes.stderr?.slice(-500)}`); // Recreate all services for the rollback target. const svcList = (await runOnServer(server, `cd ${deployDir} && docker compose config --services`)).stdout.split('\n').filter(Boolean); @@ -1085,6 +1163,37 @@ async function rollback(appName, opts = {}) { return { app: appName, ...entry }; + } catch (err) { + // The checkout/build/recreate steps above return {ok:false}/throw on failure; + // without this catch a failed rollback left progress stuck in 'running' and was + // never recorded. Mirror deploy()'s catch: record the failure and return a + // {result:'failed'} object (the rollback route's contract) rather than 500ing. + const entry = { + result: 'failed', + action: 'rollback', + error: err.message, + rolled_back_to: tag, + rolled_back_from: current?.tag || current?.commit || 'unknown', + server, + triggered_by: 'api', + }; + await writeProgress(appName, { + action: 'rollback', + triggered_by: 'api', + status: 'failed', + phase: 'rollback_failed', + current_step: 'rollback', + result: 'failed', + error: err.message, + rolled_back_to: tag, + rolled_back_from: current?.tag || current?.commit || 'unknown', + server, + finished_at: Math.floor(Date.now() / 1000), + }); + recordDeploy(appName, entry); + await audit({ action: 'rollback_failed', app: appName, error: err.message }); + return { app: appName, ...entry }; + } finally { releaseLock(appName); } @@ -1292,8 +1401,14 @@ fastify.get('/api/v1/apps/:app/deploys', async (request) => { // --- Actions --- // Release (merge main→production, tag, push — triggers webhook deploy) -fastify.post('/api/v1/apps/:app/release', async (request) => { +fastify.post('/api/v1/apps/:app/release', async (request, reply) => { const { message, source_branch, target_branch } = request.body || {}; + try { + if (source_branch !== undefined) validateBranchName(source_branch, 'source_branch'); + if (target_branch !== undefined) validateBranchName(target_branch, 'target_branch'); + } catch (err) { + return reply.code(400).send({ ok: false, error: err.message }); + } return await release(request.params.app, message || 'Release to production', { source_branch, target_branch }); });