security: fix auth bypass, branch injection, silent failures, hardcoded IPs + ensure-checkout repo_dir fix
Security (4 issues):
1. Remove the 172.* Docker-bridge auth bypass — any bridge container inherited
tag:admin (incl /unlock, /progress/reset). Bridge callers now need Tailscale
identity or bearer token. (kua-mcp-core unaffected — reaches engine via
docker exec localhost.)
2. Validate request-supplied source_branch/target_branch on /release (400 on bad
input) before they reach the shell in release().
3. Check .ok on previously-ignored runOnServer results: post_deploy hook
(→partial), no-health-url docker compose ps (→unhealthy); add a catch to
rollback() so a failed rollback records failure instead of hanging 'running'.
4. Replace hardcoded bruno/gal Tailscale IP map with runtime resolution via the
tailscaled LocalAPI over the mounted socket (cached per host).
Regression fix (ensure-checkout):
- ensureCheckout now probes/clones the GIT ROOT (registry repo_dir), not deploy_dir.
They differ for sub-monorepo apps (coder-core: repo_dir=/root/apps/coder-core,
deploy_dir=.../services/production). Probing deploy_dir/.git falsely reported
MISSING and broke coder-core deploys (e33b1e9 regression). 18 normal apps where
repo_dir==deploy_dir are unchanged.
This commit is contained in:
parent
e33b1e96cb
commit
d75a145d90
177
server.js
177
server.js
|
|
@ -21,6 +21,17 @@ function validateMessage(msg) {
|
|||
return msg;
|
||||
}
|
||||
|
||||
// Validate a request-supplied git branch name before it is interpolated into a
|
||||
// shell command in release(). Registry-derived defaults are trusted and skip this.
|
||||
const SAFE_BRANCH_RE = /^[A-Za-z0-9._/-]{1,200}$/;
|
||||
function validateBranchName(name, label) {
|
||||
if (typeof name !== 'string' || !SAFE_BRANCH_RE.test(name) ||
|
||||
name.includes('..') || name.startsWith('-') || name.includes('@{')) {
|
||||
throw new Error(`Invalid ${label}: ${JSON.stringify(name)} — must match ${SAFE_BRANCH_RE}, with no '..', leading '-', or '@{'`);
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
// --- Configuration ---
|
||||
const DATA_DIR = path.join(process.cwd(), 'data');
|
||||
const LOG_DIR = path.join(process.cwd(), 'logs');
|
||||
|
|
@ -217,7 +228,11 @@ fastify.addHook('onRequest', async (request, reply) => {
|
|||
// Webhook endpoint uses its own auth (HMAC signature verification inside the handler)
|
||||
if (request.url === '/webhook/forgejo') return;
|
||||
|
||||
const isLocalhost = ['127.0.0.1', '::1', '::ffff:127.0.0.1'].includes(request.ip) || request.ip.startsWith('172.');
|
||||
// Genuine loopback only. The Docker-bridge "172.*" shortcut was removed: the
|
||||
// service binds 0.0.0.0, so ANY container on the bridge inherited tag:admin
|
||||
// (including /unlock and /progress/reset). Bridge callers now go through the
|
||||
// normal Tailscale-Whois-or-bearer-token path like everyone else.
|
||||
const isLocalhost = ['127.0.0.1', '::1', '::ffff:127.0.0.1'].includes(request.ip);
|
||||
if (isLocalhost) {
|
||||
request.identity = { stableId: 'local', hostname: HOSTNAME, tags: ['tag:admin'], user: 'local' };
|
||||
return;
|
||||
|
|
@ -260,17 +275,53 @@ function isLocal(server) {
|
|||
return host === HOSTNAME;
|
||||
}
|
||||
|
||||
function tailscaleIpForServer(server) {
|
||||
const host = server.includes('@') ? server.split('@')[1] : server;
|
||||
const ips = {
|
||||
bruno: '100.74.17.6',
|
||||
gal: '100.122.129.114',
|
||||
};
|
||||
return ips[host] || '';
|
||||
// Resolve a server's Tailscale IPv4 at runtime via the tailscaled LocalAPI over
|
||||
// the mounted socket (the same mechanism tailscaleWhois uses). The kua-deploy
|
||||
// container has the socket but NOT the `tailscale` CLI, so we query
|
||||
// /localapi/v0/status and match the host by HostName/DNSName rather than shelling
|
||||
// out. Cached per host for the process lifetime; falls back to '' (TAILSCALE_IP
|
||||
// left unset, prior behavior for unknown hosts) if resolution fails.
|
||||
const _tailscaleIpCache = new Map();
|
||||
async function tailscaleStatusLookup(host) {
|
||||
return new Promise((resolve) => {
|
||||
const timeout = setTimeout(() => resolve(''), 2000);
|
||||
const req = http.request({
|
||||
socketPath: TAILSCALE_SOCKET,
|
||||
path: '/localapi/v0/status',
|
||||
method: 'GET',
|
||||
headers: { Host: 'local-tailscaled.sock' }, // anti-DNS-rebind guard the LocalAPI requires
|
||||
}, (res) => {
|
||||
let data = '';
|
||||
res.on('data', (chunk) => { data += chunk; });
|
||||
res.on('end', () => {
|
||||
clearTimeout(timeout);
|
||||
try {
|
||||
const status = JSON.parse(data);
|
||||
const all = [status.Self, ...Object.values(status.Peer || {})].filter(Boolean);
|
||||
const want = host.toLowerCase();
|
||||
const match = all.find(p =>
|
||||
(p.HostName || '').toLowerCase() === want ||
|
||||
(p.DNSName || '').toLowerCase().startsWith(want + '.'));
|
||||
const ip = (match?.TailscaleIPs || []).find(a => /^\d+\.\d+\.\d+\.\d+$/.test(a)) || '';
|
||||
resolve(ip);
|
||||
} catch { resolve(''); }
|
||||
});
|
||||
});
|
||||
req.on('error', () => { clearTimeout(timeout); resolve(''); });
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
function composeEnvPrefix(server) {
|
||||
const tailscaleIp = tailscaleIpForServer(server);
|
||||
async function tailscaleIpForServer(server) {
|
||||
const host = server.includes('@') ? server.split('@')[1] : server;
|
||||
if (_tailscaleIpCache.has(host)) return _tailscaleIpCache.get(host);
|
||||
const ip = await tailscaleStatusLookup(host);
|
||||
_tailscaleIpCache.set(host, ip);
|
||||
return ip;
|
||||
}
|
||||
|
||||
async function composeEnvPrefix(server) {
|
||||
const tailscaleIp = await tailscaleIpForServer(server);
|
||||
return tailscaleIp ? `TAILSCALE_IP=${tailscaleIp} ` : '';
|
||||
}
|
||||
|
||||
|
|
@ -332,7 +383,7 @@ async function recreateService({
|
|||
'-v', `${deployDir}:${deployDir}`,
|
||||
'-w', deployDir,
|
||||
];
|
||||
const tailscaleIp = tailscaleIpForServer(server);
|
||||
const tailscaleIp = await tailscaleIpForServer(server);
|
||||
if (tailscaleIp) runArgs.push('-e', `TAILSCALE_IP=${tailscaleIp}`);
|
||||
if (envFileWritten) runArgs.push('--env-file', envFilePath);
|
||||
runArgs.push('docker:cli');
|
||||
|
|
@ -406,36 +457,42 @@ async function runOnServer(server, cmd, opts = {}) {
|
|||
// Clone source is NOT derived from the app name — origins are heterogeneous (Forgejo :2222,
|
||||
// scp-style, and at least one GitHub repo whose name differs from the app) — so it MUST come
|
||||
// from the registry. The caller still performs its own branch/tag checkout afterwards.
|
||||
async function ensureCheckout(server, deployDir, repoUrl) {
|
||||
const probe = await runOnServer(server, `test -e ${deployDir}/.git && echo REPO || echo MISSING`);
|
||||
// repoDir is the GIT ROOT (registry `repo_dir`), which is NOT always the deploy_dir:
|
||||
// sub-monorepo apps like coder-core have repo_dir=/root/apps/coder-core but
|
||||
// deploy_dir=/root/apps/coder-core/services/production (compose lives in a subdir).
|
||||
// Probing/cloning must target the git root — probing deploy_dir/.git would falsely
|
||||
// report MISSING for those apps (the bug that broke coder-core deploys 2026-05-26).
|
||||
// For the 18 normal apps repo_dir == deploy_dir, so behavior is unchanged.
|
||||
async function ensureCheckout(server, repoDir, repoUrl) {
|
||||
const probe = await runOnServer(server, `test -e ${repoDir}/.git && echo REPO || echo MISSING`);
|
||||
if (probe.stdout.trim() === 'REPO') {
|
||||
// Already a checkout — leave branch/tag selection to the caller. Optionally assert origin.
|
||||
if (repoUrl) {
|
||||
const originRes = await runOnServer(server, `git -C ${deployDir} config --get remote.origin.url || true`);
|
||||
const originRes = await runOnServer(server, `git -C ${repoDir} config --get remote.origin.url || true`);
|
||||
const actual = originRes.stdout.trim();
|
||||
if (actual && actual !== repoUrl) {
|
||||
throw new Error(`ensure-checkout: ${deployDir} origin (${actual}) != registry repo_url (${repoUrl}) — refusing to deploy a mismatched checkout`);
|
||||
throw new Error(`ensure-checkout: ${repoDir} origin (${actual}) != registry repo_url (${repoUrl}) — refusing to deploy a mismatched checkout`);
|
||||
}
|
||||
}
|
||||
return { cloned: false };
|
||||
}
|
||||
if (!repoUrl) {
|
||||
throw new Error(`ensure-checkout: ${deployDir} is not a git checkout and no "repo_url" is set in the registry — cannot clone. Add repo_url to the app's registry entry (or create the checkout manually).`);
|
||||
throw new Error(`ensure-checkout: ${repoDir} is not a git checkout and no "repo_url" is set in the registry — cannot clone. Add repo_url to the app's registry entry (or create the checkout manually).`);
|
||||
}
|
||||
// Refuse to clobber a non-empty, non-repo directory.
|
||||
const dirState = await runOnServer(server, `if [ -e ${deployDir} ] && [ -n "$(ls -A ${deployDir} 2>/dev/null)" ]; then echo NONEMPTY; else echo OK; fi`);
|
||||
const dirState = await runOnServer(server, `if [ -e ${repoDir} ] && [ -n "$(ls -A ${repoDir} 2>/dev/null)" ]; then echo NONEMPTY; else echo OK; fi`);
|
||||
if (dirState.stdout.trim() === 'NONEMPTY') {
|
||||
throw new Error(`ensure-checkout: ${deployDir} exists, is not a git repo, and is non-empty — refusing to clobber. Inspect/remove it manually.`);
|
||||
throw new Error(`ensure-checkout: ${repoDir} exists, is not a git repo, and is non-empty — refusing to clobber. Inspect/remove it manually.`);
|
||||
}
|
||||
const cloneRes = await runOnServer(server, `git clone ${repoUrl} ${deployDir}`, { timeout: 180000 });
|
||||
const cloneRes = await runOnServer(server, `git clone ${repoUrl} ${repoDir}`, { timeout: 180000 });
|
||||
if (!cloneRes.ok) {
|
||||
throw new Error(`ensure-checkout: git clone ${repoUrl} -> ${deployDir} failed: ${cloneRes.stderr}`);
|
||||
throw new Error(`ensure-checkout: git clone ${repoUrl} -> ${repoDir} failed: ${cloneRes.stderr}`);
|
||||
}
|
||||
// Verify the clone landed and origin matches what we asked for.
|
||||
const verifyRes = await runOnServer(server, `git -C ${deployDir} config --get remote.origin.url || true`);
|
||||
const verifyRes = await runOnServer(server, `git -C ${repoDir} config --get remote.origin.url || true`);
|
||||
const landed = verifyRes.stdout.trim();
|
||||
if (landed !== repoUrl) {
|
||||
throw new Error(`ensure-checkout: cloned ${deployDir} but origin is ${landed || '(none)'} (expected ${repoUrl})`);
|
||||
throw new Error(`ensure-checkout: cloned ${repoDir} but origin is ${landed || '(none)'} (expected ${repoUrl})`);
|
||||
}
|
||||
return { cloned: true };
|
||||
}
|
||||
|
|
@ -620,7 +677,10 @@ async function deploy(appName, opts = {}) {
|
|||
// registry repo_url, so a first-time API deploy doesn't die at the `cd` below. No-op
|
||||
// for existing checkouts. Inside the per-app lock acquired above.
|
||||
try {
|
||||
const ec = await ensureCheckout(server, deployDir, app.repo_url);
|
||||
// Probe/clone the GIT ROOT (repo_dir), not deploy_dir — they differ for
|
||||
// sub-monorepo apps (coder-core). git fetch/checkout below run from deploy_dir
|
||||
// and git walks up to the root, so only the ensure-checkout probe needs repo_dir.
|
||||
const ec = await ensureCheckout(server, app.repo_dir || deployDir, app.repo_url);
|
||||
if (ec.cloned) steps[steps.length - 1].cloned = true;
|
||||
} catch (err) {
|
||||
steps[steps.length - 1] = { step: 'git_pull', status: 'failed', error: err.message };
|
||||
|
|
@ -660,7 +720,7 @@ ${detail}`);
|
|||
const kvPrefix = prod.vault
|
||||
? `kua-vault run --project ${prod.vault.project} --env ${prod.vault.env} --`
|
||||
: '';
|
||||
const envPrefix = composeEnvPrefix(server);
|
||||
const envPrefix = await composeEnvPrefix(server);
|
||||
const buildCmd = `cd ${deployDir} && ${envPrefix}${kvPrefix} docker compose build`;
|
||||
const buildRes = await runOnServer(server, buildCmd, { timeout: 600000 });
|
||||
if (!buildRes.ok) {
|
||||
|
|
@ -894,20 +954,37 @@ ${detail}`);
|
|||
await markProgressPhase(appName, 'health_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit });
|
||||
}
|
||||
} else {
|
||||
// No health URL — check containers
|
||||
// No health URL — fall back to confirming containers are listable. runOnServer
|
||||
// returns {ok:false} on failure (it does not throw); if we can't even run
|
||||
// `docker compose ps` we cannot claim health, so mark it failed (mirrors the
|
||||
// health_url failure path: finalResult='unhealthy') instead of silent 'done'.
|
||||
const psRes = await runOnServer(server, `cd ${deployDir} && docker compose ps --format json`);
|
||||
if (!psRes.ok) {
|
||||
steps[steps.length - 1] = { step: 'health', status: 'failed', error: psRes.stderr?.slice(-300) || psRes.error, note: 'no health URL; docker compose ps failed' };
|
||||
finalResult = 'unhealthy';
|
||||
await markProgressPhase(appName, 'health_failed', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit, result: finalResult });
|
||||
} else {
|
||||
steps[steps.length - 1] = { step: 'health', status: 'done', note: 'no health URL configured' };
|
||||
await markProgressPhase(appName, 'health_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit });
|
||||
}
|
||||
}
|
||||
|
||||
// Step 7: Post-deploy hooks
|
||||
if (prod.post_deploy) {
|
||||
steps.push({ step: 'post_deploy', status: 'running' });
|
||||
await markProgressPhase(appName, 'post_deploy', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit });
|
||||
await runOnServer(server, prod.post_deploy, { timeout: 30000 });
|
||||
// runOnServer returns {ok:false} on failure (it does not throw) — check it,
|
||||
// else a failing post-deploy hook was silently reported as success.
|
||||
const postRes = await runOnServer(server, prod.post_deploy, { timeout: 30000 });
|
||||
if (!postRes.ok) {
|
||||
steps[steps.length - 1] = { step: 'post_deploy', status: 'failed', error: postRes.stderr?.slice(-500) || postRes.error };
|
||||
finalResult = 'partial';
|
||||
await markProgressPhase(appName, 'post_deploy_failed', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit, result: finalResult });
|
||||
} else {
|
||||
steps[steps.length - 1] = { step: 'post_deploy', status: 'done' };
|
||||
await markProgressPhase(appName, 'post_deploy_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit });
|
||||
}
|
||||
}
|
||||
|
||||
// Get tag
|
||||
const tagRes = await runOnServer(server, `cd ${deployDir} && git describe --tags --abbrev=0 2>/dev/null || echo "untagged"`);
|
||||
|
|
@ -1018,9 +1095,9 @@ async function rollback(appName, opts = {}) {
|
|||
rolled_back_from: current?.tag || current?.commit || 'unknown',
|
||||
});
|
||||
|
||||
// ensure-checkout (TUBE step 1): rollback shares deploy()'s `cd ${deployDir}` assumption,
|
||||
// so the deploy_dir must exist here too. No-op for existing checkouts.
|
||||
await ensureCheckout(server, deployDir, app.repo_url);
|
||||
// ensure-checkout (TUBE step 1): probe/clone the git root (repo_dir), not deploy_dir
|
||||
// (they differ for sub-monorepo apps like coder-core). No-op for existing checkouts.
|
||||
await ensureCheckout(server, app.repo_dir || deployDir, app.repo_url);
|
||||
|
||||
// Checkout the rollback target on production (--tags so an explicit to_ref tag resolves).
|
||||
const checkoutRes = await runOnServer(server, `cd ${deployDir} && git fetch --prune --tags ${remote} && git checkout ${tag}`, { timeout: 60000 });
|
||||
|
|
@ -1032,7 +1109,8 @@ async function rollback(appName, opts = {}) {
|
|||
const kvPrefix = prod.vault
|
||||
? `kua-vault run --project ${prod.vault.project} --env ${prod.vault.env} --`
|
||||
: '';
|
||||
const buildRes = await runOnServer(server, `cd ${deployDir} && ${composeEnvPrefix(server)}${kvPrefix} docker compose build`, { timeout: 600000 });
|
||||
const envPrefix = await composeEnvPrefix(server);
|
||||
const buildRes = await runOnServer(server, `cd ${deployDir} && ${envPrefix}${kvPrefix} docker compose build`, { timeout: 600000 });
|
||||
if (!buildRes.ok) throw new Error(`rollback build failed: ${buildRes.stderr?.slice(-500)}`);
|
||||
// Recreate all services for the rollback target.
|
||||
const svcList = (await runOnServer(server, `cd ${deployDir} && docker compose config --services`)).stdout.split('\n').filter(Boolean);
|
||||
|
|
@ -1085,6 +1163,37 @@ async function rollback(appName, opts = {}) {
|
|||
|
||||
return { app: appName, ...entry };
|
||||
|
||||
} catch (err) {
|
||||
// The checkout/build/recreate steps above return {ok:false}/throw on failure;
|
||||
// without this catch a failed rollback left progress stuck in 'running' and was
|
||||
// never recorded. Mirror deploy()'s catch: record the failure and return a
|
||||
// {result:'failed'} object (the rollback route's contract) rather than 500ing.
|
||||
const entry = {
|
||||
result: 'failed',
|
||||
action: 'rollback',
|
||||
error: err.message,
|
||||
rolled_back_to: tag,
|
||||
rolled_back_from: current?.tag || current?.commit || 'unknown',
|
||||
server,
|
||||
triggered_by: 'api',
|
||||
};
|
||||
await writeProgress(appName, {
|
||||
action: 'rollback',
|
||||
triggered_by: 'api',
|
||||
status: 'failed',
|
||||
phase: 'rollback_failed',
|
||||
current_step: 'rollback',
|
||||
result: 'failed',
|
||||
error: err.message,
|
||||
rolled_back_to: tag,
|
||||
rolled_back_from: current?.tag || current?.commit || 'unknown',
|
||||
server,
|
||||
finished_at: Math.floor(Date.now() / 1000),
|
||||
});
|
||||
recordDeploy(appName, entry);
|
||||
await audit({ action: 'rollback_failed', app: appName, error: err.message });
|
||||
return { app: appName, ...entry };
|
||||
|
||||
} finally {
|
||||
releaseLock(appName);
|
||||
}
|
||||
|
|
@ -1292,8 +1401,14 @@ fastify.get('/api/v1/apps/:app/deploys', async (request) => {
|
|||
// --- Actions ---
|
||||
|
||||
// Release (merge main→production, tag, push — triggers webhook deploy)
|
||||
fastify.post('/api/v1/apps/:app/release', async (request) => {
|
||||
fastify.post('/api/v1/apps/:app/release', async (request, reply) => {
|
||||
const { message, source_branch, target_branch } = request.body || {};
|
||||
try {
|
||||
if (source_branch !== undefined) validateBranchName(source_branch, 'source_branch');
|
||||
if (target_branch !== undefined) validateBranchName(target_branch, 'target_branch');
|
||||
} catch (err) {
|
||||
return reply.code(400).send({ ok: false, error: err.message });
|
||||
}
|
||||
return await release(request.params.app, message || 'Release to production', { source_branch, target_branch });
|
||||
});
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue