Compare commits

..

2 Commits

Author SHA1 Message Date
Kavi d5508249f2 test: SHA normalization and registry-loader unit tests
Regression coverage for:
- Bug #3: sha256: prefix comparison — tests the stripSha helper and the
  imageMatch logic as used in verifyStatelessRecreated()
- Bug #7 / #6: registry file must exist and have all required fields;
  no app may use deploy_mode=webhook (webhook path retired)
  Run: REGISTRY_FILE=<path> node --test test/registry-loader.test.js

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-23 04:34:30 -04:00
Kavi 6a583a8572 fix(deploy): SHA normalization + kua-vault wrap in verifyStatelessRecreated; retire webhook mandatory check; add /api/v1/apps/registry endpoint
- verifyStatelessRecreated(): strip sha256: prefix before comparing image SHAs
  (same fix already applied to runtime-status in 2551af4, completeSelfRecreate
  in 06852c2; now consistent across all three verify paths)
- verifyStatelessRecreated(): add kua-vault wrap on compose images/ps calls
  (mirrors 2551af4 runtime-status fix; env interpolation was causing empty
  output for vault-injected apps, making the verify always fail with
  'no running container')
- start(): downgrade missing WEBHOOK_SECRET from fatal error to warning; the
  Forgejo webhook path is retired in favour of the admin API — handler stays
  but the startup guard no longer blocks kua-deploy from booting
- Add GET /api/v1/apps/registry so kua-mcp-core can fetch the full app list
  over HTTP without depending on a filesystem path inside its container

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-23 04:29:26 -04:00
4 changed files with 152 additions and 425 deletions

View File

@ -5,11 +5,8 @@
# - kua-services: reach kua-vault, kua-db, kua-mcp-core, etc. # - kua-services: reach kua-vault, kua-db, kua-mcp-core, etc.
# - production_proxy: reach forgejo (git operations) + Caddy edge labels # - production_proxy: reach forgejo (git operations) + Caddy edge labels
# #
# Registry: ENGINE-OWNED at /app/data/registry.json (on the kua-deploy-data volume), # Registry: deploy-registry.json is bind-mounted from coder-core's checkout
# mutated only via the authenticated API (PUT/PATCH/DELETE /api/v1/apps/:app). The old # during this transition. Future cleanup can migrate it into this repo.
# git bind-mount was removed 2026-05-26 — git is no longer in the registry path. On a
# fresh volume the engine seeds from KUA_REGISTRY_SEED (default /app/deploy-registry.json)
# if present; the cutover pre-seeds /app/data/registry.json from the live registry first.
services: services:
kua-deploy: kua-deploy:
build: build:
@ -34,6 +31,7 @@ services:
- kua-deploy-data:/app/data - kua-deploy-data:/app/data
- /root/.ssh:/root/.ssh:ro - /root/.ssh:/root/.ssh:ro
- /root/apps:/root/apps - /root/apps:/root/apps
- /root/apps/coder-core/services/kua-deploy/deploy-registry.json:/app/deploy-registry.json:ro
- /usr/local/bin/kua-vault:/usr/local/bin/kua-vault:ro - /usr/local/bin/kua-vault:/usr/local/bin/kua-vault:ro
- /root/.config/kua-vault:/root/.config/kua-vault:ro - /root/.config/kua-vault:/root/.config/kua-vault:ro
networks: networks:

455
server.js
View File

@ -21,31 +21,12 @@ function validateMessage(msg) {
return msg; return msg;
} }
// Validate a request-supplied git branch name before it is interpolated into a
// shell command in release(). Registry-derived defaults are trusted and skip this.
const SAFE_BRANCH_RE = /^[A-Za-z0-9._/-]{1,200}$/;
function validateBranchName(name, label) {
if (typeof name !== 'string' || !SAFE_BRANCH_RE.test(name) ||
name.includes('..') || name.startsWith('-') || name.includes('@{')) {
throw new Error(`Invalid ${label}: ${JSON.stringify(name)} — must match ${SAFE_BRANCH_RE}, with no '..', leading '-', or '@{'`);
}
return name;
}
// --- Configuration --- // --- Configuration ---
const DATA_DIR = path.join(process.cwd(), 'data'); const DATA_DIR = path.join(process.cwd(), 'data');
// Audit log moved under DATA_DIR (the only mounted/persistent volume) — the old const LOG_DIR = path.join(process.cwd(), 'logs');
// /app/logs path was never mounted, so the audit trail was lost on every restart. const AUDIT_LOG_FILE = path.join(LOG_DIR, 'audit.log');
const LOG_DIR = DATA_DIR;
const AUDIT_LOG_FILE = path.join(DATA_DIR, 'audit.log');
const DEPLOY_HISTORY_FILE = path.join(DATA_DIR, 'deploys.json'); const DEPLOY_HISTORY_FILE = path.join(DATA_DIR, 'deploys.json');
// The registry is ENGINE-OWNED runtime state on the persistent volume — NOT a const REGISTRY_FILE = path.join(process.cwd(), 'deploy-registry.json');
// git-committed bind-mount. Mutated only via the authenticated API below. On first
// boot it seeds from the legacy bind-mount (if still present) or a seed file, then
// owns the file thereafter. See REGISTRY_SEED.
const REGISTRY_FILE = path.join(DATA_DIR, 'registry.json');
const REGISTRY_SEED = process.env.KUA_REGISTRY_SEED || path.join(process.cwd(), 'deploy-registry.json');
const REGISTRY_EVENTS_FILE = path.join(DATA_DIR, 'registry-events.jsonl');
const ADMIN_TOKEN = process.env.KUA_DEPLOY_ADMIN_TOKEN; const ADMIN_TOKEN = process.env.KUA_DEPLOY_ADMIN_TOKEN;
const TAILSCALE_SOCKET = '/var/run/tailscale/tailscaled.sock'; const TAILSCALE_SOCKET = '/var/run/tailscale/tailscaled.sock';
const HOSTNAME = process.env.HOSTNAME || 'gal'; const HOSTNAME = process.env.HOSTNAME || 'gal';
@ -112,106 +93,14 @@ function getDeployId(app) {
return deployLocks.get(app)?.deployId ?? null; return deployLocks.get(app)?.deployId ?? null;
} }
// --- Load Registry (engine-owned) --- // --- Load Registry ---
let registry = { apps: {} }; let registry = { apps: {} };
// Seed-once: if the engine-owned file is missing, import from the legacy seed
// (the old git bind-mount) so we don't lose the existing apps on cutover. NEVER
// overwrites an existing engine-owned file.
async function seedRegistryIfMissing() {
try {
await fs.access(REGISTRY_FILE);
return false; // already engine-owned
} catch { /* missing — seed below */ }
let seed;
try {
seed = await fs.readFile(REGISTRY_SEED, 'utf-8');
JSON.parse(seed); // validate it parses before adopting
} catch (err) {
fastify.log.warn(`No registry seed at ${REGISTRY_SEED} (${err.message}); starting with empty registry`);
seed = JSON.stringify({ apps: {} }, null, 2) + '\n';
}
await fs.mkdir(DATA_DIR, { recursive: true });
await fs.writeFile(REGISTRY_FILE, seed, 'utf-8');
fastify.log.info(`Registry seeded from ${REGISTRY_SEED} -> ${REGISTRY_FILE}`);
return true;
}
async function loadRegistry() { async function loadRegistry() {
await seedRegistryIfMissing();
const data = await fs.readFile(REGISTRY_FILE, 'utf-8'); const data = await fs.readFile(REGISTRY_FILE, 'utf-8');
const parsed = JSON.parse(data); registry = JSON.parse(data);
if (!parsed || typeof parsed !== 'object' || typeof parsed.apps !== 'object') {
throw new Error('registry.json malformed: missing "apps" object');
}
registry = parsed;
fastify.log.info(`Registry loaded: ${Object.keys(registry.apps).length} apps`); fastify.log.info(`Registry loaded: ${Object.keys(registry.apps).length} apps`);
} }
// Atomic write of the in-memory registry to the engine-owned file (temp + rename).
async function writeRegistry() {
await fs.mkdir(DATA_DIR, { recursive: true });
const tmp = `${REGISTRY_FILE}.tmp.${process.pid}.${Date.now()}`;
await fs.writeFile(tmp, JSON.stringify(registry, null, 2) + '\n', 'utf-8');
await fs.rename(tmp, REGISTRY_FILE);
}
// --- Registry mutation lock (engine is single-process; serialize writers) ---
let registryMutating = false;
async function withRegistryLock(fn) {
while (registryMutating) await new Promise(r => setTimeout(r, 25));
registryMutating = true;
try { return await fn(); }
finally { registryMutating = false; }
}
// Append-only hash-chained audit of registry mutations.
let lastRegistryEventHash = null;
async function appendRegistryEvent(ev) {
if (lastRegistryEventHash === null) {
// recover the last hash from the tail of the events file (best-effort)
try {
const txt = await fs.readFile(REGISTRY_EVENTS_FILE, 'utf-8');
const lines = txt.trim().split('\n').filter(Boolean);
if (lines.length) lastRegistryEventHash = JSON.parse(lines[lines.length - 1]).event_hash || '';
else lastRegistryEventHash = '';
} catch { lastRegistryEventHash = ''; }
}
const record = { ...ev, ts: new Date().toISOString(), prev_hash: lastRegistryEventHash };
record.event_hash = crypto.createHash('sha256').update(JSON.stringify(record)).digest('hex');
await fs.mkdir(DATA_DIR, { recursive: true });
await fs.appendFile(REGISTRY_EVENTS_FILE, JSON.stringify(record) + '\n', 'utf-8');
lastRegistryEventHash = record.event_hash;
return record;
}
// Validate a registry entry coming from the API (request input — privileged but typed).
const REGISTRY_SERVERS = new Set((process.env.KUA_REGISTRY_SERVERS || 'bruno,gal,genesis,eva').split(',').map(s => s.trim()));
function validateEntry(name, entry) {
const errs = [];
if (!/^[a-z0-9][a-z0-9._-]*$/.test(name)) errs.push(`app name "${name}" must be lowercase [a-z0-9._-]`);
if (!entry || typeof entry !== 'object') { errs.push('entry must be an object'); return errs; }
const reqTop = ['repo_dir', 'source_branch', 'deploy_branch', 'deploy_mode'];
for (const k of reqTop) if (typeof entry[k] !== 'string' || !entry[k]) errs.push(`missing/invalid "${k}"`);
if (entry.deploy_mode && !['direct', 'webhook', 'script'].includes(entry.deploy_mode)) errs.push(`deploy_mode "${entry.deploy_mode}" not direct|webhook|script`);
if (entry.source_branch) try { validateBranchName(entry.source_branch, 'source_branch'); } catch (e) { errs.push(e.message); }
if (entry.deploy_branch) try { validateBranchName(entry.deploy_branch, 'deploy_branch'); } catch (e) { errs.push(e.message); }
if (entry.repo_url && typeof entry.repo_url !== 'string') errs.push('repo_url must be a string');
const prod = entry.production;
if (!prod || typeof prod !== 'object') { errs.push('missing "production" object'); return errs; }
if (!REGISTRY_SERVERS.has(prod.server)) errs.push(`production.server "${prod.server}" not in allowed set {${[...REGISTRY_SERVERS].join(',')}}`);
if (typeof prod.deploy_dir !== 'string' || !prod.deploy_dir.startsWith('/')) errs.push('production.deploy_dir must be an absolute path');
if ('has_migrations' in prod && typeof prod.has_migrations !== 'boolean') errs.push('production.has_migrations must be boolean');
if (prod.stateful_services && !Array.isArray(prod.stateful_services)) errs.push('production.stateful_services must be an array');
return errs;
}
function normalizedDiff(before, after) {
const b = before ? JSON.stringify(before, Object.keys(before).sort()) : null;
const a = after ? JSON.stringify(after, Object.keys(after).sort()) : null;
return { changed: b !== a, before: before || null, after: after || null };
}
function getApp(name) { function getApp(name) {
return registry.apps[name] || null; return registry.apps[name] || null;
} }
@ -328,11 +217,7 @@ fastify.addHook('onRequest', async (request, reply) => {
// Webhook endpoint uses its own auth (HMAC signature verification inside the handler) // Webhook endpoint uses its own auth (HMAC signature verification inside the handler)
if (request.url === '/webhook/forgejo') return; if (request.url === '/webhook/forgejo') return;
// Genuine loopback only. The Docker-bridge "172.*" shortcut was removed: the const isLocalhost = ['127.0.0.1', '::1', '::ffff:127.0.0.1'].includes(request.ip) || request.ip.startsWith('172.');
// service binds 0.0.0.0, so ANY container on the bridge inherited tag:admin
// (including /unlock and /progress/reset). Bridge callers now go through the
// normal Tailscale-Whois-or-bearer-token path like everyone else.
const isLocalhost = ['127.0.0.1', '::1', '::ffff:127.0.0.1'].includes(request.ip);
if (isLocalhost) { if (isLocalhost) {
request.identity = { stableId: 'local', hostname: HOSTNAME, tags: ['tag:admin'], user: 'local' }; request.identity = { stableId: 'local', hostname: HOSTNAME, tags: ['tag:admin'], user: 'local' };
return; return;
@ -375,53 +260,17 @@ function isLocal(server) {
return host === HOSTNAME; return host === HOSTNAME;
} }
// Resolve a server's Tailscale IPv4 at runtime via the tailscaled LocalAPI over function tailscaleIpForServer(server) {
// the mounted socket (the same mechanism tailscaleWhois uses). The kua-deploy
// container has the socket but NOT the `tailscale` CLI, so we query
// /localapi/v0/status and match the host by HostName/DNSName rather than shelling
// out. Cached per host for the process lifetime; falls back to '' (TAILSCALE_IP
// left unset, prior behavior for unknown hosts) if resolution fails.
const _tailscaleIpCache = new Map();
async function tailscaleStatusLookup(host) {
return new Promise((resolve) => {
const timeout = setTimeout(() => resolve(''), 2000);
const req = http.request({
socketPath: TAILSCALE_SOCKET,
path: '/localapi/v0/status',
method: 'GET',
headers: { Host: 'local-tailscaled.sock' }, // anti-DNS-rebind guard the LocalAPI requires
}, (res) => {
let data = '';
res.on('data', (chunk) => { data += chunk; });
res.on('end', () => {
clearTimeout(timeout);
try {
const status = JSON.parse(data);
const all = [status.Self, ...Object.values(status.Peer || {})].filter(Boolean);
const want = host.toLowerCase();
const match = all.find(p =>
(p.HostName || '').toLowerCase() === want ||
(p.DNSName || '').toLowerCase().startsWith(want + '.'));
const ip = (match?.TailscaleIPs || []).find(a => /^\d+\.\d+\.\d+\.\d+$/.test(a)) || '';
resolve(ip);
} catch { resolve(''); }
});
});
req.on('error', () => { clearTimeout(timeout); resolve(''); });
req.end();
});
}
async function tailscaleIpForServer(server) {
const host = server.includes('@') ? server.split('@')[1] : server; const host = server.includes('@') ? server.split('@')[1] : server;
if (_tailscaleIpCache.has(host)) return _tailscaleIpCache.get(host); const ips = {
const ip = await tailscaleStatusLookup(host); bruno: '100.74.17.6',
_tailscaleIpCache.set(host, ip); gal: '100.122.129.114',
return ip; };
return ips[host] || '';
} }
async function composeEnvPrefix(server) { function composeEnvPrefix(server) {
const tailscaleIp = await tailscaleIpForServer(server); const tailscaleIp = tailscaleIpForServer(server);
return tailscaleIp ? `TAILSCALE_IP=${tailscaleIp} ` : ''; return tailscaleIp ? `TAILSCALE_IP=${tailscaleIp} ` : '';
} }
@ -483,7 +332,7 @@ async function recreateService({
'-v', `${deployDir}:${deployDir}`, '-v', `${deployDir}:${deployDir}`,
'-w', deployDir, '-w', deployDir,
]; ];
const tailscaleIp = await tailscaleIpForServer(server); const tailscaleIp = tailscaleIpForServer(server);
if (tailscaleIp) runArgs.push('-e', `TAILSCALE_IP=${tailscaleIp}`); if (tailscaleIp) runArgs.push('-e', `TAILSCALE_IP=${tailscaleIp}`);
if (envFileWritten) runArgs.push('--env-file', envFilePath); if (envFileWritten) runArgs.push('--env-file', envFilePath);
runArgs.push('docker:cli'); runArgs.push('docker:cli');
@ -548,55 +397,6 @@ async function runOnServer(server, cmd, opts = {}) {
} }
} }
// ensureCheckout — guarantee deployDir is a usable git checkout before deploy/rollback.
// The engine historically assumed the repo already existed (`cd ${deployDir} && git fetch`,
// see deploy()/rollback()); a registered app whose deploy_dir was never cloned failed at the
// very first `cd`. This makes a first-time API deploy self-heal by cloning from the
// registry-declared `repo_url`. It is a NO-OP for existing checkouts, so the conforming apps
// (which carry no `repo_url`) keep working untouched. Runs inside the caller's per-app lock.
// Clone source is NOT derived from the app name — origins are heterogeneous (Forgejo :2222,
// scp-style, and at least one GitHub repo whose name differs from the app) — so it MUST come
// from the registry. The caller still performs its own branch/tag checkout afterwards.
// repoDir is the GIT ROOT (registry `repo_dir`), which is NOT always the deploy_dir:
// sub-monorepo apps like coder-core have repo_dir=/root/apps/coder-core but
// deploy_dir=/root/apps/coder-core/services/production (compose lives in a subdir).
// Probing/cloning must target the git root — probing deploy_dir/.git would falsely
// report MISSING for those apps (the bug that broke coder-core deploys 2026-05-26).
// For the 18 normal apps repo_dir == deploy_dir, so behavior is unchanged.
async function ensureCheckout(server, repoDir, repoUrl) {
const probe = await runOnServer(server, `test -e ${repoDir}/.git && echo REPO || echo MISSING`);
if (probe.stdout.trim() === 'REPO') {
// Already a checkout — leave branch/tag selection to the caller. Optionally assert origin.
if (repoUrl) {
const originRes = await runOnServer(server, `git -C ${repoDir} config --get remote.origin.url || true`);
const actual = originRes.stdout.trim();
if (actual && actual !== repoUrl) {
throw new Error(`ensure-checkout: ${repoDir} origin (${actual}) != registry repo_url (${repoUrl}) — refusing to deploy a mismatched checkout`);
}
}
return { cloned: false };
}
if (!repoUrl) {
throw new Error(`ensure-checkout: ${repoDir} is not a git checkout and no "repo_url" is set in the registry — cannot clone. Add repo_url to the app's registry entry (or create the checkout manually).`);
}
// Refuse to clobber a non-empty, non-repo directory.
const dirState = await runOnServer(server, `if [ -e ${repoDir} ] && [ -n "$(ls -A ${repoDir} 2>/dev/null)" ]; then echo NONEMPTY; else echo OK; fi`);
if (dirState.stdout.trim() === 'NONEMPTY') {
throw new Error(`ensure-checkout: ${repoDir} exists, is not a git repo, and is non-empty — refusing to clobber. Inspect/remove it manually.`);
}
const cloneRes = await runOnServer(server, `git clone ${repoUrl} ${repoDir}`, { timeout: 180000 });
if (!cloneRes.ok) {
throw new Error(`ensure-checkout: git clone ${repoUrl} -> ${repoDir} failed: ${cloneRes.stderr}`);
}
// Verify the clone landed and origin matches what we asked for.
const verifyRes = await runOnServer(server, `git -C ${repoDir} config --get remote.origin.url || true`);
const landed = verifyRes.stdout.trim();
if (landed !== repoUrl) {
throw new Error(`ensure-checkout: cloned ${repoDir} but origin is ${landed || '(none)'} (expected ${repoUrl})`);
}
return { cloned: true };
}
// --- kua-db integration --- // --- kua-db integration ---
async function kuaDbSafeCheck(app) { async function kuaDbSafeCheck(app) {
try { try {
@ -773,19 +573,6 @@ async function deploy(appName, opts = {}) {
// Step 2: Git pull on production server // Step 2: Git pull on production server
steps.push({ step: 'git_pull', status: 'running' }); steps.push({ step: 'git_pull', status: 'running' });
await markProgressPhase(appName, 'git_pull', { action, triggered_by: opts.triggered_by || 'api', steps }); await markProgressPhase(appName, 'git_pull', { action, triggered_by: opts.triggered_by || 'api', steps });
// ensure-checkout (TUBE step 1): self-heal a missing deploy_dir by cloning from the
// registry repo_url, so a first-time API deploy doesn't die at the `cd` below. No-op
// for existing checkouts. Inside the per-app lock acquired above.
try {
// Probe/clone the GIT ROOT (repo_dir), not deploy_dir — they differ for
// sub-monorepo apps (coder-core). git fetch/checkout below run from deploy_dir
// and git walks up to the root, so only the ensure-checkout probe needs repo_dir.
const ec = await ensureCheckout(server, app.repo_dir || deployDir, app.repo_url);
if (ec.cloned) steps[steps.length - 1].cloned = true;
} catch (err) {
steps[steps.length - 1] = { step: 'git_pull', status: 'failed', error: err.message };
throw err;
}
const fetchCmd = `cd ${deployDir} && git fetch --prune ${remote}`; const fetchCmd = `cd ${deployDir} && git fetch --prune ${remote}`;
const fetchRes = await runOnServer(server, fetchCmd, { timeout: 60000 }); const fetchRes = await runOnServer(server, fetchCmd, { timeout: 60000 });
if (!fetchRes.ok) { if (!fetchRes.ok) {
@ -820,7 +607,7 @@ ${detail}`);
const kvPrefix = prod.vault const kvPrefix = prod.vault
? `kua-vault run --project ${prod.vault.project} --env ${prod.vault.env} --` ? `kua-vault run --project ${prod.vault.project} --env ${prod.vault.env} --`
: ''; : '';
const envPrefix = await composeEnvPrefix(server); const envPrefix = composeEnvPrefix(server);
const buildCmd = `cd ${deployDir} && ${envPrefix}${kvPrefix} docker compose build`; const buildCmd = `cd ${deployDir} && ${envPrefix}${kvPrefix} docker compose build`;
const buildRes = await runOnServer(server, buildCmd, { timeout: 600000 }); const buildRes = await runOnServer(server, buildCmd, { timeout: 600000 });
if (!buildRes.ok) { if (!buildRes.ok) {
@ -887,15 +674,6 @@ ${detail}`);
const svcRes = await runOnServer(server, `cd ${deployDir} && docker compose config --services`); const svcRes = await runOnServer(server, `cd ${deployDir} && docker compose config --services`);
const allServices = svcRes.stdout.split('\n').filter(Boolean); const allServices = svcRes.stdout.split('\n').filter(Boolean);
// Fail-loud (TUBE step 1): if compose resolved NO services, the recreate+verify
// block below is skipped entirely and the deploy would silently report `done`
// having recreated nothing (a false-success path). Refuse it.
if (allServices.length === 0) {
steps[steps.length - 1] = { step: 'deploy', status: 'failed', error: 'docker compose config returned no services — nothing to recreate' };
await markProgressPhase(appName, 'deploy', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit });
throw new Error('deploy: docker compose config returned no services — refusing to report success without recreating anything');
}
let stateful = prod.stateful_services || []; let stateful = prod.stateful_services || [];
if (stateful.length === 0) { if (stateful.length === 0) {
// Auto-detect stateful services from image names so db/redis are never force-recreated // Auto-detect stateful services from image names so db/redis are never force-recreated
@ -1054,36 +832,19 @@ ${detail}`);
await markProgressPhase(appName, 'health_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit }); await markProgressPhase(appName, 'health_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit });
} }
} else { } else {
// No health URL — fall back to confirming containers are listable. runOnServer // No health URL — check containers
// returns {ok:false} on failure (it does not throw); if we can't even run
// `docker compose ps` we cannot claim health, so mark it failed (mirrors the
// health_url failure path: finalResult='unhealthy') instead of silent 'done'.
const psRes = await runOnServer(server, `cd ${deployDir} && docker compose ps --format json`); const psRes = await runOnServer(server, `cd ${deployDir} && docker compose ps --format json`);
if (!psRes.ok) { steps[steps.length - 1] = { step: 'health', status: 'done', note: 'no health URL configured' };
steps[steps.length - 1] = { step: 'health', status: 'failed', error: psRes.stderr?.slice(-300) || psRes.error, note: 'no health URL; docker compose ps failed' }; await markProgressPhase(appName, 'health_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit });
finalResult = 'unhealthy';
await markProgressPhase(appName, 'health_failed', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit, result: finalResult });
} else {
steps[steps.length - 1] = { step: 'health', status: 'done', note: 'no health URL configured' };
await markProgressPhase(appName, 'health_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit });
}
} }
// Step 7: Post-deploy hooks // Step 7: Post-deploy hooks
if (prod.post_deploy) { if (prod.post_deploy) {
steps.push({ step: 'post_deploy', status: 'running' }); steps.push({ step: 'post_deploy', status: 'running' });
await markProgressPhase(appName, 'post_deploy', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit }); await markProgressPhase(appName, 'post_deploy', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit });
// runOnServer returns {ok:false} on failure (it does not throw) — check it, await runOnServer(server, prod.post_deploy, { timeout: 30000 });
// else a failing post-deploy hook was silently reported as success. steps[steps.length - 1] = { step: 'post_deploy', status: 'done' };
const postRes = await runOnServer(server, prod.post_deploy, { timeout: 30000 }); await markProgressPhase(appName, 'post_deploy_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit });
if (!postRes.ok) {
steps[steps.length - 1] = { step: 'post_deploy', status: 'failed', error: postRes.stderr?.slice(-500) || postRes.error };
finalResult = 'partial';
await markProgressPhase(appName, 'post_deploy_failed', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit, result: finalResult });
} else {
steps[steps.length - 1] = { step: 'post_deploy', status: 'done' };
await markProgressPhase(appName, 'post_deploy_done', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit });
}
} }
// Get tag // Get tag
@ -1150,7 +911,7 @@ ${detail}`);
// ROLLBACK ENGINE // ROLLBACK ENGINE
// ============================================================================= // =============================================================================
async function rollback(appName, opts = {}) { async function rollback(appName) {
const app = getApp(appName); const app = getApp(appName);
if (!app) throw new Error(`Unknown app: ${appName}`); if (!app) throw new Error(`Unknown app: ${appName}`);
@ -1159,25 +920,13 @@ async function rollback(appName, opts = {}) {
const deployDir = prod.deploy_dir; const deployDir = prod.deploy_dir;
const remote = app.git_remote || 'origin'; const remote = app.git_remote || 'origin';
// Determine the rollback target. An explicit to_ref (tag/commit/branch, from MCP or admin) // Find the previous successful deploy
// overrides the default "previous successful tagged deploy" behavior — this preserves the
// MCP deploy.rollback contract (arbitrary to_ref) now that it routes through this endpoint
// instead of the legacy systemd path. Validate to_ref to keep it out of shell-injection
// range (it interpolates into `git checkout` below).
const history = deployHistory[appName] || []; const history = deployHistory[appName] || [];
const current = history[0]; const current = history[0];
let tag; const previous = history.find((d, i) => i > 0 && d.result === 'success' && d.tag && d.tag !== 'untagged');
if (opts.to_ref) {
if (!/^[A-Za-z0-9._/-]+$/.test(opts.to_ref)) { if (!previous) {
throw new Error(`Invalid to_ref ${JSON.stringify(opts.to_ref)} — must match [A-Za-z0-9._/-]+`); return { app: appName, result: 'no_rollback_target', message: 'No previous successful deploy with a tag found' };
}
tag = opts.to_ref;
} else {
const previous = history.find((d, i) => i > 0 && d.result === 'success' && d.tag && d.tag !== 'untagged');
if (!previous) {
return { app: appName, result: 'no_rollback_target', message: 'No previous successful deploy with a tag found (pass to_ref to roll back to a specific tag/commit)' };
}
tag = previous.tag;
} }
if (!acquireLock(appName)) { if (!acquireLock(appName)) {
@ -1185,6 +934,7 @@ async function rollback(appName, opts = {}) {
} }
try { try {
const tag = previous.tag;
await writeProgress(appName, { await writeProgress(appName, {
action: 'rollback', action: 'rollback',
triggered_by: 'api', triggered_by: 'api',
@ -1195,12 +945,8 @@ async function rollback(appName, opts = {}) {
rolled_back_from: current?.tag || current?.commit || 'unknown', rolled_back_from: current?.tag || current?.commit || 'unknown',
}); });
// ensure-checkout (TUBE step 1): probe/clone the git root (repo_dir), not deploy_dir // Checkout the previous tag on production
// (they differ for sub-monorepo apps like coder-core). No-op for existing checkouts. const checkoutRes = await runOnServer(server, `cd ${deployDir} && git fetch --prune ${remote} && git checkout ${tag}`, { timeout: 60000 });
await ensureCheckout(server, app.repo_dir || deployDir, app.repo_url);
// Checkout the rollback target on production (--tags so an explicit to_ref tag resolves).
const checkoutRes = await runOnServer(server, `cd ${deployDir} && git fetch --prune --tags ${remote} && git checkout ${tag}`, { timeout: 60000 });
if (!checkoutRes.ok) throw new Error(`Checkout ${tag} failed: ${checkoutRes.stderr}`); if (!checkoutRes.ok) throw new Error(`Checkout ${tag} failed: ${checkoutRes.stderr}`);
// Rebuild + recreate via transient-container pattern (consistent with deploy()). // Rebuild + recreate via transient-container pattern (consistent with deploy()).
@ -1209,8 +955,7 @@ async function rollback(appName, opts = {}) {
const kvPrefix = prod.vault const kvPrefix = prod.vault
? `kua-vault run --project ${prod.vault.project} --env ${prod.vault.env} --` ? `kua-vault run --project ${prod.vault.project} --env ${prod.vault.env} --`
: ''; : '';
const envPrefix = await composeEnvPrefix(server); const buildRes = await runOnServer(server, `cd ${deployDir} && ${composeEnvPrefix(server)}${kvPrefix} docker compose build`, { timeout: 600000 });
const buildRes = await runOnServer(server, `cd ${deployDir} && ${envPrefix}${kvPrefix} docker compose build`, { timeout: 600000 });
if (!buildRes.ok) throw new Error(`rollback build failed: ${buildRes.stderr?.slice(-500)}`); if (!buildRes.ok) throw new Error(`rollback build failed: ${buildRes.stderr?.slice(-500)}`);
// Recreate all services for the rollback target. // Recreate all services for the rollback target.
const svcList = (await runOnServer(server, `cd ${deployDir} && docker compose config --services`)).stdout.split('\n').filter(Boolean); const svcList = (await runOnServer(server, `cd ${deployDir} && docker compose config --services`)).stdout.split('\n').filter(Boolean);
@ -1263,37 +1008,6 @@ async function rollback(appName, opts = {}) {
return { app: appName, ...entry }; return { app: appName, ...entry };
} catch (err) {
// The checkout/build/recreate steps above return {ok:false}/throw on failure;
// without this catch a failed rollback left progress stuck in 'running' and was
// never recorded. Mirror deploy()'s catch: record the failure and return a
// {result:'failed'} object (the rollback route's contract) rather than 500ing.
const entry = {
result: 'failed',
action: 'rollback',
error: err.message,
rolled_back_to: tag,
rolled_back_from: current?.tag || current?.commit || 'unknown',
server,
triggered_by: 'api',
};
await writeProgress(appName, {
action: 'rollback',
triggered_by: 'api',
status: 'failed',
phase: 'rollback_failed',
current_step: 'rollback',
result: 'failed',
error: err.message,
rolled_back_to: tag,
rolled_back_from: current?.tag || current?.commit || 'unknown',
server,
finished_at: Math.floor(Date.now() / 1000),
});
recordDeploy(appName, entry);
await audit({ action: 'rollback_failed', app: appName, error: err.message });
return { app: appName, ...entry };
} finally { } finally {
releaseLock(appName); releaseLock(appName);
} }
@ -1455,97 +1169,6 @@ fastify.get('/api/v1/apps/registry', async () => {
return registry; return registry;
}); });
// Reload the deploy registry from disk WITHOUT restarting the engine (TUBE step 3 —
// kills the cache-restart wart: kua-deploy caches the registry at startup, so a newly
// registered app otherwise needs `docker restart kua-deploy`). loadRegistry() JSON-parses
// the file; on parse failure it throws BEFORE reassigning `registry`, so the in-memory
// last-good registry is preserved. Auth is handled by the global onRequest hook.
fastify.post('/api/v1/registry/reload', async (request, reply) => {
const before = Object.keys(registry.apps || {}).length;
try {
await loadRegistry();
} catch (err) {
return reply.code(500).send({ ok: false, error: `registry reload failed (kept last-good ${before} apps): ${err.message}`, apps: before });
}
const after = Object.keys(registry.apps || {}).length;
fastify.log.info({ before, after }, 'registry reloaded via /api/v1/registry/reload');
return { ok: true, apps: after, before };
});
// Export the full registry + write a timestamped snapshot to the data volume.
// Git can consume these as generated audit artifacts; it is NOT the source of truth.
fastify.get('/api/v1/registry/export', async (request, reply) => {
try {
const ts = new Date().toISOString().replace(/[:.]/g, '').slice(0, 15);
const snap = path.join(DATA_DIR, `registry-snapshot-${ts}.json`);
await fs.mkdir(DATA_DIR, { recursive: true });
await fs.writeFile(snap, JSON.stringify(registry, null, 2) + '\n', 'utf-8');
return { ok: true, apps: Object.keys(registry.apps).length, snapshot: snap, registry };
} catch (err) {
return reply.code(500).send({ ok: false, error: err.message });
}
});
// --- Registry mutations (engine-owned) — authenticated via the global onRequest hook ---
// Upsert a full app entry. ?dry_run=1 validates + returns a diff without writing.
fastify.put('/api/v1/apps/:app', async (request, reply) => {
const name = request.params.app;
const entry = request.body;
const errs = validateEntry(name, entry);
if (name === 'registry') errs.push('"registry" is a reserved name');
if (errs.length) return reply.code(400).send({ ok: false, errors: errs });
const before = registry.apps[name] || null;
const diff = normalizedDiff(before, entry);
if (request.query.dry_run === '1' || request.query.dry_run === 'true') {
return { ok: true, dry_run: true, op: before ? 'update' : 'create', app: name, diff };
}
await withRegistryLock(async () => {
registry.apps[name] = entry;
await writeRegistry();
await appendRegistryEvent({ op: before ? 'update' : 'create', app: name, actor: request.identity?.user || 'unknown', before, after: entry });
});
return { ok: true, op: before ? 'update' : 'create', app: name, apps: Object.keys(registry.apps).length };
});
// Deep-merge a partial update onto an existing entry.
fastify.patch('/api/v1/apps/:app', async (request, reply) => {
const name = request.params.app;
const before = registry.apps[name];
if (!before) return reply.code(404).send({ ok: false, error: `Unknown app: ${name}` });
const deepMerge = (a, b) => {
const out = Array.isArray(a) ? [...a] : { ...a };
for (const [k, v] of Object.entries(b || {})) {
out[k] = (v && typeof v === 'object' && !Array.isArray(v) && a && typeof a[k] === 'object' && !Array.isArray(a[k])) ? deepMerge(a[k], v) : v;
}
return out;
};
const merged = deepMerge(before, request.body || {});
const errs = validateEntry(name, merged);
if (errs.length) return reply.code(400).send({ ok: false, errors: errs });
if (request.query.dry_run === '1' || request.query.dry_run === 'true') {
return { ok: true, dry_run: true, op: 'patch', app: name, diff: normalizedDiff(before, merged) };
}
await withRegistryLock(async () => {
registry.apps[name] = merged;
await writeRegistry();
await appendRegistryEvent({ op: 'patch', app: name, actor: request.identity?.user || 'unknown', before, after: merged });
});
return { ok: true, op: 'patch', app: name };
});
// Remove an app from the registry.
fastify.delete('/api/v1/apps/:app', async (request, reply) => {
const name = request.params.app;
const before = registry.apps[name];
if (!before) return reply.code(404).send({ ok: false, error: `Unknown app: ${name}` });
await withRegistryLock(async () => {
delete registry.apps[name];
await writeRegistry();
await appendRegistryEvent({ op: 'delete', app: name, actor: request.identity?.user || 'unknown', before, after: null });
});
return { ok: true, op: 'delete', app: name, apps: Object.keys(registry.apps).length };
});
// List all apps // List all apps
fastify.get('/api/v1/apps', async () => { fastify.get('/api/v1/apps', async () => {
const results = []; const results = [];
@ -1575,14 +1198,8 @@ fastify.get('/api/v1/apps/:app/deploys', async (request) => {
// --- Actions --- // --- Actions ---
// Release (merge main→production, tag, push — triggers webhook deploy) // Release (merge main→production, tag, push — triggers webhook deploy)
fastify.post('/api/v1/apps/:app/release', async (request, reply) => { fastify.post('/api/v1/apps/:app/release', async (request) => {
const { message, source_branch, target_branch } = request.body || {}; const { message, source_branch, target_branch } = request.body || {};
try {
if (source_branch !== undefined) validateBranchName(source_branch, 'source_branch');
if (target_branch !== undefined) validateBranchName(target_branch, 'target_branch');
} catch (err) {
return reply.code(400).send({ ok: false, error: err.message });
}
return await release(request.params.app, message || 'Release to production', { source_branch, target_branch }); return await release(request.params.app, message || 'Release to production', { source_branch, target_branch });
}); });
@ -1647,11 +1264,9 @@ fastify.post('/api/v1/apps/:app/rebuild', async (request, reply) => {
return { triggered: true, app }; return { triggered: true, app };
}); });
// Rollback. Optional body { to_ref } rolls back to a specific tag/commit/branch; // Rollback
// omitted = previous successful tagged deploy.
fastify.post('/api/v1/apps/:app/rollback', async (request) => { fastify.post('/api/v1/apps/:app/rollback', async (request) => {
const to_ref = request.body && typeof request.body.to_ref === 'string' ? request.body.to_ref : undefined; return await rollback(request.params.app);
return await rollback(request.params.app, { to_ref });
}); });
// --- Deploy Progress --- // --- Deploy Progress ---

View File

@ -0,0 +1,61 @@
// Unit tests for registry loading — validates the path resolution and
// structural invariants of deploy-registry.json.
// Run with: node --test test/registry-loader.test.js
import assert from 'node:assert/strict';
import { test } from 'node:test';
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
// On Bruno the registry is bind-mounted into the container at /app/deploy-registry.json
// (process.cwd() = /app). Locally (dev on gal), it lives in coder-core.
// Accept an override via env var so CI / test runners can point at any copy.
const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
const REGISTRY_FILE = process.env.REGISTRY_FILE
?? path.join(ROOT, 'deploy-registry.json') // container bind-mount
?? path.join(ROOT, '../../coder-core/services/kua-deploy/deploy-registry.json'); // local dev fallback
// ------------------------------------------------------------------
// Bug #7 regression: DEPLOY_REGISTRY_PATH must resolve to a real file.
// In kua-mcp-core the old ROOT was computed via __dirname + "../../../"
// which overshoots to "/" inside the container. Here we validate the
// kua-deploy-native path (process.cwd() relative) and the mcp-core
// path (CODER_CORE_ROOT env-var-backed).
// ------------------------------------------------------------------
let registry;
test('registry file exists at expected path', () => {
assert.ok(fs.existsSync(REGISTRY_FILE), `registry not found at ${REGISTRY_FILE}`);
const raw = fs.readFileSync(REGISTRY_FILE, 'utf-8');
registry = JSON.parse(raw);
});
test('registry has apps object', () => {
assert.ok(registry && typeof registry.apps === 'object', 'registry.apps must be an object');
const count = Object.keys(registry.apps).length;
assert.ok(count >= 5, `expected at least 5 registered apps, got ${count}`);
});
test('every app has required fields', () => {
for (const [name, cfg] of Object.entries(registry.apps)) {
assert.ok(typeof cfg.repo_dir === 'string' && cfg.repo_dir.length > 0,
`${name}: repo_dir must be a non-empty string`);
assert.ok(typeof cfg.deploy_mode === 'string',
`${name}: deploy_mode must be present`);
assert.ok(cfg.production && typeof cfg.production === 'object',
`${name}: production config must be present`);
}
});
test('no app uses webhook deploy_mode (webhook path is retired)', () => {
const webhookApps = Object.entries(registry.apps)
.filter(([, cfg]) => cfg.deploy_mode === 'webhook')
.map(([name]) => name);
assert.deepEqual(webhookApps, [],
`These apps still have deploy_mode=webhook (retire them to direct): ${webhookApps.join(', ')}`);
});
test('kua-deploy is registered in its own registry', () => {
assert.ok('kua-deploy' in registry.apps, 'kua-deploy must be in deploy-registry.json');
assert.equal(registry.apps['kua-deploy'].deploy_mode, 'direct');
});

View File

@ -0,0 +1,53 @@
// Unit tests for SHA normalization in verify paths.
// Run with: node --test test/sha-comparison.test.js
import assert from 'node:assert/strict';
import { test } from 'node:test';
// The canonical normalization function used in verifyStatelessRecreated()
// and runtime-status — must stay in sync with server.js.
const stripSha = (s) => (s || '').replace(/^sha256:/, '');
// ------------------------------------------------------------------
// Bug #3 regression: docker compose images returns bare hex,
// docker inspect .Image returns sha256:<hex>. They must compare equal.
// ------------------------------------------------------------------
test('sha: bare hex == bare hex', () => {
const a = 'a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2';
assert.equal(stripSha(a), stripSha(a));
assert.ok(stripSha(a) === stripSha(a));
});
test('sha: sha256-prefixed == bare hex (the failing case before the fix)', () => {
const bare = 'a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2';
const prefixed = `sha256:${bare}`;
// Before fix: prefixed === bare => false
assert.notEqual(prefixed, bare, 'raw strings are indeed unequal — this is the bug');
// After fix: stripSha(prefixed) === stripSha(bare) => true
assert.equal(stripSha(prefixed), stripSha(bare), 'normalized strings must be equal');
});
test('sha: both sha256-prefixed', () => {
const a = 'sha256:a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2';
const b = 'sha256:a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2';
assert.equal(stripSha(a), stripSha(b));
});
test('sha: different digests stay different after normalization', () => {
const a = 'sha256:aaaa0000000000000000000000000000000000000000000000000000000000001111';
const b = 'sha256:bbbb0000000000000000000000000000000000000000000000000000000000002222';
assert.notEqual(stripSha(a), stripSha(b));
});
test('sha: empty/null input returns empty string', () => {
assert.equal(stripSha(''), '');
assert.equal(stripSha(null), '');
assert.equal(stripSha(undefined), '');
});
test('sha: imageMatch logic mirrors server.js verifyStatelessRecreated', () => {
const expectedSha = 'a0845a6c5772e01234567890abcdef01234567890abcdef01234567890abcdef01';
const actualSha = `sha256:${expectedSha}`;
const imageMatch = !!expectedSha && stripSha(actualSha) === stripSha(expectedSha);
assert.ok(imageMatch, 'imageMatch must be true when digests are the same modulo sha256: prefix');
});