Compare commits

...

7 Commits
v0.1.0 ... main

Author SHA1 Message Date
Kavi d5508249f2 test: SHA normalization and registry-loader unit tests
Regression coverage for:
- Bug #3: sha256: prefix comparison — tests the stripSha helper and the
  imageMatch logic as used in verifyStatelessRecreated()
- Bug #7 / #6: registry file must exist and have all required fields;
  no app may use deploy_mode=webhook (webhook path retired)
  Run: REGISTRY_FILE=<path> node --test test/registry-loader.test.js

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-23 04:34:30 -04:00
Kavi 6a583a8572 fix(deploy): SHA normalization + kua-vault wrap in verifyStatelessRecreated; retire webhook mandatory check; add /api/v1/apps/registry endpoint
- verifyStatelessRecreated(): strip sha256: prefix before comparing image SHAs
  (same fix already applied to runtime-status in 2551af4, completeSelfRecreate
  in 06852c2; now consistent across all three verify paths)
- verifyStatelessRecreated(): add kua-vault wrap on compose images/ps calls
  (mirrors 2551af4 runtime-status fix; env interpolation was causing empty
  output for vault-injected apps, making the verify always fail with
  'no running container')
- start(): downgrade missing WEBHOOK_SECRET from fatal error to warning; the
  Forgejo webhook path is retired in favour of the admin API — handler stays
  but the startup guard no longer blocks kua-deploy from booting
- Add GET /api/v1/apps/registry so kua-mcp-core can fetch the full app list
  over HTTP without depending on a filesystem path inside its container

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-23 04:29:26 -04:00
claude-v2-deploy-coordination 58733939e2 fix(runtime-status): strip sha256: prefix in stale comparison (was always-stale false positive) 2026-05-21 22:36:21 -04:00
claude-v2-deploy-coordination 2551af4051 fix(runtime-status): kua-vault wrap on compose config/images/ps (was the real reason services list returned empty) 2026-05-21 22:07:01 -04:00
kua-deploy-split db647bcd0a fix(self-recreate): capture built image SHA via docker images tag, normalize SHA comparison
The prior expected_image_sha was captured via docker compose images, which returns the image of the existing (pre-recreate) container — not the freshly-built one. Switch to docker images ${project}-${service}:latest --quiet --no-trunc which returns the post-build image SHA. Also normalize sha256: prefix in completeSelfRecreate comparison so docker compose images output (sha256-prefixed) and docker inspect output (also sha256-prefixed) match cleanly.
2026-05-21 18:35:37 -04:00
kua-deploy-split 06852c227c feat: self-recreate handoff for kua-deploy + startup completeSelfRecreate()
When kua-deploy is recreating itself (target appName == kua-deploy on same host), the OLD process is about to be killed by the docker daemon mid-flight. Without a handoff, progress would be stuck at deploy:running forever and release-app would poll until timeout.

Self-recreate path: (1) pre-mark progress phase=self_recreate_pending with the freshly-built image SHA + deployStartTs + stateless services list; (2) fire-and-forget recreateService (do not await its return — the OLD process is dying anyway); (3) sleep 90s as a ceiling — if were still alive, recreate failed and we throw.

On startup, completeSelfRecreate() reads progress-kua-deploy.json; if phase is self_recreate_pending, queries its own container via docker inspect, compares running image SHA to the pre-recreate expected SHA, checks StartedAt > recreate_started_at + state=running, then writes phase=succeeded (or failed) plus a verify struct on the deploy step. Idempotent — no-op if no marker is found.
2026-05-21 18:31:45 -04:00
kua-deploy-split 9169c84381 feat: recreateService transient-container helper + use it in deploy() + rollback()
Replaces the runOnServer("docker compose up -d --force-recreate") pattern with a one-shot transient docker:cli container that runs OUTSIDE kua-deploy lifecycle. Solves the self-recreate chicken-and-egg: when the target is kua-deploy itself, the recreate completes because the transient survives kua-deploy stopping (docker daemon does the actual work).

Secrets are fetched via kua-vault export, written to a 600-perm tempfile on /app/data, passed via --env-file (docker CLI reads it from kua-deploys perspective; never on the docker run command line). Tempfile is unlinked in finally{}.

Replaces: deploy() stateless recreate (force=true), deploy() stateful up (force=false), rollback() recreate (force=true with all-services svcList).

Build step keeps runOnServer (local exec on bruno) since build doesnt kill kua-deploy. envPrefix/kvPrefix vars retained for the build command.
2026-05-21 18:24:13 -04:00
3 changed files with 410 additions and 18 deletions

314
server.js
View File

@ -3,12 +3,16 @@ import fs from 'fs/promises';
import path from 'path'; import path from 'path';
import crypto from 'crypto'; import crypto from 'crypto';
import http from 'http'; import http from 'http';
import { exec as execCb, execFile as execFileCb } from 'child_process'; import { exec as execCb, execFile as execFileCb, spawn } from 'child_process';
import { promisify } from 'util'; import { promisify } from 'util';
const exec = promisify(execCb); const exec = promisify(execCb);
const execFile = promisify(execFileCb); const execFile = promisify(execFileCb);
// The app name this kua-deploy reports itself as in deploy-registry.json.
// Used to detect "I am being deployed" cases so we can do the self-recreate handoff.
const SELF_APP_NAME = 'kua-deploy';
// Input validation // Input validation
const SAFE_MESSAGE_RE = /^[a-zA-Z0-9 _.,!?:;@#/()[\]{}<>='"+*&^%$~`|-]{1,500}$/; const SAFE_MESSAGE_RE = /^[a-zA-Z0-9 _.,!?:;@#/()[\]{}<>='"+*&^%$~`|-]{1,500}$/;
function validateMessage(msg) { function validateMessage(msg) {
@ -270,6 +274,99 @@ function composeEnvPrefix(server) {
return tailscaleIp ? `TAILSCALE_IP=${tailscaleIp} ` : ''; return tailscaleIp ? `TAILSCALE_IP=${tailscaleIp} ` : '';
} }
// recreateService — spawn a one-shot transient docker:cli container that runs
// `docker compose up -d` against the host docker socket. The transient container
// is OUTSIDE the lifecycle of the service it recreates, so even when the target
// is kua-deploy itself, the recreate completes (the transient survives kua-deploy
// being stopped/started; the docker daemon does the actual work).
//
// Bind paths MUST be identical between host and transient container
// (e.g. -v /root/apps/X:/root/apps/X) so compose's path resolution matches host
// reality. Secrets are pre-fetched via kua-vault into a private --env-file rather
// than being passed on the docker run command line.
//
// Returns the same { ok, stdout, stderr, error? } shape as run()/runOnServer()
// so call sites can swap with minimal change.
async function recreateService({
project, // compose project name (basename(deployDir) typically)
deployDir, // absolute path to the deploy dir on the docker host
services, // array of service names to recreate
force = true, // pass --force-recreate
vault = null, // { project, env } — if set, fetch secrets via kua-vault export
server = 'bruno', // for TAILSCALE_IP env var
composeFile = 'docker-compose.yml',
timeout = 300000,
} = {}) {
if (!Array.isArray(services) || services.length === 0) {
return { ok: true, stdout: '', stderr: '', error: null, skipped: true };
}
// Stage env file on the kua-deploy data volume so the docker CLI (running
// inside kua-deploy) can read it. The transient container picks up vars via
// --env-file processed at submit time — no host-side mount needed.
const tmpName = `.env-recreate-${crypto.randomBytes(8).toString('hex')}`;
const envFilePath = `/app/data/${tmpName}`;
let envFileWritten = false;
try {
if (vault && vault.project) {
const envEnv = vault.env || 'prod';
// kua-vault export emits KEY=VALUE lines — directly compatible with --env-file
const exportRes = await run(`kua-vault export --project ${vault.project} --env ${envEnv}`, { timeout: 30000 });
if (!exportRes.ok) {
return { ok: false, stdout: '', stderr: `kua-vault export ${vault.project}/${envEnv} failed: ${exportRes.stderr?.slice(-300) || exportRes.error}`, error: 'vault export failed' };
}
// Strip any non KEY=VALUE lines (e.g. status banners) and validate
const envLines = exportRes.stdout
.split('\n')
.filter(l => /^[A-Z_][A-Z0-9_]*=/.test(l));
if (envLines.length === 0) {
return { ok: false, stdout: '', stderr: `kua-vault export returned no KEY=VALUE lines for ${vault.project}/${envEnv}`, error: 'empty vault export' };
}
await fs.writeFile(envFilePath, envLines.join('\n') + '\n', { mode: 0o600 });
envFileWritten = true;
}
// Build docker run args
const runArgs = [
'run', '--rm',
'-v', '/var/run/docker.sock:/var/run/docker.sock',
'-v', `${deployDir}:${deployDir}`,
'-w', deployDir,
];
const tailscaleIp = tailscaleIpForServer(server);
if (tailscaleIp) runArgs.push('-e', `TAILSCALE_IP=${tailscaleIp}`);
if (envFileWritten) runArgs.push('--env-file', envFilePath);
runArgs.push('docker:cli');
// Compose command (transient container will run it)
runArgs.push('docker', 'compose', '-p', project, '-f', composeFile, 'up', '-d', '--no-deps', '--remove-orphans');
if (force) runArgs.push('--force-recreate');
runArgs.push(...services);
return await new Promise((resolve) => {
const child = spawn('docker', runArgs);
let stdout = '';
let stderr = '';
const tHandle = setTimeout(() => {
try { child.kill('SIGKILL'); } catch (_) { /* ignore */ }
}, timeout);
child.stdout.on('data', d => { stdout += d.toString(); });
child.stderr.on('data', d => { stderr += d.toString(); });
child.on('close', (code) => {
clearTimeout(tHandle);
if (code === 0) resolve({ ok: true, stdout: stdout.trim(), stderr: stderr.trim() });
else resolve({ ok: false, stdout: stdout.trim(), stderr: stderr.trim(), error: `docker run exit ${code}` });
});
child.on('error', (err) => {
clearTimeout(tHandle);
resolve({ ok: false, stdout: '', stderr: String(err?.message || err), error: 'spawn failed' });
});
});
} finally {
if (envFileWritten) {
try { await fs.unlink(envFilePath); } catch (_) { /* ignore */ }
}
}
}
async function run(cmd, opts = {}) { async function run(cmd, opts = {}) {
const timeout = opts.timeout || 30000; const timeout = opts.timeout || 30000;
try { try {
@ -539,8 +636,8 @@ ${detail}`);
if (verifyMode === 'off') return { ok: true, results: [], skipped: true }; if (verifyMode === 'off') return { ok: true, results: [], skipped: true };
const results = []; const results = [];
for (const svc of services) { for (const svc of services) {
const exp = await runOnServer(server, `cd ${deployDir} && docker compose images --quiet ${svc} 2>/dev/null | head -1`); const exp = await runOnServer(server, `cd ${deployDir} && ${kvPrefix} docker compose images --quiet ${svc} 2>/dev/null | head -1`);
const cid = await runOnServer(server, `cd ${deployDir} && docker compose ps --quiet ${svc} 2>/dev/null | head -1`); const cid = await runOnServer(server, `cd ${deployDir} && ${kvPrefix} docker compose ps --quiet ${svc} 2>/dev/null | head -1`);
const expectedSha = (exp.stdout || '').trim(); const expectedSha = (exp.stdout || '').trim();
const containerId = (cid.stdout || '').trim(); const containerId = (cid.stdout || '').trim();
if (!containerId) { if (!containerId) {
@ -550,7 +647,8 @@ ${detail}`);
const insp = await runOnServer(server, `docker inspect --format '{{.Image}}|{{.State.StartedAt}}' ${containerId}`); const insp = await runOnServer(server, `docker inspect --format '{{.Image}}|{{.State.StartedAt}}' ${containerId}`);
const [actualSha, startedAtStr] = (insp.stdout || '').trim().split('|'); const [actualSha, startedAtStr] = (insp.stdout || '').trim().split('|');
const startedAt = new Date(startedAtStr || 0); const startedAt = new Date(startedAtStr || 0);
const imageMatch = !!expectedSha && actualSha === expectedSha; const stripSha = (s) => (s || '').replace(/^sha256:/, '');
const imageMatch = !!expectedSha && stripSha(actualSha) === stripSha(expectedSha);
const freshlyStarted = !isNaN(startedAt) && startedAt >= deployStartTs; const freshlyStarted = !isNaN(startedAt) && startedAt >= deployStartTs;
results.push({ results.push({
service: svc, ok: imageMatch && freshlyStarted, service: svc, ok: imageMatch && freshlyStarted,
@ -598,10 +696,69 @@ ${detail}`);
const stateless = allServices.filter(s => !stateful.includes(s)); const stateless = allServices.filter(s => !stateful.includes(s));
const deployStartTs = new Date(); const deployStartTs = new Date();
if (stateless.length > 0) { if (stateless.length > 0) {
const upRes = await runOnServer(server, `cd ${deployDir} && ${envPrefix}${kvPrefix} docker compose up -d --force-recreate --remove-orphans ${stateless.join(' ')}`, { timeout: 300000 }); // Use transient-container recreate so kua-deploy can self-update without
// killing the compose-up process mid-flight. Same pattern works for all
// apps (not just kua-deploy) and replaces the old runOnServer + kua-vault-run
// shell prefix approach.
const composeProject = path.basename(deployDir);
// SELF-RECREATE HANDOFF — when the target IS kua-deploy on the same host,
// this process is about to be killed. We pre-mark progress with a
// self-recreate-pending sentinel so the NEW kua-deploy can pick up the
// verification on startup. See completeSelfRecreate() near init.
const selfRecreate = appName === SELF_APP_NAME && isLocal(server) && stateless.includes(SELF_APP_NAME);
if (selfRecreate) {
// Capture the freshly-built image SHA for post-restart verification.
// `docker compose images` returns the image used by the EXISTING container
// (still the OLD one before recreate). For the just-built image, query the
// image tag that compose builds into: ${project}-${service}:latest.
const builtImageTag = `${composeProject}-${SELF_APP_NAME}:latest`;
const builtSha = (await run(`docker images ${builtImageTag} --quiet --no-trunc | head -1`)).stdout.trim() || null;
steps[steps.length - 1] = {
step: 'deploy',
status: 'running',
self_recreate: true,
note: 'self-recreate handoff — NEW kua-deploy will verify on startup',
};
await markProgressPhase(appName, 'self_recreate_pending', {
action,
triggered_by: opts.triggered_by || 'api',
steps,
commit: deployCommit,
self_recreate_expected_image: builtSha,
self_recreate_started_at: deployStartTs.toISOString(),
self_recreate_stateless: stateless,
});
// Fire-and-forget recreate. The OLD process is about to die; spawn close
// handler may resolve with ok=false because of the kill, which is expected.
// We don't throw on its failure — the docker daemon owns the lifecycle now.
recreateService({
project: composeProject,
deployDir,
services: stateless,
force: true,
vault: prod.vault || null,
server,
}).catch(() => { /* swallowing — we're dying anyway */ });
// Block here so the process keeps the lock until the daemon kills us.
// 90s ceiling so the lock doesn't leak if the recreate truly fails.
await new Promise(r => setTimeout(r, 90000));
// If we're still alive at this point, the recreate didn't take. Bail.
steps[steps.length - 1] = { step: 'deploy', status: 'failed', error: 'self-recreate timed out — container was not replaced' };
throw new Error('self-recreate did not replace container within 90s');
}
const upRes = await recreateService({
project: composeProject,
deployDir,
services: stateless,
force: true,
vault: prod.vault || null,
server,
});
if (!upRes.ok) { if (!upRes.ok) {
steps[steps.length - 1] = { step: 'deploy', status: 'failed', error: upRes.stderr?.slice(-500) }; steps[steps.length - 1] = { step: 'deploy', status: 'failed', error: upRes.stderr?.slice(-500) || upRes.error };
throw new Error('docker compose up failed for stateless services'); throw new Error('recreateService failed for stateless services');
} }
// POST-DEPLOY VERIFY — catches false-success (see helper comment above). // POST-DEPLOY VERIFY — catches false-success (see helper comment above).
const verify = await verifyStatelessRecreated(server, deployDir, stateless, deployStartTs); const verify = await verifyStatelessRecreated(server, deployDir, stateless, deployStartTs);
@ -618,10 +775,20 @@ ${detail}`);
} }
} }
if (stateful.length > 0) { if (stateful.length > 0) {
const upRes = await runOnServer(server, `cd ${deployDir} && ${envPrefix}${kvPrefix} docker compose up -d --remove-orphans ${stateful.join(' ')}`, { timeout: 300000 }); // Stateful services: start if not running but don't force-recreate
// (db/redis must keep their volume + connection state).
const composeProject = path.basename(deployDir);
const upRes = await recreateService({
project: composeProject,
deployDir,
services: stateful,
force: false,
vault: prod.vault || null,
server,
});
if (!upRes.ok) { if (!upRes.ok) {
steps[steps.length - 1] = { step: 'deploy', status: 'failed', error: upRes.stderr?.slice(-500) }; steps[steps.length - 1] = { step: 'deploy', status: 'failed', error: upRes.stderr?.slice(-500) || upRes.error };
throw new Error('docker compose up failed for stateful services'); throw new Error('recreateService failed for stateful services');
} }
} }
} }
@ -782,11 +949,25 @@ async function rollback(appName) {
const checkoutRes = await runOnServer(server, `cd ${deployDir} && git fetch --prune ${remote} && git checkout ${tag}`, { timeout: 60000 }); const checkoutRes = await runOnServer(server, `cd ${deployDir} && git fetch --prune ${remote} && git checkout ${tag}`, { timeout: 60000 });
if (!checkoutRes.ok) throw new Error(`Checkout ${tag} failed: ${checkoutRes.stderr}`); if (!checkoutRes.ok) throw new Error(`Checkout ${tag} failed: ${checkoutRes.stderr}`);
// Rebuild and restart // Rebuild + recreate via transient-container pattern (consistent with deploy()).
// Build runs via runOnServer (local exec when server=bruno); the recreate uses
// the transient docker:cli so kua-deploy can roll back itself reliably.
const kvPrefix = prod.vault const kvPrefix = prod.vault
? `kua-vault run --project ${prod.vault.project} --env ${prod.vault.env} --` ? `kua-vault run --project ${prod.vault.project} --env ${prod.vault.env} --`
: ''; : '';
await runOnServer(server, `cd ${deployDir} && ${composeEnvPrefix(server)}${kvPrefix} docker compose up -d --force-recreate --build`, { timeout: 600000 }); const buildRes = await runOnServer(server, `cd ${deployDir} && ${composeEnvPrefix(server)}${kvPrefix} docker compose build`, { timeout: 600000 });
if (!buildRes.ok) throw new Error(`rollback build failed: ${buildRes.stderr?.slice(-500)}`);
// Recreate all services for the rollback target.
const svcList = (await runOnServer(server, `cd ${deployDir} && docker compose config --services`)).stdout.split('\n').filter(Boolean);
const recreateRes = await recreateService({
project: path.basename(deployDir),
deployDir,
services: svcList,
force: true,
vault: prod.vault || null,
server,
});
if (!recreateRes.ok) throw new Error(`rollback recreate failed: ${recreateRes.stderr?.slice(-500) || recreateRes.error}`);
// Health check // Health check
let healthy = true; let healthy = true;
@ -982,6 +1163,12 @@ fastify.post('/webhook/forgejo', async (request, reply) => {
// --- Apps --- // --- Apps ---
// Registry dump — used by kua-mcp-core to discover all apps at startup
// without relying on a filesystem path that may not resolve inside its container.
fastify.get('/api/v1/apps/registry', async () => {
return registry;
});
// List all apps // List all apps
fastify.get('/api/v1/apps', async () => { fastify.get('/api/v1/apps', async () => {
const results = []; const results = [];
@ -1125,14 +1312,23 @@ fastify.get('/api/v1/apps/:app/runtime-status', async (request, reply) => {
const server = prod.server || cfg.deploy_server || 'bruno'; const server = prod.server || cfg.deploy_server || 'bruno';
const deployDir = prod.deploy_dir || cfg.repo_dir; const deployDir = prod.deploy_dir || cfg.repo_dir;
if (!deployDir) return reply.code(400).send({ error: 'no deploy_dir for app' }); if (!deployDir) return reply.code(400).send({ error: 'no deploy_dir for app' });
// kua-vault wrap — compose files in this org use ${VAR} interpolations
// backed by vault-injected env (KUA_SESSIONS_ADMIN_TOKEN, AGENT_API_KEY,
// STRIPE_*, etc). Without the wrap, `docker compose config --services`
// emits empty/error output for most apps, which made /runtime-status
// return services: [] (the original symptom the coordinator session
// diagnosed). Mirrors the canonical kvPrefix pattern from deploy().
const kvPrefix = prod.vault
? `kua-vault run --project ${prod.vault.project} --env ${prod.vault.env} --`
: '';
try { try {
const svcRes = await runOnServer(server, `cd ${deployDir} && docker compose config --services`); const svcRes = await runOnServer(server, `cd ${deployDir} && ${kvPrefix} docker compose config --services`);
const services = (svcRes.stdout || '').split('\n').filter(Boolean); const services = (svcRes.stdout || '').split('\n').filter(Boolean);
const out = []; const out = [];
let anyStale = false; let anyStale = false;
for (const svc of services) { for (const svc of services) {
const exp = await runOnServer(server, `cd ${deployDir} && docker compose images --quiet ${svc} 2>/dev/null | head -1`); const exp = await runOnServer(server, `cd ${deployDir} && ${kvPrefix} docker compose images --quiet ${svc} 2>/dev/null | head -1`);
const cid = await runOnServer(server, `cd ${deployDir} && docker compose ps --quiet ${svc} 2>/dev/null | head -1`); const cid = await runOnServer(server, `cd ${deployDir} && ${kvPrefix} docker compose ps --quiet ${svc} 2>/dev/null | head -1`);
const expectedSha = (exp.stdout || '').trim(); const expectedSha = (exp.stdout || '').trim();
const containerId = (cid.stdout || '').trim(); const containerId = (cid.stdout || '').trim();
let running_image_sha = null, started_at = null, state = null, health = null; let running_image_sha = null, started_at = null, state = null, health = null;
@ -1144,7 +1340,8 @@ fastify.get('/api/v1/apps/:app/runtime-status', async (request, reply) => {
state = parts[2] || null; state = parts[2] || null;
health = parts[3] || null; health = parts[3] || null;
} }
const stale = !!expectedSha && !!running_image_sha && expectedSha !== running_image_sha; const stripSha = (s) => (s || '').replace(/^sha256:/, '');
const stale = !!expectedSha && !!running_image_sha && stripSha(expectedSha) !== stripSha(running_image_sha);
if (stale) anyStale = true; if (stale) anyStale = true;
out.push({ service: svc, container_id: containerId || null, expected_image_sha: expectedSha || null, running_image_sha, started_at, state, health, stale }); out.push({ service: svc, container_id: containerId || null, expected_image_sha: expectedSha || null, running_image_sha, started_at, state, health, stale });
} }
@ -1236,15 +1433,96 @@ process.on('unhandledRejection', (reason, promise) => {
fastify.log.error({ reason, promise }, 'Unhandled promise rejection — investigate immediately'); fastify.log.error({ reason, promise }, 'Unhandled promise rejection — investigate immediately');
}); });
// completeSelfRecreate — on startup, if the previous kua-deploy left progress
// in "phase: self_recreate_pending" state, this NEW instance verifies the
// recreate landed (image SHA matches what was built, container is fresh) and
// flips progress to succeeded or failed. Without this, release-app would poll
// /progress forever after a self-deploy.
async function completeSelfRecreate() {
const progressFile = path.join(DATA_DIR, `progress-${SELF_APP_NAME}.json`);
let progress;
try {
progress = JSON.parse(await fs.readFile(progressFile, 'utf-8'));
} catch { return; /* no progress file yet */ }
if (progress.phase !== 'self_recreate_pending') return;
const expectedSha = progress.self_recreate_expected_image || null;
const recreateStartedAt = progress.self_recreate_started_at
? new Date(progress.self_recreate_started_at)
: null;
// Check our own container via docker
let runningSha = null, startedAtStr = null, state = null;
try {
const insp = await run(`docker inspect ${SELF_APP_NAME} --format '{{.Image}}|{{.State.StartedAt}}|{{.State.Status}}'`);
if (insp.ok) {
const parts = insp.stdout.split('|');
runningSha = parts[0] || null;
startedAtStr = parts[1] || null;
state = parts[2] || null;
}
} catch { /* docker unreachable — leave progress in pending; next startup retries */ }
// Normalize: strip sha256: prefix from both sides for tolerant comparison.
const normSha = s => (s || '').replace(/^sha256:/, '').trim();
const imageMatches = expectedSha && runningSha && normSha(expectedSha) === normSha(runningSha);
const freshlyStarted = startedAtStr && recreateStartedAt && new Date(startedAtStr) >= recreateStartedAt;
const ok = !!(imageMatches && freshlyStarted && state === 'running');
const verify = {
expected_image_sha: expectedSha,
running_image_sha: runningSha,
started_at: startedAtStr,
recreate_started_at: progress.self_recreate_started_at,
state,
image_matches: !!imageMatches,
freshly_started: !!freshlyStarted,
completed_at: new Date().toISOString(),
};
// Mark the deploy step done, append the verify, flip phase + status
const steps = Array.isArray(progress.steps) ? progress.steps : [];
const deployIdx = steps.findIndex(s => s.step === 'deploy');
if (deployIdx >= 0) {
steps[deployIdx] = ok
? { ...steps[deployIdx], status: 'done', verify, note: 'self-recreate completed; verified by NEW kua-deploy on startup' }
: { ...steps[deployIdx], status: 'failed', error: 'self-recreate verify failed', verify };
}
const updated = {
...progress,
phase: ok ? 'succeeded' : 'failed',
status: 'done',
current_step: ok ? 'done' : 'deploy',
steps,
updated_at: Math.floor(Date.now() / 1000),
self_recreate_completed: true,
};
// Clear the marker fields
delete updated.self_recreate_expected_image;
delete updated.self_recreate_started_at;
delete updated.self_recreate_stateless;
await fs.writeFile(progressFile, JSON.stringify(updated, null, 2));
fastify.log.info({ ok, verify }, 'self-recreate completed and verified');
}
const start = async () => { const start = async () => {
try { try {
// Fail fast if webhook secret is missing in production // WEBHOOK_SECRET is optional — the Forgejo webhook path is now retired in
// favour of the admin API (/api/v1/apps/:app/deploy). The handler remains
// but returns 503 when the secret is absent, which is safe.
if (!DEV_MODE && !WEBHOOK_SECRET) { if (!DEV_MODE && !WEBHOOK_SECRET) {
throw new Error('KUA_DEPLOY_WEBHOOK_SECRET must be set in production — refusing to start'); fastify.log.warn('KUA_DEPLOY_WEBHOOK_SECRET not set — /webhook/forgejo will return 503. Set the secret to re-enable Forgejo push triggers.');
} }
await loadRegistry(); await loadRegistry();
await loadHistory(); await loadHistory();
await fs.mkdir(DATA_DIR, { recursive: true }); await fs.mkdir(DATA_DIR, { recursive: true });
// Self-recreate recovery — handles handoff from previous instance that was
// killed mid-recreate during a kua-deploy self-deploy. Idempotent.
try { await completeSelfRecreate(); } catch (e) {
fastify.log.error({ err: e }, 'completeSelfRecreate failed (non-fatal)');
}
await fastify.listen({ port: 3200, host: '0.0.0.0' }); await fastify.listen({ port: 3200, host: '0.0.0.0' });
} catch (err) { } catch (err) {
fastify.log.error(err); fastify.log.error(err);

View File

@ -0,0 +1,61 @@
// Unit tests for registry loading — validates the path resolution and
// structural invariants of deploy-registry.json.
// Run with: node --test test/registry-loader.test.js
import assert from 'node:assert/strict';
import { test } from 'node:test';
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
// On Bruno the registry is bind-mounted into the container at /app/deploy-registry.json
// (process.cwd() = /app). Locally (dev on gal), it lives in coder-core.
// Accept an override via env var so CI / test runners can point at any copy.
const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
const REGISTRY_FILE = process.env.REGISTRY_FILE
?? path.join(ROOT, 'deploy-registry.json') // container bind-mount
?? path.join(ROOT, '../../coder-core/services/kua-deploy/deploy-registry.json'); // local dev fallback
// ------------------------------------------------------------------
// Bug #7 regression: DEPLOY_REGISTRY_PATH must resolve to a real file.
// In kua-mcp-core the old ROOT was computed via __dirname + "../../../"
// which overshoots to "/" inside the container. Here we validate the
// kua-deploy-native path (process.cwd() relative) and the mcp-core
// path (CODER_CORE_ROOT env-var-backed).
// ------------------------------------------------------------------
let registry;
test('registry file exists at expected path', () => {
assert.ok(fs.existsSync(REGISTRY_FILE), `registry not found at ${REGISTRY_FILE}`);
const raw = fs.readFileSync(REGISTRY_FILE, 'utf-8');
registry = JSON.parse(raw);
});
test('registry has apps object', () => {
assert.ok(registry && typeof registry.apps === 'object', 'registry.apps must be an object');
const count = Object.keys(registry.apps).length;
assert.ok(count >= 5, `expected at least 5 registered apps, got ${count}`);
});
test('every app has required fields', () => {
for (const [name, cfg] of Object.entries(registry.apps)) {
assert.ok(typeof cfg.repo_dir === 'string' && cfg.repo_dir.length > 0,
`${name}: repo_dir must be a non-empty string`);
assert.ok(typeof cfg.deploy_mode === 'string',
`${name}: deploy_mode must be present`);
assert.ok(cfg.production && typeof cfg.production === 'object',
`${name}: production config must be present`);
}
});
test('no app uses webhook deploy_mode (webhook path is retired)', () => {
const webhookApps = Object.entries(registry.apps)
.filter(([, cfg]) => cfg.deploy_mode === 'webhook')
.map(([name]) => name);
assert.deepEqual(webhookApps, [],
`These apps still have deploy_mode=webhook (retire them to direct): ${webhookApps.join(', ')}`);
});
test('kua-deploy is registered in its own registry', () => {
assert.ok('kua-deploy' in registry.apps, 'kua-deploy must be in deploy-registry.json');
assert.equal(registry.apps['kua-deploy'].deploy_mode, 'direct');
});

View File

@ -0,0 +1,53 @@
// Unit tests for SHA normalization in verify paths.
// Run with: node --test test/sha-comparison.test.js
import assert from 'node:assert/strict';
import { test } from 'node:test';
// The canonical normalization function used in verifyStatelessRecreated()
// and runtime-status — must stay in sync with server.js.
const stripSha = (s) => (s || '').replace(/^sha256:/, '');
// ------------------------------------------------------------------
// Bug #3 regression: docker compose images returns bare hex,
// docker inspect .Image returns sha256:<hex>. They must compare equal.
// ------------------------------------------------------------------
test('sha: bare hex == bare hex', () => {
const a = 'a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2';
assert.equal(stripSha(a), stripSha(a));
assert.ok(stripSha(a) === stripSha(a));
});
test('sha: sha256-prefixed == bare hex (the failing case before the fix)', () => {
const bare = 'a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2';
const prefixed = `sha256:${bare}`;
// Before fix: prefixed === bare => false
assert.notEqual(prefixed, bare, 'raw strings are indeed unequal — this is the bug');
// After fix: stripSha(prefixed) === stripSha(bare) => true
assert.equal(stripSha(prefixed), stripSha(bare), 'normalized strings must be equal');
});
test('sha: both sha256-prefixed', () => {
const a = 'sha256:a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2';
const b = 'sha256:a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2';
assert.equal(stripSha(a), stripSha(b));
});
test('sha: different digests stay different after normalization', () => {
const a = 'sha256:aaaa0000000000000000000000000000000000000000000000000000000000001111';
const b = 'sha256:bbbb0000000000000000000000000000000000000000000000000000000000002222';
assert.notEqual(stripSha(a), stripSha(b));
});
test('sha: empty/null input returns empty string', () => {
assert.equal(stripSha(''), '');
assert.equal(stripSha(null), '');
assert.equal(stripSha(undefined), '');
});
test('sha: imageMatch logic mirrors server.js verifyStatelessRecreated', () => {
const expectedSha = 'a0845a6c5772e01234567890abcdef01234567890abcdef01234567890abcdef01';
const actualSha = `sha256:${expectedSha}`;
const imageMatch = !!expectedSha && stripSha(actualSha) === stripSha(expectedSha);
assert.ok(imageMatch, 'imageMatch must be true when digests are the same modulo sha256: prefix');
});