feat(engine): ensure-checkout before deploy/rollback + rollback to_ref
- ensureCheckout(server, deployDir, repoUrl): clone-if-missing, inside the per-app acquireLock, called before deploy() git_pull and before rollback() cd. No-op when .git present (asserts origin==repo_url if set); requires registry repo_url when absent; refuses to clobber a non-empty non-repo dir. - rollback(appName, opts): opts.to_ref (validated /^[A-Za-z0-9._/-]+$/, rejected before any mutation) checks out that ref; default = previous successful tag from deployHistory. fetch now --prune --tags. - route POST /api/v1/apps/:app/rollback reads body.to_ref.
This commit is contained in:
parent
58733939e2
commit
e33b1e96cb
133
server.js
133
server.js
|
|
@ -397,6 +397,49 @@ async function runOnServer(server, cmd, opts = {}) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ensureCheckout — guarantee deployDir is a usable git checkout before deploy/rollback.
|
||||||
|
// The engine historically assumed the repo already existed (`cd ${deployDir} && git fetch`,
|
||||||
|
// see deploy()/rollback()); a registered app whose deploy_dir was never cloned failed at the
|
||||||
|
// very first `cd`. This makes a first-time API deploy self-heal by cloning from the
|
||||||
|
// registry-declared `repo_url`. It is a NO-OP for existing checkouts, so the conforming apps
|
||||||
|
// (which carry no `repo_url`) keep working untouched. Runs inside the caller's per-app lock.
|
||||||
|
// Clone source is NOT derived from the app name — origins are heterogeneous (Forgejo :2222,
|
||||||
|
// scp-style, and at least one GitHub repo whose name differs from the app) — so it MUST come
|
||||||
|
// from the registry. The caller still performs its own branch/tag checkout afterwards.
|
||||||
|
async function ensureCheckout(server, deployDir, repoUrl) {
|
||||||
|
const probe = await runOnServer(server, `test -e ${deployDir}/.git && echo REPO || echo MISSING`);
|
||||||
|
if (probe.stdout.trim() === 'REPO') {
|
||||||
|
// Already a checkout — leave branch/tag selection to the caller. Optionally assert origin.
|
||||||
|
if (repoUrl) {
|
||||||
|
const originRes = await runOnServer(server, `git -C ${deployDir} config --get remote.origin.url || true`);
|
||||||
|
const actual = originRes.stdout.trim();
|
||||||
|
if (actual && actual !== repoUrl) {
|
||||||
|
throw new Error(`ensure-checkout: ${deployDir} origin (${actual}) != registry repo_url (${repoUrl}) — refusing to deploy a mismatched checkout`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return { cloned: false };
|
||||||
|
}
|
||||||
|
if (!repoUrl) {
|
||||||
|
throw new Error(`ensure-checkout: ${deployDir} is not a git checkout and no "repo_url" is set in the registry — cannot clone. Add repo_url to the app's registry entry (or create the checkout manually).`);
|
||||||
|
}
|
||||||
|
// Refuse to clobber a non-empty, non-repo directory.
|
||||||
|
const dirState = await runOnServer(server, `if [ -e ${deployDir} ] && [ -n "$(ls -A ${deployDir} 2>/dev/null)" ]; then echo NONEMPTY; else echo OK; fi`);
|
||||||
|
if (dirState.stdout.trim() === 'NONEMPTY') {
|
||||||
|
throw new Error(`ensure-checkout: ${deployDir} exists, is not a git repo, and is non-empty — refusing to clobber. Inspect/remove it manually.`);
|
||||||
|
}
|
||||||
|
const cloneRes = await runOnServer(server, `git clone ${repoUrl} ${deployDir}`, { timeout: 180000 });
|
||||||
|
if (!cloneRes.ok) {
|
||||||
|
throw new Error(`ensure-checkout: git clone ${repoUrl} -> ${deployDir} failed: ${cloneRes.stderr}`);
|
||||||
|
}
|
||||||
|
// Verify the clone landed and origin matches what we asked for.
|
||||||
|
const verifyRes = await runOnServer(server, `git -C ${deployDir} config --get remote.origin.url || true`);
|
||||||
|
const landed = verifyRes.stdout.trim();
|
||||||
|
if (landed !== repoUrl) {
|
||||||
|
throw new Error(`ensure-checkout: cloned ${deployDir} but origin is ${landed || '(none)'} (expected ${repoUrl})`);
|
||||||
|
}
|
||||||
|
return { cloned: true };
|
||||||
|
}
|
||||||
|
|
||||||
// --- kua-db integration ---
|
// --- kua-db integration ---
|
||||||
async function kuaDbSafeCheck(app) {
|
async function kuaDbSafeCheck(app) {
|
||||||
try {
|
try {
|
||||||
|
|
@ -573,6 +616,16 @@ async function deploy(appName, opts = {}) {
|
||||||
// Step 2: Git pull on production server
|
// Step 2: Git pull on production server
|
||||||
steps.push({ step: 'git_pull', status: 'running' });
|
steps.push({ step: 'git_pull', status: 'running' });
|
||||||
await markProgressPhase(appName, 'git_pull', { action, triggered_by: opts.triggered_by || 'api', steps });
|
await markProgressPhase(appName, 'git_pull', { action, triggered_by: opts.triggered_by || 'api', steps });
|
||||||
|
// ensure-checkout (TUBE step 1): self-heal a missing deploy_dir by cloning from the
|
||||||
|
// registry repo_url, so a first-time API deploy doesn't die at the `cd` below. No-op
|
||||||
|
// for existing checkouts. Inside the per-app lock acquired above.
|
||||||
|
try {
|
||||||
|
const ec = await ensureCheckout(server, deployDir, app.repo_url);
|
||||||
|
if (ec.cloned) steps[steps.length - 1].cloned = true;
|
||||||
|
} catch (err) {
|
||||||
|
steps[steps.length - 1] = { step: 'git_pull', status: 'failed', error: err.message };
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
const fetchCmd = `cd ${deployDir} && git fetch --prune ${remote}`;
|
const fetchCmd = `cd ${deployDir} && git fetch --prune ${remote}`;
|
||||||
const fetchRes = await runOnServer(server, fetchCmd, { timeout: 60000 });
|
const fetchRes = await runOnServer(server, fetchCmd, { timeout: 60000 });
|
||||||
if (!fetchRes.ok) {
|
if (!fetchRes.ok) {
|
||||||
|
|
@ -636,8 +689,8 @@ ${detail}`);
|
||||||
if (verifyMode === 'off') return { ok: true, results: [], skipped: true };
|
if (verifyMode === 'off') return { ok: true, results: [], skipped: true };
|
||||||
const results = [];
|
const results = [];
|
||||||
for (const svc of services) {
|
for (const svc of services) {
|
||||||
const exp = await runOnServer(server, `cd ${deployDir} && docker compose images --quiet ${svc} 2>/dev/null | head -1`);
|
const exp = await runOnServer(server, `cd ${deployDir} && ${kvPrefix} docker compose images --quiet ${svc} 2>/dev/null | head -1`);
|
||||||
const cid = await runOnServer(server, `cd ${deployDir} && docker compose ps --quiet ${svc} 2>/dev/null | head -1`);
|
const cid = await runOnServer(server, `cd ${deployDir} && ${kvPrefix} docker compose ps --quiet ${svc} 2>/dev/null | head -1`);
|
||||||
const expectedSha = (exp.stdout || '').trim();
|
const expectedSha = (exp.stdout || '').trim();
|
||||||
const containerId = (cid.stdout || '').trim();
|
const containerId = (cid.stdout || '').trim();
|
||||||
if (!containerId) {
|
if (!containerId) {
|
||||||
|
|
@ -647,7 +700,8 @@ ${detail}`);
|
||||||
const insp = await runOnServer(server, `docker inspect --format '{{.Image}}|{{.State.StartedAt}}' ${containerId}`);
|
const insp = await runOnServer(server, `docker inspect --format '{{.Image}}|{{.State.StartedAt}}' ${containerId}`);
|
||||||
const [actualSha, startedAtStr] = (insp.stdout || '').trim().split('|');
|
const [actualSha, startedAtStr] = (insp.stdout || '').trim().split('|');
|
||||||
const startedAt = new Date(startedAtStr || 0);
|
const startedAt = new Date(startedAtStr || 0);
|
||||||
const imageMatch = !!expectedSha && actualSha === expectedSha;
|
const stripSha = (s) => (s || '').replace(/^sha256:/, '');
|
||||||
|
const imageMatch = !!expectedSha && stripSha(actualSha) === stripSha(expectedSha);
|
||||||
const freshlyStarted = !isNaN(startedAt) && startedAt >= deployStartTs;
|
const freshlyStarted = !isNaN(startedAt) && startedAt >= deployStartTs;
|
||||||
results.push({
|
results.push({
|
||||||
service: svc, ok: imageMatch && freshlyStarted,
|
service: svc, ok: imageMatch && freshlyStarted,
|
||||||
|
|
@ -673,6 +727,15 @@ ${detail}`);
|
||||||
const svcRes = await runOnServer(server, `cd ${deployDir} && docker compose config --services`);
|
const svcRes = await runOnServer(server, `cd ${deployDir} && docker compose config --services`);
|
||||||
const allServices = svcRes.stdout.split('\n').filter(Boolean);
|
const allServices = svcRes.stdout.split('\n').filter(Boolean);
|
||||||
|
|
||||||
|
// Fail-loud (TUBE step 1): if compose resolved NO services, the recreate+verify
|
||||||
|
// block below is skipped entirely and the deploy would silently report `done`
|
||||||
|
// having recreated nothing (a false-success path). Refuse it.
|
||||||
|
if (allServices.length === 0) {
|
||||||
|
steps[steps.length - 1] = { step: 'deploy', status: 'failed', error: 'docker compose config returned no services — nothing to recreate' };
|
||||||
|
await markProgressPhase(appName, 'deploy', { action, triggered_by: opts.triggered_by || 'api', steps, commit: deployCommit });
|
||||||
|
throw new Error('deploy: docker compose config returned no services — refusing to report success without recreating anything');
|
||||||
|
}
|
||||||
|
|
||||||
let stateful = prod.stateful_services || [];
|
let stateful = prod.stateful_services || [];
|
||||||
if (stateful.length === 0) {
|
if (stateful.length === 0) {
|
||||||
// Auto-detect stateful services from image names so db/redis are never force-recreated
|
// Auto-detect stateful services from image names so db/redis are never force-recreated
|
||||||
|
|
@ -910,7 +973,7 @@ ${detail}`);
|
||||||
// ROLLBACK ENGINE
|
// ROLLBACK ENGINE
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
|
|
||||||
async function rollback(appName) {
|
async function rollback(appName, opts = {}) {
|
||||||
const app = getApp(appName);
|
const app = getApp(appName);
|
||||||
if (!app) throw new Error(`Unknown app: ${appName}`);
|
if (!app) throw new Error(`Unknown app: ${appName}`);
|
||||||
|
|
||||||
|
|
@ -919,13 +982,25 @@ async function rollback(appName) {
|
||||||
const deployDir = prod.deploy_dir;
|
const deployDir = prod.deploy_dir;
|
||||||
const remote = app.git_remote || 'origin';
|
const remote = app.git_remote || 'origin';
|
||||||
|
|
||||||
// Find the previous successful deploy
|
// Determine the rollback target. An explicit to_ref (tag/commit/branch, from MCP or admin)
|
||||||
|
// overrides the default "previous successful tagged deploy" behavior — this preserves the
|
||||||
|
// MCP deploy.rollback contract (arbitrary to_ref) now that it routes through this endpoint
|
||||||
|
// instead of the legacy systemd path. Validate to_ref to keep it out of shell-injection
|
||||||
|
// range (it interpolates into `git checkout` below).
|
||||||
const history = deployHistory[appName] || [];
|
const history = deployHistory[appName] || [];
|
||||||
const current = history[0];
|
const current = history[0];
|
||||||
|
let tag;
|
||||||
|
if (opts.to_ref) {
|
||||||
|
if (!/^[A-Za-z0-9._/-]+$/.test(opts.to_ref)) {
|
||||||
|
throw new Error(`Invalid to_ref ${JSON.stringify(opts.to_ref)} — must match [A-Za-z0-9._/-]+`);
|
||||||
|
}
|
||||||
|
tag = opts.to_ref;
|
||||||
|
} else {
|
||||||
const previous = history.find((d, i) => i > 0 && d.result === 'success' && d.tag && d.tag !== 'untagged');
|
const previous = history.find((d, i) => i > 0 && d.result === 'success' && d.tag && d.tag !== 'untagged');
|
||||||
|
|
||||||
if (!previous) {
|
if (!previous) {
|
||||||
return { app: appName, result: 'no_rollback_target', message: 'No previous successful deploy with a tag found' };
|
return { app: appName, result: 'no_rollback_target', message: 'No previous successful deploy with a tag found (pass to_ref to roll back to a specific tag/commit)' };
|
||||||
|
}
|
||||||
|
tag = previous.tag;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!acquireLock(appName)) {
|
if (!acquireLock(appName)) {
|
||||||
|
|
@ -933,7 +1008,6 @@ async function rollback(appName) {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const tag = previous.tag;
|
|
||||||
await writeProgress(appName, {
|
await writeProgress(appName, {
|
||||||
action: 'rollback',
|
action: 'rollback',
|
||||||
triggered_by: 'api',
|
triggered_by: 'api',
|
||||||
|
|
@ -944,8 +1018,12 @@ async function rollback(appName) {
|
||||||
rolled_back_from: current?.tag || current?.commit || 'unknown',
|
rolled_back_from: current?.tag || current?.commit || 'unknown',
|
||||||
});
|
});
|
||||||
|
|
||||||
// Checkout the previous tag on production
|
// ensure-checkout (TUBE step 1): rollback shares deploy()'s `cd ${deployDir}` assumption,
|
||||||
const checkoutRes = await runOnServer(server, `cd ${deployDir} && git fetch --prune ${remote} && git checkout ${tag}`, { timeout: 60000 });
|
// so the deploy_dir must exist here too. No-op for existing checkouts.
|
||||||
|
await ensureCheckout(server, deployDir, app.repo_url);
|
||||||
|
|
||||||
|
// Checkout the rollback target on production (--tags so an explicit to_ref tag resolves).
|
||||||
|
const checkoutRes = await runOnServer(server, `cd ${deployDir} && git fetch --prune --tags ${remote} && git checkout ${tag}`, { timeout: 60000 });
|
||||||
if (!checkoutRes.ok) throw new Error(`Checkout ${tag} failed: ${checkoutRes.stderr}`);
|
if (!checkoutRes.ok) throw new Error(`Checkout ${tag} failed: ${checkoutRes.stderr}`);
|
||||||
|
|
||||||
// Rebuild + recreate via transient-container pattern (consistent with deploy()).
|
// Rebuild + recreate via transient-container pattern (consistent with deploy()).
|
||||||
|
|
@ -1162,6 +1240,29 @@ fastify.post('/webhook/forgejo', async (request, reply) => {
|
||||||
|
|
||||||
// --- Apps ---
|
// --- Apps ---
|
||||||
|
|
||||||
|
// Registry dump — used by kua-mcp-core to discover all apps at startup
|
||||||
|
// without relying on a filesystem path that may not resolve inside its container.
|
||||||
|
fastify.get('/api/v1/apps/registry', async () => {
|
||||||
|
return registry;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Reload the deploy registry from disk WITHOUT restarting the engine (TUBE step 3 —
|
||||||
|
// kills the cache-restart wart: kua-deploy caches the registry at startup, so a newly
|
||||||
|
// registered app otherwise needs `docker restart kua-deploy`). loadRegistry() JSON-parses
|
||||||
|
// the file; on parse failure it throws BEFORE reassigning `registry`, so the in-memory
|
||||||
|
// last-good registry is preserved. Auth is handled by the global onRequest hook.
|
||||||
|
fastify.post('/api/v1/registry/reload', async (request, reply) => {
|
||||||
|
const before = Object.keys(registry.apps || {}).length;
|
||||||
|
try {
|
||||||
|
await loadRegistry();
|
||||||
|
} catch (err) {
|
||||||
|
return reply.code(500).send({ ok: false, error: `registry reload failed (kept last-good ${before} apps): ${err.message}`, apps: before });
|
||||||
|
}
|
||||||
|
const after = Object.keys(registry.apps || {}).length;
|
||||||
|
fastify.log.info({ before, after }, 'registry reloaded via /api/v1/registry/reload');
|
||||||
|
return { ok: true, apps: after, before };
|
||||||
|
});
|
||||||
|
|
||||||
// List all apps
|
// List all apps
|
||||||
fastify.get('/api/v1/apps', async () => {
|
fastify.get('/api/v1/apps', async () => {
|
||||||
const results = [];
|
const results = [];
|
||||||
|
|
@ -1257,9 +1358,11 @@ fastify.post('/api/v1/apps/:app/rebuild', async (request, reply) => {
|
||||||
return { triggered: true, app };
|
return { triggered: true, app };
|
||||||
});
|
});
|
||||||
|
|
||||||
// Rollback
|
// Rollback. Optional body { to_ref } rolls back to a specific tag/commit/branch;
|
||||||
|
// omitted = previous successful tagged deploy.
|
||||||
fastify.post('/api/v1/apps/:app/rollback', async (request) => {
|
fastify.post('/api/v1/apps/:app/rollback', async (request) => {
|
||||||
return await rollback(request.params.app);
|
const to_ref = request.body && typeof request.body.to_ref === 'string' ? request.body.to_ref : undefined;
|
||||||
|
return await rollback(request.params.app, { to_ref });
|
||||||
});
|
});
|
||||||
|
|
||||||
// --- Deploy Progress ---
|
// --- Deploy Progress ---
|
||||||
|
|
@ -1502,9 +1605,11 @@ async function completeSelfRecreate() {
|
||||||
|
|
||||||
const start = async () => {
|
const start = async () => {
|
||||||
try {
|
try {
|
||||||
// Fail fast if webhook secret is missing in production
|
// WEBHOOK_SECRET is optional — the Forgejo webhook path is now retired in
|
||||||
|
// favour of the admin API (/api/v1/apps/:app/deploy). The handler remains
|
||||||
|
// but returns 503 when the secret is absent, which is safe.
|
||||||
if (!DEV_MODE && !WEBHOOK_SECRET) {
|
if (!DEV_MODE && !WEBHOOK_SECRET) {
|
||||||
throw new Error('KUA_DEPLOY_WEBHOOK_SECRET must be set in production — refusing to start');
|
fastify.log.warn('KUA_DEPLOY_WEBHOOK_SECRET not set — /webhook/forgejo will return 503. Set the secret to re-enable Forgejo push triggers.');
|
||||||
}
|
}
|
||||||
await loadRegistry();
|
await loadRegistry();
|
||||||
await loadHistory();
|
await loadHistory();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue