diff --git a/server.js b/server.js index fd1e2b9..52e4c24 100644 --- a/server.js +++ b/server.js @@ -9,6 +9,10 @@ import { promisify } from 'util'; const exec = promisify(execCb); const execFile = promisify(execFileCb); +// The app name this kua-deploy reports itself as in deploy-registry.json. +// Used to detect "I am being deployed" cases so we can do the self-recreate handoff. +const SELF_APP_NAME = 'kua-deploy'; + // Input validation const SAFE_MESSAGE_RE = /^[a-zA-Z0-9 _.,!?:;@#/()[\]{}<>='"+*&^%$~`|-]{1,500}$/; function validateMessage(msg) { @@ -696,6 +700,49 @@ ${detail}`); // apps (not just kua-deploy) and replaces the old runOnServer + kua-vault-run // shell prefix approach. const composeProject = path.basename(deployDir); + + // SELF-RECREATE HANDOFF — when the target IS kua-deploy on the same host, + // this process is about to be killed. We pre-mark progress with a + // self-recreate-pending sentinel so the NEW kua-deploy can pick up the + // verification on startup. See completeSelfRecreate() near init. + const selfRecreate = appName === SELF_APP_NAME && isLocal(server) && stateless.includes(SELF_APP_NAME); + if (selfRecreate) { + // Capture the freshly-built image SHA for post-restart verification. + const builtSha = (await run(`docker compose -p ${composeProject} -f ${deployDir}/docker-compose.yml images --quiet ${SELF_APP_NAME} 2>/dev/null | head -1`)).stdout.trim() || null; + steps[steps.length - 1] = { + step: 'deploy', + status: 'running', + self_recreate: true, + note: 'self-recreate handoff — NEW kua-deploy will verify on startup', + }; + await markProgressPhase(appName, 'self_recreate_pending', { + action, + triggered_by: opts.triggered_by || 'api', + steps, + commit: deployCommit, + self_recreate_expected_image: builtSha, + self_recreate_started_at: deployStartTs.toISOString(), + self_recreate_stateless: stateless, + }); + // Fire-and-forget recreate. The OLD process is about to die; spawn close + // handler may resolve with ok=false because of the kill, which is expected. + // We don't throw on its failure — the docker daemon owns the lifecycle now. + recreateService({ + project: composeProject, + deployDir, + services: stateless, + force: true, + vault: prod.vault || null, + server, + }).catch(() => { /* swallowing — we're dying anyway */ }); + // Block here so the process keeps the lock until the daemon kills us. + // 90s ceiling so the lock doesn't leak if the recreate truly fails. + await new Promise(r => setTimeout(r, 90000)); + // If we're still alive at this point, the recreate didn't take. Bail. + steps[steps.length - 1] = { step: 'deploy', status: 'failed', error: 'self-recreate timed out — container was not replaced' }; + throw new Error('self-recreate did not replace container within 90s'); + } + const upRes = await recreateService({ project: composeProject, deployDir, @@ -1365,6 +1412,78 @@ process.on('unhandledRejection', (reason, promise) => { fastify.log.error({ reason, promise }, 'Unhandled promise rejection — investigate immediately'); }); +// completeSelfRecreate — on startup, if the previous kua-deploy left progress +// in "phase: self_recreate_pending" state, this NEW instance verifies the +// recreate landed (image SHA matches what was built, container is fresh) and +// flips progress to succeeded or failed. Without this, release-app would poll +// /progress forever after a self-deploy. +async function completeSelfRecreate() { + const progressFile = path.join(DATA_DIR, `progress-${SELF_APP_NAME}.json`); + let progress; + try { + progress = JSON.parse(await fs.readFile(progressFile, 'utf-8')); + } catch { return; /* no progress file yet */ } + + if (progress.phase !== 'self_recreate_pending') return; + + const expectedSha = progress.self_recreate_expected_image || null; + const recreateStartedAt = progress.self_recreate_started_at + ? new Date(progress.self_recreate_started_at) + : null; + + // Check our own container via docker + let runningSha = null, startedAtStr = null, state = null; + try { + const insp = await run(`docker inspect ${SELF_APP_NAME} --format '{{.Image}}|{{.State.StartedAt}}|{{.State.Status}}'`); + if (insp.ok) { + const parts = insp.stdout.split('|'); + runningSha = parts[0] || null; + startedAtStr = parts[1] || null; + state = parts[2] || null; + } + } catch { /* docker unreachable — leave progress in pending; next startup retries */ } + + const imageMatches = expectedSha && runningSha && expectedSha.endsWith(runningSha.replace(/^sha256:/, '')); + const freshlyStarted = startedAtStr && recreateStartedAt && new Date(startedAtStr) >= recreateStartedAt; + const ok = !!(imageMatches && freshlyStarted && state === 'running'); + + const verify = { + expected_image_sha: expectedSha, + running_image_sha: runningSha, + started_at: startedAtStr, + recreate_started_at: progress.self_recreate_started_at, + state, + image_matches: !!imageMatches, + freshly_started: !!freshlyStarted, + completed_at: new Date().toISOString(), + }; + + // Mark the deploy step done, append the verify, flip phase + status + const steps = Array.isArray(progress.steps) ? progress.steps : []; + const deployIdx = steps.findIndex(s => s.step === 'deploy'); + if (deployIdx >= 0) { + steps[deployIdx] = ok + ? { ...steps[deployIdx], status: 'done', verify, note: 'self-recreate completed; verified by NEW kua-deploy on startup' } + : { ...steps[deployIdx], status: 'failed', error: 'self-recreate verify failed', verify }; + } + const updated = { + ...progress, + phase: ok ? 'succeeded' : 'failed', + status: 'done', + current_step: ok ? 'done' : 'deploy', + steps, + updated_at: Math.floor(Date.now() / 1000), + self_recreate_completed: true, + }; + // Clear the marker fields + delete updated.self_recreate_expected_image; + delete updated.self_recreate_started_at; + delete updated.self_recreate_stateless; + + await fs.writeFile(progressFile, JSON.stringify(updated, null, 2)); + fastify.log.info({ ok, verify }, 'self-recreate completed and verified'); +} + const start = async () => { try { // Fail fast if webhook secret is missing in production @@ -1374,6 +1493,11 @@ const start = async () => { await loadRegistry(); await loadHistory(); await fs.mkdir(DATA_DIR, { recursive: true }); + // Self-recreate recovery — handles handoff from previous instance that was + // killed mid-recreate during a kua-deploy self-deploy. Idempotent. + try { await completeSelfRecreate(); } catch (e) { + fastify.log.error({ err: e }, 'completeSelfRecreate failed (non-fatal)'); + } await fastify.listen({ port: 3200, host: '0.0.0.0' }); } catch (err) { fastify.log.error(err);