feat(registry): engine-owned registry + mutation API (end the git-reconcile loop)

The registry was a git-committed file bind-mounted :ro from coder-core AND hand-patched
live — two writers, neither authoritative. Root cause of the reconcile commits, dirty-tree
deploy blocks, main-push gating, and multi-session collisions (codex/gpt-5.5 audited).

- Registry is now ENGINE-OWNED at /app/data/registry.json (persistent volume), seeded once
  from the legacy bind-mount on first boot, never overwritten thereafter.
- New authenticated mutation API: PUT/PATCH/DELETE /api/v1/apps/:app (+ ?dry_run=1 returns
  a normalized diff, no write), GET /api/v1/registry/export (+ timestamped snapshot).
- Atomic writeRegistry (temp+rename), in-process mutation lock, schema validateEntry
  (allowed servers, branch-name safety, required fields).
- Append-only hash-chained audit at /app/data/registry-events.jsonl (actor from request
  identity, before/after, prev_hash/event_hash).
- FIX: audit.log moved /app/logs -> /app/data (the /app/logs path was never mounted, so the
  audit trail was lost on every restart).
- compose: removed the deploy-registry.json git bind-mount.

Clients (kua-app, kua-mcp-core) migrate to the API in a follow-up; git becomes an export
sink, not the source of truth.
This commit is contained in:
Kavi 2026-05-26 20:36:55 -04:00
parent d75a145d90
commit ed476804f9
2 changed files with 184 additions and 8 deletions

View File

@ -5,8 +5,11 @@
# - kua-services: reach kua-vault, kua-db, kua-mcp-core, etc.
# - production_proxy: reach forgejo (git operations) + Caddy edge labels
#
# Registry: deploy-registry.json is bind-mounted from coder-core's checkout
# during this transition. Future cleanup can migrate it into this repo.
# Registry: ENGINE-OWNED at /app/data/registry.json (on the kua-deploy-data volume),
# mutated only via the authenticated API (PUT/PATCH/DELETE /api/v1/apps/:app). The old
# git bind-mount was removed 2026-05-26 — git is no longer in the registry path. On a
# fresh volume the engine seeds from KUA_REGISTRY_SEED (default /app/deploy-registry.json)
# if present; the cutover pre-seeds /app/data/registry.json from the live registry first.
services:
kua-deploy:
build:
@ -31,7 +34,6 @@ services:
- kua-deploy-data:/app/data
- /root/.ssh:/root/.ssh:ro
- /root/apps:/root/apps
- /root/apps/coder-core/services/kua-deploy/deploy-registry.json:/app/deploy-registry.json:ro
- /usr/local/bin/kua-vault:/usr/local/bin/kua-vault:ro
- /root/.config/kua-vault:/root/.config/kua-vault:ro
networks:

184
server.js
View File

@ -34,10 +34,18 @@ function validateBranchName(name, label) {
// --- Configuration ---
const DATA_DIR = path.join(process.cwd(), 'data');
const LOG_DIR = path.join(process.cwd(), 'logs');
const AUDIT_LOG_FILE = path.join(LOG_DIR, 'audit.log');
// Audit log moved under DATA_DIR (the only mounted/persistent volume) — the old
// /app/logs path was never mounted, so the audit trail was lost on every restart.
const LOG_DIR = DATA_DIR;
const AUDIT_LOG_FILE = path.join(DATA_DIR, 'audit.log');
const DEPLOY_HISTORY_FILE = path.join(DATA_DIR, 'deploys.json');
const REGISTRY_FILE = path.join(process.cwd(), 'deploy-registry.json');
// The registry is ENGINE-OWNED runtime state on the persistent volume — NOT a
// git-committed bind-mount. Mutated only via the authenticated API below. On first
// boot it seeds from the legacy bind-mount (if still present) or a seed file, then
// owns the file thereafter. See REGISTRY_SEED.
const REGISTRY_FILE = path.join(DATA_DIR, 'registry.json');
const REGISTRY_SEED = process.env.KUA_REGISTRY_SEED || path.join(process.cwd(), 'deploy-registry.json');
const REGISTRY_EVENTS_FILE = path.join(DATA_DIR, 'registry-events.jsonl');
const ADMIN_TOKEN = process.env.KUA_DEPLOY_ADMIN_TOKEN;
const TAILSCALE_SOCKET = '/var/run/tailscale/tailscaled.sock';
const HOSTNAME = process.env.HOSTNAME || 'gal';
@ -104,14 +112,106 @@ function getDeployId(app) {
return deployLocks.get(app)?.deployId ?? null;
}
// --- Load Registry ---
// --- Load Registry (engine-owned) ---
let registry = { apps: {} };
// Seed-once: if the engine-owned file is missing, import from the legacy seed
// (the old git bind-mount) so we don't lose the existing apps on cutover. NEVER
// overwrites an existing engine-owned file.
async function seedRegistryIfMissing() {
try {
await fs.access(REGISTRY_FILE);
return false; // already engine-owned
} catch { /* missing — seed below */ }
let seed;
try {
seed = await fs.readFile(REGISTRY_SEED, 'utf-8');
JSON.parse(seed); // validate it parses before adopting
} catch (err) {
fastify.log.warn(`No registry seed at ${REGISTRY_SEED} (${err.message}); starting with empty registry`);
seed = JSON.stringify({ apps: {} }, null, 2) + '\n';
}
await fs.mkdir(DATA_DIR, { recursive: true });
await fs.writeFile(REGISTRY_FILE, seed, 'utf-8');
fastify.log.info(`Registry seeded from ${REGISTRY_SEED} -> ${REGISTRY_FILE}`);
return true;
}
async function loadRegistry() {
await seedRegistryIfMissing();
const data = await fs.readFile(REGISTRY_FILE, 'utf-8');
registry = JSON.parse(data);
const parsed = JSON.parse(data);
if (!parsed || typeof parsed !== 'object' || typeof parsed.apps !== 'object') {
throw new Error('registry.json malformed: missing "apps" object');
}
registry = parsed;
fastify.log.info(`Registry loaded: ${Object.keys(registry.apps).length} apps`);
}
// Atomic write of the in-memory registry to the engine-owned file (temp + rename).
async function writeRegistry() {
await fs.mkdir(DATA_DIR, { recursive: true });
const tmp = `${REGISTRY_FILE}.tmp.${process.pid}.${Date.now()}`;
await fs.writeFile(tmp, JSON.stringify(registry, null, 2) + '\n', 'utf-8');
await fs.rename(tmp, REGISTRY_FILE);
}
// --- Registry mutation lock (engine is single-process; serialize writers) ---
let registryMutating = false;
async function withRegistryLock(fn) {
while (registryMutating) await new Promise(r => setTimeout(r, 25));
registryMutating = true;
try { return await fn(); }
finally { registryMutating = false; }
}
// Append-only hash-chained audit of registry mutations.
let lastRegistryEventHash = null;
async function appendRegistryEvent(ev) {
if (lastRegistryEventHash === null) {
// recover the last hash from the tail of the events file (best-effort)
try {
const txt = await fs.readFile(REGISTRY_EVENTS_FILE, 'utf-8');
const lines = txt.trim().split('\n').filter(Boolean);
if (lines.length) lastRegistryEventHash = JSON.parse(lines[lines.length - 1]).event_hash || '';
else lastRegistryEventHash = '';
} catch { lastRegistryEventHash = ''; }
}
const record = { ...ev, ts: new Date().toISOString(), prev_hash: lastRegistryEventHash };
record.event_hash = crypto.createHash('sha256').update(JSON.stringify(record)).digest('hex');
await fs.mkdir(DATA_DIR, { recursive: true });
await fs.appendFile(REGISTRY_EVENTS_FILE, JSON.stringify(record) + '\n', 'utf-8');
lastRegistryEventHash = record.event_hash;
return record;
}
// Validate a registry entry coming from the API (request input — privileged but typed).
const REGISTRY_SERVERS = new Set((process.env.KUA_REGISTRY_SERVERS || 'bruno,gal,genesis,eva').split(',').map(s => s.trim()));
function validateEntry(name, entry) {
const errs = [];
if (!/^[a-z0-9][a-z0-9._-]*$/.test(name)) errs.push(`app name "${name}" must be lowercase [a-z0-9._-]`);
if (!entry || typeof entry !== 'object') { errs.push('entry must be an object'); return errs; }
const reqTop = ['repo_dir', 'source_branch', 'deploy_branch', 'deploy_mode'];
for (const k of reqTop) if (typeof entry[k] !== 'string' || !entry[k]) errs.push(`missing/invalid "${k}"`);
if (entry.deploy_mode && !['direct', 'webhook', 'script'].includes(entry.deploy_mode)) errs.push(`deploy_mode "${entry.deploy_mode}" not direct|webhook|script`);
if (entry.source_branch) try { validateBranchName(entry.source_branch, 'source_branch'); } catch (e) { errs.push(e.message); }
if (entry.deploy_branch) try { validateBranchName(entry.deploy_branch, 'deploy_branch'); } catch (e) { errs.push(e.message); }
if (entry.repo_url && typeof entry.repo_url !== 'string') errs.push('repo_url must be a string');
const prod = entry.production;
if (!prod || typeof prod !== 'object') { errs.push('missing "production" object'); return errs; }
if (!REGISTRY_SERVERS.has(prod.server)) errs.push(`production.server "${prod.server}" not in allowed set {${[...REGISTRY_SERVERS].join(',')}}`);
if (typeof prod.deploy_dir !== 'string' || !prod.deploy_dir.startsWith('/')) errs.push('production.deploy_dir must be an absolute path');
if ('has_migrations' in prod && typeof prod.has_migrations !== 'boolean') errs.push('production.has_migrations must be boolean');
if (prod.stateful_services && !Array.isArray(prod.stateful_services)) errs.push('production.stateful_services must be an array');
return errs;
}
function normalizedDiff(before, after) {
const b = before ? JSON.stringify(before, Object.keys(before).sort()) : null;
const a = after ? JSON.stringify(after, Object.keys(after).sort()) : null;
return { changed: b !== a, before: before || null, after: after || null };
}
function getApp(name) {
return registry.apps[name] || null;
}
@ -1372,6 +1472,80 @@ fastify.post('/api/v1/registry/reload', async (request, reply) => {
return { ok: true, apps: after, before };
});
// Export the full registry + write a timestamped snapshot to the data volume.
// Git can consume these as generated audit artifacts; it is NOT the source of truth.
fastify.get('/api/v1/registry/export', async (request, reply) => {
try {
const ts = new Date().toISOString().replace(/[:.]/g, '').slice(0, 15);
const snap = path.join(DATA_DIR, `registry-snapshot-${ts}.json`);
await fs.mkdir(DATA_DIR, { recursive: true });
await fs.writeFile(snap, JSON.stringify(registry, null, 2) + '\n', 'utf-8');
return { ok: true, apps: Object.keys(registry.apps).length, snapshot: snap, registry };
} catch (err) {
return reply.code(500).send({ ok: false, error: err.message });
}
});
// --- Registry mutations (engine-owned) — authenticated via the global onRequest hook ---
// Upsert a full app entry. ?dry_run=1 validates + returns a diff without writing.
fastify.put('/api/v1/apps/:app', async (request, reply) => {
const name = request.params.app;
const entry = request.body;
const errs = validateEntry(name, entry);
if (name === 'registry') errs.push('"registry" is a reserved name');
if (errs.length) return reply.code(400).send({ ok: false, errors: errs });
const before = registry.apps[name] || null;
const diff = normalizedDiff(before, entry);
if (request.query.dry_run === '1' || request.query.dry_run === 'true') {
return { ok: true, dry_run: true, op: before ? 'update' : 'create', app: name, diff };
}
await withRegistryLock(async () => {
registry.apps[name] = entry;
await writeRegistry();
await appendRegistryEvent({ op: before ? 'update' : 'create', app: name, actor: request.identity?.user || 'unknown', before, after: entry });
});
return { ok: true, op: before ? 'update' : 'create', app: name, apps: Object.keys(registry.apps).length };
});
// Deep-merge a partial update onto an existing entry.
fastify.patch('/api/v1/apps/:app', async (request, reply) => {
const name = request.params.app;
const before = registry.apps[name];
if (!before) return reply.code(404).send({ ok: false, error: `Unknown app: ${name}` });
const deepMerge = (a, b) => {
const out = Array.isArray(a) ? [...a] : { ...a };
for (const [k, v] of Object.entries(b || {})) {
out[k] = (v && typeof v === 'object' && !Array.isArray(v) && a && typeof a[k] === 'object' && !Array.isArray(a[k])) ? deepMerge(a[k], v) : v;
}
return out;
};
const merged = deepMerge(before, request.body || {});
const errs = validateEntry(name, merged);
if (errs.length) return reply.code(400).send({ ok: false, errors: errs });
if (request.query.dry_run === '1' || request.query.dry_run === 'true') {
return { ok: true, dry_run: true, op: 'patch', app: name, diff: normalizedDiff(before, merged) };
}
await withRegistryLock(async () => {
registry.apps[name] = merged;
await writeRegistry();
await appendRegistryEvent({ op: 'patch', app: name, actor: request.identity?.user || 'unknown', before, after: merged });
});
return { ok: true, op: 'patch', app: name };
});
// Remove an app from the registry.
fastify.delete('/api/v1/apps/:app', async (request, reply) => {
const name = request.params.app;
const before = registry.apps[name];
if (!before) return reply.code(404).send({ ok: false, error: `Unknown app: ${name}` });
await withRegistryLock(async () => {
delete registry.apps[name];
await writeRegistry();
await appendRegistryEvent({ op: 'delete', app: name, actor: request.identity?.user || 'unknown', before, after: null });
});
return { ok: true, op: 'delete', app: name, apps: Object.keys(registry.apps).length };
});
// List all apps
fastify.get('/api/v1/apps', async () => {
const results = [];