import fs from 'node:fs/promises'; import path from 'node:path'; import { execFile } from 'node:child_process'; import { promisify } from 'node:util'; import { archiveRawEmail } from './mailArchive.js'; const DEFAULT_MAIL_ROOT = '/Users/kavi/Library/Mail/V10'; const DEFAULT_ENVELOPE_INDEX = path.join(DEFAULT_MAIL_ROOT, 'MailData', 'Envelope Index'); const DEFAULT_ACCOUNTS_DB = '/Users/kavi/Library/Accounts/Accounts4.sqlite'; const execFileAsync = promisify(execFile); export async function listAppleMailSources(mailRoot = DEFAULT_MAIL_ROOT) { const sources = []; const accountDirs = await safeReaddir(mailRoot, { withFileTypes: true }); for (const accountDir of accountDirs.filter((entry) => entry.isDirectory())) { if (accountDir.name === 'MailData') continue; const accountPath = path.join(mailRoot, accountDir.name); const mailboxes = await findMailboxDirs(accountPath); if (!mailboxes.length) continue; sources.push({ accountDir: accountDir.name, accountPath, mailboxes: mailboxes.map((mailbox) => ({ path: mailbox, label: mailboxLabel(mailbox, accountPath), })), }); } return sources; } export async function importAppleMailEmlx({ account, sourcePath, archiveRoot, limit = 25, since }) { const files = await findFiles(sourcePath, (file) => file.endsWith('.emlx') && !file.endsWith('.partial.emlx')); const selectedFiles = []; const sinceDate = since ? new Date(since) : null; for (const file of files.sort()) { if (sinceDate) { const stat = await fs.stat(file); if (stat.mtime < sinceDate) continue; } selectedFiles.push(file); if (selectedFiles.length >= limit) break; } const records = []; for (const file of selectedFiles) { const source = await readEmlxRawMessage(file); const record = await archiveRawEmail({ account, mailbox: mailboxLabel(file, sourcePath), uid: path.basename(file, '.emlx'), source, archiveRoot, }); record.localSource = file; records.push(record); } return { sourcePath, archiveRoot, imported: records.length, scanned: files.length, records, }; } export async function importAppleMailFromIndex({ account, email = account.email || account.auth?.user, mailRoot = DEFAULT_MAIL_ROOT, indexPath = DEFAULT_ENVELOPE_INDEX, accountsDbPath = DEFAULT_ACCOUNTS_DB, mailbox = 'all', archiveRoot, limit = 25, since, }) { if (!email) throw new Error('email is required to resolve Apple Mail account'); const { imapAccount, mailboxRecord } = await resolveAppleMailAccountMailbox({ email, accountsDbPath, indexPath, mailbox, }); const sourcePath = mailboxPathForUrl(mailRoot, mailboxRecord.url); const sinceTimestamp = since ? Math.floor(new Date(since).getTime() / 1000) : 0; const importLimit = Number(limit); const candidateLimit = Math.max(importLimit * 20, importLimit + 100); const messages = await queryJson(indexPath, ` select m.ROWID as rowid, m.remote_id as remoteId, m.date_received as dateReceived from messages m where m.mailbox = ${Number(mailboxRecord.rowid)} and m.deleted = 0 and m.date_received >= ${Number.isFinite(sinceTimestamp) ? sinceTimestamp : 0} order by m.date_received desc limit ${Number.isFinite(candidateLimit) ? candidateLimit : 25} `); const records = []; const missing = []; for (const message of messages) { const localSource = await findEmlxByRowId(sourcePath, message.rowid); if (!localSource) { missing.push(message.rowid); continue; } if (records.length >= importLimit) break; const source = await readEmlxRawMessage(localSource); const record = await archiveRawEmail({ account, mailbox: decodeMailboxUrl(mailboxRecord.url), uid: String(message.rowid), source, archiveRoot, }); record.localSource = localSource; record.appleMail = { accountIdentifier: imapAccount.identifier, mailboxUrl: mailboxRecord.url, rowid: message.rowid, remoteId: message.remoteId, dateReceived: message.dateReceived, }; records.push(record); } return { email, imapAccount, mailbox: mailboxRecord, sourcePath, archiveRoot, imported: records.length, scanned: messages.length, missing, records, }; } export async function resolveAppleMailAccountMailbox({ email, accountsDbPath = DEFAULT_ACCOUNTS_DB, indexPath = DEFAULT_ENVELOPE_INDEX, mailbox = 'all', }) { const imapAccounts = await resolveAppleMailImapAccounts({ email, accountsDbPath }); const attempts = []; for (const imapAccount of imapAccounts) { try { const mailboxRecord = await resolveMailboxRecord({ indexPath, imapAccountId: imapAccount.identifier, mailbox }); attempts.push({ imapAccount, mailboxRecord }); } catch { // Some macOS account records are stale or have no Mail mailbox. Try the next candidate. } } if (!attempts.length) { throw new Error(`Apple Mail mailbox not found for ${email}/${mailbox}`); } return attempts.sort((a, b) => Number(b.mailboxRecord.totalCount || 0) - Number(a.mailboxRecord.totalCount || 0))[0]; } export async function resolveAppleMailImapAccount({ email, accountsDbPath = DEFAULT_ACCOUNTS_DB }) { const rows = await resolveAppleMailImapAccounts({ email, accountsDbPath }); if (!rows.length) { throw new Error(`Apple Mail IMAP account not found for ${email}`); } return rows[0]; } export async function resolveAppleMailImapAccounts({ email, accountsDbPath = DEFAULT_ACCOUNTS_DB }) { const childRows = await queryJson(accountsDbPath, ` select child.ZIDENTIFIER as identifier, parent.ZUSERNAME as email, parent.ZACCOUNTDESCRIPTION as description, parent.ZIDENTIFIER as parentIdentifier, 'child' as source from ZACCOUNT parent join ZACCOUNT child on child.ZPARENTACCOUNT = parent.Z_PK join ZACCOUNTTYPE childType on child.ZACCOUNTTYPE = childType.Z_PK where lower(parent.ZUSERNAME) = lower('${sqlString(email)}') and childType.ZIDENTIFIER = 'com.apple.account.IMAP' order by child.Z_PK `); const directRows = await queryJson(accountsDbPath, ` select account.ZIDENTIFIER as identifier, account.ZUSERNAME as email, account.ZACCOUNTDESCRIPTION as description, null as parentIdentifier, 'direct' as source from ZACCOUNT account join ZACCOUNTTYPE accountType on account.ZACCOUNTTYPE = accountType.Z_PK where lower(account.ZUSERNAME) = lower('${sqlString(email)}') and accountType.ZIDENTIFIER = 'com.apple.account.IMAP' order by account.Z_PK `); const rows = [...directRows, ...childRows]; const unique = new Map(); for (const row of rows) { if (!unique.has(row.identifier)) unique.set(row.identifier, row); } const result = [...unique.values()]; if (!rows.length) { throw new Error(`Apple Mail IMAP account not found for ${email}`); } return result; } export async function resolveMailboxRecord({ indexPath = DEFAULT_ENVELOPE_INDEX, imapAccountId, mailbox = 'all' }) { const accountPrefix = `imap://${imapAccountId}/`; const candidates = mailboxCandidates(mailbox) .map((name) => `${accountPrefix}${encodeMailboxPath(name)}`); for (const url of candidates) { const rows = await queryJson(indexPath, ` select ROWID as rowid, url, total_count as totalCount, unread_count as unreadCount, unseen_count as unseenCount from mailboxes where url = '${sqlString(url)}' limit 1 `); if (rows.length) return rows[0]; } const available = await queryJson(indexPath, ` select ROWID as rowid, url, total_count as totalCount from mailboxes where url like '${sqlString(accountPrefix)}%' order by total_count desc limit 20 `); throw new Error(`Apple Mail mailbox not found for ${imapAccountId}/${mailbox}. Available: ${available.map((row) => row.url).join(', ')}`); } export async function readEmlxRawMessage(filePath) { const content = await fs.readFile(filePath); const newlineIndex = content.indexOf(0x0a); if (newlineIndex < 0) throw new Error(`invalid emlx file without first line: ${filePath}`); const sizeText = content.subarray(0, newlineIndex).toString('utf8').trim(); const declaredSize = Number(sizeText); if (!Number.isFinite(declaredSize) || declaredSize <= 0) { return content.subarray(newlineIndex + 1); } const start = newlineIndex + 1; const end = Math.min(start + declaredSize, content.length); return content.subarray(start, end); } export async function findEmlxByRowId(sourcePath, rowid) { const fileName = `${rowid}.emlx`; const baseDirs = await mailDataBaseDirs(sourcePath); const bucketParts = bucketPathParts(rowid); for (const baseDir of baseDirs) { const candidate = path.join(baseDir, 'Data', ...bucketParts, 'Messages', fileName); if (await fileExists(candidate)) return candidate; } return findFirstFile(sourcePath, fileName); } export function bucketPathParts(rowid) { const bucket = Math.floor(Number(rowid) / 1000); if (!bucket) return []; return String(bucket).split('').reverse(); } export function mailboxPathForUrl(mailRoot, url) { const withoutScheme = url.replace(/^imap:\/\//, ''); const slashIndex = withoutScheme.indexOf('/'); const accountDir = slashIndex >= 0 ? withoutScheme.slice(0, slashIndex) : withoutScheme; const mailboxPath = slashIndex >= 0 ? withoutScheme.slice(slashIndex + 1) : ''; const mailboxParts = mailboxPath .split('/') .filter(Boolean) .map((part) => decodeURIComponent(part)); return path.join(mailRoot, accountDir, ...mailboxParts.map((part) => `${part}.mbox`)); } export function decodeMailboxUrl(url) { const withoutScheme = url.replace(/^imap:\/\//, ''); const slashIndex = withoutScheme.indexOf('/'); const mailboxPath = slashIndex >= 0 ? withoutScheme.slice(slashIndex + 1) : ''; return mailboxPath .split('/') .filter(Boolean) .map((part) => decodeURIComponent(part)) .join('/'); } async function findMailboxDirs(root) { const dirs = []; const entries = await safeReaddir(root, { withFileTypes: true }); for (const entry of entries) { if (!entry.isDirectory()) continue; const fullPath = path.join(root, entry.name); if (entry.name.endsWith('.mbox')) dirs.push(fullPath); const nested = await findMailboxDirs(fullPath); dirs.push(...nested); } return dirs; } async function findFiles(root, predicate) { const found = []; const entries = await safeReaddir(root, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(root, entry.name); if (entry.isDirectory()) { found.push(...await findFiles(fullPath, predicate)); } else if (entry.isFile() && predicate(fullPath)) { found.push(fullPath); } } return found; } async function findFirstFile(root, fileName) { const entries = await safeReaddir(root, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(root, entry.name); if (entry.isFile() && entry.name === fileName) return fullPath; if (entry.isDirectory()) { const nested = await findFirstFile(fullPath, fileName); if (nested) return nested; } } return null; } async function mailDataBaseDirs(sourcePath) { const baseDirs = [sourcePath]; const entries = await safeReaddir(sourcePath, { withFileTypes: true }); for (const entry of entries) { if (!entry.isDirectory()) continue; const fullPath = path.join(sourcePath, entry.name); if (await fileExists(path.join(fullPath, 'Data'))) baseDirs.push(fullPath); } return baseDirs; } async function queryJson(dbPath, sql) { const { stdout } = await execFileAsync('sqlite3', ['-json', dbPath, sql], { maxBuffer: 1024 * 1024 * 20, }); return stdout.trim() ? JSON.parse(stdout) : []; } function mailboxCandidates(mailbox) { if (!mailbox || mailbox === 'all') return ['[Gmail]/Todos', '[Gmail]/All Mail', 'INBOX']; if (mailbox === 'inbox') return ['INBOX']; return [mailbox]; } function encodeMailboxPath(mailboxPath) { return mailboxPath .split('/') .map((part) => encodeURIComponent(part)) .join('/'); } function sqlString(value) { return String(value).replaceAll("'", "''"); } async function fileExists(filePath) { try { await fs.access(filePath); return true; } catch { return false; } } async function safeReaddir(dir, options) { try { return await fs.readdir(dir, options); } catch { return []; } } function mailboxLabel(itemPath, rootPath) { return path.relative(rootPath, itemPath) .split(path.sep) .filter((part) => part && part !== 'Data' && part !== 'Messages') .join('/'); }