403 lines
13 KiB
JavaScript
403 lines
13 KiB
JavaScript
import fs from 'node:fs/promises';
|
|
import path from 'node:path';
|
|
import { execFile } from 'node:child_process';
|
|
import { promisify } from 'node:util';
|
|
import { archiveRawEmail } from './mailArchive.js';
|
|
|
|
const DEFAULT_MAIL_ROOT = '/Users/kavi/Library/Mail/V10';
|
|
const DEFAULT_ENVELOPE_INDEX = path.join(DEFAULT_MAIL_ROOT, 'MailData', 'Envelope Index');
|
|
const DEFAULT_ACCOUNTS_DB = '/Users/kavi/Library/Accounts/Accounts4.sqlite';
|
|
const execFileAsync = promisify(execFile);
|
|
|
|
export async function listAppleMailSources(mailRoot = DEFAULT_MAIL_ROOT) {
|
|
const sources = [];
|
|
const accountDirs = await safeReaddir(mailRoot, { withFileTypes: true });
|
|
|
|
for (const accountDir of accountDirs.filter((entry) => entry.isDirectory())) {
|
|
if (accountDir.name === 'MailData') continue;
|
|
const accountPath = path.join(mailRoot, accountDir.name);
|
|
const mailboxes = await findMailboxDirs(accountPath);
|
|
if (!mailboxes.length) continue;
|
|
sources.push({
|
|
accountDir: accountDir.name,
|
|
accountPath,
|
|
mailboxes: mailboxes.map((mailbox) => ({
|
|
path: mailbox,
|
|
label: mailboxLabel(mailbox, accountPath),
|
|
})),
|
|
});
|
|
}
|
|
|
|
return sources;
|
|
}
|
|
|
|
export async function importAppleMailEmlx({ account, sourcePath, archiveRoot, limit = 25, since }) {
|
|
const files = await findFiles(sourcePath, (file) => file.endsWith('.emlx') && !file.endsWith('.partial.emlx'));
|
|
const selectedFiles = [];
|
|
const sinceDate = since ? new Date(since) : null;
|
|
|
|
for (const file of files.sort()) {
|
|
if (sinceDate) {
|
|
const stat = await fs.stat(file);
|
|
if (stat.mtime < sinceDate) continue;
|
|
}
|
|
selectedFiles.push(file);
|
|
if (selectedFiles.length >= limit) break;
|
|
}
|
|
|
|
const records = [];
|
|
for (const file of selectedFiles) {
|
|
const source = await readEmlxRawMessage(file);
|
|
const record = await archiveRawEmail({
|
|
account,
|
|
mailbox: mailboxLabel(file, sourcePath),
|
|
uid: path.basename(file, '.emlx'),
|
|
source,
|
|
archiveRoot,
|
|
});
|
|
record.localSource = file;
|
|
records.push(record);
|
|
}
|
|
|
|
return {
|
|
sourcePath,
|
|
archiveRoot,
|
|
imported: records.length,
|
|
scanned: files.length,
|
|
records,
|
|
};
|
|
}
|
|
|
|
export async function importAppleMailFromIndex({
|
|
account,
|
|
email = account.email || account.auth?.user,
|
|
mailRoot = DEFAULT_MAIL_ROOT,
|
|
indexPath = DEFAULT_ENVELOPE_INDEX,
|
|
accountsDbPath = DEFAULT_ACCOUNTS_DB,
|
|
mailbox = 'all',
|
|
archiveRoot,
|
|
limit = 25,
|
|
since,
|
|
}) {
|
|
if (!email) throw new Error('email is required to resolve Apple Mail account');
|
|
|
|
const { imapAccount, mailboxRecord } = await resolveAppleMailAccountMailbox({
|
|
email,
|
|
accountsDbPath,
|
|
indexPath,
|
|
mailbox,
|
|
});
|
|
const sourcePath = mailboxPathForUrl(mailRoot, mailboxRecord.url);
|
|
const sinceTimestamp = since ? Math.floor(new Date(since).getTime() / 1000) : 0;
|
|
const importLimit = Number(limit);
|
|
const candidateLimit = Math.max(importLimit * 20, importLimit + 100);
|
|
|
|
const messages = await queryJson(indexPath, `
|
|
select m.ROWID as rowid, m.remote_id as remoteId, m.date_received as dateReceived
|
|
from messages m
|
|
where m.mailbox = ${Number(mailboxRecord.rowid)}
|
|
and m.deleted = 0
|
|
and m.date_received >= ${Number.isFinite(sinceTimestamp) ? sinceTimestamp : 0}
|
|
order by m.date_received desc
|
|
limit ${Number.isFinite(candidateLimit) ? candidateLimit : 25}
|
|
`);
|
|
|
|
const records = [];
|
|
const missing = [];
|
|
for (const message of messages) {
|
|
const localSource = await findEmlxByRowId(sourcePath, message.rowid);
|
|
if (!localSource) {
|
|
missing.push(message.rowid);
|
|
continue;
|
|
}
|
|
if (records.length >= importLimit) break;
|
|
|
|
const source = await readEmlxRawMessage(localSource);
|
|
const record = await archiveRawEmail({
|
|
account,
|
|
mailbox: decodeMailboxUrl(mailboxRecord.url),
|
|
uid: String(message.rowid),
|
|
source,
|
|
archiveRoot,
|
|
});
|
|
record.localSource = localSource;
|
|
record.appleMail = {
|
|
accountIdentifier: imapAccount.identifier,
|
|
mailboxUrl: mailboxRecord.url,
|
|
rowid: message.rowid,
|
|
remoteId: message.remoteId,
|
|
dateReceived: message.dateReceived,
|
|
};
|
|
records.push(record);
|
|
}
|
|
|
|
return {
|
|
email,
|
|
imapAccount,
|
|
mailbox: mailboxRecord,
|
|
sourcePath,
|
|
archiveRoot,
|
|
imported: records.length,
|
|
scanned: messages.length,
|
|
missing,
|
|
records,
|
|
};
|
|
}
|
|
|
|
export async function resolveAppleMailAccountMailbox({
|
|
email,
|
|
accountsDbPath = DEFAULT_ACCOUNTS_DB,
|
|
indexPath = DEFAULT_ENVELOPE_INDEX,
|
|
mailbox = 'all',
|
|
}) {
|
|
const imapAccounts = await resolveAppleMailImapAccounts({ email, accountsDbPath });
|
|
const attempts = [];
|
|
|
|
for (const imapAccount of imapAccounts) {
|
|
try {
|
|
const mailboxRecord = await resolveMailboxRecord({ indexPath, imapAccountId: imapAccount.identifier, mailbox });
|
|
attempts.push({ imapAccount, mailboxRecord });
|
|
} catch {
|
|
// Some macOS account records are stale or have no Mail mailbox. Try the next candidate.
|
|
}
|
|
}
|
|
|
|
if (!attempts.length) {
|
|
throw new Error(`Apple Mail mailbox not found for ${email}/${mailbox}`);
|
|
}
|
|
|
|
return attempts.sort((a, b) => Number(b.mailboxRecord.totalCount || 0) - Number(a.mailboxRecord.totalCount || 0))[0];
|
|
}
|
|
|
|
export async function resolveAppleMailImapAccount({ email, accountsDbPath = DEFAULT_ACCOUNTS_DB }) {
|
|
const rows = await resolveAppleMailImapAccounts({ email, accountsDbPath });
|
|
if (!rows.length) {
|
|
throw new Error(`Apple Mail IMAP account not found for ${email}`);
|
|
}
|
|
return rows[0];
|
|
}
|
|
|
|
export async function resolveAppleMailImapAccounts({ email, accountsDbPath = DEFAULT_ACCOUNTS_DB }) {
|
|
const childRows = await queryJson(accountsDbPath, `
|
|
select child.ZIDENTIFIER as identifier,
|
|
parent.ZUSERNAME as email,
|
|
parent.ZACCOUNTDESCRIPTION as description,
|
|
parent.ZIDENTIFIER as parentIdentifier,
|
|
'child' as source
|
|
from ZACCOUNT parent
|
|
join ZACCOUNT child on child.ZPARENTACCOUNT = parent.Z_PK
|
|
join ZACCOUNTTYPE childType on child.ZACCOUNTTYPE = childType.Z_PK
|
|
where lower(parent.ZUSERNAME) = lower('${sqlString(email)}')
|
|
and childType.ZIDENTIFIER = 'com.apple.account.IMAP'
|
|
order by child.Z_PK
|
|
`);
|
|
|
|
const directRows = await queryJson(accountsDbPath, `
|
|
select account.ZIDENTIFIER as identifier,
|
|
account.ZUSERNAME as email,
|
|
account.ZACCOUNTDESCRIPTION as description,
|
|
null as parentIdentifier,
|
|
'direct' as source
|
|
from ZACCOUNT account
|
|
join ZACCOUNTTYPE accountType on account.ZACCOUNTTYPE = accountType.Z_PK
|
|
where lower(account.ZUSERNAME) = lower('${sqlString(email)}')
|
|
and accountType.ZIDENTIFIER = 'com.apple.account.IMAP'
|
|
order by account.Z_PK
|
|
`);
|
|
|
|
const rows = [...directRows, ...childRows];
|
|
const unique = new Map();
|
|
for (const row of rows) {
|
|
if (!unique.has(row.identifier)) unique.set(row.identifier, row);
|
|
}
|
|
|
|
const result = [...unique.values()];
|
|
if (!rows.length) {
|
|
throw new Error(`Apple Mail IMAP account not found for ${email}`);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
export async function resolveMailboxRecord({ indexPath = DEFAULT_ENVELOPE_INDEX, imapAccountId, mailbox = 'all' }) {
|
|
const accountPrefix = `imap://${imapAccountId}/`;
|
|
const candidates = mailboxCandidates(mailbox)
|
|
.map((name) => `${accountPrefix}${encodeMailboxPath(name)}`);
|
|
|
|
for (const url of candidates) {
|
|
const rows = await queryJson(indexPath, `
|
|
select ROWID as rowid, url, total_count as totalCount, unread_count as unreadCount, unseen_count as unseenCount
|
|
from mailboxes
|
|
where url = '${sqlString(url)}'
|
|
limit 1
|
|
`);
|
|
if (rows.length) return rows[0];
|
|
}
|
|
|
|
const available = await queryJson(indexPath, `
|
|
select ROWID as rowid, url, total_count as totalCount
|
|
from mailboxes
|
|
where url like '${sqlString(accountPrefix)}%'
|
|
order by total_count desc
|
|
limit 20
|
|
`);
|
|
throw new Error(`Apple Mail mailbox not found for ${imapAccountId}/${mailbox}. Available: ${available.map((row) => row.url).join(', ')}`);
|
|
}
|
|
|
|
export async function readEmlxRawMessage(filePath) {
|
|
const content = await fs.readFile(filePath);
|
|
const newlineIndex = content.indexOf(0x0a);
|
|
if (newlineIndex < 0) throw new Error(`invalid emlx file without first line: ${filePath}`);
|
|
|
|
const sizeText = content.subarray(0, newlineIndex).toString('utf8').trim();
|
|
const declaredSize = Number(sizeText);
|
|
if (!Number.isFinite(declaredSize) || declaredSize <= 0) {
|
|
return content.subarray(newlineIndex + 1);
|
|
}
|
|
|
|
const start = newlineIndex + 1;
|
|
const end = Math.min(start + declaredSize, content.length);
|
|
return content.subarray(start, end);
|
|
}
|
|
|
|
export async function findEmlxByRowId(sourcePath, rowid) {
|
|
const fileName = `${rowid}.emlx`;
|
|
const baseDirs = await mailDataBaseDirs(sourcePath);
|
|
const bucketParts = bucketPathParts(rowid);
|
|
|
|
for (const baseDir of baseDirs) {
|
|
const candidate = path.join(baseDir, 'Data', ...bucketParts, 'Messages', fileName);
|
|
if (await fileExists(candidate)) return candidate;
|
|
}
|
|
|
|
return findFirstFile(sourcePath, fileName);
|
|
}
|
|
|
|
export function bucketPathParts(rowid) {
|
|
const bucket = Math.floor(Number(rowid) / 1000);
|
|
if (!bucket) return [];
|
|
return String(bucket).split('').reverse();
|
|
}
|
|
|
|
export function mailboxPathForUrl(mailRoot, url) {
|
|
const withoutScheme = url.replace(/^imap:\/\//, '');
|
|
const slashIndex = withoutScheme.indexOf('/');
|
|
const accountDir = slashIndex >= 0 ? withoutScheme.slice(0, slashIndex) : withoutScheme;
|
|
const mailboxPath = slashIndex >= 0 ? withoutScheme.slice(slashIndex + 1) : '';
|
|
const mailboxParts = mailboxPath
|
|
.split('/')
|
|
.filter(Boolean)
|
|
.map((part) => decodeURIComponent(part));
|
|
|
|
return path.join(mailRoot, accountDir, ...mailboxParts.map((part) => `${part}.mbox`));
|
|
}
|
|
|
|
export function decodeMailboxUrl(url) {
|
|
const withoutScheme = url.replace(/^imap:\/\//, '');
|
|
const slashIndex = withoutScheme.indexOf('/');
|
|
const mailboxPath = slashIndex >= 0 ? withoutScheme.slice(slashIndex + 1) : '';
|
|
return mailboxPath
|
|
.split('/')
|
|
.filter(Boolean)
|
|
.map((part) => decodeURIComponent(part))
|
|
.join('/');
|
|
}
|
|
|
|
async function findMailboxDirs(root) {
|
|
const dirs = [];
|
|
const entries = await safeReaddir(root, { withFileTypes: true });
|
|
for (const entry of entries) {
|
|
if (!entry.isDirectory()) continue;
|
|
const fullPath = path.join(root, entry.name);
|
|
if (entry.name.endsWith('.mbox')) dirs.push(fullPath);
|
|
const nested = await findMailboxDirs(fullPath);
|
|
dirs.push(...nested);
|
|
}
|
|
return dirs;
|
|
}
|
|
|
|
async function findFiles(root, predicate) {
|
|
const found = [];
|
|
const entries = await safeReaddir(root, { withFileTypes: true });
|
|
for (const entry of entries) {
|
|
const fullPath = path.join(root, entry.name);
|
|
if (entry.isDirectory()) {
|
|
found.push(...await findFiles(fullPath, predicate));
|
|
} else if (entry.isFile() && predicate(fullPath)) {
|
|
found.push(fullPath);
|
|
}
|
|
}
|
|
return found;
|
|
}
|
|
|
|
async function findFirstFile(root, fileName) {
|
|
const entries = await safeReaddir(root, { withFileTypes: true });
|
|
for (const entry of entries) {
|
|
const fullPath = path.join(root, entry.name);
|
|
if (entry.isFile() && entry.name === fileName) return fullPath;
|
|
if (entry.isDirectory()) {
|
|
const nested = await findFirstFile(fullPath, fileName);
|
|
if (nested) return nested;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
async function mailDataBaseDirs(sourcePath) {
|
|
const baseDirs = [sourcePath];
|
|
const entries = await safeReaddir(sourcePath, { withFileTypes: true });
|
|
for (const entry of entries) {
|
|
if (!entry.isDirectory()) continue;
|
|
const fullPath = path.join(sourcePath, entry.name);
|
|
if (await fileExists(path.join(fullPath, 'Data'))) baseDirs.push(fullPath);
|
|
}
|
|
return baseDirs;
|
|
}
|
|
|
|
async function queryJson(dbPath, sql) {
|
|
const { stdout } = await execFileAsync('sqlite3', ['-json', dbPath, sql], {
|
|
maxBuffer: 1024 * 1024 * 20,
|
|
});
|
|
return stdout.trim() ? JSON.parse(stdout) : [];
|
|
}
|
|
|
|
function mailboxCandidates(mailbox) {
|
|
if (!mailbox || mailbox === 'all') return ['[Gmail]/Todos', '[Gmail]/All Mail', 'INBOX'];
|
|
if (mailbox === 'inbox') return ['INBOX'];
|
|
return [mailbox];
|
|
}
|
|
|
|
function encodeMailboxPath(mailboxPath) {
|
|
return mailboxPath
|
|
.split('/')
|
|
.map((part) => encodeURIComponent(part))
|
|
.join('/');
|
|
}
|
|
|
|
function sqlString(value) {
|
|
return String(value).replaceAll("'", "''");
|
|
}
|
|
|
|
async function fileExists(filePath) {
|
|
try {
|
|
await fs.access(filePath);
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
async function safeReaddir(dir, options) {
|
|
try {
|
|
return await fs.readdir(dir, options);
|
|
} catch {
|
|
return [];
|
|
}
|
|
}
|
|
|
|
function mailboxLabel(itemPath, rootPath) {
|
|
return path.relative(rootPath, itemPath)
|
|
.split(path.sep)
|
|
.filter((part) => part && part !== 'Data' && part !== 'Messages')
|
|
.join('/');
|
|
}
|