Add 2019-2024 backfill ledger (separate) + ?ledger= dashboard param
- parsers/parse_backfill.py: decrypts staging tree, parses Santander / Banco de Chile / Tenpo / BancoEstado / CopecPay into a SEPARATE web/ledger-backfill.json (2363 txns, 2018-01 to 2024-12, 480 statements) - web/engine.js: dashboard now honors ?ledger=<file>.json so the backfill can be reviewed via dashboard.html?ledger=ledger-backfill.json without touching the live ledger.json - .gitignore: exclude documents/ (decrypted bank PDFs must never be committed)
This commit is contained in:
parent
6d09b0673d
commit
bdda30afa1
|
|
@ -4,3 +4,5 @@ data/mail-oauth/
|
|||
data/*.state.json
|
||||
.env
|
||||
.env.*
|
||||
documents/
|
||||
documents/decrypted-backfill/
|
||||
|
|
|
|||
|
|
@ -0,0 +1,60 @@
|
|||
#!/usr/bin/env python3
|
||||
'''Parse the 2019-2024 backfill staging tree into a SEPARATE ledger file.
|
||||
Reads documents/decrypted-backfill/<Bank>/, writes web/ledger-backfill.json.
|
||||
Does NOT touch the live web/ledger.json.'''
|
||||
import glob, json, os, importlib.util, sys
|
||||
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, os.path.join(ROOT, 'parsers'))
|
||||
import common, classify
|
||||
|
||||
STAGE = os.path.join(ROOT, 'documents/decrypted-backfill')
|
||||
def load(m):
|
||||
spec = importlib.util.spec_from_file_location(m, os.path.join(ROOT,'parsers',m+'.py'))
|
||||
mod = importlib.util.module_from_spec(spec); spec.loader.exec_module(mod); return mod
|
||||
|
||||
PARSERS = [('Santander','santander'),('Banco_de_Chile','banco_de_chile'),
|
||||
('BancoEstado','bancoestado'),('CopecPay','copecpay'),('Tenpo','tenpo')]
|
||||
PARSE_FN = {}
|
||||
|
||||
def gid_map():
|
||||
p='/tmp/unlock-results-backfill.json'; out={}
|
||||
if os.path.exists(p):
|
||||
for o in json.load(open(p)).get('ok',[]):
|
||||
if o.get('sha8'): out[o['sha8']]=str(o.get('gmail_id',''))
|
||||
return out
|
||||
|
||||
def main():
|
||||
gids=gid_map(); web={'banks':{},'statements':[]}; grand=0
|
||||
for subdir,modname in PARSERS:
|
||||
mod=load(modname)
|
||||
d_in=os.path.join(STAGE,subdir)
|
||||
files=sorted(glob.glob(os.path.join(d_in,'*.pdf'))+glob.glob(os.path.join(d_in,'*.PDF')))
|
||||
outdir=os.path.join(ROOT,'data/ledger-backfill',subdir); os.makedirs(outdir,exist_ok=True)
|
||||
fn=getattr(mod, PARSE_FN.get(subdir,'parse_file'))
|
||||
n_txn=n_doc=0
|
||||
for f in files:
|
||||
try: d=fn(f)
|
||||
except Exception as e: print(f' FAIL {os.path.basename(f)}: {e}'); continue
|
||||
for t in d['transactions']:
|
||||
common.enrich(t); t['description']=common.clean_desc(t['description']); classify.classify(t)
|
||||
d['gmail_id']=gids.get(d['statement_id'], d.get('gmail_id'))
|
||||
json.dump(d, open(os.path.join(outdir,d['statement_id']+'.json'),'w'), ensure_ascii=False, indent=1)
|
||||
dw=dict(d); dw['pdf_url']='/'+d['source_pdf']; web['statements'].append(dw)
|
||||
n_txn+=len(d['transactions']); n_doc+=1
|
||||
web['banks'][subdir]={'docs':n_doc,'txns':n_txn}; grand+=n_txn
|
||||
print(f'{subdir}: {n_doc} docs, {n_txn} txns')
|
||||
from collections import defaultdict
|
||||
flow=defaultdict(lambda:[0,0]); ri=ro=ia=0
|
||||
for s in web['statements']:
|
||||
for t in s['transactions']:
|
||||
ft=t.get('flow_type','other'); flow[ft][0]+=1; flow[ft][1]+=t['amount']
|
||||
if t.get('internal'): ia+=t['amount']
|
||||
elif t['direction']=='credit': ri+=t['amount']
|
||||
else: ro+=t['amount']
|
||||
web['flow_summary']={k:{'count':v[0],'amount':v[1]} for k,v in flow.items()}
|
||||
web['real_totals']={'income':ri,'expense_outflow':ro,'internal':ia}
|
||||
json.dump(web, open(os.path.join(ROOT,'web/ledger-backfill.json'),'w'), ensure_ascii=False)
|
||||
print(f'TOTAL: {grand} txns -> web/ledger-backfill.json')
|
||||
print(f'REAL in ${ri:,} REAL out ${ro:,} INTERNAL ${ia:,}')
|
||||
|
||||
if __name__=='__main__': main()
|
||||
|
|
@ -103,7 +103,8 @@
|
|||
let RAW = null, TX = [], MONTHS = [], BANKS = [], ACCT_LAST4 = new Set();
|
||||
|
||||
async function load() {
|
||||
const res = await fetch('ledger.json');
|
||||
const _lp = new URLSearchParams(location.search).get('ledger');
|
||||
const res = await fetch(_lp && /^[A-Za-z0-9_.-]+\.json$/.test(_lp) ? _lp : 'ledger.json');
|
||||
RAW = await res.json();
|
||||
TX = [];
|
||||
for (const s of RAW.statements) {
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue