import fs from 'node:fs'; const CLIENT_ID='1idZRzFTMDBLO5z-dbd'; const LEDGERS=['web/ledger.json','web/ledger-backfill.json']; // High-frequency / important brands: regex on the RAW upper description -> canonical domain. // These are resolved directly (logo via Brandfetch CDN by domain) with high precision. const ALIAS=[ [/UBER\s*EATS|UBR.*EATS|PS UBER EATS/, 'Uber Eats','ubereats.com'], [/\bUBER\b|\bUBR\b/, 'Uber','uber.com'], [/RAPPI/, 'Rappi','rappi.com'], [/MERCADO ?PAGO|MERPAGO|\bMP\b/, 'Mercado Pago','mercadopago.cl'], [/APPLE/, 'Apple','apple.com'], [/NETFLIX/, 'Netflix','netflix.com'], [/SPOTIFY/, 'Spotify','spotify.com'], [/\bTEMU\b/, 'Temu','temu.com'], [/ALIEXPRESS/, 'AliExpress','aliexpress.com'], [/\bAMAZON\b/, 'Amazon','amazon.com'], [/\bSHEIN\b/, 'Shein','shein.com'], [/CABIFY/, 'Cabify','cabify.com'], [/\bDIDI\b/, 'DiDi','didiglobal.com'], [/PAYPAL/, 'PayPal','paypal.com'], [/PAYSEND/, 'Paysend','paysend.com'], [/\bMC ?DONALD/, "McDonald's",'mcdonalds.com'], [/\bKFC\b/, 'KFC','kfc.com'], [/STA ISABEL|SANTA ISABEL/, 'Santa Isabel','santaisabel.cl'], [/\bJUMBO\b/, 'Jumbo','jumbo.cl'], [/HIP ?LIDER|\bLIDER\b/, 'Lider','lider.cl'], [/TOTTUS/, 'Tottus','tottus.cl'], [/UNIMARC/, 'Unimarc','unimarc.cl'], [/\bCOPEC\b/, 'Copec','copec.cl'], [/\bSHELL\b/, 'Shell','shell.com'], [/FALABELLA/, 'Falabella','falabella.com'], [/RIPLEY/, 'Ripley','ripley.cl'], [/SODIMAC/, 'Sodimac','sodimac.cl'], [/\bPARIS\b/, 'Paris','paris.cl'], [/\bENTEL\b/, 'Entel','entel.cl'], [/\bWOM\b/, 'WOM','wom.cl'], [/MOVISTAR/, 'Movistar','movistar.cl'], [/\bCLARO\b/, 'Claro','claro.cl'], [/TURBUS|TUR BUS/, 'Turbus','turbus.cl'], [/CRUZ VERDE/, 'Cruz Verde','cruzverde.cl'], [/SALCOBRAND/, 'Salcobrand','salcobrand.cl'], [/FARMACIA.*AHUMADA|AHUMADA/, 'Farmacias Ahumada','farmaciasahumada.cl'], [/INTEGRAMEDICA/, 'IntegraMédica','integramedica.cl'], [/\bGOOGLE\b/, 'Google','google.com'], [/MICROSOFT/, 'Microsoft','microsoft.com'], [/\bOPENAI\b/, 'OpenAI','openai.com'], [/ANTHROPIC/, 'Anthropic','anthropic.com'], [/HETZNER/, 'Hetzner','hetzner.com'], [/SPOTMINDERS/, 'Spotminders','spotminders.com'], [/ADIDAS/, 'adidas','adidas.com'], [/NEW BALANCE/, 'New Balance','newbalance.com'], [/ENEL/, 'Enel','enel.cl'], [/SERVIPAG/, 'Servipag','servipag.com'], [/SENCILLITO/, 'Sencillito','sencillito.com'], ]; function aliasOf(descRaw){ const s=(descRaw||'').toUpperCase(); for(const [re,name,domain] of ALIAS) if(re.test(s)) return {name,domain,via:'alias'}; return null; } function brandTerm(descRaw){ let s=(descRaw||'').toUpperCase(); if(/INTERES|IMPUEST|COMISION|COM\.|MANTENCION|PASAJE QR|RENDIMIENTO|COBRANZA|\bMORA\b|DL ?3475|CARGO INTERES|GASTO DE|TRASPASO|\bABONO\b|\bGIRO\b|RETIRO|TEF |TRANSF|SEGURO|DIVISAS|PREPAGO EN CUOTAS|PAGO (AUTOMATICO|APP|TARJETA)|O\.GERENCIA/.test(s)) return null; s=s.replace(/^(PS|PAGO|PAGO:|PREPAGO|COMPRA)\s+/i,''); s=s.replace(/^(PAYU|MP|CV ?\d*|DL|PAYSCAN|MERPAGO|MERCADOPAGO|SUMUP|TUU|MACH|WEBPAY|ONECLICK|KUSHKI|SERVIPAG\.COM)\s*[\*—\-·:]*\s*/i,''); s=s.replace(/\b(COMPRA POR INTERNET|COMPRA NACIONAL|COMPRA INTERNAC\w*|COMPRA( \d+)? CUOTAS( PRECIO CONTADO)?|PRECIO CONTADO|RECARGA ONLINE|WEB ?\d*|ONLINE)\b/gi,' '); s=s.replace(/·.*/,' ').replace(/\.(COM|CO|CL)\b.*/i,' ').replace(/HELP\..*/i,' '); s=s.replace(/\b(PENDING|PEND|SANTIAGO|CL|LAS CONDES|PROVIDENCIA|VITACURA|CURICO)\b/g,' '); s=s.replace(/[\*—·|\/]/g,' ').replace(/\s{2,}/g,' ').trim(); if(s.length<3) return null; if(/^(COMPRA|PAGO|TARJETA|NACIONAL|INTERNET|VENTA|DIGITAL|VIRTUAL|PUN|WEB)$/.test(s)) return null; return s.slice(0,40); } const aliasHits=new Map(); const terms=new Map(); let totalTx=0, aliasTx=0; for(const lf of LEDGERS){ if(!fs.existsSync(lf))continue; const d=JSON.parse(fs.readFileSync(lf)); for(const s of d.statements) for(const t of s.transactions){ if(!['expense','fee'].includes(t.flow_type))continue; totalTx++; const a=aliasOf(t.description); if(a){ aliasHits.set(a.domain,a); aliasTx++; continue; } const term=brandTerm(t.description); if(!term)continue; terms.set(term,(terms.get(term)||0)+1); } } const sleep=ms=>new Promise(r=>setTimeout(r,ms)); async function search(q){ try{ const r=await fetch('https://api.brandfetch.io/v2/search/'+encodeURIComponent(q)+'?c='+CLIENT_ID); if(!r.ok)return null; const j=await r.json(); return Array.isArray(j)?j:null;}catch{return null;} } function norm(x){return (x||'').toUpperCase().replace(/[^A-Z0-9]/g,'');} function pick(term,results){ if(!results)return null; const words=term.toUpperCase().split(/\s+/).filter(w=>w.length>2); for(const b of results){ if(!b.verified||!b.icon||b.qualityScore<0.6)continue; const N=norm(b.name); const firstWordMatch = words.length && N.includes(norm(words[0])) && norm(words[0]).length>2; const whole = norm(term); if(firstWordMatch || N.includes(whole) || whole.includes(N)) return {name:b.name,domain:b.domain,via:'search'}; } return null; } const out={}; for(const a of aliasHits.values()) out[a.name.toUpperCase()]={name:a.name,domain:a.domain,via:'alias'}; let shit=0,smiss=0; const missed=[]; for(const [term,cnt] of [...terms.entries()].sort((a,b)=>b[1]-a[1])){ const m=pick(term, await search(term)); if(m){ out[term]=m; shit++; } else { smiss++; if(missed.length<20) missed.push(term+' ('+cnt+')'); } await sleep(110); } // build coverage: how many TRANSACTIONS get a logo function logoForTx(desc){ const a=aliasOf(desc); if(a&&out[a.name.toUpperCase()])return true; const term=brandTerm(desc); if(term&&out[term])return true; return false; } let covered=0; for(const lf of LEDGERS){ if(!fs.existsSync(lf))continue; const d=JSON.parse(fs.readFileSync(lf)); for(const s of d.statements) for(const t of s.transactions){ if(!['expense','fee'].includes(t.flow_type))continue; if(logoForTx(t.description))covered++; } } fs.writeFileSync('web/merchant-logos.json',JSON.stringify(out,null,1)); console.log('alias brands:',aliasHits.size,'| search resolved:',shit,'| search missed:',smiss); console.log('TOTAL logo entries:',Object.keys(out).length); console.log('TXN COVERAGE: '+covered+' / '+totalTx+' expense+fee txns ('+Math.round(covered/totalTx*100)+'%)'); console.log('\n--- still missed (top) ---'); missed.forEach(m=>console.log(' '+m));