128 lines
6.2 KiB
JavaScript
128 lines
6.2 KiB
JavaScript
import fs from 'node:fs';
|
|
const CLIENT_ID='1idZRzFTMDBLO5z-dbd';
|
|
const LEDGERS=['web/ledger.json','web/ledger-backfill.json'];
|
|
|
|
// High-frequency / important brands: regex on the RAW upper description -> canonical domain.
|
|
// These are resolved directly (logo via Brandfetch CDN by domain) with high precision.
|
|
const ALIAS=[
|
|
[/UBER\s*EATS|UBR.*EATS|PS UBER EATS/, 'Uber Eats','ubereats.com'],
|
|
[/\bUBER\b|\bUBR\b/, 'Uber','uber.com'],
|
|
[/RAPPI/, 'Rappi','rappi.com'],
|
|
[/MERCADO ?PAGO|MERPAGO|\bMP\b/, 'Mercado Pago','mercadopago.cl'],
|
|
[/APPLE/, 'Apple','apple.com'],
|
|
[/NETFLIX/, 'Netflix','netflix.com'],
|
|
[/SPOTIFY/, 'Spotify','spotify.com'],
|
|
[/\bTEMU\b/, 'Temu','temu.com'],
|
|
[/ALIEXPRESS/, 'AliExpress','aliexpress.com'],
|
|
[/\bAMAZON\b/, 'Amazon','amazon.com'],
|
|
[/\bSHEIN\b/, 'Shein','shein.com'],
|
|
[/CABIFY/, 'Cabify','cabify.com'],
|
|
[/\bDIDI\b/, 'DiDi','didiglobal.com'],
|
|
[/PAYPAL/, 'PayPal','paypal.com'],
|
|
[/PAYSEND/, 'Paysend','paysend.com'],
|
|
[/\bMC ?DONALD/, "McDonald's",'mcdonalds.com'],
|
|
[/\bKFC\b/, 'KFC','kfc.com'],
|
|
[/STA ISABEL|SANTA ISABEL/, 'Santa Isabel','santaisabel.cl'],
|
|
[/\bJUMBO\b/, 'Jumbo','jumbo.cl'],
|
|
[/HIP ?LIDER|\bLIDER\b/, 'Lider','lider.cl'],
|
|
[/TOTTUS/, 'Tottus','tottus.cl'],
|
|
[/UNIMARC/, 'Unimarc','unimarc.cl'],
|
|
[/\bCOPEC\b/, 'Copec','copec.cl'],
|
|
[/\bSHELL\b/, 'Shell','shell.com'],
|
|
[/FALABELLA/, 'Falabella','falabella.com'],
|
|
[/RIPLEY/, 'Ripley','ripley.cl'],
|
|
[/SODIMAC/, 'Sodimac','sodimac.cl'],
|
|
[/\bPARIS\b/, 'Paris','paris.cl'],
|
|
[/\bENTEL\b/, 'Entel','entel.cl'],
|
|
[/\bWOM\b/, 'WOM','wom.cl'],
|
|
[/MOVISTAR/, 'Movistar','movistar.cl'],
|
|
[/\bCLARO\b/, 'Claro','claro.cl'],
|
|
[/TURBUS|TUR BUS/, 'Turbus','turbus.cl'],
|
|
[/CRUZ VERDE/, 'Cruz Verde','cruzverde.cl'],
|
|
[/SALCOBRAND/, 'Salcobrand','salcobrand.cl'],
|
|
[/FARMACIA.*AHUMADA|AHUMADA/, 'Farmacias Ahumada','farmaciasahumada.cl'],
|
|
[/INTEGRAMEDICA/, 'IntegraMédica','integramedica.cl'],
|
|
[/\bGOOGLE\b/, 'Google','google.com'],
|
|
[/MICROSOFT/, 'Microsoft','microsoft.com'],
|
|
[/\bOPENAI\b/, 'OpenAI','openai.com'],
|
|
[/ANTHROPIC/, 'Anthropic','anthropic.com'],
|
|
[/HETZNER/, 'Hetzner','hetzner.com'],
|
|
[/SPOTMINDERS/, 'Spotminders','spotminders.com'],
|
|
[/ADIDAS/, 'adidas','adidas.com'],
|
|
[/NEW BALANCE/, 'New Balance','newbalance.com'],
|
|
[/ENEL/, 'Enel','enel.cl'],
|
|
[/SERVIPAG/, 'Servipag','servipag.com'],
|
|
[/SENCILLITO/, 'Sencillito','sencillito.com'],
|
|
];
|
|
|
|
function aliasOf(descRaw){
|
|
const s=(descRaw||'').toUpperCase();
|
|
for(const [re,name,domain] of ALIAS) if(re.test(s)) return {name,domain,via:'alias'};
|
|
return null;
|
|
}
|
|
function brandTerm(descRaw){
|
|
let s=(descRaw||'').toUpperCase();
|
|
if(/INTERES|IMPUEST|COMISION|COM\.|MANTENCION|PASAJE QR|RENDIMIENTO|COBRANZA|\bMORA\b|DL ?3475|CARGO INTERES|GASTO DE|TRASPASO|\bABONO\b|\bGIRO\b|RETIRO|TEF |TRANSF|SEGURO|DIVISAS|PREPAGO EN CUOTAS|PAGO (AUTOMATICO|APP|TARJETA)|O\.GERENCIA/.test(s)) return null;
|
|
s=s.replace(/^(PS|PAGO|PAGO:|PREPAGO|COMPRA)\s+/i,'');
|
|
s=s.replace(/^(PAYU|MP|CV ?\d*|DL|PAYSCAN|MERPAGO|MERCADOPAGO|SUMUP|TUU|MACH|WEBPAY|ONECLICK|KUSHKI|SERVIPAG\.COM)\s*[\*—\-·:]*\s*/i,'');
|
|
s=s.replace(/\b(COMPRA POR INTERNET|COMPRA NACIONAL|COMPRA INTERNAC\w*|COMPRA( \d+)? CUOTAS( PRECIO CONTADO)?|PRECIO CONTADO|RECARGA ONLINE|WEB ?\d*|ONLINE)\b/gi,' ');
|
|
s=s.replace(/·.*/,' ').replace(/\.(COM|CO|CL)\b.*/i,' ').replace(/HELP\..*/i,' ');
|
|
s=s.replace(/\b(PENDING|PEND|SANTIAGO|CL|LAS CONDES|PROVIDENCIA|VITACURA|CURICO)\b/g,' ');
|
|
s=s.replace(/[\*—·|\/]/g,' ').replace(/\s{2,}/g,' ').trim();
|
|
if(s.length<3) return null;
|
|
if(/^(COMPRA|PAGO|TARJETA|NACIONAL|INTERNET|VENTA|DIGITAL|VIRTUAL|PUN|WEB)$/.test(s)) return null;
|
|
return s.slice(0,40);
|
|
}
|
|
|
|
const aliasHits=new Map(); const terms=new Map();
|
|
let totalTx=0, aliasTx=0;
|
|
for(const lf of LEDGERS){ if(!fs.existsSync(lf))continue;
|
|
const d=JSON.parse(fs.readFileSync(lf));
|
|
for(const s of d.statements) for(const t of s.transactions){
|
|
if(!['expense','fee'].includes(t.flow_type))continue;
|
|
totalTx++;
|
|
const a=aliasOf(t.description);
|
|
if(a){ aliasHits.set(a.domain,a); aliasTx++; continue; }
|
|
const term=brandTerm(t.description); if(!term)continue;
|
|
terms.set(term,(terms.get(term)||0)+1);
|
|
}
|
|
}
|
|
|
|
const sleep=ms=>new Promise(r=>setTimeout(r,ms));
|
|
async function search(q){ try{ const r=await fetch('https://api.brandfetch.io/v2/search/'+encodeURIComponent(q)+'?c='+CLIENT_ID); if(!r.ok)return null; const j=await r.json(); return Array.isArray(j)?j:null;}catch{return null;} }
|
|
function norm(x){return (x||'').toUpperCase().replace(/[^A-Z0-9]/g,'');}
|
|
function pick(term,results){
|
|
if(!results)return null;
|
|
const words=term.toUpperCase().split(/\s+/).filter(w=>w.length>2);
|
|
for(const b of results){
|
|
if(!b.verified||!b.icon||b.qualityScore<0.6)continue;
|
|
const N=norm(b.name);
|
|
const firstWordMatch = words.length && N.includes(norm(words[0])) && norm(words[0]).length>2;
|
|
const whole = norm(term);
|
|
if(firstWordMatch || N.includes(whole) || whole.includes(N)) return {name:b.name,domain:b.domain,via:'search'};
|
|
}
|
|
return null;
|
|
}
|
|
|
|
const out={};
|
|
for(const a of aliasHits.values()) out[a.name.toUpperCase()]={name:a.name,domain:a.domain,via:'alias'};
|
|
let shit=0,smiss=0; const missed=[];
|
|
for(const [term,cnt] of [...terms.entries()].sort((a,b)=>b[1]-a[1])){
|
|
const m=pick(term, await search(term));
|
|
if(m){ out[term]=m; shit++; } else { smiss++; if(missed.length<20) missed.push(term+' ('+cnt+')'); }
|
|
await sleep(110);
|
|
}
|
|
// build coverage: how many TRANSACTIONS get a logo
|
|
function logoForTx(desc){
|
|
const a=aliasOf(desc); if(a&&out[a.name.toUpperCase()])return true;
|
|
const term=brandTerm(desc); if(term&&out[term])return true; return false;
|
|
}
|
|
let covered=0;
|
|
for(const lf of LEDGERS){ if(!fs.existsSync(lf))continue; const d=JSON.parse(fs.readFileSync(lf));
|
|
for(const s of d.statements) for(const t of s.transactions){ if(!['expense','fee'].includes(t.flow_type))continue; if(logoForTx(t.description))covered++; } }
|
|
fs.writeFileSync('web/merchant-logos.json',JSON.stringify(out,null,1));
|
|
console.log('alias brands:',aliasHits.size,'| search resolved:',shit,'| search missed:',smiss);
|
|
console.log('TOTAL logo entries:',Object.keys(out).length);
|
|
console.log('TXN COVERAGE: '+covered+' / '+totalTx+' expense+fee txns ('+Math.round(covered/totalTx*100)+'%)');
|
|
console.log('\n--- still missed (top) ---'); missed.forEach(m=>console.log(' '+m));
|