diff --git a/robot.js b/robot.js index 100a8bf..5e8cf0b 100644 --- a/robot.js +++ b/robot.js @@ -12,18 +12,17 @@ const pool = new pg.Pool({ connectionString: DATABASE_URL, ssl: false }); const HEADLESS = true; async function main() { - console.log("馃 ROBOT MODO: ASPIRADORA INDUSTRIAL (CAPTURA TODO)"); + console.log("馃 ROBOT MODO: CIRUJANO + ASPIRADORA INTELIGENTE"); while (true) { const client = await pool.connect(); try { const res = await client.query("SELECT * FROM provider_credentials WHERE status = 'active'"); const credentials = res.rows; - console.log(`馃搵 Cuentas activas: ${credentials.length}`); for (const cred of credentials) { let password = Buffer.from(cred.password_hash, 'base64').toString('utf-8'); - console.log(`\n馃攧 Escaneando ${cred.provider.toUpperCase()} (ID: ${cred.owner_id})...`); + console.log(`\n馃攧 Procesando ${cred.provider.toUpperCase()}...`); if (cred.provider === 'multiasistencia') { await runMultiasistencia(cred.owner_id, cred.username, password); @@ -43,7 +42,7 @@ async function main() { } // ========================================== -// 馃Ч MULTIASISTENCIA (EXTRACCI脫N TOTAL) +// 馃彞 MULTIASISTENCIA V3 (CIRUJANO) // ========================================== async function runMultiasistencia(ownerId, user, pass) { const browser = await chromium.launch({ headless: HEADLESS, args: ['--no-sandbox'] }); @@ -51,7 +50,7 @@ async function runMultiasistencia(ownerId, user, pass) { const page = await context.newPage(); try { - console.log("馃實 [Multi] Entrando..."); + console.log("馃實 [Multi] Conectando..."); await page.goto('https://web.multiasistencia.com/w3multi/acceso.php', { timeout: 60000 }); const userInput = await page.$('input[name="usuario"]') || await page.$('input[type="text"]'); @@ -69,45 +68,71 @@ async function runMultiasistencia(ownerId, user, pass) { return Array.from(new Set(links.map(a => a.href.match(/reparacion=(\d+)/)?.[1]).filter(Boolean))); }); - console.log(`馃攳 [Multi] ${expedientes.length} expedientes.`); + console.log(`馃攳 [Multi] ${expedientes.length} expedientes a analizar.`); for (const ref of expedientes) { await page.goto(`https://web.multiasistencia.com/w3multi/repasos1.php?reparacion=${ref}`, { waitUntil: 'domcontentloaded' }); - // --- L脫GICA AGRESIVA DE CAPTURA --- const fullData = await page.evaluate(() => { const data = {}; + const bodyText = document.body.innerText; + + // --- 1. CIRUJANO (Extracci贸n manual de lo importante) --- + // Intentamos sacar la direcci贸n de la cabecera t铆pica de Multi + const boldElements = Array.from(document.querySelectorAll('b, strong, .titulo')); - // 1. Recorrer TODAS las filas de TODAS las tablas + // Buscamos nombre: Suele estar cerca de "Asegurado" o es un texto en may煤sculas aislado + let clientNameCandidate = ""; + // Buscamos direcci贸n: Suele contener "CL", "AV", "PZ" + let addressCandidate = ""; + + // Estrategia por palabras clave vecinas (m谩s seguro) + const findNeighbor = (keywords) => { + const cells = Array.from(document.querySelectorAll('td')); + for (let i = 0; i < cells.length; i++) { + const txt = cells[i].innerText.toUpperCase(); + if (keywords.some(k => txt.includes(k))) { + // Devolvemos el texto de la siguiente celda que no est茅 vac铆a + for(let j=1; j<=3; j++) { + if(cells[i+j] && cells[i+j].innerText.trim().length > 2) return cells[i+j].innerText.trim(); + } + } + } + return ""; + }; + + data.clientName_fixed = findNeighbor(['ASEGURADO', 'NOMBRE CLIENTE', 'CONTACTO']); + data.address_fixed = findNeighbor(['DIRECCI脫N', 'DOMICILIO', 'RIESGO', 'UBICACI脫N']); + data.phone_fixed = (bodyText.match(/[6789]\d{8}/) || [])[0] || ""; + data.description_fixed = findNeighbor(['DESCRIPCI脫N', 'DA脩OS', 'AVER脥A']); + + // --- 2. ASPIRADORA INTELIGENTE (Tablas) --- const rows = document.querySelectorAll('tr'); rows.forEach(row => { - const cells = Array.from(row.querySelectorAll('td, th')); - // Intentamos emparejar: Celda 1 (Clave) -> Celda 2 (Valor) + const cells = Array.from(row.querySelectorAll('td')); for (let i = 0; i < cells.length - 1; i++) { let key = cells[i].innerText.trim().replace(':', ''); let val = cells[i+1]?.innerText.trim(); - // Filtros de limpieza b谩sicos - if (key.length > 2 && key.length < 60 && val && val.length > 0) { - // Si la clave ya existe, no la machacamos (o le a帽adimos un indice) - if (!data[key]) { - data[key] = val; - // Saltamos la celda de valor para no leerla como clave en la siguiente vuelta - i++; - } + // FILTRO DE SEGURIDAD PARA ETIQUETAS + // 1. La clave no puede ser largu铆sima (eso es una descripci贸n o direcci贸n) + if (key.length > 35) continue; + // 2. La clave no puede contener d铆gitos (ej: "28001" no es una clave) + if (/\d/.test(key) && key.length > 10) continue; + // 3. La clave no debe empezar por tipo de v铆a + if (/^(CL|AV|PZ|UR|CJ)\s/.test(key.toUpperCase())) continue; + + if (key.length > 2 && val && val.length > 0) { + // Guardamos normalizando la clave + data[key] = val; } } }); - // Asegurar m铆nimos (fallback) - if (!data.clientName) data.clientName = data["Nombre Cliente"] || data["Asegurado"] || "Desconocido"; - if (!data.address) data.address = data["Direcci贸n"] || data["Domicilio"] || data["Riesgo"] || ""; - if (!data.phone) data.phone = (document.body.innerText.match(/[6789]\d{8}/) || [])[0] || ""; - return data; }); - if (fullData && Object.keys(fullData).length > 2) { + if (fullData) { await saveServiceToDB(ownerId, 'multiasistencia', ref, fullData); } } @@ -115,78 +140,56 @@ async function runMultiasistencia(ownerId, user, pass) { } // ========================================== -// 馃Ч HOMESERVE (EXTRACCI脫N TOTAL) +// 馃Ч HOMESERVE (SE MANTIENE IGUAL, FUNCIONABA BIEN) // ========================================== async function runHomeserve(ownerId, user, pass) { const browser = await chromium.launch({ headless: HEADLESS, args: ['--no-sandbox'] }); const page = await browser.newPage(); - try { console.log("馃實 [HomeServe] Entrando..."); await page.goto('https://www.clientes.homeserve.es/cgi-bin/fccgi.exe?w3exec=PROF_PASS', { timeout: 60000 }); - if (await page.isVisible('input[name="CODIGO"]')) { await page.fill('input[name="CODIGO"]', user); await page.fill('input[type="password"]', pass); await page.keyboard.press('Enter'); await page.waitForTimeout(5000); } - await page.goto('https://www.clientes.homeserve.es/cgi-bin/fccgi.exe?w3exec=lista_servicios_total'); - const refs = await page.evaluate(() => { const filas = Array.from(document.querySelectorAll('table tr')); const found = []; filas.forEach(tr => { - const txt = tr.innerText; - const match = txt.match(/(\d{6,10})/); + const txt = tr.innerText; const match = txt.match(/(\d{6,10})/); if (match) found.push(match[1]); }); return [...new Set(found)]; }); - console.log(`馃攳 [HomeServe] ${refs.length} expedientes.`); - for (const ref of refs) { try { await page.goto('https://www.clientes.homeserve.es/cgi-bin/fccgi.exe?w3exec=lista_servicios_total'); const link = await page.getByText(ref).first(); if (await link.isVisible()) { - await link.click(); - await page.waitForTimeout(1500); - + await link.click(); await page.waitForTimeout(1500); const fullData = await page.evaluate(() => { const d = {}; - // HomeServe suele ser tablas limpias: TR -> TD (Clave) | TD (Valor) const rows = Array.from(document.querySelectorAll('tr')); rows.forEach(r => { const cells = r.querySelectorAll('td'); - // Si tiene al menos 2 celdas if(cells.length >= 2) { const k = cells[0].innerText.toUpperCase().trim().replace(':', ''); const v = cells[1].innerText.trim(); if(k.length > 1 && v.length > 0) d[k] = v; } - // A veces son tablas de 4 columnas (Clave | Valor | Clave | Valor) if(cells.length >= 4) { const k2 = cells[2].innerText.toUpperCase().trim().replace(':', ''); const v2 = cells[3].innerText.trim(); if(k2.length > 1 && v2.length > 0) d[k2] = v2; } }); - - // Mapeo m铆nimo - d.clientName = d["CLIENTE"] || d["ASEGURADO"] || "Desconocido"; - d.phone = d["TELEFONOS"] || d["MOVIL"] || ""; - d.address = d["DOMICILIO"] || d["DIRECCION"] || ""; - if(d["POBLACION"]) d.address += ", " + d["POBLACION"]; - return d; }); - - if (fullData) { - await saveServiceToDB(ownerId, 'homeserve', ref, fullData); - } + if (fullData) await saveServiceToDB(ownerId, 'homeserve', ref, fullData); } } catch (errRef) { console.error(`鈿狅笍 Error ref ${ref}`); } } @@ -194,7 +197,7 @@ async function runHomeserve(ownerId, user, pass) { } async function saveServiceToDB(ownerId, provider, ref, data) { - console.log(`馃捑 Guardando ${ref} con ${Object.keys(data).length} variables encontradas.`); + console.log(`馃捑 Guardando ${ref}...`); await pool.query(` INSERT INTO scraped_services (owner_id, provider, service_ref, raw_data, status) VALUES ($1, $2, $3, $4, 'pending')