414 lines
19 KiB
JavaScript
414 lines
19 KiB
JavaScript
import { chromium } from 'playwright';
|
|
import pg from 'pg';
|
|
|
|
const { DATABASE_URL } = process.env;
|
|
if (!DATABASE_URL) { console.error("❌ Error: No hay DATABASE_URL."); process.exit(1); }
|
|
|
|
const pool = new pg.Pool({ connectionString: DATABASE_URL, ssl: false });
|
|
const HEADLESS = true;
|
|
|
|
// ==========================================
|
|
// 🧠 MOTOR DE CLASIFICACIÓN DE GREMIOS PRO
|
|
// ==========================================
|
|
const REGLAS_GREMIOS = [
|
|
{
|
|
nombres_db: ["MANITAS ELECTRICISTA", "MANITAS ELECTRICO", "MANITAS ELECTRICIDAD"],
|
|
keywords: ["manitas electric", "cambiar bombilla", "colgar lampara", "instalar foco", "fluorescente", "casquillo", "lampara del dormitorio", "cambiar enchufe", "embellecedor"]
|
|
},
|
|
{
|
|
nombres_db: ["MANITAS FONTANERIA", "MANITAS FONTANERO"],
|
|
keywords: ["manitas fontaner", "cambiar grifo", "sellar bañera", "silicona", "latiguillo", "alcachofa", "tapon", "cambiar cisterna", "descargador"]
|
|
},
|
|
{
|
|
nombres_db: ["MANITAS PERSIANAS", "MANITAS PERSIANISTA"],
|
|
keywords: ["manitas persian", "cambiar cinta", "cuerda persiana", "recogedor", "atasco persiana", "lamas rotas", "persiana descolgada"]
|
|
},
|
|
{
|
|
nombres_db: ["ELECTRICISTA", "ELECTRICIDAD"],
|
|
keywords: ["electric", "cortocircuito", "cuadro electrico", "salto de plomos", "apagon", "diferencial", "icp", "magnetotermico", "chispazo", "sin luz", "cableado", "derivacion", "no hay luz", "SIN LUZ", "salta el termico", "CUADRO ELECTRICO", "cableado"]
|
|
},
|
|
{
|
|
nombres_db: ["FONTANERO", "FONTANERIA"],
|
|
keywords: ["fontaner", "fuga de agua", "tuberia", "atasco", "desatasco", "bote sifonico", "llave de paso", "calentador", "termo", "radiador", "caldera", "gotera", "inundacion", "filtracion", "bajante", "humedad"]
|
|
},
|
|
{
|
|
nombres_db: ["CRISTALERO", "CRISTALERIA"],
|
|
keywords: ["cristal", "vidrio", "ventana rota", "escaparate", "luna", "espejo", "climalit", "doble acristalamiento", "velux", "rotura"]
|
|
},
|
|
{
|
|
nombres_db: ["PERSIANISTA", "PERSIANERO", "PERSIANAS"],
|
|
keywords: ["motor persiana", "eje persiana", "persianista", "persiana atascada", "rotura de persiana", "domotica persiana"]
|
|
},
|
|
{
|
|
nombres_db: ["CARPINTERO", "CARPINTERIA", "MADERA"],
|
|
keywords: ["carpinter", "puerta de madera", "bisagra", "marco", "rodapie", "tarima", "armario", "cepillar puerta", "cajon", "encimera", "madera hinchada"]
|
|
},
|
|
{
|
|
nombres_db: ["MANITAS GENERAL", "MANITAS", "BRICOLAJE"],
|
|
keywords: ["bombin", "colgar cuadro", "soporte tv", "estanteria", "montar mueble", "ikea", "cortina", "riel", "estor", "agujero", "taladro", "picaporte", "colgar espejo"]
|
|
}
|
|
];
|
|
|
|
function normalizarTexto(texto) {
|
|
if (!texto) return "";
|
|
return texto.toLowerCase().normalize("NFD").replace(/[\u0300-\u036f]/g, "");
|
|
}
|
|
|
|
function clasificarGremio(descripcion, gremiosActivosDB) {
|
|
if (!descripcion || gremiosActivosDB.length === 0) return null;
|
|
|
|
const descNormalizada = normalizarTexto(descripcion);
|
|
|
|
for (const regla of REGLAS_GREMIOS) {
|
|
const coincide = regla.keywords.some(kw => descNormalizada.includes(normalizarTexto(kw)));
|
|
|
|
if (coincide) {
|
|
const gremioEncontrado = gremiosActivosDB.find(gDB => {
|
|
const nombreDBNorm = normalizarTexto(gDB.name);
|
|
return regla.nombres_db.some(nRegla => nombreDBNorm.includes(normalizarTexto(nRegla)));
|
|
});
|
|
|
|
if (gremioEncontrado) {
|
|
console.log(` 🧠 Gremio detectado automáticamente: ${gremioEncontrado.name} (ID: ${gremioEncontrado.id})`);
|
|
return gremioEncontrado.id;
|
|
}
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
// Función auxiliar para reintentar la navegación si hay fallo de red
|
|
async function gotoWithRetry(page, url, retries = 3) {
|
|
for (let i = 0; i < retries; i++) {
|
|
try {
|
|
// Usamos domcontentloaded para que no se quede colgado esperando imágenes o scripts pesados
|
|
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 45000 });
|
|
return; // Si funciona, salimos de la función
|
|
} catch (e) {
|
|
if (i === retries - 1) throw e; // Si es el último intento, lanza el error
|
|
console.log(`⚠️ Fallo de red detectado al ir a ${url}. Reintentando (${i + 1}/${retries})...`);
|
|
await page.waitForTimeout(3000); // Esperamos 3 segundos antes de volver a intentar
|
|
}
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
console.log("🤖 ROBOT MODO: CIRUJANO + ASPIRADORA + CLASIFICADOR PRO");
|
|
while (true) {
|
|
const client = await pool.connect();
|
|
try {
|
|
const gremiosRes = await client.query("SELECT id, name FROM guilds");
|
|
const gremiosDB = gremiosRes.rows;
|
|
|
|
await client.query(`
|
|
DO $$ BEGIN
|
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='scraped_services' AND column_name='is_urgent') THEN
|
|
ALTER TABLE scraped_services ADD COLUMN is_urgent BOOLEAN DEFAULT FALSE;
|
|
END IF;
|
|
END $$;
|
|
`);
|
|
|
|
const res = await client.query("SELECT * FROM provider_credentials WHERE status = 'active'");
|
|
for (const cred of res.rows) {
|
|
let password = Buffer.from(cred.password_hash, 'base64').toString('utf-8');
|
|
console.log(`\n🔄 Procesando ${cred.provider.toUpperCase()}...`);
|
|
|
|
if (cred.provider === 'multiasistencia') {
|
|
await runMultiasistencia(cred.owner_id, cred.username, password, gremiosDB);
|
|
} else if (cred.provider === 'homeserve') {
|
|
await runHomeserve(cred.owner_id, cred.username, password, gremiosDB);
|
|
}
|
|
|
|
await client.query("UPDATE provider_credentials SET last_sync = NOW() WHERE id = $1", [cred.id]);
|
|
}
|
|
} catch (e) { console.error("❌ Error ciclo:", e.message); }
|
|
finally { client.release(); }
|
|
|
|
console.log("\n💤 Durmiendo 15 minutos...");
|
|
await new Promise(r => setTimeout(r, 15 * 60 * 1000));
|
|
}
|
|
}
|
|
|
|
// ==========================================
|
|
// 🏥 MULTIASISTENCIA (CON REINTENTOS ANTI-CAÍDAS)
|
|
// ==========================================
|
|
async function runMultiasistencia(ownerId, user, pass, gremiosDB) {
|
|
const browser = await chromium.launch({
|
|
headless: HEADLESS,
|
|
args: [
|
|
'--no-sandbox',
|
|
'--disable-setuid-sandbox',
|
|
'--disable-dev-shm-usage',
|
|
'--disable-gpu'
|
|
]
|
|
});
|
|
const context = await browser.newContext();
|
|
const page = await context.newPage();
|
|
try {
|
|
// INICIO DE SESIÓN CON REINTENTOS
|
|
await gotoWithRetry(page, 'https://web.multiasistencia.com/w3multi/acceso.php');
|
|
|
|
await page.fill('input[name="usuario"]', user);
|
|
await page.fill('input[type="password"]', pass);
|
|
await page.click('input[type="submit"]');
|
|
await page.waitForTimeout(4000);
|
|
|
|
// ENTRAR AL BUZÓN CON REINTENTOS
|
|
await gotoWithRetry(page, 'https://web.multiasistencia.com/w3multi/frepasos_new.php?refresh=1');
|
|
await page.waitForTimeout(3000);
|
|
|
|
console.log("🔄 [Multi] Forzando recarga segura mediante script interno...");
|
|
try {
|
|
await page.evaluate(() => {
|
|
if (typeof refrescar === 'function') refrescar();
|
|
});
|
|
await page.waitForLoadState('networkidle');
|
|
await page.waitForTimeout(3000);
|
|
} catch (e) {}
|
|
|
|
// --- BUCLE DE PAGINACIÓN ---
|
|
let todosExpedientes = new Set();
|
|
let paginaActual = 1;
|
|
|
|
while (true) {
|
|
console.log(`📄 [Multi] Escaneando página ${paginaActual}...`);
|
|
|
|
const expedientesPagina = await page.evaluate(() => {
|
|
const links = Array.from(document.querySelectorAll('a[href*="reparacion="]'));
|
|
return links.map(a => a.href.match(/reparacion=(\d+)/)?.[1]).filter(Boolean);
|
|
});
|
|
|
|
expedientesPagina.forEach(ref => todosExpedientes.add(ref));
|
|
|
|
const hasNextPage = await page.evaluate(() => {
|
|
const links = Array.from(document.querySelectorAll('a.lnkheader'));
|
|
const nextBtn = links.find(a => a.innerText.trim() === 'Página siguiente');
|
|
if (nextBtn) { nextBtn.click(); return true; }
|
|
return false;
|
|
});
|
|
|
|
if (hasNextPage) {
|
|
await page.waitForLoadState('networkidle');
|
|
await page.waitForTimeout(2500);
|
|
paginaActual++;
|
|
if(paginaActual > 15) {
|
|
console.log("⚠️ [Multi] Límite de 15 páginas alcanzado por seguridad.");
|
|
break;
|
|
}
|
|
} else {
|
|
console.log("🛑 [Multi] No hay más páginas.");
|
|
break;
|
|
}
|
|
}
|
|
|
|
const expedientes = Array.from(todosExpedientes);
|
|
console.log(`✅ [Multi] Total expedientes detectados: ${expedientes.length}`);
|
|
|
|
if (expedientes.length > 0) {
|
|
await syncAndArchive(ownerId, 'multiasistencia', expedientes);
|
|
}
|
|
|
|
for (const ref of expedientes) {
|
|
await gotoWithRetry(page, `https://web.multiasistencia.com/w3multi/repasos1.php?reparacion=${ref}`);
|
|
await page.waitForTimeout(1500);
|
|
let scrapData = null;
|
|
|
|
for (const frame of page.frames()) {
|
|
try {
|
|
scrapData = await frame.evaluate(() => {
|
|
const clean = (t) => t ? t.replace(/\s+/g, ' ').trim() : "";
|
|
const body = document.body?.innerText || "";
|
|
if (!body.includes("Nombre Cliente") && !body.includes("Asegurado")) return null;
|
|
|
|
const cabeceras = Array.from(document.querySelectorAll('.tcab'));
|
|
const detalles = Array.from(document.querySelectorAll('.tdet'));
|
|
const findByCab = (texto) => {
|
|
const idx = cabeceras.findIndex(el => el.innerText.includes(texto));
|
|
return idx !== -1 && detalles[idx] ? clean(detalles[idx].innerText) : null;
|
|
};
|
|
|
|
let rawDesc = "";
|
|
const descHeader = Array.from(document.querySelectorAll('td.tcab')).find(td => td.innerText.includes("Descripción de la Reparación"));
|
|
if (descHeader && descHeader.nextElementSibling) rawDesc = clean(descHeader.nextElementSibling.innerText);
|
|
const idxDate = rawDesc.search(/\b\d{2}\/\d{2}\/\d{4}\b/);
|
|
const cleanDesc = idxDate !== -1 ? rawDesc.substring(0, idxDate).trim() : rawDesc;
|
|
|
|
const rawCPField = findByCab("Distrito Postal") || "";
|
|
const cpMatch = rawCPField.match(/\b\d{5}\b/);
|
|
const cpOnly = cpMatch ? cpMatch[0] : "";
|
|
const popOnly = rawCPField.replace(cpOnly, '').replace('-', '').trim();
|
|
|
|
return {
|
|
"Expediente": findByCab("Número Reparación") || "",
|
|
"Nombre Cliente": findByCab("Nombre Cliente") || "",
|
|
"Dirección": findByCab("Dirección") || "",
|
|
"Población": popOnly,
|
|
"Código Postal": cpOnly,
|
|
"Compañía": "MULTI - " + (findByCab("Procedencia") || "MULTIASISTENCIA"),
|
|
"Descripción": cleanDesc,
|
|
"Teléfono": (document.body.innerText.match(/[6789]\d{8}/) || [])[0] || "",
|
|
"Urgente": findByCab("Urgente") || "No"
|
|
};
|
|
});
|
|
if (scrapData && scrapData['Nombre Cliente']) break;
|
|
} catch (e) { continue; }
|
|
}
|
|
|
|
if (scrapData && scrapData['Nombre Cliente']) {
|
|
const idGremioDetectado = clasificarGremio(scrapData['Descripción'], gremiosDB);
|
|
if (idGremioDetectado) {
|
|
scrapData['guild_id'] = idGremioDetectado;
|
|
}
|
|
await saveServiceToDB(ownerId, 'multiasistencia', ref, scrapData);
|
|
}
|
|
}
|
|
} catch (e) {
|
|
console.error("❌ Error Multi:", e.message);
|
|
} finally {
|
|
await browser.close();
|
|
}
|
|
}
|
|
|
|
// ==========================================
|
|
// 🧹 HOMESERVE
|
|
// ==========================================
|
|
async function runHomeserve(ownerId, user, pass, gremiosDB) {
|
|
const browser = await chromium.launch({
|
|
headless: HEADLESS,
|
|
args: [
|
|
'--no-sandbox',
|
|
'--disable-setuid-sandbox',
|
|
'--disable-dev-shm-usage',
|
|
'--disable-gpu'
|
|
]
|
|
});
|
|
const page = await browser.newPage();
|
|
try {
|
|
console.log("🌍 [HomeServe] Entrando...");
|
|
await gotoWithRetry(page, 'https://www.clientes.homeserve.es/cgi-bin/fccgi.exe?w3exec=PROF_PASS');
|
|
|
|
if (await page.isVisible('input[name="CODIGO"]')) {
|
|
await page.fill('input[name="CODIGO"]', user);
|
|
await page.fill('input[type="password"]', pass);
|
|
await page.keyboard.press('Enter');
|
|
await page.waitForTimeout(5000);
|
|
}
|
|
|
|
await gotoWithRetry(page, 'https://www.clientes.homeserve.es/cgi-bin/fccgi.exe?w3exec=lista_servicios_total');
|
|
await page.waitForTimeout(3000);
|
|
|
|
const refs = await page.evaluate(() => {
|
|
const cells = Array.from(document.querySelectorAll('td'));
|
|
const found = [];
|
|
cells.forEach(td => {
|
|
const match = td.innerText.trim().match(/^15\d{6}$/);
|
|
if (match) found.push(match[0]);
|
|
});
|
|
return [...new Set(found)];
|
|
});
|
|
|
|
if (refs.length > 0) {
|
|
await syncAndArchive(ownerId, 'homeserve', refs);
|
|
}
|
|
|
|
console.log(`🔍 [HomeServe] ${refs.length} expedientes detectados.`);
|
|
|
|
for (const ref of refs) {
|
|
await gotoWithRetry(page, `https://www.clientes.homeserve.es/cgi-bin/fccgi.exe?w3exec=ver_servicioencurso&Servicio=${ref}`);
|
|
await page.waitForTimeout(2000);
|
|
|
|
const scrapData = await page.evaluate(() => {
|
|
const d = {};
|
|
const rows = Array.from(document.querySelectorAll('tr'));
|
|
rows.forEach(r => {
|
|
const cells = r.querySelectorAll('td');
|
|
if (cells.length >= 2) {
|
|
const k = cells[0].innerText.toUpperCase().trim().replace(':', '');
|
|
const v = cells[1].innerText.trim();
|
|
if (k.includes("COMENTARIOS")) {
|
|
const txt = cells[1].querySelector('textarea')?.value || "";
|
|
const cleanDesc = txt.split('\n').filter(line => {
|
|
const l = line.toUpperCase();
|
|
return !["CAMBIO DE ESTADO", "ESTADO ASIGNADO", "SMS NO ENVIADO", "CONTACTO CON PROF", "0000"].some(b => l.includes(b));
|
|
}).slice(0, 3).join('\n').trim();
|
|
d['Descripción'] = cleanDesc;
|
|
} else if (k.length > 1 && v.length > 0 && !k.includes("MENU")) {
|
|
d[k] = v;
|
|
}
|
|
}
|
|
});
|
|
const rawPop = d['POBLACION-PROVINCIA'] || "";
|
|
const cpMatch = rawPop.match(/\b\d{5}\b/);
|
|
d['Código Postal'] = cpMatch ? cpMatch[0] : "";
|
|
d['Población'] = rawPop.replace(d['Código Postal'], '').replace('-', '').trim();
|
|
d['Compañía'] = "HOME - " + (d['COMPAÑIA'] || "HOMESERVE");
|
|
d['Nombre Cliente'] = d['CLIENTE'] || "";
|
|
d['Dirección'] = d['DOMICILIO'] || "";
|
|
return d;
|
|
});
|
|
|
|
if (scrapData && scrapData['Nombre Cliente']) {
|
|
const idGremioDetectado = clasificarGremio(scrapData['Descripción'], gremiosDB);
|
|
if (idGremioDetectado) {
|
|
scrapData['guild_id'] = idGremioDetectado;
|
|
}
|
|
await saveServiceToDB(ownerId, 'homeserve', ref, scrapData);
|
|
}
|
|
}
|
|
} catch (e) { console.error("❌ [HomeServe] Error:", e.message); }
|
|
finally { await browser.close(); }
|
|
}
|
|
|
|
async function syncAndArchive(ownerId, provider, currentWebRefs) {
|
|
const client = await pool.connect();
|
|
try {
|
|
const { rows: dbServices } = await client.query(
|
|
`SELECT service_ref FROM scraped_services
|
|
WHERE owner_id = $1 AND provider = $2 AND status = 'pending'`,
|
|
[ownerId, provider]
|
|
);
|
|
|
|
const refsToArchive = dbServices
|
|
.map(s => s.service_ref)
|
|
.filter(ref => !currentWebRefs.includes(ref));
|
|
|
|
if (refsToArchive.length > 0) {
|
|
await client.query(
|
|
`UPDATE scraped_services
|
|
SET status = 'archived'
|
|
WHERE owner_id = $1
|
|
AND provider = $2
|
|
AND service_ref = ANY($3)`,
|
|
[ownerId, provider, refsToArchive]
|
|
);
|
|
console.log(`📦 [${provider.toUpperCase()}] Archivados ${refsToArchive.length} expedientes desaparecidos.`);
|
|
}
|
|
} catch (error) {
|
|
console.error(`❌ Error archivando ${provider}:`, error.message);
|
|
} finally {
|
|
client.release();
|
|
}
|
|
}
|
|
|
|
async function saveServiceToDB(ownerId, provider, ref, data) {
|
|
console.log(`💾 Guardando/Actualizando ${provider.toUpperCase()} ${ref}...`);
|
|
|
|
// Aquí está la solución al problema del tilde ("sí" vs "si") que te comenté antes
|
|
const isUrgent = (data['Urgente'] && (data['Urgente'].toLowerCase().trim() === 'sí' || data['Urgente'].toLowerCase().trim() === 'si')) ? true : false;
|
|
|
|
await pool.query(`
|
|
INSERT INTO scraped_services (owner_id, provider, service_ref, raw_data, status, is_urgent)
|
|
VALUES ($1, $2, $3, $4, 'pending', $5)
|
|
ON CONFLICT (owner_id, provider, service_ref)
|
|
DO UPDATE SET
|
|
raw_data = scraped_services.raw_data || EXCLUDED.raw_data,
|
|
is_urgent = EXCLUDED.is_urgent,
|
|
status = CASE
|
|
WHEN scraped_services.status = 'archived' THEN 'archived'
|
|
WHEN scraped_services.status = 'imported' THEN 'imported'
|
|
ELSE 'pending'
|
|
END
|
|
`, [ownerId, provider, ref, JSON.stringify(data), isUrgent]);
|
|
}
|
|
|
|
main(); |