const { consultarViabilidade, discoverDataType } = require('./viabilidadeService'); const fs = require('fs'); const path = require('path'); const XLSX = require('xlsx'); const { once } = require('events'); const { incrementProcessed, incrementErrors, finishJob } = require('./jobStore.service'); function normalizeHeader(value) { return String(value || '') .trim() .toLowerCase() .normalize('NFD') .replace(/[\u0300-\u036f]/g, '') .replace(/[_-]+/g, ' ') .replace(/\s+/g, ' '); } function isExcelFile(filePath) { const ext = path.extname(filePath).toLowerCase(); if (['.xls', '.xlsx'].includes(ext)) return true; const fileStart = fs.readFileSync(filePath).subarray(0, 512); const signature = fileStart.subarray(0, 8); const isZipBasedXlsx = signature[0] === 0x50 && signature[1] === 0x4b; const isOleBasedXls = signature[0] === 0xd0 && signature[1] === 0xcf && signature[2] === 0x11 && signature[3] === 0xe0 && signature[4] === 0xa1 && signature[5] === 0xb1 && signature[6] === 0x1a && signature[7] === 0xe1; const startText = fileStart.toString('latin1').trimStart().toLowerCase(); const isHtmlExcel = startText.startsWith(' ({ delimiter, count: line.split(delimiter).length })) .sort((a, b) => b.count - a.count)[0].delimiter; } function splitDelimitedLine(line, delimiter) { const cols = []; let current = ''; let inQuotes = false; for (let i = 0; i < line.length; i++) { const char = line[i]; const next = line[i + 1]; if (char === '"' && next === '"') { current += '"'; i++; } else if (char === '"') { inQuotes = !inQuotes; } else if (char === delimiter && !inQuotes) { cols.push(current.trim()); current = ''; } else { current += char; } } cols.push(current.trim()); return cols; } function readDelimitedRows(filePath) { const content = fs.readFileSync(filePath, 'utf8').replace(/^\uFEFF/, ''); const lines = content.split(/\r?\n/).filter(line => line.trim()); if (!lines.length) return []; const delimiter = detectDelimiter(lines[0]); return lines.map(line => splitDelimitedLine(line.replace(/\r$/, ''), delimiter)); } function readExcelRows(filePath) { const workbook = XLSX.readFile(filePath, { cellDates: false, raw: false }); const firstSheetName = workbook.SheetNames[0]; if (!firstSheetName) return []; return XLSX.utils.sheet_to_json(workbook.Sheets[firstSheetName], { header: 1, blankrows: false, defval: '' }).map(row => row.map(cell => String(cell ?? '').trim())); } function readRows(filePath) { return isExcelFile(filePath) ? readExcelRows(filePath) : readDelimitedRows(filePath); } function findFirstHeaderIndex(headers, predicate) { return headers.map(normalizeHeader).findIndex(predicate); } function hasCepHeader(headers) { return headers.map(normalizeHeader).some(header => /\bcep\b/.test(header) || header === 'codigo postal'); } function hasAddressOrNumberHeader(headers) { return headers.map(normalizeHeader).some(header => ['numero', 'num', 'nº', 'n°'].includes(header) || header.includes('endereco') || header.includes('logradouro')); } function hasGeoHeaders(headers) { const normalizedHeaders = headers.map(normalizeHeader); return normalizedHeaders.some(header => header.includes('latitude')) && normalizedHeaders.some(header => header.includes('longitude')); } function findHeaderRowIndex(rows) { const index = rows.findIndex(row => (hasCepHeader(row) && hasAddressOrNumberHeader(row)) || hasGeoHeaders(row)); return index >= 0 ? index : 0; } function resolveColumnIndexes(headers) { const normalizedHeaders = headers.map(normalizeHeader); const exactIndex = aliases => { const normalizedAliases = aliases.map(normalizeHeader); return normalizedHeaders.findIndex(header => normalizedAliases.includes(header)); }; return { idxCep: findFirstHeaderIndex(headers, header => /\bcep\b/.test(header) || header === 'codigo postal'), idxNumero: exactIndex(['numero', 'número', 'num', 'nº', 'n°']), idxEndereco: findFirstHeaderIndex(headers, header => header.includes('endereco') || header.includes('logradouro')), idxLatitude: findFirstHeaderIndex(headers, header => header.includes('latitude')), idxLongitude: findFirstHeaderIndex(headers, header => header.includes('longitude')) }; } function extractAddressNumber(address) { const value = String(address || '').trim(); if (!value) return '1'; const withoutRoadKm = value .replace(/\b(BR|SP|GO|MT|KM)\s*[-]?\s*\d+[A-Z]?\b/gi, ' ') .replace(/\b\d+\s*[A-Z]?\b\s*(?=\))/gi, ' '); const labeledNumber = withoutRoadKm.match(/\b(?:n|no|num|numero|número|nº|n°)\.?\s*[:,-]?\s*(\d+[A-Z]?)\b/i); if (labeledNumber) return labeledNumber[1]; const commaNumber = withoutRoadKm.match(/,\s*(\d+[A-Z]?)\b/i); if (commaNumber) return commaNumber[1]; const standaloneNumbers = withoutRoadKm.match(/\b\d+[A-Z]?\b/gi) || []; return standaloneNumbers.length ? standaloneNumbers[standaloneNumbers.length - 1] : '1'; } function buildCepPayload(cols, indexes) { const cepRaw = indexes.idxCep >= 0 ? cols[indexes.idxCep] : ''; const cep = String(cepRaw || '').replace(/\D/g, ''); const numeroRaw = indexes.idxNumero >= 0 ? cols[indexes.idxNumero] : ''; const enderecoRaw = indexes.idxEndereco >= 0 ? cols[indexes.idxEndereco] : ''; const numero = String(numeroRaw || '').trim() || extractAddressNumber(enderecoRaw); if (!cep) return null; return { cep, numero }; } function parseCoordinate(value) { const parsed = parseFloat(String(value ?? '').trim().replace(',', '.')); return Number.isFinite(parsed) ? parsed : NaN; } function buildGeoPayload(cols, indexes) { const latitude = indexes.idxLatitude >= 0 ? parseCoordinate(cols[indexes.idxLatitude]) : NaN; const longitude = indexes.idxLongitude >= 0 ? parseCoordinate(cols[indexes.idxLongitude]) : NaN; if (isNaN(latitude) || isNaN(longitude)) return null; return { latitude, longitude }; } async function consultarComFallback(geoPayload, cepPayload) { let lastError = null; if (geoPayload) { try { const result = await consultarViabilidade(geoPayload); if (!result || !result.error) return result; lastError = new Error(result.error); } catch (err) { lastError = err; } } if (cepPayload) { try { const result = await consultarViabilidade(cepPayload); if (!result || !result.error) return result; lastError = new Error(result.error); } catch (err) { lastError = err; } } throw lastError || new Error('Linha sem latitude/longitude ou CEP valido'); } function cleanCsvValue(value) { const text = String(value ?? '').replace(/[\r\n;]/g, ' '); return text.includes('"') ? text.replace(/"/g, "'") : text; } function formatApiErrorResponse(error) { const responseData = error && error.response && error.response.data; if (responseData !== undefined && responseData !== null) { if (typeof responseData === 'string') return responseData; if (responseData.error) return responseData.error; if (responseData.message) return responseData.message; return JSON.stringify(responseData); } return error && (error.message || String(error)); } async function countValidLines(inputPath) { await discoverDataType(inputPath); const rows = readRows(inputPath); const headerRowIndex = findHeaderRowIndex(rows); const headers = rows[headerRowIndex] || []; const indexes = resolveColumnIndexes(headers); let total = 0; for (const cols of rows.slice(headerRowIndex + 1)) { const geoPayload = buildGeoPayload(cols, indexes); const cepPayload = buildCepPayload(cols, indexes); if (geoPayload || cepPayload) total++; } return total; } async function processCsvFile(jobId, inputPath, originalName) { await discoverDataType(inputPath); const rows = readRows(inputPath); const headerRowIndex = findHeaderRowIndex(rows); const headers = rows[headerRowIndex] || []; const indexes = resolveColumnIndexes(headers); const baseName = path.parse(originalName || inputPath).name; const outputFilename = `processed_${Date.now()}_${baseName}.csv`; const outputPath = path.join(__dirname, '..', 'outputs', outputFilename); const outStream = fs.createWriteStream(outputPath, { encoding: 'utf8' }); outStream.write('\uFEFF'); outStream.write(['Provedor', 'Distancia', 'Dedicado', 'Nao Dedicado', 'Erro', ...headers].join(';') + '\n'); for (const cols of rows.slice(headerRowIndex + 1)) { const geoPayload = buildGeoPayload(cols, indexes); const cepPayload = buildCepPayload(cols, indexes); if (!geoPayload && !cepPayload) continue; try { const viab = await consultarComFallback(geoPayload, cepPayload); const provedor = viab.provedor ?? ''; const distancia = viab.distancia ?? (viab.raw && (viab.raw.distancia || viab.raw.distance)) ?? ''; const dedicado = viab.dedicado ? 'Viavel' : 'Nao Viavel'; const naoDedicado = viab.naoDedicado ? 'Viavel' : 'Nao Viavel'; const error = viab.error ? cleanCsvValue(viab.error) : ''; const outCols = [distancia, dedicado, naoDedicado, provedor, error, ...cols].map(cleanCsvValue); outStream.write(outCols.join(';') + '\n'); incrementProcessed(jobId); } catch (err) { const errMsg = cleanCsvValue(formatApiErrorResponse(err)); const outCols = ['', '', '', '', errMsg, ...cols].map(cleanCsvValue); outStream.write(outCols.join(';') + '\n'); incrementErrors(jobId); incrementProcessed(jobId); } } outStream.end(); await once(outStream, 'finish'); finishJob(jobId, path.basename(outputPath)); return outputPath; } module.exports = { processCsvFile, countValidLines };