const { consultarViabilidade, discoverDataType } = require('./viabilidadeService'); const fs = require('fs'); const path = require('path'); const XLSX = require('xlsx'); const ExcelJS = require('exceljs'); const { once } = require('events'); const { incrementProcessed, incrementErrors, finishJob } = require('./jobStore.service'); const RESULT_HEADERS = ['Provedor', 'Distancia', 'Dedicado', 'Nao Dedicado', 'Erro']; function normalizeHeader(value) { return String(value || '') .trim() .toLowerCase() .normalize('NFD') .replace(/[\u0300-\u036f]/g, '') .replace(/[_-]+/g, ' ') .replace(/\s+/g, ' '); } function isExcelFile(filePath) { const ext = path.extname(filePath).toLowerCase(); if (['.xls', '.xlsx'].includes(ext)) return true; const fileStart = fs.readFileSync(filePath).subarray(0, 512); const signature = fileStart.subarray(0, 8); const isZipBasedXlsx = signature[0] === 0x50 && signature[1] === 0x4b; const isOleBasedXls = signature[0] === 0xd0 && signature[1] === 0xcf && signature[2] === 0x11 && signature[3] === 0xe0 && signature[4] === 0xa1 && signature[5] === 0xb1 && signature[6] === 0x1a && signature[7] === 0xe1; const startText = fileStart.toString('latin1').trimStart().toLowerCase(); const isHtmlExcel = startText.startsWith(' ({ delimiter, count: line.split(delimiter).length })) .sort((a, b) => b.count - a.count)[0].delimiter; } function splitDelimitedLine(line, delimiter) { const cols = []; let current = ''; let inQuotes = false; for (let i = 0; i < line.length; i++) { const char = line[i]; const next = line[i + 1]; if (char === '"' && next === '"') { current += '"'; i++; } else if (char === '"') { inQuotes = !inQuotes; } else if (char === delimiter && !inQuotes) { cols.push(current.trim()); current = ''; } else { current += char; } } cols.push(current.trim()); return cols; } function readDelimitedRows(filePath) { const content = fs.readFileSync(filePath, 'utf8').replace(/^\uFEFF/, ''); const lines = content.split(/\r?\n/).filter(line => line.trim()); if (!lines.length) return []; const delimiter = detectDelimiter(lines[0]); return lines.map(line => splitDelimitedLine(line.replace(/\r$/, ''), delimiter)); } function readExcelRows(filePath) { const workbook = XLSX.readFile(filePath, { cellDates: false, raw: false }); const firstSheetName = workbook.SheetNames[0]; if (!firstSheetName) return []; return XLSX.utils.sheet_to_json(workbook.Sheets[firstSheetName], { header: 1, blankrows: true, defval: '' }).map(row => row.map(cell => String(cell ?? '').trim())); } function readRows(filePath) { return isExcelFile(filePath) ? readExcelRows(filePath) : readDelimitedRows(filePath); } function findFirstHeaderIndex(headers, predicate) { return headers.map(normalizeHeader).findIndex(predicate); } function hasHeaderAlias(headers, aliases) { const normalizedAliases = aliases.map(normalizeHeader); return headers.map(normalizeHeader).some(header => normalizedAliases.includes(header)); } function hasCepHeader(headers) { return headers.map(normalizeHeader).some(header => /\bcep\b/.test(header) || header === 'codigo postal'); } function hasAddressOrNumberHeader(headers) { return headers.map(normalizeHeader).some(header => ['numero', 'num', 'nº', 'n°'].includes(header) || header.includes('endereco') || header.includes('logradouro')); } function hasGeoHeaders(headers) { return hasHeaderAlias(headers, ['latitude', 'lat']) && hasHeaderAlias(headers, ['longitude', 'long', 'lng', 'lon']); } function findHeaderRowIndex(rows) { const index = rows.findIndex(row => (hasCepHeader(row) && hasAddressOrNumberHeader(row)) || hasGeoHeaders(row)); return index >= 0 ? index : 0; } function resolveColumnIndexes(headers) { const normalizedHeaders = headers.map(normalizeHeader); const exactIndex = aliases => { const normalizedAliases = aliases.map(normalizeHeader); return normalizedHeaders.findIndex(header => normalizedAliases.includes(header)); }; return { idxCep: findFirstHeaderIndex(headers, header => /\bcep\b/.test(header) || header === 'codigo postal'), idxNumero: exactIndex(['numero', 'número', 'num', 'nº', 'n°']), idxEndereco: findFirstHeaderIndex(headers, header => header.includes('endereco') || header.includes('logradouro')), idxLatitude: exactIndex(['latitude', 'lat']), idxLongitude: exactIndex(['longitude', 'long', 'lng', 'lon']) }; } function extractAddressNumber(address) { const value = String(address || '').trim(); if (!value) return '1'; const withoutRoadKm = value .replace(/\b(BR|SP|GO|MT|KM)\s*[-]?\s*\d+[A-Z]?\b/gi, ' ') .replace(/\b\d+\s*[A-Z]?\b\s*(?=\))/gi, ' '); const labeledNumber = withoutRoadKm.match(/\b(?:n|no|num|numero|número|nº|n°)\.?\s*[:,-]?\s*(\d+[A-Z]?)\b/i); if (labeledNumber) return labeledNumber[1]; const commaNumber = withoutRoadKm.match(/,\s*(\d+[A-Z]?)\b/i); if (commaNumber) return commaNumber[1]; const standaloneNumbers = withoutRoadKm.match(/\b\d+[A-Z]?\b/gi) || []; return standaloneNumbers.length ? standaloneNumbers[standaloneNumbers.length - 1] : '1'; } function buildCepPayload(cols, indexes) { const cepRaw = indexes.idxCep >= 0 ? cols[indexes.idxCep] : ''; const cep = String(cepRaw || '').replace(/\D/g, ''); const numeroRaw = indexes.idxNumero >= 0 ? cols[indexes.idxNumero] : ''; const enderecoRaw = indexes.idxEndereco >= 0 ? cols[indexes.idxEndereco] : ''; const numero = String(numeroRaw || '').trim() || extractAddressNumber(enderecoRaw); if (!cep) return null; return { cep, numero }; } function parseCoordinate(value) { const parsed = parseFloat(String(value ?? '').trim().replace(',', '.')); return Number.isFinite(parsed) ? parsed : NaN; } function buildGeoPayload(cols, indexes) { const latitude = indexes.idxLatitude >= 0 ? parseCoordinate(cols[indexes.idxLatitude]) : NaN; const longitude = indexes.idxLongitude >= 0 ? parseCoordinate(cols[indexes.idxLongitude]) : NaN; if (isNaN(latitude) || isNaN(longitude)) return null; return { latitude, longitude }; } async function consultarComFallback(geoPayload, cepPayload) { let lastError = null; if (geoPayload) { try { const result = await consultarViabilidade(geoPayload); if (!result || !result.error) return result; lastError = new Error(result.error); } catch (err) { lastError = err; } } if (cepPayload) { try { const result = await consultarViabilidade(cepPayload); if (!result || !result.error) return result; lastError = new Error(result.error); } catch (err) { lastError = err; } } throw lastError || new Error('Linha sem latitude/longitude ou CEP valido'); } function cleanCsvValue(value) { const text = String(value ?? '').replace(/[\r\n;]/g, ' '); return text.includes('"') ? text.replace(/"/g, "'") : text; } function formatApiErrorResponse(error) { const responseData = error && error.response && error.response.data; if (responseData !== undefined && responseData !== null) { if (typeof responseData === 'string') return responseData; if (responseData.error) return responseData.error; if (responseData.message) return responseData.message; return JSON.stringify(responseData); } return error && (error.message || String(error)); } function buildSuccessResultColumns(viab) { const provedor = viab.provedor ?? ''; const distancia = viab.distancia ?? (viab.raw && (viab.raw.distancia || viab.raw.distance)) ?? ''; const dedicado = viab.dedicado ? 'Viavel' : 'Nao Viavel'; const naoDedicado = viab.naoDedicado ? 'Viavel' : 'Nao Viavel'; const error = viab.error ? cleanCsvValue(viab.error) : ''; return [provedor, distancia, dedicado, naoDedicado, error]; } function buildErrorResultColumns(err) { return ['', '', '', '', cleanCsvValue(formatApiErrorResponse(err))]; } function cloneCellStyle(cell) { return { numFmt: cell.numFmt, font: cell.font ? { ...cell.font } : undefined, alignment: cell.alignment ? { ...cell.alignment } : undefined, border: cell.border ? { ...cell.border } : undefined, fill: cell.fill ? { ...cell.fill } : undefined, protection: cell.protection ? { ...cell.protection } : undefined }; } function styleInsertedResultColumns(worksheet, headerRowNumber) { RESULT_HEADERS.forEach((header, index) => { const columnNumber = index + 1; const sourceColumn = worksheet.getColumn(RESULT_HEADERS.length + 1); const targetColumn = worksheet.getColumn(columnNumber); targetColumn.width = Math.max(16, sourceColumn.width || 0); const headerCell = worksheet.getRow(headerRowNumber).getCell(columnNumber); const sourceHeaderCell = worksheet.getRow(headerRowNumber).getCell(RESULT_HEADERS.length + 1); headerCell.value = header; headerCell.style = cloneCellStyle(sourceHeaderCell); }); } async function processXlsxFile(jobId, inputPath, outputPath, rows, headerRowIndex, indexes) { const workbook = new ExcelJS.Workbook(); await workbook.xlsx.readFile(inputPath); const worksheet = workbook.worksheets[0]; const headerRowNumber = headerRowIndex + 1; worksheet.spliceColumns(1, 0, ...RESULT_HEADERS.map(() => [])); styleInsertedResultColumns(worksheet, headerRowNumber); for (let rowIndex = headerRowIndex + 1; rowIndex < rows.length; rowIndex++) { const cols = rows[rowIndex]; const geoPayload = buildGeoPayload(cols, indexes); const cepPayload = buildCepPayload(cols, indexes); if (!geoPayload && !cepPayload) continue; const row = worksheet.getRow(rowIndex + 1); try { const viab = await consultarComFallback(geoPayload, cepPayload); buildSuccessResultColumns(viab).forEach((value, index) => { const cell = row.getCell(index + 1); cell.value = value; cell.style = cloneCellStyle(row.getCell(RESULT_HEADERS.length + 1)); }); incrementProcessed(jobId); } catch (err) { buildErrorResultColumns(err).forEach((value, index) => { const cell = row.getCell(index + 1); cell.value = value; cell.style = cloneCellStyle(row.getCell(RESULT_HEADERS.length + 1)); }); incrementErrors(jobId); incrementProcessed(jobId); } row.commit(); } await workbook.xlsx.writeFile(outputPath); } function shiftCellAddress(address, colOffset) { const decoded = XLSX.utils.decode_cell(address); decoded.c += colOffset; return XLSX.utils.encode_cell(decoded); } function shiftRange(range, colOffset) { const decoded = typeof range === 'string' ? XLSX.utils.decode_range(range) : range; return { s: { r: decoded.s.r, c: decoded.s.c + colOffset }, e: { r: decoded.e.r, c: decoded.e.c + colOffset } }; } function prependResultColumnsToWorksheet(worksheet, headerRowIndex, rowResults) { const colOffset = RESULT_HEADERS.length; const shiftedWorksheet = {}; Object.keys(worksheet).forEach(key => { if (key[0] === '!') return; shiftedWorksheet[shiftCellAddress(key, colOffset)] = worksheet[key]; }); const originalRange = worksheet['!ref'] ? XLSX.utils.decode_range(worksheet['!ref']) : { s: { r: 0, c: 0 }, e: { r: headerRowIndex, c: 0 } }; shiftedWorksheet['!ref'] = XLSX.utils.encode_range({ s: { r: Math.min(originalRange.s.r, headerRowIndex), c: 0 }, e: { r: originalRange.e.r, c: originalRange.e.c + colOffset } }); if (worksheet['!cols']) { shiftedWorksheet['!cols'] = Array(colOffset).fill({ wch: 16 }).concat(worksheet['!cols']); } if (worksheet['!merges']) { shiftedWorksheet['!merges'] = worksheet['!merges'].map(merge => shiftRange(merge, colOffset)); } if (worksheet['!autofilter'] && worksheet['!autofilter'].ref) { shiftedWorksheet['!autofilter'] = { ...worksheet['!autofilter'], ref: XLSX.utils.encode_range(shiftRange(worksheet['!autofilter'].ref, colOffset)) }; } RESULT_HEADERS.forEach((value, index) => { const address = XLSX.utils.encode_cell({ r: headerRowIndex, c: index }); shiftedWorksheet[address] = { t: 's', v: value }; }); rowResults.forEach(({ rowIndex, values }) => { values.forEach((value, index) => { const address = XLSX.utils.encode_cell({ r: rowIndex, c: index }); shiftedWorksheet[address] = { t: 's', v: String(value ?? '') }; }); }); return shiftedWorksheet; } async function countValidLines(inputPath) { await discoverDataType(inputPath); const rows = readRows(inputPath); const headerRowIndex = findHeaderRowIndex(rows); const headers = rows[headerRowIndex] || []; const indexes = resolveColumnIndexes(headers); let total = 0; for (const cols of rows.slice(headerRowIndex + 1)) { const geoPayload = buildGeoPayload(cols, indexes); const cepPayload = buildCepPayload(cols, indexes); if (geoPayload || cepPayload) total++; } return total; } async function processCsvFile(jobId, inputPath, originalName) { await discoverDataType(inputPath); const rows = readRows(inputPath); const headerRowIndex = findHeaderRowIndex(rows); const headers = rows[headerRowIndex] || []; const indexes = resolveColumnIndexes(headers); const baseName = path.parse(originalName || inputPath).name; const isExcel = isExcelFile(inputPath); const outputFilename = `processed_${Date.now()}_${baseName}${isExcel ? '.xlsx' : '.csv'}`; const outputPath = path.join(__dirname, '..', 'outputs', outputFilename); fs.mkdirSync(path.dirname(outputPath), { recursive: true }); if (isXlsxFile(inputPath)) { await processXlsxFile(jobId, inputPath, outputPath, rows, headerRowIndex, indexes); finishJob(jobId, path.basename(outputPath)); return outputPath; } if (isExcel) { const workbook = XLSX.readFile(inputPath, { cellDates: false, raw: false, cellStyles: true }); const firstSheetName = workbook.SheetNames[0]; const worksheet = workbook.Sheets[firstSheetName]; const rowResults = []; for (let rowIndex = headerRowIndex + 1; rowIndex < rows.length; rowIndex++) { const cols = rows[rowIndex]; const geoPayload = buildGeoPayload(cols, indexes); const cepPayload = buildCepPayload(cols, indexes); if (!geoPayload && !cepPayload) continue; try { const viab = await consultarComFallback(geoPayload, cepPayload); rowResults.push({ rowIndex, values: buildSuccessResultColumns(viab) }); incrementProcessed(jobId); } catch (err) { rowResults.push({ rowIndex, values: buildErrorResultColumns(err) }); incrementErrors(jobId); incrementProcessed(jobId); } } workbook.Sheets[firstSheetName] = prependResultColumnsToWorksheet(worksheet, headerRowIndex, rowResults); XLSX.writeFile(workbook, outputPath, { bookType: 'xlsx' }); finishJob(jobId, path.basename(outputPath)); return outputPath; } const outStream = fs.createWriteStream(outputPath, { encoding: 'utf8' }); outStream.write('\uFEFF'); outStream.write([...RESULT_HEADERS, ...headers].join(';') + '\n'); for (const cols of rows.slice(headerRowIndex + 1)) { const geoPayload = buildGeoPayload(cols, indexes); const cepPayload = buildCepPayload(cols, indexes); if (!geoPayload && !cepPayload) continue; try { const viab = await consultarComFallback(geoPayload, cepPayload); const outCols = [...buildSuccessResultColumns(viab), ...cols].map(cleanCsvValue); outStream.write(outCols.join(';') + '\n'); incrementProcessed(jobId); } catch (err) { const outCols = [...buildErrorResultColumns(err), ...cols].map(cleanCsvValue); outStream.write(outCols.join(';') + '\n'); incrementErrors(jobId); incrementProcessed(jobId); } } outStream.end(); await once(outStream, 'finish'); finishJob(jobId, path.basename(outputPath)); return outputPath; } module.exports = { processCsvFile, countValidLines };