From e42621642bcd541eb960d47f655e5cbb4631906e Mon Sep 17 00:00:00 2001 From: Rafael Lopes Date: Mon, 4 May 2026 16:52:23 -0300 Subject: [PATCH] =?UTF-8?q?FIX/FEAT:=20Fun=C3=A7=C3=A3o=20auxiliar=20para?= =?UTF-8?q?=20realizar=20a=20valida=C3=A7=C3=A3o=20de=20arquivos=20excel?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- service/csvService.js | 52 +++++++++++++++++++++++++++++++---- service/viabilidadeService.js | 42 +++++++++++++++++++++++++--- 2 files changed, 85 insertions(+), 9 deletions(-) diff --git a/service/csvService.js b/service/csvService.js index 8332a40..332a7c5 100644 --- a/service/csvService.js +++ b/service/csvService.js @@ -20,7 +20,27 @@ function normalizeHeader(value) { } function isExcelFile(filePath) { - return ['.xls', '.xlsx'].includes(path.extname(filePath).toLowerCase()); + const ext = path.extname(filePath).toLowerCase(); + if (['.xls', '.xlsx'].includes(ext)) return true; + + const fileStart = fs.readFileSync(filePath).subarray(0, 512); + const signature = fileStart.subarray(0, 8); + const isZipBasedXlsx = signature[0] === 0x50 && signature[1] === 0x4b; + const isOleBasedXls = signature[0] === 0xd0 + && signature[1] === 0xcf + && signature[2] === 0x11 + && signature[3] === 0xe0 + && signature[4] === 0xa1 + && signature[5] === 0xb1 + && signature[6] === 0x1a + && signature[7] === 0xe1; + + const startText = fileStart.toString('latin1').trimStart().toLowerCase(); + const isHtmlExcel = startText.startsWith(' /\bcep\b/.test(header) || header === 'codigo postal'); +} + +function hasAddressOrNumberHeader(headers) { + return headers.map(normalizeHeader).some(header => ['numero', 'num', 'nº', 'n°'].includes(header) + || header.includes('endereco') + || header.includes('logradouro')); +} + +function hasGeoHeaders(headers) { + const normalizedHeaders = headers.map(normalizeHeader); + return normalizedHeaders.includes('latitude') && normalizedHeaders.includes('longitude'); +} + +function findHeaderRowIndex(rows) { + const index = rows.findIndex(row => (hasCepHeader(row) && hasAddressOrNumberHeader(row)) || hasGeoHeaders(row)); + return index >= 0 ? index : 0; +} + function resolveColumnIndexes(headers) { const normalizedHeaders = headers.map(normalizeHeader); const exactIndex = aliases => { @@ -138,11 +178,12 @@ function cleanCsvValue(value) { async function countValidLines(inputPath) { const dataType = await discoverDataType(inputPath); const rows = readRows(inputPath); - const headers = rows[0] || []; + const headerRowIndex = findHeaderRowIndex(rows); + const headers = rows[headerRowIndex] || []; const indexes = resolveColumnIndexes(headers); let total = 0; - for (const cols of rows.slice(1)) { + for (const cols of rows.slice(headerRowIndex + 1)) { if (dataType === 'cep') { if (buildCepPayload(cols, indexes)) total++; } else if (dataType === 'geolocalizacao') { @@ -158,7 +199,8 @@ async function countValidLines(inputPath) { async function processCsvFile(jobId, inputPath, originalName) { const dataType = await discoverDataType(inputPath); const rows = readRows(inputPath); - const headers = rows[0] || []; + const headerRowIndex = findHeaderRowIndex(rows); + const headers = rows[headerRowIndex] || []; const indexes = resolveColumnIndexes(headers); const baseName = path.parse(originalName || inputPath).name; const outputFilename = `processed_${Date.now()}_${baseName}.csv`; @@ -167,7 +209,7 @@ async function processCsvFile(jobId, inputPath, originalName) { outStream.write('\uFEFF'); outStream.write(['Distancia', 'Dedicado', 'Nao Dedicado', 'Erro', ...headers].join(';') + '\n'); - for (const cols of rows.slice(1)) { + for (const cols of rows.slice(headerRowIndex + 1)) { let dataToSend = {}; if (dataType === 'cep') { diff --git a/service/viabilidadeService.js b/service/viabilidadeService.js index ca9806d..8949492 100644 --- a/service/viabilidadeService.js +++ b/service/viabilidadeService.js @@ -21,7 +21,27 @@ function hasHeader(headers, aliases) { } function isExcelFile(filePath) { - return ['.xls', '.xlsx'].includes(path.extname(filePath).toLowerCase()); + const ext = path.extname(filePath).toLowerCase(); + if (['.xls', '.xlsx'].includes(ext)) return true; + + const fileStart = fs.readFileSync(filePath).subarray(0, 512); + const signature = fileStart.subarray(0, 8); + const isZipBasedXlsx = signature[0] === 0x50 && signature[1] === 0x4b; + const isOleBasedXls = signature[0] === 0xd0 + && signature[1] === 0xcf + && signature[2] === 0x11 + && signature[3] === 0xe0 + && signature[4] === 0xa1 + && signature[5] === 0xb1 + && signature[6] === 0x1a + && signature[7] === 0xe1; + + const startText = fileStart.toString('latin1').trimStart().toLowerCase(); + const isHtmlExcel = startText.startsWith(' b.count - a.count)[0].delimiter; } +function findHeaderRow(rows) { + return rows.find(row => { + const headers = row.map(normalizeHeader); + const hasCepNumero = hasCepHeader(headers) && hasAddressOrNumberHeader(headers); + const hasGeo = headers.includes('latitude') && headers.includes('longitude'); + return hasCepNumero || hasGeo; + }) || []; +} + function readExcelHeaders(filePath) { const workbook = XLSX.readFile(filePath, { cellDates: false, raw: false }); const firstSheetName = workbook.SheetNames[0]; @@ -42,7 +71,7 @@ function readExcelHeaders(filePath) { defval: '' }); - return (rows[0] || []).map(normalizeHeader); + return findHeaderRow(rows).map(normalizeHeader); } async function readDelimitedHeaders(filePath) { @@ -52,8 +81,13 @@ async function readDelimitedHeaders(filePath) { for await (const rawLine of rl) { const line = rawLine.replace(/^\uFEFF/, '').replace(/\r$/, ''); if (!line.trim()) continue; - rl.close(); - return line.split(detectDelimiter(line)).map(normalizeHeader); + const headers = line.split(detectDelimiter(line)).map(normalizeHeader); + const hasCepNumero = hasCepHeader(headers) && hasAddressOrNumberHeader(headers); + const hasGeo = headers.includes('latitude') && headers.includes('longitude'); + if (hasCepNumero || hasGeo) { + rl.close(); + return headers; + } } rl.close();