FIX/FEAT: Função auxiliar para realizar a validação de arquivos excel

This commit is contained in:
Rafael Alves Lopes 2026-05-04 16:52:23 -03:00
parent 827855295e
commit e42621642b
2 changed files with 85 additions and 9 deletions

View File

@ -20,7 +20,27 @@ function normalizeHeader(value) {
}
function isExcelFile(filePath) {
return ['.xls', '.xlsx'].includes(path.extname(filePath).toLowerCase());
const ext = path.extname(filePath).toLowerCase();
if (['.xls', '.xlsx'].includes(ext)) return true;
const fileStart = fs.readFileSync(filePath).subarray(0, 512);
const signature = fileStart.subarray(0, 8);
const isZipBasedXlsx = signature[0] === 0x50 && signature[1] === 0x4b;
const isOleBasedXls = signature[0] === 0xd0
&& signature[1] === 0xcf
&& signature[2] === 0x11
&& signature[3] === 0xe0
&& signature[4] === 0xa1
&& signature[5] === 0xb1
&& signature[6] === 0x1a
&& signature[7] === 0xe1;
const startText = fileStart.toString('latin1').trimStart().toLowerCase();
const isHtmlExcel = startText.startsWith('<html')
|| startText.startsWith('<!doctype html')
|| startText.includes('<table');
return isZipBasedXlsx || isOleBasedXls || isHtmlExcel;
}
function detectDelimiter(line) {
@ -85,6 +105,26 @@ function findFirstHeaderIndex(headers, predicate) {
return headers.map(normalizeHeader).findIndex(predicate);
}
function hasCepHeader(headers) {
return headers.map(normalizeHeader).some(header => /\bcep\b/.test(header) || header === 'codigo postal');
}
function hasAddressOrNumberHeader(headers) {
return headers.map(normalizeHeader).some(header => ['numero', 'num', 'nº', 'n°'].includes(header)
|| header.includes('endereco')
|| header.includes('logradouro'));
}
function hasGeoHeaders(headers) {
const normalizedHeaders = headers.map(normalizeHeader);
return normalizedHeaders.includes('latitude') && normalizedHeaders.includes('longitude');
}
function findHeaderRowIndex(rows) {
const index = rows.findIndex(row => (hasCepHeader(row) && hasAddressOrNumberHeader(row)) || hasGeoHeaders(row));
return index >= 0 ? index : 0;
}
function resolveColumnIndexes(headers) {
const normalizedHeaders = headers.map(normalizeHeader);
const exactIndex = aliases => {
@ -138,11 +178,12 @@ function cleanCsvValue(value) {
async function countValidLines(inputPath) {
const dataType = await discoverDataType(inputPath);
const rows = readRows(inputPath);
const headers = rows[0] || [];
const headerRowIndex = findHeaderRowIndex(rows);
const headers = rows[headerRowIndex] || [];
const indexes = resolveColumnIndexes(headers);
let total = 0;
for (const cols of rows.slice(1)) {
for (const cols of rows.slice(headerRowIndex + 1)) {
if (dataType === 'cep') {
if (buildCepPayload(cols, indexes)) total++;
} else if (dataType === 'geolocalizacao') {
@ -158,7 +199,8 @@ async function countValidLines(inputPath) {
async function processCsvFile(jobId, inputPath, originalName) {
const dataType = await discoverDataType(inputPath);
const rows = readRows(inputPath);
const headers = rows[0] || [];
const headerRowIndex = findHeaderRowIndex(rows);
const headers = rows[headerRowIndex] || [];
const indexes = resolveColumnIndexes(headers);
const baseName = path.parse(originalName || inputPath).name;
const outputFilename = `processed_${Date.now()}_${baseName}.csv`;
@ -167,7 +209,7 @@ async function processCsvFile(jobId, inputPath, originalName) {
outStream.write('\uFEFF');
outStream.write(['Distancia', 'Dedicado', 'Nao Dedicado', 'Erro', ...headers].join(';') + '\n');
for (const cols of rows.slice(1)) {
for (const cols of rows.slice(headerRowIndex + 1)) {
let dataToSend = {};
if (dataType === 'cep') {

View File

@ -21,7 +21,27 @@ function hasHeader(headers, aliases) {
}
function isExcelFile(filePath) {
return ['.xls', '.xlsx'].includes(path.extname(filePath).toLowerCase());
const ext = path.extname(filePath).toLowerCase();
if (['.xls', '.xlsx'].includes(ext)) return true;
const fileStart = fs.readFileSync(filePath).subarray(0, 512);
const signature = fileStart.subarray(0, 8);
const isZipBasedXlsx = signature[0] === 0x50 && signature[1] === 0x4b;
const isOleBasedXls = signature[0] === 0xd0
&& signature[1] === 0xcf
&& signature[2] === 0x11
&& signature[3] === 0xe0
&& signature[4] === 0xa1
&& signature[5] === 0xb1
&& signature[6] === 0x1a
&& signature[7] === 0xe1;
const startText = fileStart.toString('latin1').trimStart().toLowerCase();
const isHtmlExcel = startText.startsWith('<html')
|| startText.startsWith('<!doctype html')
|| startText.includes('<table');
return isZipBasedXlsx || isOleBasedXls || isHtmlExcel;
}
function detectDelimiter(line) {
@ -31,6 +51,15 @@ function detectDelimiter(line) {
.sort((a, b) => b.count - a.count)[0].delimiter;
}
function findHeaderRow(rows) {
return rows.find(row => {
const headers = row.map(normalizeHeader);
const hasCepNumero = hasCepHeader(headers) && hasAddressOrNumberHeader(headers);
const hasGeo = headers.includes('latitude') && headers.includes('longitude');
return hasCepNumero || hasGeo;
}) || [];
}
function readExcelHeaders(filePath) {
const workbook = XLSX.readFile(filePath, { cellDates: false, raw: false });
const firstSheetName = workbook.SheetNames[0];
@ -42,7 +71,7 @@ function readExcelHeaders(filePath) {
defval: ''
});
return (rows[0] || []).map(normalizeHeader);
return findHeaderRow(rows).map(normalizeHeader);
}
async function readDelimitedHeaders(filePath) {
@ -52,8 +81,13 @@ async function readDelimitedHeaders(filePath) {
for await (const rawLine of rl) {
const line = rawLine.replace(/^\uFEFF/, '').replace(/\r$/, '');
if (!line.trim()) continue;
rl.close();
return line.split(detectDelimiter(line)).map(normalizeHeader);
const headers = line.split(detectDelimiter(line)).map(normalizeHeader);
const hasCepNumero = hasCepHeader(headers) && hasAddressOrNumberHeader(headers);
const hasGeo = headers.includes('latitude') && headers.includes('longitude');
if (hasCepNumero || hasGeo) {
rl.close();
return headers;
}
}
rl.close();