FIX/FEAT: Função auxiliar para realizar a validação de arquivos excel
This commit is contained in:
parent
827855295e
commit
e42621642b
@ -20,7 +20,27 @@ function normalizeHeader(value) {
|
||||
}
|
||||
|
||||
function isExcelFile(filePath) {
|
||||
return ['.xls', '.xlsx'].includes(path.extname(filePath).toLowerCase());
|
||||
const ext = path.extname(filePath).toLowerCase();
|
||||
if (['.xls', '.xlsx'].includes(ext)) return true;
|
||||
|
||||
const fileStart = fs.readFileSync(filePath).subarray(0, 512);
|
||||
const signature = fileStart.subarray(0, 8);
|
||||
const isZipBasedXlsx = signature[0] === 0x50 && signature[1] === 0x4b;
|
||||
const isOleBasedXls = signature[0] === 0xd0
|
||||
&& signature[1] === 0xcf
|
||||
&& signature[2] === 0x11
|
||||
&& signature[3] === 0xe0
|
||||
&& signature[4] === 0xa1
|
||||
&& signature[5] === 0xb1
|
||||
&& signature[6] === 0x1a
|
||||
&& signature[7] === 0xe1;
|
||||
|
||||
const startText = fileStart.toString('latin1').trimStart().toLowerCase();
|
||||
const isHtmlExcel = startText.startsWith('<html')
|
||||
|| startText.startsWith('<!doctype html')
|
||||
|| startText.includes('<table');
|
||||
|
||||
return isZipBasedXlsx || isOleBasedXls || isHtmlExcel;
|
||||
}
|
||||
|
||||
function detectDelimiter(line) {
|
||||
@ -85,6 +105,26 @@ function findFirstHeaderIndex(headers, predicate) {
|
||||
return headers.map(normalizeHeader).findIndex(predicate);
|
||||
}
|
||||
|
||||
function hasCepHeader(headers) {
|
||||
return headers.map(normalizeHeader).some(header => /\bcep\b/.test(header) || header === 'codigo postal');
|
||||
}
|
||||
|
||||
function hasAddressOrNumberHeader(headers) {
|
||||
return headers.map(normalizeHeader).some(header => ['numero', 'num', 'nº', 'n°'].includes(header)
|
||||
|| header.includes('endereco')
|
||||
|| header.includes('logradouro'));
|
||||
}
|
||||
|
||||
function hasGeoHeaders(headers) {
|
||||
const normalizedHeaders = headers.map(normalizeHeader);
|
||||
return normalizedHeaders.includes('latitude') && normalizedHeaders.includes('longitude');
|
||||
}
|
||||
|
||||
function findHeaderRowIndex(rows) {
|
||||
const index = rows.findIndex(row => (hasCepHeader(row) && hasAddressOrNumberHeader(row)) || hasGeoHeaders(row));
|
||||
return index >= 0 ? index : 0;
|
||||
}
|
||||
|
||||
function resolveColumnIndexes(headers) {
|
||||
const normalizedHeaders = headers.map(normalizeHeader);
|
||||
const exactIndex = aliases => {
|
||||
@ -138,11 +178,12 @@ function cleanCsvValue(value) {
|
||||
async function countValidLines(inputPath) {
|
||||
const dataType = await discoverDataType(inputPath);
|
||||
const rows = readRows(inputPath);
|
||||
const headers = rows[0] || [];
|
||||
const headerRowIndex = findHeaderRowIndex(rows);
|
||||
const headers = rows[headerRowIndex] || [];
|
||||
const indexes = resolveColumnIndexes(headers);
|
||||
let total = 0;
|
||||
|
||||
for (const cols of rows.slice(1)) {
|
||||
for (const cols of rows.slice(headerRowIndex + 1)) {
|
||||
if (dataType === 'cep') {
|
||||
if (buildCepPayload(cols, indexes)) total++;
|
||||
} else if (dataType === 'geolocalizacao') {
|
||||
@ -158,7 +199,8 @@ async function countValidLines(inputPath) {
|
||||
async function processCsvFile(jobId, inputPath, originalName) {
|
||||
const dataType = await discoverDataType(inputPath);
|
||||
const rows = readRows(inputPath);
|
||||
const headers = rows[0] || [];
|
||||
const headerRowIndex = findHeaderRowIndex(rows);
|
||||
const headers = rows[headerRowIndex] || [];
|
||||
const indexes = resolveColumnIndexes(headers);
|
||||
const baseName = path.parse(originalName || inputPath).name;
|
||||
const outputFilename = `processed_${Date.now()}_${baseName}.csv`;
|
||||
@ -167,7 +209,7 @@ async function processCsvFile(jobId, inputPath, originalName) {
|
||||
outStream.write('\uFEFF');
|
||||
outStream.write(['Distancia', 'Dedicado', 'Nao Dedicado', 'Erro', ...headers].join(';') + '\n');
|
||||
|
||||
for (const cols of rows.slice(1)) {
|
||||
for (const cols of rows.slice(headerRowIndex + 1)) {
|
||||
let dataToSend = {};
|
||||
|
||||
if (dataType === 'cep') {
|
||||
|
||||
@ -21,7 +21,27 @@ function hasHeader(headers, aliases) {
|
||||
}
|
||||
|
||||
function isExcelFile(filePath) {
|
||||
return ['.xls', '.xlsx'].includes(path.extname(filePath).toLowerCase());
|
||||
const ext = path.extname(filePath).toLowerCase();
|
||||
if (['.xls', '.xlsx'].includes(ext)) return true;
|
||||
|
||||
const fileStart = fs.readFileSync(filePath).subarray(0, 512);
|
||||
const signature = fileStart.subarray(0, 8);
|
||||
const isZipBasedXlsx = signature[0] === 0x50 && signature[1] === 0x4b;
|
||||
const isOleBasedXls = signature[0] === 0xd0
|
||||
&& signature[1] === 0xcf
|
||||
&& signature[2] === 0x11
|
||||
&& signature[3] === 0xe0
|
||||
&& signature[4] === 0xa1
|
||||
&& signature[5] === 0xb1
|
||||
&& signature[6] === 0x1a
|
||||
&& signature[7] === 0xe1;
|
||||
|
||||
const startText = fileStart.toString('latin1').trimStart().toLowerCase();
|
||||
const isHtmlExcel = startText.startsWith('<html')
|
||||
|| startText.startsWith('<!doctype html')
|
||||
|| startText.includes('<table');
|
||||
|
||||
return isZipBasedXlsx || isOleBasedXls || isHtmlExcel;
|
||||
}
|
||||
|
||||
function detectDelimiter(line) {
|
||||
@ -31,6 +51,15 @@ function detectDelimiter(line) {
|
||||
.sort((a, b) => b.count - a.count)[0].delimiter;
|
||||
}
|
||||
|
||||
function findHeaderRow(rows) {
|
||||
return rows.find(row => {
|
||||
const headers = row.map(normalizeHeader);
|
||||
const hasCepNumero = hasCepHeader(headers) && hasAddressOrNumberHeader(headers);
|
||||
const hasGeo = headers.includes('latitude') && headers.includes('longitude');
|
||||
return hasCepNumero || hasGeo;
|
||||
}) || [];
|
||||
}
|
||||
|
||||
function readExcelHeaders(filePath) {
|
||||
const workbook = XLSX.readFile(filePath, { cellDates: false, raw: false });
|
||||
const firstSheetName = workbook.SheetNames[0];
|
||||
@ -42,7 +71,7 @@ function readExcelHeaders(filePath) {
|
||||
defval: ''
|
||||
});
|
||||
|
||||
return (rows[0] || []).map(normalizeHeader);
|
||||
return findHeaderRow(rows).map(normalizeHeader);
|
||||
}
|
||||
|
||||
async function readDelimitedHeaders(filePath) {
|
||||
@ -52,8 +81,13 @@ async function readDelimitedHeaders(filePath) {
|
||||
for await (const rawLine of rl) {
|
||||
const line = rawLine.replace(/^\uFEFF/, '').replace(/\r$/, '');
|
||||
if (!line.trim()) continue;
|
||||
rl.close();
|
||||
return line.split(detectDelimiter(line)).map(normalizeHeader);
|
||||
const headers = line.split(detectDelimiter(line)).map(normalizeHeader);
|
||||
const hasCepNumero = hasCepHeader(headers) && hasAddressOrNumberHeader(headers);
|
||||
const hasGeo = headers.includes('latitude') && headers.includes('longitude');
|
||||
if (hasCepNumero || hasGeo) {
|
||||
rl.close();
|
||||
return headers;
|
||||
}
|
||||
}
|
||||
|
||||
rl.close();
|
||||
|
||||
Loading…
Reference in New Issue
Block a user