FIX/FEAT: Função auxiliar para realizar a validação de arquivos excel
This commit is contained in:
parent
827855295e
commit
e42621642b
@ -20,7 +20,27 @@ function normalizeHeader(value) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function isExcelFile(filePath) {
|
function isExcelFile(filePath) {
|
||||||
return ['.xls', '.xlsx'].includes(path.extname(filePath).toLowerCase());
|
const ext = path.extname(filePath).toLowerCase();
|
||||||
|
if (['.xls', '.xlsx'].includes(ext)) return true;
|
||||||
|
|
||||||
|
const fileStart = fs.readFileSync(filePath).subarray(0, 512);
|
||||||
|
const signature = fileStart.subarray(0, 8);
|
||||||
|
const isZipBasedXlsx = signature[0] === 0x50 && signature[1] === 0x4b;
|
||||||
|
const isOleBasedXls = signature[0] === 0xd0
|
||||||
|
&& signature[1] === 0xcf
|
||||||
|
&& signature[2] === 0x11
|
||||||
|
&& signature[3] === 0xe0
|
||||||
|
&& signature[4] === 0xa1
|
||||||
|
&& signature[5] === 0xb1
|
||||||
|
&& signature[6] === 0x1a
|
||||||
|
&& signature[7] === 0xe1;
|
||||||
|
|
||||||
|
const startText = fileStart.toString('latin1').trimStart().toLowerCase();
|
||||||
|
const isHtmlExcel = startText.startsWith('<html')
|
||||||
|
|| startText.startsWith('<!doctype html')
|
||||||
|
|| startText.includes('<table');
|
||||||
|
|
||||||
|
return isZipBasedXlsx || isOleBasedXls || isHtmlExcel;
|
||||||
}
|
}
|
||||||
|
|
||||||
function detectDelimiter(line) {
|
function detectDelimiter(line) {
|
||||||
@ -85,6 +105,26 @@ function findFirstHeaderIndex(headers, predicate) {
|
|||||||
return headers.map(normalizeHeader).findIndex(predicate);
|
return headers.map(normalizeHeader).findIndex(predicate);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function hasCepHeader(headers) {
|
||||||
|
return headers.map(normalizeHeader).some(header => /\bcep\b/.test(header) || header === 'codigo postal');
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasAddressOrNumberHeader(headers) {
|
||||||
|
return headers.map(normalizeHeader).some(header => ['numero', 'num', 'nº', 'n°'].includes(header)
|
||||||
|
|| header.includes('endereco')
|
||||||
|
|| header.includes('logradouro'));
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasGeoHeaders(headers) {
|
||||||
|
const normalizedHeaders = headers.map(normalizeHeader);
|
||||||
|
return normalizedHeaders.includes('latitude') && normalizedHeaders.includes('longitude');
|
||||||
|
}
|
||||||
|
|
||||||
|
function findHeaderRowIndex(rows) {
|
||||||
|
const index = rows.findIndex(row => (hasCepHeader(row) && hasAddressOrNumberHeader(row)) || hasGeoHeaders(row));
|
||||||
|
return index >= 0 ? index : 0;
|
||||||
|
}
|
||||||
|
|
||||||
function resolveColumnIndexes(headers) {
|
function resolveColumnIndexes(headers) {
|
||||||
const normalizedHeaders = headers.map(normalizeHeader);
|
const normalizedHeaders = headers.map(normalizeHeader);
|
||||||
const exactIndex = aliases => {
|
const exactIndex = aliases => {
|
||||||
@ -138,11 +178,12 @@ function cleanCsvValue(value) {
|
|||||||
async function countValidLines(inputPath) {
|
async function countValidLines(inputPath) {
|
||||||
const dataType = await discoverDataType(inputPath);
|
const dataType = await discoverDataType(inputPath);
|
||||||
const rows = readRows(inputPath);
|
const rows = readRows(inputPath);
|
||||||
const headers = rows[0] || [];
|
const headerRowIndex = findHeaderRowIndex(rows);
|
||||||
|
const headers = rows[headerRowIndex] || [];
|
||||||
const indexes = resolveColumnIndexes(headers);
|
const indexes = resolveColumnIndexes(headers);
|
||||||
let total = 0;
|
let total = 0;
|
||||||
|
|
||||||
for (const cols of rows.slice(1)) {
|
for (const cols of rows.slice(headerRowIndex + 1)) {
|
||||||
if (dataType === 'cep') {
|
if (dataType === 'cep') {
|
||||||
if (buildCepPayload(cols, indexes)) total++;
|
if (buildCepPayload(cols, indexes)) total++;
|
||||||
} else if (dataType === 'geolocalizacao') {
|
} else if (dataType === 'geolocalizacao') {
|
||||||
@ -158,7 +199,8 @@ async function countValidLines(inputPath) {
|
|||||||
async function processCsvFile(jobId, inputPath, originalName) {
|
async function processCsvFile(jobId, inputPath, originalName) {
|
||||||
const dataType = await discoverDataType(inputPath);
|
const dataType = await discoverDataType(inputPath);
|
||||||
const rows = readRows(inputPath);
|
const rows = readRows(inputPath);
|
||||||
const headers = rows[0] || [];
|
const headerRowIndex = findHeaderRowIndex(rows);
|
||||||
|
const headers = rows[headerRowIndex] || [];
|
||||||
const indexes = resolveColumnIndexes(headers);
|
const indexes = resolveColumnIndexes(headers);
|
||||||
const baseName = path.parse(originalName || inputPath).name;
|
const baseName = path.parse(originalName || inputPath).name;
|
||||||
const outputFilename = `processed_${Date.now()}_${baseName}.csv`;
|
const outputFilename = `processed_${Date.now()}_${baseName}.csv`;
|
||||||
@ -167,7 +209,7 @@ async function processCsvFile(jobId, inputPath, originalName) {
|
|||||||
outStream.write('\uFEFF');
|
outStream.write('\uFEFF');
|
||||||
outStream.write(['Distancia', 'Dedicado', 'Nao Dedicado', 'Erro', ...headers].join(';') + '\n');
|
outStream.write(['Distancia', 'Dedicado', 'Nao Dedicado', 'Erro', ...headers].join(';') + '\n');
|
||||||
|
|
||||||
for (const cols of rows.slice(1)) {
|
for (const cols of rows.slice(headerRowIndex + 1)) {
|
||||||
let dataToSend = {};
|
let dataToSend = {};
|
||||||
|
|
||||||
if (dataType === 'cep') {
|
if (dataType === 'cep') {
|
||||||
|
|||||||
@ -21,7 +21,27 @@ function hasHeader(headers, aliases) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function isExcelFile(filePath) {
|
function isExcelFile(filePath) {
|
||||||
return ['.xls', '.xlsx'].includes(path.extname(filePath).toLowerCase());
|
const ext = path.extname(filePath).toLowerCase();
|
||||||
|
if (['.xls', '.xlsx'].includes(ext)) return true;
|
||||||
|
|
||||||
|
const fileStart = fs.readFileSync(filePath).subarray(0, 512);
|
||||||
|
const signature = fileStart.subarray(0, 8);
|
||||||
|
const isZipBasedXlsx = signature[0] === 0x50 && signature[1] === 0x4b;
|
||||||
|
const isOleBasedXls = signature[0] === 0xd0
|
||||||
|
&& signature[1] === 0xcf
|
||||||
|
&& signature[2] === 0x11
|
||||||
|
&& signature[3] === 0xe0
|
||||||
|
&& signature[4] === 0xa1
|
||||||
|
&& signature[5] === 0xb1
|
||||||
|
&& signature[6] === 0x1a
|
||||||
|
&& signature[7] === 0xe1;
|
||||||
|
|
||||||
|
const startText = fileStart.toString('latin1').trimStart().toLowerCase();
|
||||||
|
const isHtmlExcel = startText.startsWith('<html')
|
||||||
|
|| startText.startsWith('<!doctype html')
|
||||||
|
|| startText.includes('<table');
|
||||||
|
|
||||||
|
return isZipBasedXlsx || isOleBasedXls || isHtmlExcel;
|
||||||
}
|
}
|
||||||
|
|
||||||
function detectDelimiter(line) {
|
function detectDelimiter(line) {
|
||||||
@ -31,6 +51,15 @@ function detectDelimiter(line) {
|
|||||||
.sort((a, b) => b.count - a.count)[0].delimiter;
|
.sort((a, b) => b.count - a.count)[0].delimiter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function findHeaderRow(rows) {
|
||||||
|
return rows.find(row => {
|
||||||
|
const headers = row.map(normalizeHeader);
|
||||||
|
const hasCepNumero = hasCepHeader(headers) && hasAddressOrNumberHeader(headers);
|
||||||
|
const hasGeo = headers.includes('latitude') && headers.includes('longitude');
|
||||||
|
return hasCepNumero || hasGeo;
|
||||||
|
}) || [];
|
||||||
|
}
|
||||||
|
|
||||||
function readExcelHeaders(filePath) {
|
function readExcelHeaders(filePath) {
|
||||||
const workbook = XLSX.readFile(filePath, { cellDates: false, raw: false });
|
const workbook = XLSX.readFile(filePath, { cellDates: false, raw: false });
|
||||||
const firstSheetName = workbook.SheetNames[0];
|
const firstSheetName = workbook.SheetNames[0];
|
||||||
@ -42,7 +71,7 @@ function readExcelHeaders(filePath) {
|
|||||||
defval: ''
|
defval: ''
|
||||||
});
|
});
|
||||||
|
|
||||||
return (rows[0] || []).map(normalizeHeader);
|
return findHeaderRow(rows).map(normalizeHeader);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function readDelimitedHeaders(filePath) {
|
async function readDelimitedHeaders(filePath) {
|
||||||
@ -52,8 +81,13 @@ async function readDelimitedHeaders(filePath) {
|
|||||||
for await (const rawLine of rl) {
|
for await (const rawLine of rl) {
|
||||||
const line = rawLine.replace(/^\uFEFF/, '').replace(/\r$/, '');
|
const line = rawLine.replace(/^\uFEFF/, '').replace(/\r$/, '');
|
||||||
if (!line.trim()) continue;
|
if (!line.trim()) continue;
|
||||||
rl.close();
|
const headers = line.split(detectDelimiter(line)).map(normalizeHeader);
|
||||||
return line.split(detectDelimiter(line)).map(normalizeHeader);
|
const hasCepNumero = hasCepHeader(headers) && hasAddressOrNumberHeader(headers);
|
||||||
|
const hasGeo = headers.includes('latitude') && headers.includes('longitude');
|
||||||
|
if (hasCepNumero || hasGeo) {
|
||||||
|
rl.close();
|
||||||
|
return headers;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
rl.close();
|
rl.close();
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user