482 lines
16 KiB
JavaScript
482 lines
16 KiB
JavaScript
const { consultarViabilidade, discoverDataType } = require('./viabilidadeService');
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
const XLSX = require('xlsx');
|
|
const ExcelJS = require('exceljs');
|
|
const { once } = require('events');
|
|
const {
|
|
incrementProcessed,
|
|
incrementErrors,
|
|
finishJob
|
|
} = require('./jobStore.service');
|
|
|
|
const RESULT_HEADERS = ['Provedor', 'Distancia', 'Dedicado', 'Nao Dedicado', 'Erro'];
|
|
|
|
function normalizeHeader(value) {
|
|
return String(value || '')
|
|
.trim()
|
|
.toLowerCase()
|
|
.normalize('NFD')
|
|
.replace(/[\u0300-\u036f]/g, '')
|
|
.replace(/[_-]+/g, ' ')
|
|
.replace(/\s+/g, ' ');
|
|
}
|
|
|
|
function isExcelFile(filePath) {
|
|
const ext = path.extname(filePath).toLowerCase();
|
|
if (['.xls', '.xlsx'].includes(ext)) return true;
|
|
|
|
const fileStart = fs.readFileSync(filePath).subarray(0, 512);
|
|
const signature = fileStart.subarray(0, 8);
|
|
const isZipBasedXlsx = signature[0] === 0x50 && signature[1] === 0x4b;
|
|
const isOleBasedXls = signature[0] === 0xd0
|
|
&& signature[1] === 0xcf
|
|
&& signature[2] === 0x11
|
|
&& signature[3] === 0xe0
|
|
&& signature[4] === 0xa1
|
|
&& signature[5] === 0xb1
|
|
&& signature[6] === 0x1a
|
|
&& signature[7] === 0xe1;
|
|
|
|
const startText = fileStart.toString('latin1').trimStart().toLowerCase();
|
|
const isHtmlExcel = startText.startsWith('<html')
|
|
|| startText.startsWith('<!doctype html')
|
|
|| startText.includes('<table');
|
|
|
|
return isZipBasedXlsx || isOleBasedXls || isHtmlExcel;
|
|
}
|
|
|
|
function isXlsxFile(filePath) {
|
|
const ext = path.extname(filePath).toLowerCase();
|
|
if (ext === '.xlsx') return true;
|
|
|
|
const fileStart = fs.readFileSync(filePath).subarray(0, 4);
|
|
return fileStart[0] === 0x50 && fileStart[1] === 0x4b;
|
|
}
|
|
|
|
function detectDelimiter(line) {
|
|
const delimiters = [';', '\t', ','];
|
|
return delimiters
|
|
.map(delimiter => ({ delimiter, count: line.split(delimiter).length }))
|
|
.sort((a, b) => b.count - a.count)[0].delimiter;
|
|
}
|
|
|
|
function splitDelimitedLine(line, delimiter) {
|
|
const cols = [];
|
|
let current = '';
|
|
let inQuotes = false;
|
|
|
|
for (let i = 0; i < line.length; i++) {
|
|
const char = line[i];
|
|
const next = line[i + 1];
|
|
|
|
if (char === '"' && next === '"') {
|
|
current += '"';
|
|
i++;
|
|
} else if (char === '"') {
|
|
inQuotes = !inQuotes;
|
|
} else if (char === delimiter && !inQuotes) {
|
|
cols.push(current.trim());
|
|
current = '';
|
|
} else {
|
|
current += char;
|
|
}
|
|
}
|
|
|
|
cols.push(current.trim());
|
|
return cols;
|
|
}
|
|
|
|
function readDelimitedRows(filePath) {
|
|
const content = fs.readFileSync(filePath, 'utf8').replace(/^\uFEFF/, '');
|
|
const lines = content.split(/\r?\n/).filter(line => line.trim());
|
|
if (!lines.length) return [];
|
|
|
|
const delimiter = detectDelimiter(lines[0]);
|
|
return lines.map(line => splitDelimitedLine(line.replace(/\r$/, ''), delimiter));
|
|
}
|
|
|
|
function readExcelRows(filePath) {
|
|
const workbook = XLSX.readFile(filePath, { cellDates: false, raw: false });
|
|
const firstSheetName = workbook.SheetNames[0];
|
|
if (!firstSheetName) return [];
|
|
|
|
return XLSX.utils.sheet_to_json(workbook.Sheets[firstSheetName], {
|
|
header: 1,
|
|
blankrows: true,
|
|
defval: ''
|
|
}).map(row => row.map(cell => String(cell ?? '').trim()));
|
|
}
|
|
|
|
function readRows(filePath) {
|
|
return isExcelFile(filePath) ? readExcelRows(filePath) : readDelimitedRows(filePath);
|
|
}
|
|
|
|
function findFirstHeaderIndex(headers, predicate) {
|
|
return headers.map(normalizeHeader).findIndex(predicate);
|
|
}
|
|
|
|
function hasHeaderAlias(headers, aliases) {
|
|
const normalizedAliases = aliases.map(normalizeHeader);
|
|
return headers.map(normalizeHeader).some(header => normalizedAliases.includes(header));
|
|
}
|
|
|
|
function hasCepHeader(headers) {
|
|
return headers.map(normalizeHeader).some(header => /\bcep\b/.test(header) || header === 'codigo postal');
|
|
}
|
|
|
|
function hasAddressOrNumberHeader(headers) {
|
|
return headers.map(normalizeHeader).some(header => ['numero', 'num', 'nº', 'n°'].includes(header)
|
|
|| header.includes('endereco')
|
|
|| header.includes('logradouro'));
|
|
}
|
|
|
|
function hasGeoHeaders(headers) {
|
|
return hasHeaderAlias(headers, ['latitude', 'lat'])
|
|
&& hasHeaderAlias(headers, ['longitude', 'long', 'lng', 'lon']);
|
|
}
|
|
|
|
function findHeaderRowIndex(rows) {
|
|
const index = rows.findIndex(row => (hasCepHeader(row) && hasAddressOrNumberHeader(row)) || hasGeoHeaders(row));
|
|
return index >= 0 ? index : 0;
|
|
}
|
|
|
|
function resolveColumnIndexes(headers) {
|
|
const normalizedHeaders = headers.map(normalizeHeader);
|
|
const exactIndex = aliases => {
|
|
const normalizedAliases = aliases.map(normalizeHeader);
|
|
return normalizedHeaders.findIndex(header => normalizedAliases.includes(header));
|
|
};
|
|
|
|
return {
|
|
idxCep: findFirstHeaderIndex(headers, header => /\bcep\b/.test(header) || header === 'codigo postal'),
|
|
idxNumero: exactIndex(['numero', 'número', 'num', 'nº', 'n°']),
|
|
idxEndereco: findFirstHeaderIndex(headers, header => header.includes('endereco') || header.includes('logradouro')),
|
|
idxLatitude: exactIndex(['latitude', 'lat']),
|
|
idxLongitude: exactIndex(['longitude', 'long', 'lng', 'lon'])
|
|
};
|
|
}
|
|
|
|
function extractAddressNumber(address) {
|
|
const value = String(address || '').trim();
|
|
if (!value) return '1';
|
|
|
|
const withoutRoadKm = value
|
|
.replace(/\b(BR|SP|GO|MT|KM)\s*[-]?\s*\d+[A-Z]?\b/gi, ' ')
|
|
.replace(/\b\d+\s*[A-Z]?\b\s*(?=\))/gi, ' ');
|
|
|
|
const labeledNumber = withoutRoadKm.match(/\b(?:n|no|num|numero|número|nº|n°)\.?\s*[:,-]?\s*(\d+[A-Z]?)\b/i);
|
|
if (labeledNumber) return labeledNumber[1];
|
|
|
|
const commaNumber = withoutRoadKm.match(/,\s*(\d+[A-Z]?)\b/i);
|
|
if (commaNumber) return commaNumber[1];
|
|
|
|
const standaloneNumbers = withoutRoadKm.match(/\b\d+[A-Z]?\b/gi) || [];
|
|
return standaloneNumbers.length ? standaloneNumbers[standaloneNumbers.length - 1] : '1';
|
|
}
|
|
|
|
function buildCepPayload(cols, indexes) {
|
|
const cepRaw = indexes.idxCep >= 0 ? cols[indexes.idxCep] : '';
|
|
const cep = String(cepRaw || '').replace(/\D/g, '');
|
|
const numeroRaw = indexes.idxNumero >= 0 ? cols[indexes.idxNumero] : '';
|
|
const enderecoRaw = indexes.idxEndereco >= 0 ? cols[indexes.idxEndereco] : '';
|
|
const numero = String(numeroRaw || '').trim() || extractAddressNumber(enderecoRaw);
|
|
|
|
if (!cep) return null;
|
|
return { cep, numero };
|
|
}
|
|
|
|
function parseCoordinate(value) {
|
|
const parsed = parseFloat(String(value ?? '').trim().replace(',', '.'));
|
|
return Number.isFinite(parsed) ? parsed : NaN;
|
|
}
|
|
|
|
function buildGeoPayload(cols, indexes) {
|
|
const latitude = indexes.idxLatitude >= 0 ? parseCoordinate(cols[indexes.idxLatitude]) : NaN;
|
|
const longitude = indexes.idxLongitude >= 0 ? parseCoordinate(cols[indexes.idxLongitude]) : NaN;
|
|
|
|
if (isNaN(latitude) || isNaN(longitude)) return null;
|
|
return { latitude, longitude };
|
|
}
|
|
|
|
async function consultarComFallback(geoPayload, cepPayload) {
|
|
let lastError = null;
|
|
|
|
if (geoPayload) {
|
|
try {
|
|
const result = await consultarViabilidade(geoPayload);
|
|
if (!result || !result.error) return result;
|
|
lastError = new Error(result.error);
|
|
} catch (err) {
|
|
lastError = err;
|
|
}
|
|
}
|
|
|
|
if (cepPayload) {
|
|
try {
|
|
const result = await consultarViabilidade(cepPayload);
|
|
if (!result || !result.error) return result;
|
|
lastError = new Error(result.error);
|
|
} catch (err) {
|
|
lastError = err;
|
|
}
|
|
}
|
|
|
|
throw lastError || new Error('Linha sem latitude/longitude ou CEP valido');
|
|
}
|
|
|
|
function cleanCsvValue(value) {
|
|
const text = String(value ?? '').replace(/[\r\n;]/g, ' ');
|
|
return text.includes('"') ? text.replace(/"/g, "'") : text;
|
|
}
|
|
|
|
function formatApiErrorResponse(error) {
|
|
const responseData = error && error.response && error.response.data;
|
|
if (responseData !== undefined && responseData !== null) {
|
|
if (typeof responseData === 'string') return responseData;
|
|
if (responseData.error) return responseData.error;
|
|
if (responseData.message) return responseData.message;
|
|
return JSON.stringify(responseData);
|
|
}
|
|
|
|
return error && (error.message || String(error));
|
|
}
|
|
|
|
function buildSuccessResultColumns(viab) {
|
|
const provedor = viab.provedor ?? '';
|
|
const distancia = viab.distancia ?? (viab.raw && (viab.raw.distancia || viab.raw.distance)) ?? '';
|
|
const dedicado = viab.dedicado ? 'Viavel' : 'Nao Viavel';
|
|
const naoDedicado = viab.naoDedicado ? 'Viavel' : 'Nao Viavel';
|
|
const error = viab.error ? cleanCsvValue(viab.error) : '';
|
|
|
|
return [provedor, distancia, dedicado, naoDedicado, error];
|
|
}
|
|
|
|
function buildErrorResultColumns(err) {
|
|
return ['', '', '', '', cleanCsvValue(formatApiErrorResponse(err))];
|
|
}
|
|
|
|
function cloneCellStyle(cell) {
|
|
return {
|
|
numFmt: cell.numFmt,
|
|
font: cell.font ? { ...cell.font } : undefined,
|
|
alignment: cell.alignment ? { ...cell.alignment } : undefined,
|
|
border: cell.border ? { ...cell.border } : undefined,
|
|
fill: cell.fill ? { ...cell.fill } : undefined,
|
|
protection: cell.protection ? { ...cell.protection } : undefined
|
|
};
|
|
}
|
|
|
|
function styleInsertedResultColumns(worksheet, headerRowNumber) {
|
|
RESULT_HEADERS.forEach((header, index) => {
|
|
const columnNumber = index + 1;
|
|
const sourceColumn = worksheet.getColumn(RESULT_HEADERS.length + 1);
|
|
const targetColumn = worksheet.getColumn(columnNumber);
|
|
targetColumn.width = Math.max(16, sourceColumn.width || 0);
|
|
|
|
const headerCell = worksheet.getRow(headerRowNumber).getCell(columnNumber);
|
|
const sourceHeaderCell = worksheet.getRow(headerRowNumber).getCell(RESULT_HEADERS.length + 1);
|
|
headerCell.value = header;
|
|
headerCell.style = cloneCellStyle(sourceHeaderCell);
|
|
});
|
|
}
|
|
|
|
async function processXlsxFile(jobId, inputPath, outputPath, rows, headerRowIndex, indexes) {
|
|
const workbook = new ExcelJS.Workbook();
|
|
await workbook.xlsx.readFile(inputPath);
|
|
|
|
const worksheet = workbook.worksheets[0];
|
|
const headerRowNumber = headerRowIndex + 1;
|
|
worksheet.spliceColumns(1, 0, ...RESULT_HEADERS.map(() => []));
|
|
styleInsertedResultColumns(worksheet, headerRowNumber);
|
|
|
|
for (let rowIndex = headerRowIndex + 1; rowIndex < rows.length; rowIndex++) {
|
|
const cols = rows[rowIndex];
|
|
const geoPayload = buildGeoPayload(cols, indexes);
|
|
const cepPayload = buildCepPayload(cols, indexes);
|
|
if (!geoPayload && !cepPayload) continue;
|
|
|
|
const row = worksheet.getRow(rowIndex + 1);
|
|
try {
|
|
const viab = await consultarComFallback(geoPayload, cepPayload);
|
|
buildSuccessResultColumns(viab).forEach((value, index) => {
|
|
const cell = row.getCell(index + 1);
|
|
cell.value = value;
|
|
cell.style = cloneCellStyle(row.getCell(RESULT_HEADERS.length + 1));
|
|
});
|
|
incrementProcessed(jobId);
|
|
} catch (err) {
|
|
buildErrorResultColumns(err).forEach((value, index) => {
|
|
const cell = row.getCell(index + 1);
|
|
cell.value = value;
|
|
cell.style = cloneCellStyle(row.getCell(RESULT_HEADERS.length + 1));
|
|
});
|
|
incrementErrors(jobId);
|
|
incrementProcessed(jobId);
|
|
}
|
|
row.commit();
|
|
}
|
|
|
|
await workbook.xlsx.writeFile(outputPath);
|
|
}
|
|
|
|
function shiftCellAddress(address, colOffset) {
|
|
const decoded = XLSX.utils.decode_cell(address);
|
|
decoded.c += colOffset;
|
|
return XLSX.utils.encode_cell(decoded);
|
|
}
|
|
|
|
function shiftRange(range, colOffset) {
|
|
const decoded = typeof range === 'string' ? XLSX.utils.decode_range(range) : range;
|
|
return {
|
|
s: { r: decoded.s.r, c: decoded.s.c + colOffset },
|
|
e: { r: decoded.e.r, c: decoded.e.c + colOffset }
|
|
};
|
|
}
|
|
|
|
function prependResultColumnsToWorksheet(worksheet, headerRowIndex, rowResults) {
|
|
const colOffset = RESULT_HEADERS.length;
|
|
const shiftedWorksheet = {};
|
|
|
|
Object.keys(worksheet).forEach(key => {
|
|
if (key[0] === '!') return;
|
|
shiftedWorksheet[shiftCellAddress(key, colOffset)] = worksheet[key];
|
|
});
|
|
|
|
const originalRange = worksheet['!ref']
|
|
? XLSX.utils.decode_range(worksheet['!ref'])
|
|
: { s: { r: 0, c: 0 }, e: { r: headerRowIndex, c: 0 } };
|
|
|
|
shiftedWorksheet['!ref'] = XLSX.utils.encode_range({
|
|
s: { r: Math.min(originalRange.s.r, headerRowIndex), c: 0 },
|
|
e: { r: originalRange.e.r, c: originalRange.e.c + colOffset }
|
|
});
|
|
|
|
if (worksheet['!cols']) {
|
|
shiftedWorksheet['!cols'] = Array(colOffset).fill({ wch: 16 }).concat(worksheet['!cols']);
|
|
}
|
|
|
|
if (worksheet['!merges']) {
|
|
shiftedWorksheet['!merges'] = worksheet['!merges'].map(merge => shiftRange(merge, colOffset));
|
|
}
|
|
|
|
if (worksheet['!autofilter'] && worksheet['!autofilter'].ref) {
|
|
shiftedWorksheet['!autofilter'] = {
|
|
...worksheet['!autofilter'],
|
|
ref: XLSX.utils.encode_range(shiftRange(worksheet['!autofilter'].ref, colOffset))
|
|
};
|
|
}
|
|
|
|
RESULT_HEADERS.forEach((value, index) => {
|
|
const address = XLSX.utils.encode_cell({ r: headerRowIndex, c: index });
|
|
shiftedWorksheet[address] = { t: 's', v: value };
|
|
});
|
|
|
|
rowResults.forEach(({ rowIndex, values }) => {
|
|
values.forEach((value, index) => {
|
|
const address = XLSX.utils.encode_cell({ r: rowIndex, c: index });
|
|
shiftedWorksheet[address] = { t: 's', v: String(value ?? '') };
|
|
});
|
|
});
|
|
|
|
return shiftedWorksheet;
|
|
}
|
|
|
|
async function countValidLines(inputPath) {
|
|
await discoverDataType(inputPath);
|
|
const rows = readRows(inputPath);
|
|
const headerRowIndex = findHeaderRowIndex(rows);
|
|
const headers = rows[headerRowIndex] || [];
|
|
const indexes = resolveColumnIndexes(headers);
|
|
let total = 0;
|
|
|
|
for (const cols of rows.slice(headerRowIndex + 1)) {
|
|
const geoPayload = buildGeoPayload(cols, indexes);
|
|
const cepPayload = buildCepPayload(cols, indexes);
|
|
if (geoPayload || cepPayload) total++;
|
|
}
|
|
|
|
return total;
|
|
}
|
|
|
|
async function processCsvFile(jobId, inputPath, originalName) {
|
|
await discoverDataType(inputPath);
|
|
const rows = readRows(inputPath);
|
|
const headerRowIndex = findHeaderRowIndex(rows);
|
|
const headers = rows[headerRowIndex] || [];
|
|
const indexes = resolveColumnIndexes(headers);
|
|
const baseName = path.parse(originalName || inputPath).name;
|
|
const isExcel = isExcelFile(inputPath);
|
|
const outputFilename = `processed_${Date.now()}_${baseName}${isExcel ? '.xlsx' : '.csv'}`;
|
|
const outputPath = path.join(__dirname, '..', 'outputs', outputFilename);
|
|
fs.mkdirSync(path.dirname(outputPath), { recursive: true });
|
|
|
|
if (isXlsxFile(inputPath)) {
|
|
await processXlsxFile(jobId, inputPath, outputPath, rows, headerRowIndex, indexes);
|
|
finishJob(jobId, path.basename(outputPath));
|
|
|
|
return outputPath;
|
|
}
|
|
|
|
if (isExcel) {
|
|
const workbook = XLSX.readFile(inputPath, { cellDates: false, raw: false, cellStyles: true });
|
|
const firstSheetName = workbook.SheetNames[0];
|
|
const worksheet = workbook.Sheets[firstSheetName];
|
|
const rowResults = [];
|
|
|
|
for (let rowIndex = headerRowIndex + 1; rowIndex < rows.length; rowIndex++) {
|
|
const cols = rows[rowIndex];
|
|
const geoPayload = buildGeoPayload(cols, indexes);
|
|
const cepPayload = buildCepPayload(cols, indexes);
|
|
if (!geoPayload && !cepPayload) continue;
|
|
|
|
try {
|
|
const viab = await consultarComFallback(geoPayload, cepPayload);
|
|
rowResults.push({ rowIndex, values: buildSuccessResultColumns(viab) });
|
|
incrementProcessed(jobId);
|
|
} catch (err) {
|
|
rowResults.push({ rowIndex, values: buildErrorResultColumns(err) });
|
|
incrementErrors(jobId);
|
|
incrementProcessed(jobId);
|
|
}
|
|
}
|
|
|
|
workbook.Sheets[firstSheetName] = prependResultColumnsToWorksheet(worksheet, headerRowIndex, rowResults);
|
|
XLSX.writeFile(workbook, outputPath, { bookType: 'xlsx' });
|
|
finishJob(jobId, path.basename(outputPath));
|
|
|
|
return outputPath;
|
|
}
|
|
|
|
const outStream = fs.createWriteStream(outputPath, { encoding: 'utf8' });
|
|
outStream.write('\uFEFF');
|
|
outStream.write([...RESULT_HEADERS, ...headers].join(';') + '\n');
|
|
|
|
for (const cols of rows.slice(headerRowIndex + 1)) {
|
|
const geoPayload = buildGeoPayload(cols, indexes);
|
|
const cepPayload = buildCepPayload(cols, indexes);
|
|
if (!geoPayload && !cepPayload) continue;
|
|
|
|
try {
|
|
const viab = await consultarComFallback(geoPayload, cepPayload);
|
|
const outCols = [...buildSuccessResultColumns(viab), ...cols].map(cleanCsvValue);
|
|
outStream.write(outCols.join(';') + '\n');
|
|
incrementProcessed(jobId);
|
|
} catch (err) {
|
|
const outCols = [...buildErrorResultColumns(err), ...cols].map(cleanCsvValue);
|
|
outStream.write(outCols.join(';') + '\n');
|
|
incrementErrors(jobId);
|
|
incrementProcessed(jobId);
|
|
}
|
|
}
|
|
|
|
outStream.end();
|
|
await once(outStream, 'finish');
|
|
|
|
finishJob(jobId, path.basename(outputPath));
|
|
|
|
return outputPath;
|
|
}
|
|
|
|
module.exports = { processCsvFile, countValidLines };
|