- Excel, CSV(; , ou tabulação) e TXT são agora aceitos para upload. - Formato de dados atualizado para arquivos que fogem do padrão pré definido.
214 lines
7.1 KiB
JavaScript
214 lines
7.1 KiB
JavaScript
const { consultarViabilidade, discoverDataType } = require('./viabilidadeService');
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
const XLSX = require('xlsx');
|
|
const { once } = require('events');
|
|
const {
|
|
incrementProcessed,
|
|
incrementErrors,
|
|
finishJob
|
|
} = require('./jobStore.service');
|
|
|
|
function normalizeHeader(value) {
|
|
return String(value || '')
|
|
.trim()
|
|
.toLowerCase()
|
|
.normalize('NFD')
|
|
.replace(/[\u0300-\u036f]/g, '')
|
|
.replace(/[_-]+/g, ' ')
|
|
.replace(/\s+/g, ' ');
|
|
}
|
|
|
|
function isExcelFile(filePath) {
|
|
return ['.xls', '.xlsx'].includes(path.extname(filePath).toLowerCase());
|
|
}
|
|
|
|
function detectDelimiter(line) {
|
|
const delimiters = [';', '\t', ','];
|
|
return delimiters
|
|
.map(delimiter => ({ delimiter, count: line.split(delimiter).length }))
|
|
.sort((a, b) => b.count - a.count)[0].delimiter;
|
|
}
|
|
|
|
function splitDelimitedLine(line, delimiter) {
|
|
const cols = [];
|
|
let current = '';
|
|
let inQuotes = false;
|
|
|
|
for (let i = 0; i < line.length; i++) {
|
|
const char = line[i];
|
|
const next = line[i + 1];
|
|
|
|
if (char === '"' && next === '"') {
|
|
current += '"';
|
|
i++;
|
|
} else if (char === '"') {
|
|
inQuotes = !inQuotes;
|
|
} else if (char === delimiter && !inQuotes) {
|
|
cols.push(current.trim());
|
|
current = '';
|
|
} else {
|
|
current += char;
|
|
}
|
|
}
|
|
|
|
cols.push(current.trim());
|
|
return cols;
|
|
}
|
|
|
|
function readDelimitedRows(filePath) {
|
|
const content = fs.readFileSync(filePath, 'utf8').replace(/^\uFEFF/, '');
|
|
const lines = content.split(/\r?\n/).filter(line => line.trim());
|
|
if (!lines.length) return [];
|
|
|
|
const delimiter = detectDelimiter(lines[0]);
|
|
return lines.map(line => splitDelimitedLine(line.replace(/\r$/, ''), delimiter));
|
|
}
|
|
|
|
function readExcelRows(filePath) {
|
|
const workbook = XLSX.readFile(filePath, { cellDates: false, raw: false });
|
|
const firstSheetName = workbook.SheetNames[0];
|
|
if (!firstSheetName) return [];
|
|
|
|
return XLSX.utils.sheet_to_json(workbook.Sheets[firstSheetName], {
|
|
header: 1,
|
|
blankrows: false,
|
|
defval: ''
|
|
}).map(row => row.map(cell => String(cell ?? '').trim()));
|
|
}
|
|
|
|
function readRows(filePath) {
|
|
return isExcelFile(filePath) ? readExcelRows(filePath) : readDelimitedRows(filePath);
|
|
}
|
|
|
|
function findFirstHeaderIndex(headers, predicate) {
|
|
return headers.map(normalizeHeader).findIndex(predicate);
|
|
}
|
|
|
|
function resolveColumnIndexes(headers) {
|
|
const normalizedHeaders = headers.map(normalizeHeader);
|
|
const exactIndex = aliases => {
|
|
const normalizedAliases = aliases.map(normalizeHeader);
|
|
return normalizedHeaders.findIndex(header => normalizedAliases.includes(header));
|
|
};
|
|
|
|
return {
|
|
idxCep: findFirstHeaderIndex(headers, header => /\bcep\b/.test(header) || header === 'codigo postal'),
|
|
idxNumero: exactIndex(['numero', 'número', 'num', 'nº', 'n°']),
|
|
idxEndereco: findFirstHeaderIndex(headers, header => header.includes('endereco') || header.includes('logradouro')),
|
|
idxLatitude: exactIndex(['latitude']),
|
|
idxLongitude: exactIndex(['longitude'])
|
|
};
|
|
}
|
|
|
|
function extractAddressNumber(address) {
|
|
const value = String(address || '').trim();
|
|
if (!value) return '1';
|
|
|
|
const withoutRoadKm = value
|
|
.replace(/\b(BR|SP|GO|MT|KM)\s*[-]?\s*\d+[A-Z]?\b/gi, ' ')
|
|
.replace(/\b\d+\s*[A-Z]?\b\s*(?=\))/gi, ' ');
|
|
|
|
const labeledNumber = withoutRoadKm.match(/\b(?:n|no|num|numero|número|nº|n°)\.?\s*[:,-]?\s*(\d+[A-Z]?)\b/i);
|
|
if (labeledNumber) return labeledNumber[1];
|
|
|
|
const commaNumber = withoutRoadKm.match(/,\s*(\d+[A-Z]?)\b/i);
|
|
if (commaNumber) return commaNumber[1];
|
|
|
|
const standaloneNumbers = withoutRoadKm.match(/\b\d+[A-Z]?\b/gi) || [];
|
|
return standaloneNumbers.length ? standaloneNumbers[standaloneNumbers.length - 1] : '1';
|
|
}
|
|
|
|
function buildCepPayload(cols, indexes) {
|
|
const cepRaw = indexes.idxCep >= 0 ? cols[indexes.idxCep] : '';
|
|
const cep = String(cepRaw || '').replace(/\D/g, '');
|
|
const numeroRaw = indexes.idxNumero >= 0 ? cols[indexes.idxNumero] : '';
|
|
const enderecoRaw = indexes.idxEndereco >= 0 ? cols[indexes.idxEndereco] : '';
|
|
const numero = String(numeroRaw || '').trim() || extractAddressNumber(enderecoRaw);
|
|
|
|
if (!cep) return null;
|
|
return { cep, numero };
|
|
}
|
|
|
|
function cleanCsvValue(value) {
|
|
const text = String(value ?? '').replace(/[\r\n;]/g, ' ');
|
|
return text.includes('"') ? text.replace(/"/g, "'") : text;
|
|
}
|
|
|
|
async function countValidLines(inputPath) {
|
|
const dataType = await discoverDataType(inputPath);
|
|
const rows = readRows(inputPath);
|
|
const headers = rows[0] || [];
|
|
const indexes = resolveColumnIndexes(headers);
|
|
let total = 0;
|
|
|
|
for (const cols of rows.slice(1)) {
|
|
if (dataType === 'cep') {
|
|
if (buildCepPayload(cols, indexes)) total++;
|
|
} else if (dataType === 'geolocalizacao') {
|
|
const latitude = indexes.idxLatitude >= 0 ? parseFloat(cols[indexes.idxLatitude]) : NaN;
|
|
const longitude = indexes.idxLongitude >= 0 ? parseFloat(cols[indexes.idxLongitude]) : NaN;
|
|
if (!isNaN(latitude) && !isNaN(longitude)) total++;
|
|
}
|
|
}
|
|
|
|
return total;
|
|
}
|
|
|
|
async function processCsvFile(jobId, inputPath, originalName) {
|
|
const dataType = await discoverDataType(inputPath);
|
|
const rows = readRows(inputPath);
|
|
const headers = rows[0] || [];
|
|
const indexes = resolveColumnIndexes(headers);
|
|
const baseName = path.parse(originalName || inputPath).name;
|
|
const outputFilename = `processed_${Date.now()}_${baseName}.csv`;
|
|
const outputPath = path.join(__dirname, '..', 'outputs', outputFilename);
|
|
const outStream = fs.createWriteStream(outputPath, { encoding: 'utf8' });
|
|
outStream.write('\uFEFF');
|
|
outStream.write(['Distancia', 'Dedicado', 'Nao Dedicado', 'Erro', ...headers].join(';') + '\n');
|
|
|
|
for (const cols of rows.slice(1)) {
|
|
let dataToSend = {};
|
|
|
|
if (dataType === 'cep') {
|
|
dataToSend = buildCepPayload(cols, indexes);
|
|
if (!dataToSend) continue;
|
|
} else if (dataType === 'geolocalizacao') {
|
|
const latitude = indexes.idxLatitude >= 0 ? parseFloat(cols[indexes.idxLatitude]) : NaN;
|
|
const longitude = indexes.idxLongitude >= 0 ? parseFloat(cols[indexes.idxLongitude]) : NaN;
|
|
|
|
if (isNaN(latitude) || isNaN(longitude)) continue;
|
|
dataToSend = { latitude, longitude };
|
|
} else {
|
|
continue;
|
|
}
|
|
|
|
try {
|
|
const viab = await consultarViabilidade(dataToSend);
|
|
const distancia = viab.distancia ?? (viab.raw && (viab.raw.distancia || viab.raw.distance)) ?? '';
|
|
const dedicado = viab.dedicado ? 'Viavel' : 'Nao Viavel';
|
|
const naoDedicado = viab.naoDedicado ? 'Viavel' : 'Nao Viavel';
|
|
const error = viab.error ? cleanCsvValue(viab.error) : '';
|
|
|
|
const outCols = [distancia, dedicado, naoDedicado, error, ...cols].map(cleanCsvValue);
|
|
outStream.write(outCols.join(';') + '\n');
|
|
incrementProcessed(jobId);
|
|
} catch (err) {
|
|
const errMsg = cleanCsvValue(err && (err.message || String(err)));
|
|
const outCols = ['', '', '', errMsg, ...cols].map(cleanCsvValue);
|
|
outStream.write(outCols.join(';') + '\n');
|
|
incrementErrors(jobId);
|
|
incrementProcessed(jobId);
|
|
}
|
|
}
|
|
|
|
outStream.end();
|
|
await once(outStream, 'finish');
|
|
|
|
finishJob(jobId, path.basename(outputPath));
|
|
|
|
return outputPath;
|
|
}
|
|
|
|
module.exports = { processCsvFile, countValidLines };
|