viabiliza/service/csvService.js
Rafael Lopes 827855295e FEAT: Novos tipos de arquivos são permitidos
- Excel, CSV(; , ou tabulação) e TXT são agora aceitos para upload.
- Formato de dados atualizado para arquivos que fogem do padrão pré definido.
2026-05-04 16:38:37 -03:00

214 lines
7.1 KiB
JavaScript

const { consultarViabilidade, discoverDataType } = require('./viabilidadeService');
const fs = require('fs');
const path = require('path');
const XLSX = require('xlsx');
const { once } = require('events');
const {
incrementProcessed,
incrementErrors,
finishJob
} = require('./jobStore.service');
function normalizeHeader(value) {
return String(value || '')
.trim()
.toLowerCase()
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/[_-]+/g, ' ')
.replace(/\s+/g, ' ');
}
function isExcelFile(filePath) {
return ['.xls', '.xlsx'].includes(path.extname(filePath).toLowerCase());
}
function detectDelimiter(line) {
const delimiters = [';', '\t', ','];
return delimiters
.map(delimiter => ({ delimiter, count: line.split(delimiter).length }))
.sort((a, b) => b.count - a.count)[0].delimiter;
}
function splitDelimitedLine(line, delimiter) {
const cols = [];
let current = '';
let inQuotes = false;
for (let i = 0; i < line.length; i++) {
const char = line[i];
const next = line[i + 1];
if (char === '"' && next === '"') {
current += '"';
i++;
} else if (char === '"') {
inQuotes = !inQuotes;
} else if (char === delimiter && !inQuotes) {
cols.push(current.trim());
current = '';
} else {
current += char;
}
}
cols.push(current.trim());
return cols;
}
function readDelimitedRows(filePath) {
const content = fs.readFileSync(filePath, 'utf8').replace(/^\uFEFF/, '');
const lines = content.split(/\r?\n/).filter(line => line.trim());
if (!lines.length) return [];
const delimiter = detectDelimiter(lines[0]);
return lines.map(line => splitDelimitedLine(line.replace(/\r$/, ''), delimiter));
}
function readExcelRows(filePath) {
const workbook = XLSX.readFile(filePath, { cellDates: false, raw: false });
const firstSheetName = workbook.SheetNames[0];
if (!firstSheetName) return [];
return XLSX.utils.sheet_to_json(workbook.Sheets[firstSheetName], {
header: 1,
blankrows: false,
defval: ''
}).map(row => row.map(cell => String(cell ?? '').trim()));
}
function readRows(filePath) {
return isExcelFile(filePath) ? readExcelRows(filePath) : readDelimitedRows(filePath);
}
function findFirstHeaderIndex(headers, predicate) {
return headers.map(normalizeHeader).findIndex(predicate);
}
function resolveColumnIndexes(headers) {
const normalizedHeaders = headers.map(normalizeHeader);
const exactIndex = aliases => {
const normalizedAliases = aliases.map(normalizeHeader);
return normalizedHeaders.findIndex(header => normalizedAliases.includes(header));
};
return {
idxCep: findFirstHeaderIndex(headers, header => /\bcep\b/.test(header) || header === 'codigo postal'),
idxNumero: exactIndex(['numero', 'número', 'num', 'nº', 'n°']),
idxEndereco: findFirstHeaderIndex(headers, header => header.includes('endereco') || header.includes('logradouro')),
idxLatitude: exactIndex(['latitude']),
idxLongitude: exactIndex(['longitude'])
};
}
function extractAddressNumber(address) {
const value = String(address || '').trim();
if (!value) return '1';
const withoutRoadKm = value
.replace(/\b(BR|SP|GO|MT|KM)\s*[-]?\s*\d+[A-Z]?\b/gi, ' ')
.replace(/\b\d+\s*[A-Z]?\b\s*(?=\))/gi, ' ');
const labeledNumber = withoutRoadKm.match(/\b(?:n|no|num|numero|número|nº|n°)\.?\s*[:,-]?\s*(\d+[A-Z]?)\b/i);
if (labeledNumber) return labeledNumber[1];
const commaNumber = withoutRoadKm.match(/,\s*(\d+[A-Z]?)\b/i);
if (commaNumber) return commaNumber[1];
const standaloneNumbers = withoutRoadKm.match(/\b\d+[A-Z]?\b/gi) || [];
return standaloneNumbers.length ? standaloneNumbers[standaloneNumbers.length - 1] : '1';
}
function buildCepPayload(cols, indexes) {
const cepRaw = indexes.idxCep >= 0 ? cols[indexes.idxCep] : '';
const cep = String(cepRaw || '').replace(/\D/g, '');
const numeroRaw = indexes.idxNumero >= 0 ? cols[indexes.idxNumero] : '';
const enderecoRaw = indexes.idxEndereco >= 0 ? cols[indexes.idxEndereco] : '';
const numero = String(numeroRaw || '').trim() || extractAddressNumber(enderecoRaw);
if (!cep) return null;
return { cep, numero };
}
function cleanCsvValue(value) {
const text = String(value ?? '').replace(/[\r\n;]/g, ' ');
return text.includes('"') ? text.replace(/"/g, "'") : text;
}
async function countValidLines(inputPath) {
const dataType = await discoverDataType(inputPath);
const rows = readRows(inputPath);
const headers = rows[0] || [];
const indexes = resolveColumnIndexes(headers);
let total = 0;
for (const cols of rows.slice(1)) {
if (dataType === 'cep') {
if (buildCepPayload(cols, indexes)) total++;
} else if (dataType === 'geolocalizacao') {
const latitude = indexes.idxLatitude >= 0 ? parseFloat(cols[indexes.idxLatitude]) : NaN;
const longitude = indexes.idxLongitude >= 0 ? parseFloat(cols[indexes.idxLongitude]) : NaN;
if (!isNaN(latitude) && !isNaN(longitude)) total++;
}
}
return total;
}
async function processCsvFile(jobId, inputPath, originalName) {
const dataType = await discoverDataType(inputPath);
const rows = readRows(inputPath);
const headers = rows[0] || [];
const indexes = resolveColumnIndexes(headers);
const baseName = path.parse(originalName || inputPath).name;
const outputFilename = `processed_${Date.now()}_${baseName}.csv`;
const outputPath = path.join(__dirname, '..', 'outputs', outputFilename);
const outStream = fs.createWriteStream(outputPath, { encoding: 'utf8' });
outStream.write('\uFEFF');
outStream.write(['Distancia', 'Dedicado', 'Nao Dedicado', 'Erro', ...headers].join(';') + '\n');
for (const cols of rows.slice(1)) {
let dataToSend = {};
if (dataType === 'cep') {
dataToSend = buildCepPayload(cols, indexes);
if (!dataToSend) continue;
} else if (dataType === 'geolocalizacao') {
const latitude = indexes.idxLatitude >= 0 ? parseFloat(cols[indexes.idxLatitude]) : NaN;
const longitude = indexes.idxLongitude >= 0 ? parseFloat(cols[indexes.idxLongitude]) : NaN;
if (isNaN(latitude) || isNaN(longitude)) continue;
dataToSend = { latitude, longitude };
} else {
continue;
}
try {
const viab = await consultarViabilidade(dataToSend);
const distancia = viab.distancia ?? (viab.raw && (viab.raw.distancia || viab.raw.distance)) ?? '';
const dedicado = viab.dedicado ? 'Viavel' : 'Nao Viavel';
const naoDedicado = viab.naoDedicado ? 'Viavel' : 'Nao Viavel';
const error = viab.error ? cleanCsvValue(viab.error) : '';
const outCols = [distancia, dedicado, naoDedicado, error, ...cols].map(cleanCsvValue);
outStream.write(outCols.join(';') + '\n');
incrementProcessed(jobId);
} catch (err) {
const errMsg = cleanCsvValue(err && (err.message || String(err)));
const outCols = ['', '', '', errMsg, ...cols].map(cleanCsvValue);
outStream.write(outCols.join(';') + '\n');
incrementErrors(jobId);
incrementProcessed(jobId);
}
}
outStream.end();
await once(outStream, 'finish');
finishJob(jobId, path.basename(outputPath));
return outputPath;
}
module.exports = { processCsvFile, countValidLines };