FEAT: Novos tipos de arquivos são permitidos

- Excel, CSV(; , ou tabulação) e TXT são agora aceitos para upload.
- Formato de dados atualizado para arquivos que fogem do padrão pré definido.
This commit is contained in:
Rafael Alves Lopes 2026-05-04 16:38:37 -03:00
parent 3e39b9c36a
commit 827855295e
6 changed files with 338 additions and 141 deletions

View File

@ -46,13 +46,13 @@ async function uploadCsvFile(req, res) {
// Verifica o tipo de dados do CSV
const dataType = await discoverDataType(filePath);
if (dataType === 'unknown') {
return res.status(400).json({ error: 'Formato de CSV inválido. Deve conter colunas CEP e Número ou Latitude e Longitude.' });
return res.status(400).json({ error: 'Formato invalido. Envie CSV, XLS ou XLSX com CEP+Numero, CEP+Endereco, ou Latitude+Longitude.' });
}
// Conta as linhas válidas primeiro
const total = await countValidLines(filePath);
if (total === 0) {
return res.status(400).json({ error: 'Nenhuma linha válida encontrada no CSV. Verifique se há colunas CEP e Número.' });
return res.status(400).json({ error: 'Nenhuma linha valida encontrada. Verifique se ha CEP+Numero, CEP+Endereco, ou Latitude+Longitude.' });
}
// Cria o job

106
package-lock.json generated
View File

@ -18,7 +18,8 @@
"fast-csv": "^4.3.6",
"ipaddr.js": "^2.2.0",
"multer": "*",
"querystring": "^0.2.1"
"querystring": "^0.2.1",
"xlsx": "^0.18.5"
}
},
"node_modules/@fast-csv/format": {
@ -69,6 +70,15 @@
"node": ">= 0.6"
}
},
"node_modules/adler-32": {
"version": "1.3.1",
"resolved": "https://registry.npmjs.org/adler-32/-/adler-32-1.3.1.tgz",
"integrity": "sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A==",
"license": "Apache-2.0",
"engines": {
"node": ">=0.8"
}
},
"node_modules/append-field": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/append-field/-/append-field-1.0.0.tgz",
@ -206,6 +216,28 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/cfb": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/cfb/-/cfb-1.2.2.tgz",
"integrity": "sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA==",
"license": "Apache-2.0",
"dependencies": {
"adler-32": "~1.3.0",
"crc-32": "~1.2.0"
},
"engines": {
"node": ">=0.8"
}
},
"node_modules/codepage": {
"version": "1.15.0",
"resolved": "https://registry.npmjs.org/codepage/-/codepage-1.15.0.tgz",
"integrity": "sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA==",
"license": "Apache-2.0",
"engines": {
"node": ">=0.8"
}
},
"node_modules/combined-stream": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
@ -282,6 +314,18 @@
"node": ">= 0.10"
}
},
"node_modules/crc-32": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.2.tgz",
"integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==",
"license": "Apache-2.0",
"bin": {
"crc32": "bin/crc32.njs"
},
"engines": {
"node": ">=0.8"
}
},
"node_modules/csv-parser": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.2.0.tgz",
@ -588,6 +632,15 @@
"node": ">= 0.6"
}
},
"node_modules/frac": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/frac/-/frac-1.1.2.tgz",
"integrity": "sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA==",
"license": "Apache-2.0",
"engines": {
"node": ">=0.8"
}
},
"node_modules/fresh": {
"version": "0.5.2",
"resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz",
@ -1253,6 +1306,18 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/ssf": {
"version": "0.11.2",
"resolved": "https://registry.npmjs.org/ssf/-/ssf-0.11.2.tgz",
"integrity": "sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g==",
"license": "Apache-2.0",
"dependencies": {
"frac": "~1.1.2"
},
"engines": {
"node": ">=0.8"
}
},
"node_modules/statuses": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz",
@ -1352,6 +1417,45 @@
"node": ">= 0.8"
}
},
"node_modules/wmf": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/wmf/-/wmf-1.0.2.tgz",
"integrity": "sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw==",
"license": "Apache-2.0",
"engines": {
"node": ">=0.8"
}
},
"node_modules/word": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/word/-/word-0.3.0.tgz",
"integrity": "sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA==",
"license": "Apache-2.0",
"engines": {
"node": ">=0.8"
}
},
"node_modules/xlsx": {
"version": "0.18.5",
"resolved": "https://registry.npmjs.org/xlsx/-/xlsx-0.18.5.tgz",
"integrity": "sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ==",
"license": "Apache-2.0",
"dependencies": {
"adler-32": "~1.3.0",
"cfb": "~1.2.1",
"codepage": "~1.15.0",
"crc-32": "~1.2.1",
"ssf": "~0.11.2",
"wmf": "~1.0.1",
"word": "~0.3.0"
},
"bin": {
"xlsx": "bin/xlsx.njs"
},
"engines": {
"node": ">=0.8"
}
},
"node_modules/xtend": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz",

View File

@ -19,6 +19,7 @@
"fast-csv": "^4.3.6",
"ipaddr.js": "^2.2.0",
"multer": "*",
"querystring": "^0.2.1"
"querystring": "^0.2.1",
"xlsx": "^0.18.5"
}
}

View File

@ -44,7 +44,7 @@
</p>
<form id="uploadForm">
<div class="mb-3">
<input class="form-control" type="file" id="csvfile" accept=".csv" required />
<input class="form-control" type="file" id="csvfile" accept=".csv,.xls,.xlsx" required />
</div>
<div class="card-buttons__container">
<button class="btn btn-primary button-mobile" type="submit">Enviar CSV</button>

View File

@ -1,53 +1,153 @@
const { consultarViabilidade, discoverDataType } = require('./viabilidadeService');
const fs = require('fs');
const path = require('path');
const readline = require('readline');
const XLSX = require('xlsx');
const { once } = require('events');
const {
createJob,
incrementProcessed,
incrementErrors,
finishJob,
failJob
finishJob
} = require('./jobStore.service');
// conta linhas válidas no CSV (com CEP e Número ou Latitude e Longitude)
async function countValidLines(inputPath) {
const dataType = await discoverDataType(inputPath);
const instream = fs.createReadStream(inputPath, { encoding: 'utf8' });
const rl = readline.createInterface({ input: instream, crlfDelay: Infinity });
function normalizeHeader(value) {
return String(value || '')
.trim()
.toLowerCase()
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/[_-]+/g, ' ')
.replace(/\s+/g, ' ');
}
let isHeader = true;
let headers = [];
let idxCep = -1;
let idxNumero = -1;
let idxLatitude = -1;
let idxLongitude = -1;
let total = 0;
function isExcelFile(filePath) {
return ['.xls', '.xlsx'].includes(path.extname(filePath).toLowerCase());
}
for await (const rawLine of rl) {
const line = rawLine.replace(/\r$/, '');
if (!line.trim()) continue;
function detectDelimiter(line) {
const delimiters = [';', '\t', ','];
return delimiters
.map(delimiter => ({ delimiter, count: line.split(delimiter).length }))
.sort((a, b) => b.count - a.count)[0].delimiter;
}
if (isHeader) {
headers = line.split(';').map(h => h.trim());
const lower = headers.map(h => h.toLowerCase());
idxCep = lower.indexOf('cep');
idxNumero = lower.indexOf('numero');
idxLatitude = lower.indexOf('latitude');
idxLongitude = lower.indexOf('longitude');
isHeader = false;
continue;
function splitDelimitedLine(line, delimiter) {
const cols = [];
let current = '';
let inQuotes = false;
for (let i = 0; i < line.length; i++) {
const char = line[i];
const next = line[i + 1];
if (char === '"' && next === '"') {
current += '"';
i++;
} else if (char === '"') {
inQuotes = !inQuotes;
} else if (char === delimiter && !inQuotes) {
cols.push(current.trim());
current = '';
} else {
current += char;
}
}
const cols = line.split(';').map(c => c.trim());
cols.push(current.trim());
return cols;
}
function readDelimitedRows(filePath) {
const content = fs.readFileSync(filePath, 'utf8').replace(/^\uFEFF/, '');
const lines = content.split(/\r?\n/).filter(line => line.trim());
if (!lines.length) return [];
const delimiter = detectDelimiter(lines[0]);
return lines.map(line => splitDelimitedLine(line.replace(/\r$/, ''), delimiter));
}
function readExcelRows(filePath) {
const workbook = XLSX.readFile(filePath, { cellDates: false, raw: false });
const firstSheetName = workbook.SheetNames[0];
if (!firstSheetName) return [];
return XLSX.utils.sheet_to_json(workbook.Sheets[firstSheetName], {
header: 1,
blankrows: false,
defval: ''
}).map(row => row.map(cell => String(cell ?? '').trim()));
}
function readRows(filePath) {
return isExcelFile(filePath) ? readExcelRows(filePath) : readDelimitedRows(filePath);
}
function findFirstHeaderIndex(headers, predicate) {
return headers.map(normalizeHeader).findIndex(predicate);
}
function resolveColumnIndexes(headers) {
const normalizedHeaders = headers.map(normalizeHeader);
const exactIndex = aliases => {
const normalizedAliases = aliases.map(normalizeHeader);
return normalizedHeaders.findIndex(header => normalizedAliases.includes(header));
};
return {
idxCep: findFirstHeaderIndex(headers, header => /\bcep\b/.test(header) || header === 'codigo postal'),
idxNumero: exactIndex(['numero', 'número', 'num', 'nº', 'n°']),
idxEndereco: findFirstHeaderIndex(headers, header => header.includes('endereco') || header.includes('logradouro')),
idxLatitude: exactIndex(['latitude']),
idxLongitude: exactIndex(['longitude'])
};
}
function extractAddressNumber(address) {
const value = String(address || '').trim();
if (!value) return '1';
const withoutRoadKm = value
.replace(/\b(BR|SP|GO|MT|KM)\s*[-]?\s*\d+[A-Z]?\b/gi, ' ')
.replace(/\b\d+\s*[A-Z]?\b\s*(?=\))/gi, ' ');
const labeledNumber = withoutRoadKm.match(/\b(?:n|no|num|numero|número|nº|n°)\.?\s*[:,-]?\s*(\d+[A-Z]?)\b/i);
if (labeledNumber) return labeledNumber[1];
const commaNumber = withoutRoadKm.match(/,\s*(\d+[A-Z]?)\b/i);
if (commaNumber) return commaNumber[1];
const standaloneNumbers = withoutRoadKm.match(/\b\d+[A-Z]?\b/gi) || [];
return standaloneNumbers.length ? standaloneNumbers[standaloneNumbers.length - 1] : '1';
}
function buildCepPayload(cols, indexes) {
const cepRaw = indexes.idxCep >= 0 ? cols[indexes.idxCep] : '';
const cep = String(cepRaw || '').replace(/\D/g, '');
const numeroRaw = indexes.idxNumero >= 0 ? cols[indexes.idxNumero] : '';
const enderecoRaw = indexes.idxEndereco >= 0 ? cols[indexes.idxEndereco] : '';
const numero = String(numeroRaw || '').trim() || extractAddressNumber(enderecoRaw);
if (!cep) return null;
return { cep, numero };
}
function cleanCsvValue(value) {
const text = String(value ?? '').replace(/[\r\n;]/g, ' ');
return text.includes('"') ? text.replace(/"/g, "'") : text;
}
async function countValidLines(inputPath) {
const dataType = await discoverDataType(inputPath);
const rows = readRows(inputPath);
const headers = rows[0] || [];
const indexes = resolveColumnIndexes(headers);
let total = 0;
for (const cols of rows.slice(1)) {
if (dataType === 'cep') {
const cep = idxCep >= 0 ? String(cols[idxCep] || '').replace(/\D/g, '') : '';
const numero = idxNumero >= 0 ? cols[idxNumero] : '';
if (cep && numero) total++;
if (buildCepPayload(cols, indexes)) total++;
} else if (dataType === 'geolocalizacao') {
const latitude = idxLatitude >= 0 ? parseFloat(cols[idxLatitude]) : NaN;
const longitude = idxLongitude >= 0 ? parseFloat(cols[idxLongitude]) : NaN;
const latitude = indexes.idxLatitude >= 0 ? parseFloat(cols[indexes.idxLatitude]) : NaN;
const longitude = indexes.idxLongitude >= 0 ? parseFloat(cols[indexes.idxLongitude]) : NaN;
if (!isNaN(latitude) && !isNaN(longitude)) total++;
}
}
@ -55,113 +155,47 @@ async function countValidLines(inputPath) {
return total;
}
// nova função: processa CSV linha a linha, chama consultarViabilidade e gera CSV de saída
// Recebe jobId já criado no controller
async function processCsvFile(jobId, inputPath, originalName) {
const dataType = await discoverDataType(inputPath);
const total = await countValidLines(inputPath);
// Job já criado no controller
// const jobId = createJob(total);
const baseName = path.parse(inputPath).name;
const rows = readRows(inputPath);
const headers = rows[0] || [];
const indexes = resolveColumnIndexes(headers);
const baseName = path.parse(originalName || inputPath).name;
const outputFilename = `processed_${Date.now()}_${baseName}.csv`;
const outputPath = path.join(__dirname, '..', 'outputs', outputFilename);
const instream = fs.createReadStream(inputPath, { encoding: 'utf8' });
const rl = readline.createInterface({ input: instream, crlfDelay: Infinity });
const outStream = fs.createWriteStream(outputPath, { encoding: 'utf8' });
outStream.write('\uFEFF');
outStream.write(['Distancia', 'Dedicado', 'Nao Dedicado', 'Erro', ...headers].join(';') + '\n');
let isHeader = true;
let headers = [];
let idxCep = -1;
let idxNumero = -1;
let idxLatitude = -1;
let idxLongitude = -1;
for await (const rawLine of rl) {
const line = rawLine.replace(/\r$/, ''); // normalize CRLF
if (!line.trim()) continue;
if (isHeader) {
headers = line
.split(';')
.map(h => h.trim())
.filter(h => h !== '');
const lower = headers.map(h => h.toLowerCase());
idxCep = lower.indexOf('cep');
idxNumero = lower.indexOf('numero');
idxLatitude = lower.indexOf('latitude');
idxLongitude = lower.indexOf('longitude');
// se não encontrar, tenta variações comuns
const idx = lower.indexOf('codigo postal');
if (idx !== -1) idxCep = idx;
const outHeaders = [...headers, 'Distancia', 'Endereco', 'Não Dedicado', 'Dedicado', 'Erro'];
outStream.write(outHeaders.join(';') + '\n');
isHeader = false;
continue;
}
const cols = line
.split(';')
.map(c => c.trim())
.filter(c => c !== '');
for (const cols of rows.slice(1)) {
let dataToSend = {};
if (dataType === 'cep') {
const cepRaw = (idxCep >= 0 && cols[idxCep]) ? cols[idxCep] : '';
const cep = String(cepRaw).replace(/\D/g, ''); // keep digits only
const numero = (idxNumero >= 0 && cols[idxNumero]) ? cols[idxNumero] : '';
if (!cep || !numero) {
continue; // pula linha inválida
}
dataToSend = { cep, numero };
dataToSend = buildCepPayload(cols, indexes);
if (!dataToSend) continue;
} else if (dataType === 'geolocalizacao') {
const latitude = (idxLatitude >= 0 && cols[idxLatitude]) ? parseFloat(cols[idxLatitude]) : NaN;
const longitude = (idxLongitude >= 0 && cols[idxLongitude]) ? parseFloat(cols[idxLongitude]) : NaN;
const latitude = indexes.idxLatitude >= 0 ? parseFloat(cols[indexes.idxLatitude]) : NaN;
const longitude = indexes.idxLongitude >= 0 ? parseFloat(cols[indexes.idxLongitude]) : NaN;
if (isNaN(latitude) || isNaN(longitude)) {
continue; // pula linha inválida
}
if (isNaN(latitude) || isNaN(longitude)) continue;
dataToSend = { latitude, longitude };
} else {
continue; // tipo desconhecido, pula
continue;
}
try {
const viab = await consultarViabilidade(dataToSend);
const distancia = viab.distancia ?? (viab.raw && (viab.raw.distancia || viab.raw.distance)) ?? '';
if (dataType === 'cep' && viab.cep) {
var endereco = `${viab.logradouro || ''}, ${viab.bairro || ''}, ${viab.cidade || ''}/${viab.estado || ''}, ${viab.cep || ''}`;
} else {
var endereco = viab.endereco;
}
const dedicado = viab.dedicado ? 'Viavel' : 'Nao Viavel';
const naoDedicado = viab.naoDedicado ? 'Viavel' : 'Nao Viavel';
const error = viab.error ? cleanCsvValue(viab.error) : '';
if (viab.naoDedicado) {
var naoDedicado = "Viavel";
} else {
var naoDedicado = "Não Viavel";
}
if (viab.dedicado) {
var dedicado = "Viavel";
} else {
var dedicado = "Não Viavel";
}
const error = viab.error ? String(viab.error).replace(/[\r\n;]/g, ' ') : '';
const outCols = [...cols, distancia, endereco, naoDedicado, dedicado, error];
const outCols = [distancia, dedicado, naoDedicado, error, ...cols].map(cleanCsvValue);
outStream.write(outCols.join(';') + '\n');
incrementProcessed(jobId);
} catch (err) {
const errMsg = (err && (err.message || String(err))).replace(/[\r\n;]/g, ' ');
const outCols = [...cols, '', '', '', '', '', '', errMsg];
const errMsg = cleanCsvValue(err && (err.message || String(err)));
const outCols = ['', '', '', errMsg, ...cols].map(cleanCsvValue);
outStream.write(outCols.join(';') + '\n');
incrementErrors(jobId);
incrementProcessed(jobId);

View File

@ -1,8 +1,74 @@
const axios = require('axios');
const fs = require('fs');
const readline = require('readline');
const path = require('path');
const XLSX = require('xlsx');
const { apiConfig, apiViabilidadeUrl, apiUrlBase } = require('../config/apiConfig');
function normalizeHeader(value) {
return String(value || '')
.trim()
.toLowerCase()
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/[_-]+/g, ' ')
.replace(/\s+/g, ' ');
}
function hasHeader(headers, aliases) {
const normalizedAliases = aliases.map(normalizeHeader);
return headers.some(header => normalizedAliases.includes(header));
}
function isExcelFile(filePath) {
return ['.xls', '.xlsx'].includes(path.extname(filePath).toLowerCase());
}
function detectDelimiter(line) {
const delimiters = [';', '\t', ','];
return delimiters
.map(delimiter => ({ delimiter, count: line.split(delimiter).length }))
.sort((a, b) => b.count - a.count)[0].delimiter;
}
function readExcelHeaders(filePath) {
const workbook = XLSX.readFile(filePath, { cellDates: false, raw: false });
const firstSheetName = workbook.SheetNames[0];
if (!firstSheetName) return [];
const rows = XLSX.utils.sheet_to_json(workbook.Sheets[firstSheetName], {
header: 1,
blankrows: false,
defval: ''
});
return (rows[0] || []).map(normalizeHeader);
}
async function readDelimitedHeaders(filePath) {
const instream = fs.createReadStream(filePath, { encoding: 'utf8' });
const rl = readline.createInterface({ input: instream, crlfDelay: Infinity });
for await (const rawLine of rl) {
const line = rawLine.replace(/^\uFEFF/, '').replace(/\r$/, '');
if (!line.trim()) continue;
rl.close();
return line.split(detectDelimiter(line)).map(normalizeHeader);
}
rl.close();
return [];
}
function hasCepHeader(headers) {
return headers.some(header => /\bcep\b/.test(header) || header === 'codigo postal');
}
function hasAddressOrNumberHeader(headers) {
return headers.some(header => ['numero', 'num', 'nº', 'n°'].includes(header)
|| header.includes('endereco')
|| header.includes('logradouro'));
}
async function consultarViabilidade(data) {
try {
@ -28,20 +94,12 @@ async function consultarViabilidade(data) {
// Preciso de uma função para verificar se os dados vindos são de CEP ou de geolocalização
async function discoverDataType(input) {
if (typeof input === 'string') {
// Trata como filePath
const instream = fs.createReadStream(input, { encoding: 'utf8' });
const rl = readline.createInterface({ input: instream, crlfDelay: Infinity });
const headers = isExcelFile(input)
? readExcelHeaders(input)
: await readDelimitedHeaders(input);
let headers = [];
for await (const rawLine of rl) {
const line = rawLine.replace(/\r$/, '');
if (!line.trim()) continue;
headers = line.split(';').map(h => h.trim().toLowerCase());
break;
}
rl.close();
if (headers.includes('cep') && headers.includes('numero')) {
const hasCepNumero = hasCepHeader(headers) && hasAddressOrNumberHeader(headers);
if (hasCepNumero) {
return 'cep';
} else if (headers.includes('latitude') && headers.includes('longitude')) {
return 'geolocalizacao';