viabiliza/service/csvService.js

402 lines
13 KiB
JavaScript

const { consultarViabilidade, discoverDataType } = require('./viabilidadeService');
const fs = require('fs');
const path = require('path');
const XLSX = require('xlsx');
const { once } = require('events');
const {
incrementProcessed,
incrementErrors,
finishJob
} = require('./jobStore.service');
const RESULT_HEADERS = ['Provedor', 'Distancia', 'Dedicado', 'Nao Dedicado', 'Erro'];
function normalizeHeader(value) {
return String(value || '')
.trim()
.toLowerCase()
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/[_-]+/g, ' ')
.replace(/\s+/g, ' ');
}
function isExcelFile(filePath) {
const ext = path.extname(filePath).toLowerCase();
if (['.xls', '.xlsx'].includes(ext)) return true;
const fileStart = fs.readFileSync(filePath).subarray(0, 512);
const signature = fileStart.subarray(0, 8);
const isZipBasedXlsx = signature[0] === 0x50 && signature[1] === 0x4b;
const isOleBasedXls = signature[0] === 0xd0
&& signature[1] === 0xcf
&& signature[2] === 0x11
&& signature[3] === 0xe0
&& signature[4] === 0xa1
&& signature[5] === 0xb1
&& signature[6] === 0x1a
&& signature[7] === 0xe1;
const startText = fileStart.toString('latin1').trimStart().toLowerCase();
const isHtmlExcel = startText.startsWith('<html')
|| startText.startsWith('<!doctype html')
|| startText.includes('<table');
return isZipBasedXlsx || isOleBasedXls || isHtmlExcel;
}
function detectDelimiter(line) {
const delimiters = [';', '\t', ','];
return delimiters
.map(delimiter => ({ delimiter, count: line.split(delimiter).length }))
.sort((a, b) => b.count - a.count)[0].delimiter;
}
function splitDelimitedLine(line, delimiter) {
const cols = [];
let current = '';
let inQuotes = false;
for (let i = 0; i < line.length; i++) {
const char = line[i];
const next = line[i + 1];
if (char === '"' && next === '"') {
current += '"';
i++;
} else if (char === '"') {
inQuotes = !inQuotes;
} else if (char === delimiter && !inQuotes) {
cols.push(current.trim());
current = '';
} else {
current += char;
}
}
cols.push(current.trim());
return cols;
}
function readDelimitedRows(filePath) {
const content = fs.readFileSync(filePath, 'utf8').replace(/^\uFEFF/, '');
const lines = content.split(/\r?\n/).filter(line => line.trim());
if (!lines.length) return [];
const delimiter = detectDelimiter(lines[0]);
return lines.map(line => splitDelimitedLine(line.replace(/\r$/, ''), delimiter));
}
function readExcelRows(filePath) {
const workbook = XLSX.readFile(filePath, { cellDates: false, raw: false });
const firstSheetName = workbook.SheetNames[0];
if (!firstSheetName) return [];
return XLSX.utils.sheet_to_json(workbook.Sheets[firstSheetName], {
header: 1,
blankrows: true,
defval: ''
}).map(row => row.map(cell => String(cell ?? '').trim()));
}
function readRows(filePath) {
return isExcelFile(filePath) ? readExcelRows(filePath) : readDelimitedRows(filePath);
}
function findFirstHeaderIndex(headers, predicate) {
return headers.map(normalizeHeader).findIndex(predicate);
}
function hasHeaderAlias(headers, aliases) {
const normalizedAliases = aliases.map(normalizeHeader);
return headers.map(normalizeHeader).some(header => normalizedAliases.includes(header));
}
function hasCepHeader(headers) {
return headers.map(normalizeHeader).some(header => /\bcep\b/.test(header) || header === 'codigo postal');
}
function hasAddressOrNumberHeader(headers) {
return headers.map(normalizeHeader).some(header => ['numero', 'num', 'nº', 'n°'].includes(header)
|| header.includes('endereco')
|| header.includes('logradouro'));
}
function hasGeoHeaders(headers) {
return hasHeaderAlias(headers, ['latitude', 'lat'])
&& hasHeaderAlias(headers, ['longitude', 'long', 'lng', 'lon']);
}
function findHeaderRowIndex(rows) {
const index = rows.findIndex(row => (hasCepHeader(row) && hasAddressOrNumberHeader(row)) || hasGeoHeaders(row));
return index >= 0 ? index : 0;
}
function resolveColumnIndexes(headers) {
const normalizedHeaders = headers.map(normalizeHeader);
const exactIndex = aliases => {
const normalizedAliases = aliases.map(normalizeHeader);
return normalizedHeaders.findIndex(header => normalizedAliases.includes(header));
};
return {
idxCep: findFirstHeaderIndex(headers, header => /\bcep\b/.test(header) || header === 'codigo postal'),
idxNumero: exactIndex(['numero', 'número', 'num', 'nº', 'n°']),
idxEndereco: findFirstHeaderIndex(headers, header => header.includes('endereco') || header.includes('logradouro')),
idxLatitude: exactIndex(['latitude', 'lat']),
idxLongitude: exactIndex(['longitude', 'long', 'lng', 'lon'])
};
}
function extractAddressNumber(address) {
const value = String(address || '').trim();
if (!value) return '1';
const withoutRoadKm = value
.replace(/\b(BR|SP|GO|MT|KM)\s*[-]?\s*\d+[A-Z]?\b/gi, ' ')
.replace(/\b\d+\s*[A-Z]?\b\s*(?=\))/gi, ' ');
const labeledNumber = withoutRoadKm.match(/\b(?:n|no|num|numero|número|nº|n°)\.?\s*[:,-]?\s*(\d+[A-Z]?)\b/i);
if (labeledNumber) return labeledNumber[1];
const commaNumber = withoutRoadKm.match(/,\s*(\d+[A-Z]?)\b/i);
if (commaNumber) return commaNumber[1];
const standaloneNumbers = withoutRoadKm.match(/\b\d+[A-Z]?\b/gi) || [];
return standaloneNumbers.length ? standaloneNumbers[standaloneNumbers.length - 1] : '1';
}
function buildCepPayload(cols, indexes) {
const cepRaw = indexes.idxCep >= 0 ? cols[indexes.idxCep] : '';
const cep = String(cepRaw || '').replace(/\D/g, '');
const numeroRaw = indexes.idxNumero >= 0 ? cols[indexes.idxNumero] : '';
const enderecoRaw = indexes.idxEndereco >= 0 ? cols[indexes.idxEndereco] : '';
const numero = String(numeroRaw || '').trim() || extractAddressNumber(enderecoRaw);
if (!cep) return null;
return { cep, numero };
}
function parseCoordinate(value) {
const parsed = parseFloat(String(value ?? '').trim().replace(',', '.'));
return Number.isFinite(parsed) ? parsed : NaN;
}
function buildGeoPayload(cols, indexes) {
const latitude = indexes.idxLatitude >= 0 ? parseCoordinate(cols[indexes.idxLatitude]) : NaN;
const longitude = indexes.idxLongitude >= 0 ? parseCoordinate(cols[indexes.idxLongitude]) : NaN;
if (isNaN(latitude) || isNaN(longitude)) return null;
return { latitude, longitude };
}
async function consultarComFallback(geoPayload, cepPayload) {
let lastError = null;
if (geoPayload) {
try {
const result = await consultarViabilidade(geoPayload);
if (!result || !result.error) return result;
lastError = new Error(result.error);
} catch (err) {
lastError = err;
}
}
if (cepPayload) {
try {
const result = await consultarViabilidade(cepPayload);
if (!result || !result.error) return result;
lastError = new Error(result.error);
} catch (err) {
lastError = err;
}
}
throw lastError || new Error('Linha sem latitude/longitude ou CEP valido');
}
function cleanCsvValue(value) {
const text = String(value ?? '').replace(/[\r\n;]/g, ' ');
return text.includes('"') ? text.replace(/"/g, "'") : text;
}
function formatApiErrorResponse(error) {
const responseData = error && error.response && error.response.data;
if (responseData !== undefined && responseData !== null) {
if (typeof responseData === 'string') return responseData;
if (responseData.error) return responseData.error;
if (responseData.message) return responseData.message;
return JSON.stringify(responseData);
}
return error && (error.message || String(error));
}
function buildSuccessResultColumns(viab) {
const provedor = viab.provedor ?? '';
const distancia = viab.distancia ?? (viab.raw && (viab.raw.distancia || viab.raw.distance)) ?? '';
const dedicado = viab.dedicado ? 'Viavel' : 'Nao Viavel';
const naoDedicado = viab.naoDedicado ? 'Viavel' : 'Nao Viavel';
const error = viab.error ? cleanCsvValue(viab.error) : '';
return [provedor, distancia, dedicado, naoDedicado, error];
}
function buildErrorResultColumns(err) {
return ['', '', '', '', cleanCsvValue(formatApiErrorResponse(err))];
}
function shiftCellAddress(address, colOffset) {
const decoded = XLSX.utils.decode_cell(address);
decoded.c += colOffset;
return XLSX.utils.encode_cell(decoded);
}
function shiftRange(range, colOffset) {
const decoded = typeof range === 'string' ? XLSX.utils.decode_range(range) : range;
return {
s: { r: decoded.s.r, c: decoded.s.c + colOffset },
e: { r: decoded.e.r, c: decoded.e.c + colOffset }
};
}
function prependResultColumnsToWorksheet(worksheet, headerRowIndex, rowResults) {
const colOffset = RESULT_HEADERS.length;
const shiftedWorksheet = {};
Object.keys(worksheet).forEach(key => {
if (key[0] === '!') return;
shiftedWorksheet[shiftCellAddress(key, colOffset)] = worksheet[key];
});
const originalRange = worksheet['!ref']
? XLSX.utils.decode_range(worksheet['!ref'])
: { s: { r: 0, c: 0 }, e: { r: headerRowIndex, c: 0 } };
shiftedWorksheet['!ref'] = XLSX.utils.encode_range({
s: { r: Math.min(originalRange.s.r, headerRowIndex), c: 0 },
e: { r: originalRange.e.r, c: originalRange.e.c + colOffset }
});
if (worksheet['!cols']) {
shiftedWorksheet['!cols'] = Array(colOffset).fill({ wch: 16 }).concat(worksheet['!cols']);
}
if (worksheet['!merges']) {
shiftedWorksheet['!merges'] = worksheet['!merges'].map(merge => shiftRange(merge, colOffset));
}
if (worksheet['!autofilter'] && worksheet['!autofilter'].ref) {
shiftedWorksheet['!autofilter'] = {
...worksheet['!autofilter'],
ref: XLSX.utils.encode_range(shiftRange(worksheet['!autofilter'].ref, colOffset))
};
}
RESULT_HEADERS.forEach((value, index) => {
const address = XLSX.utils.encode_cell({ r: headerRowIndex, c: index });
shiftedWorksheet[address] = { t: 's', v: value };
});
rowResults.forEach(({ rowIndex, values }) => {
values.forEach((value, index) => {
const address = XLSX.utils.encode_cell({ r: rowIndex, c: index });
shiftedWorksheet[address] = { t: 's', v: String(value ?? '') };
});
});
return shiftedWorksheet;
}
async function countValidLines(inputPath) {
await discoverDataType(inputPath);
const rows = readRows(inputPath);
const headerRowIndex = findHeaderRowIndex(rows);
const headers = rows[headerRowIndex] || [];
const indexes = resolveColumnIndexes(headers);
let total = 0;
for (const cols of rows.slice(headerRowIndex + 1)) {
const geoPayload = buildGeoPayload(cols, indexes);
const cepPayload = buildCepPayload(cols, indexes);
if (geoPayload || cepPayload) total++;
}
return total;
}
async function processCsvFile(jobId, inputPath, originalName) {
await discoverDataType(inputPath);
const rows = readRows(inputPath);
const headerRowIndex = findHeaderRowIndex(rows);
const headers = rows[headerRowIndex] || [];
const indexes = resolveColumnIndexes(headers);
const baseName = path.parse(originalName || inputPath).name;
const isExcel = isExcelFile(inputPath);
const outputFilename = `processed_${Date.now()}_${baseName}${isExcel ? '.xlsx' : '.csv'}`;
const outputPath = path.join(__dirname, '..', 'outputs', outputFilename);
fs.mkdirSync(path.dirname(outputPath), { recursive: true });
if (isExcel) {
const workbook = XLSX.readFile(inputPath, { cellDates: false, raw: false, cellStyles: true });
const firstSheetName = workbook.SheetNames[0];
const worksheet = workbook.Sheets[firstSheetName];
const rowResults = [];
for (let rowIndex = headerRowIndex + 1; rowIndex < rows.length; rowIndex++) {
const cols = rows[rowIndex];
const geoPayload = buildGeoPayload(cols, indexes);
const cepPayload = buildCepPayload(cols, indexes);
if (!geoPayload && !cepPayload) continue;
try {
const viab = await consultarComFallback(geoPayload, cepPayload);
rowResults.push({ rowIndex, values: buildSuccessResultColumns(viab) });
incrementProcessed(jobId);
} catch (err) {
rowResults.push({ rowIndex, values: buildErrorResultColumns(err) });
incrementErrors(jobId);
incrementProcessed(jobId);
}
}
workbook.Sheets[firstSheetName] = prependResultColumnsToWorksheet(worksheet, headerRowIndex, rowResults);
XLSX.writeFile(workbook, outputPath, { bookType: 'xlsx' });
finishJob(jobId, path.basename(outputPath));
return outputPath;
}
const outStream = fs.createWriteStream(outputPath, { encoding: 'utf8' });
outStream.write('\uFEFF');
outStream.write([...RESULT_HEADERS, ...headers].join(';') + '\n');
for (const cols of rows.slice(headerRowIndex + 1)) {
const geoPayload = buildGeoPayload(cols, indexes);
const cepPayload = buildCepPayload(cols, indexes);
if (!geoPayload && !cepPayload) continue;
try {
const viab = await consultarComFallback(geoPayload, cepPayload);
const outCols = [...buildSuccessResultColumns(viab), ...cols].map(cleanCsvValue);
outStream.write(outCols.join(';') + '\n');
incrementProcessed(jobId);
} catch (err) {
const outCols = [...buildErrorResultColumns(err), ...cols].map(cleanCsvValue);
outStream.write(outCols.join(';') + '\n');
incrementErrors(jobId);
incrementProcessed(jobId);
}
}
outStream.end();
await once(outStream, 'finish');
finishJob(jobId, path.basename(outputPath));
return outputPath;
}
module.exports = { processCsvFile, countValidLines };