viabiliza/service/csvService.js

const { consultarViabilidade, discoverDataType } = require('./viabilidadeService');
const fs = require('fs');
const path = require('path');
const XLSX = require('xlsx');
const { once } = require('events');
const {
  incrementProcessed,
  incrementErrors,
  finishJob
} = require('./jobStore.service');

function normalizeHeader(value) {
  return String(value || '')
    .trim()
    .toLowerCase()
    .normalize('NFD')
    .replace(/[\u0300-\u036f]/g, '')
    .replace(/[_-]+/g, ' ')
    .replace(/\s+/g, ' ');
}

function isExcelFile(filePath) {
  return ['.xls', '.xlsx'].includes(path.extname(filePath).toLowerCase());
}

function detectDelimiter(line) {
  const delimiters = [';', '\t', ','];
  return delimiters
    .map(delimiter => ({ delimiter, count: line.split(delimiter).length }))
    .sort((a, b) => b.count - a.count)[0].delimiter;
}

function splitDelimitedLine(line, delimiter) {
  const cols = [];
  let current = '';
  let inQuotes = false;

  for (let i = 0; i < line.length; i++) {
    const char = line[i];
    const next = line[i + 1];

    if (char === '"' && next === '"') {
      current += '"';
      i++;
    } else if (char === '"') {
      inQuotes = !inQuotes;
    } else if (char === delimiter && !inQuotes) {
      cols.push(current.trim());
      current = '';
    } else {
      current += char;
    }
  }

  cols.push(current.trim());
  return cols;
}

function readDelimitedRows(filePath) {
  const content = fs.readFileSync(filePath, 'utf8').replace(/^\uFEFF/, '');
  const lines = content.split(/\r?\n/).filter(line => line.trim());
  if (!lines.length) return [];

  const delimiter = detectDelimiter(lines[0]);
  return lines.map(line => splitDelimitedLine(line.replace(/\r$/, ''), delimiter));
}

function readExcelRows(filePath) {
  const workbook = XLSX.readFile(filePath, { cellDates: false, raw: false });
  const firstSheetName = workbook.SheetNames[0];
  if (!firstSheetName) return [];

  return XLSX.utils.sheet_to_json(workbook.Sheets[firstSheetName], {
    header: 1,
    blankrows: false,
    defval: ''
  }).map(row => row.map(cell => String(cell ?? '').trim()));
}

function readRows(filePath) {
  return isExcelFile(filePath) ? readExcelRows(filePath) : readDelimitedRows(filePath);
}

function findFirstHeaderIndex(headers, predicate) {
  return headers.map(normalizeHeader).findIndex(predicate);
}

function resolveColumnIndexes(headers) {
  const normalizedHeaders = headers.map(normalizeHeader);
  const exactIndex = aliases => {
    const normalizedAliases = aliases.map(normalizeHeader);
    return normalizedHeaders.findIndex(header => normalizedAliases.includes(header));
  };

  return {
    idxCep: findFirstHeaderIndex(headers, header => /\bcep\b/.test(header) || header === 'codigo postal'),
    idxNumero: exactIndex(['numero', 'número', 'num', 'nº', 'n°']),
    idxEndereco: findFirstHeaderIndex(headers, header => header.includes('endereco') || header.includes('logradouro')),
    idxLatitude: exactIndex(['latitude']),
    idxLongitude: exactIndex(['longitude'])
  };
}

function extractAddressNumber(address) {
  const value = String(address || '').trim();
  if (!value) return '1';

  const withoutRoadKm = value
    .replace(/\b(BR|SP|GO|MT|KM)\s*[-]?\s*\d+[A-Z]?\b/gi, ' ')
    .replace(/\b\d+\s*[A-Z]?\b\s*(?=\))/gi, ' ');

  const labeledNumber = withoutRoadKm.match(/\b(?:n|no|num|numero|número|nº|n°)\.?\s*[:,-]?\s*(\d+[A-Z]?)\b/i);
  if (labeledNumber) return labeledNumber[1];

  const commaNumber = withoutRoadKm.match(/,\s*(\d+[A-Z]?)\b/i);
  if (commaNumber) return commaNumber[1];

  const standaloneNumbers = withoutRoadKm.match(/\b\d+[A-Z]?\b/gi) || [];
  return standaloneNumbers.length ? standaloneNumbers[standaloneNumbers.length - 1] : '1';
}

function buildCepPayload(cols, indexes) {
  const cepRaw = indexes.idxCep >= 0 ? cols[indexes.idxCep] : '';
  const cep = String(cepRaw || '').replace(/\D/g, '');
  const numeroRaw = indexes.idxNumero >= 0 ? cols[indexes.idxNumero] : '';
  const enderecoRaw = indexes.idxEndereco >= 0 ? cols[indexes.idxEndereco] : '';
  const numero = String(numeroRaw || '').trim() || extractAddressNumber(enderecoRaw);

  if (!cep) return null;
  return { cep, numero };
}

function cleanCsvValue(value) {
  const text = String(value ?? '').replace(/[\r\n;]/g, ' ');
  return text.includes('"') ? text.replace(/"/g, "'") : text;
}

async function countValidLines(inputPath) {
  const dataType = await discoverDataType(inputPath);
  const rows = readRows(inputPath);
  const headers = rows[0] || [];
  const indexes = resolveColumnIndexes(headers);
  let total = 0;

  for (const cols of rows.slice(1)) {
    if (dataType === 'cep') {
      if (buildCepPayload(cols, indexes)) total++;
    } else if (dataType === 'geolocalizacao') {
      const latitude = indexes.idxLatitude >= 0 ? parseFloat(cols[indexes.idxLatitude]) : NaN;
      const longitude = indexes.idxLongitude >= 0 ? parseFloat(cols[indexes.idxLongitude]) : NaN;
      if (!isNaN(latitude) && !isNaN(longitude)) total++;
    }
  }

  return total;
}

async function processCsvFile(jobId, inputPath, originalName) {
  const dataType = await discoverDataType(inputPath);
  const rows = readRows(inputPath);
  const headers = rows[0] || [];
  const indexes = resolveColumnIndexes(headers);
  const baseName = path.parse(originalName || inputPath).name;
  const outputFilename = `processed_${Date.now()}_${baseName}.csv`;
  const outputPath = path.join(__dirname, '..', 'outputs', outputFilename);
  const outStream = fs.createWriteStream(outputPath, { encoding: 'utf8' });
  outStream.write('\uFEFF');
  outStream.write(['Distancia', 'Dedicado', 'Nao Dedicado', 'Erro', ...headers].join(';') + '\n');

  for (const cols of rows.slice(1)) {
    let dataToSend = {};

    if (dataType === 'cep') {
      dataToSend = buildCepPayload(cols, indexes);
      if (!dataToSend) continue;
    } else if (dataType === 'geolocalizacao') {
      const latitude = indexes.idxLatitude >= 0 ? parseFloat(cols[indexes.idxLatitude]) : NaN;
      const longitude = indexes.idxLongitude >= 0 ? parseFloat(cols[indexes.idxLongitude]) : NaN;

      if (isNaN(latitude) || isNaN(longitude)) continue;
      dataToSend = { latitude, longitude };
    } else {
      continue;
    }

    try {
      const viab = await consultarViabilidade(dataToSend);
      const distancia = viab.distancia ?? (viab.raw && (viab.raw.distancia || viab.raw.distance)) ?? '';
      const dedicado = viab.dedicado ? 'Viavel' : 'Nao Viavel';
      const naoDedicado = viab.naoDedicado ? 'Viavel' : 'Nao Viavel';
      const error = viab.error ? cleanCsvValue(viab.error) : '';

      const outCols = [distancia, dedicado, naoDedicado, error, ...cols].map(cleanCsvValue);
      outStream.write(outCols.join(';') + '\n');
      incrementProcessed(jobId);
    } catch (err) {
      const errMsg = cleanCsvValue(err && (err.message || String(err)));
      const outCols = ['', '', '', errMsg, ...cols].map(cleanCsvValue);
      outStream.write(outCols.join(';') + '\n');
      incrementErrors(jobId);
      incrementProcessed(jobId);
    }
  }

  outStream.end();
  await once(outStream, 'finish');

  finishJob(jobId, path.basename(outputPath));

  return outputPath;
}

module.exports = { processCsvFile, countValidLines };