viabiliza/service/csvService.js

const { consultarViabilidade, discoverDataType } = require('./viabilidadeService');
const fs = require('fs');
const path = require('path');
const XLSX = require('xlsx');
const ExcelJS = require('exceljs');
const { once } = require('events');
const {
  incrementProcessed,
  incrementErrors,
  finishJob
} = require('./jobStore.service');

const RESULT_HEADERS = ['Provedor', 'Distancia', 'Dedicado', 'Nao Dedicado', 'Erro'];

function normalizeHeader(value) {
  return String(value || '')
    .trim()
    .toLowerCase()
    .normalize('NFD')
    .replace(/[\u0300-\u036f]/g, '')
    .replace(/[_-]+/g, ' ')
    .replace(/\s+/g, ' ');
}

function isExcelFile(filePath) {
  const ext = path.extname(filePath).toLowerCase();
  if (['.xls', '.xlsx'].includes(ext)) return true;

  const fileStart = fs.readFileSync(filePath).subarray(0, 512);
  const signature = fileStart.subarray(0, 8);
  const isZipBasedXlsx = signature[0] === 0x50 && signature[1] === 0x4b;
  const isOleBasedXls = signature[0] === 0xd0
    && signature[1] === 0xcf
    && signature[2] === 0x11
    && signature[3] === 0xe0
    && signature[4] === 0xa1
    && signature[5] === 0xb1
    && signature[6] === 0x1a
    && signature[7] === 0xe1;

  const startText = fileStart.toString('latin1').trimStart().toLowerCase();
  const isHtmlExcel = startText.startsWith('<html')
    || startText.startsWith('<!doctype html')
    || startText.includes('<table');

  return isZipBasedXlsx || isOleBasedXls || isHtmlExcel;
}

function isXlsxFile(filePath) {
  const ext = path.extname(filePath).toLowerCase();
  if (ext === '.xlsx') return true;

  const fileStart = fs.readFileSync(filePath).subarray(0, 4);
  return fileStart[0] === 0x50 && fileStart[1] === 0x4b;
}

function detectDelimiter(line) {
  const delimiters = [';', '\t', ','];
  return delimiters
    .map(delimiter => ({ delimiter, count: line.split(delimiter).length }))
    .sort((a, b) => b.count - a.count)[0].delimiter;
}

function splitDelimitedLine(line, delimiter) {
  const cols = [];
  let current = '';
  let inQuotes = false;

  for (let i = 0; i < line.length; i++) {
    const char = line[i];
    const next = line[i + 1];

    if (char === '"' && next === '"') {
      current += '"';
      i++;
    } else if (char === '"') {
      inQuotes = !inQuotes;
    } else if (char === delimiter && !inQuotes) {
      cols.push(current.trim());
      current = '';
    } else {
      current += char;
    }
  }

  cols.push(current.trim());
  return cols;
}

function readDelimitedRows(filePath) {
  const content = fs.readFileSync(filePath, 'utf8').replace(/^\uFEFF/, '');
  const lines = content.split(/\r?\n/).filter(line => line.trim());
  if (!lines.length) return [];

  const delimiter = detectDelimiter(lines[0]);
  return lines.map(line => splitDelimitedLine(line.replace(/\r$/, ''), delimiter));
}

function readExcelRows(filePath) {
  const workbook = XLSX.readFile(filePath, { cellDates: false, raw: false });
  const firstSheetName = workbook.SheetNames[0];
  if (!firstSheetName) return [];

  return XLSX.utils.sheet_to_json(workbook.Sheets[firstSheetName], {
    header: 1,
    blankrows: true,
    defval: ''
  }).map(row => row.map(cell => String(cell ?? '').trim()));
}

function readRows(filePath) {
  return isExcelFile(filePath) ? readExcelRows(filePath) : readDelimitedRows(filePath);
}

function findFirstHeaderIndex(headers, predicate) {
  return headers.map(normalizeHeader).findIndex(predicate);
}

function hasHeaderAlias(headers, aliases) {
  const normalizedAliases = aliases.map(normalizeHeader);
  return headers.map(normalizeHeader).some(header => normalizedAliases.includes(header));
}

function hasCepHeader(headers) {
  return headers.map(normalizeHeader).some(header => /\bcep\b/.test(header) || header === 'codigo postal');
}

function hasAddressOrNumberHeader(headers) {
  return headers.map(normalizeHeader).some(header => ['numero', 'num', 'nº', 'n°'].includes(header)
    || header.includes('endereco')
    || header.includes('logradouro'));
}

function hasGeoHeaders(headers) {
  return hasHeaderAlias(headers, ['latitude', 'lat'])
    && hasHeaderAlias(headers, ['longitude', 'long', 'lng', 'lon']);
}

function findHeaderRowIndex(rows) {
  const index = rows.findIndex(row => (hasCepHeader(row) && hasAddressOrNumberHeader(row)) || hasGeoHeaders(row));
  return index >= 0 ? index : 0;
}

function resolveColumnIndexes(headers) {
  const normalizedHeaders = headers.map(normalizeHeader);
  const exactIndex = aliases => {
    const normalizedAliases = aliases.map(normalizeHeader);
    return normalizedHeaders.findIndex(header => normalizedAliases.includes(header));
  };

  return {
    idxCep: findFirstHeaderIndex(headers, header => /\bcep\b/.test(header) || header === 'codigo postal'),
    idxNumero: exactIndex(['numero', 'número', 'num', 'nº', 'n°']),
    idxEndereco: findFirstHeaderIndex(headers, header => header.includes('endereco') || header.includes('logradouro')),
    idxLatitude: exactIndex(['latitude', 'lat']),
    idxLongitude: exactIndex(['longitude', 'long', 'lng', 'lon'])
  };
}

function extractAddressNumber(address) {
  const value = String(address || '').trim();
  if (!value) return '1';

  const withoutRoadKm = value
    .replace(/\b(BR|SP|GO|MT|KM)\s*[-]?\s*\d+[A-Z]?\b/gi, ' ')
    .replace(/\b\d+\s*[A-Z]?\b\s*(?=\))/gi, ' ');

  const labeledNumber = withoutRoadKm.match(/\b(?:n|no|num|numero|número|nº|n°)\.?\s*[:,-]?\s*(\d+[A-Z]?)\b/i);
  if (labeledNumber) return labeledNumber[1];

  const commaNumber = withoutRoadKm.match(/,\s*(\d+[A-Z]?)\b/i);
  if (commaNumber) return commaNumber[1];

  const standaloneNumbers = withoutRoadKm.match(/\b\d+[A-Z]?\b/gi) || [];
  return standaloneNumbers.length ? standaloneNumbers[standaloneNumbers.length - 1] : '1';
}

function buildCepPayload(cols, indexes) {
  const cepRaw = indexes.idxCep >= 0 ? cols[indexes.idxCep] : '';
  const cep = String(cepRaw || '').replace(/\D/g, '');
  const numeroRaw = indexes.idxNumero >= 0 ? cols[indexes.idxNumero] : '';
  const enderecoRaw = indexes.idxEndereco >= 0 ? cols[indexes.idxEndereco] : '';
  const numero = String(numeroRaw || '').trim() || extractAddressNumber(enderecoRaw);

  if (!cep) return null;
  return { cep, numero };
}

function parseCoordinate(value) {
  const parsed = parseFloat(String(value ?? '').trim().replace(',', '.'));
  return Number.isFinite(parsed) ? parsed : NaN;
}

function buildGeoPayload(cols, indexes) {
  const latitude = indexes.idxLatitude >= 0 ? parseCoordinate(cols[indexes.idxLatitude]) : NaN;
  const longitude = indexes.idxLongitude >= 0 ? parseCoordinate(cols[indexes.idxLongitude]) : NaN;

  if (isNaN(latitude) || isNaN(longitude)) return null;
  return { latitude, longitude };
}

async function consultarComFallback(geoPayload, cepPayload) {
  let lastError = null;

  if (geoPayload) {
    try {
      const result = await consultarViabilidade(geoPayload);
      if (!result || !result.error) return result;
      lastError = new Error(result.error);
    } catch (err) {
      lastError = err;
    }
  }

  if (cepPayload) {
    try {
      const result = await consultarViabilidade(cepPayload);
      if (!result || !result.error) return result;
      lastError = new Error(result.error);
    } catch (err) {
      lastError = err;
    }
  }

  throw lastError || new Error('Linha sem latitude/longitude ou CEP valido');
}

function cleanCsvValue(value) {
  const text = String(value ?? '').replace(/[\r\n;]/g, ' ');
  return text.includes('"') ? text.replace(/"/g, "'") : text;
}

function formatApiErrorResponse(error) {
  const responseData = error && error.response && error.response.data;
  if (responseData !== undefined && responseData !== null) {
    if (typeof responseData === 'string') return responseData;
    if (responseData.error) return responseData.error;
    if (responseData.message) return responseData.message;
    return JSON.stringify(responseData);
  }

  return error && (error.message || String(error));
}

function buildSuccessResultColumns(viab) {
  const provedor = viab.provedor ?? '';
  const distancia = viab.distancia ?? (viab.raw && (viab.raw.distancia || viab.raw.distance)) ?? '';
  const dedicado = viab.dedicado ? 'Viavel' : 'Nao Viavel';
  const naoDedicado = viab.naoDedicado ? 'Viavel' : 'Nao Viavel';
  const error = viab.error ? cleanCsvValue(viab.error) : '';

  return [provedor, distancia, dedicado, naoDedicado, error];
}

function buildErrorResultColumns(err) {
  return ['', '', '', '', cleanCsvValue(formatApiErrorResponse(err))];
}

function cloneCellStyle(cell) {
  return {
    numFmt: cell.numFmt,
    font: cell.font ? { ...cell.font } : undefined,
    alignment: cell.alignment ? { ...cell.alignment } : undefined,
    border: cell.border ? { ...cell.border } : undefined,
    fill: cell.fill ? { ...cell.fill } : undefined,
    protection: cell.protection ? { ...cell.protection } : undefined
  };
}

function styleInsertedResultColumns(worksheet, headerRowNumber) {
  RESULT_HEADERS.forEach((header, index) => {
    const columnNumber = index + 1;
    const sourceColumn = worksheet.getColumn(RESULT_HEADERS.length + 1);
    const targetColumn = worksheet.getColumn(columnNumber);
    targetColumn.width = Math.max(16, sourceColumn.width || 0);

    const headerCell = worksheet.getRow(headerRowNumber).getCell(columnNumber);
    const sourceHeaderCell = worksheet.getRow(headerRowNumber).getCell(RESULT_HEADERS.length + 1);
    headerCell.value = header;
    headerCell.style = cloneCellStyle(sourceHeaderCell);
  });
}

async function processXlsxFile(jobId, inputPath, outputPath, rows, headerRowIndex, indexes) {
  const workbook = new ExcelJS.Workbook();
  await workbook.xlsx.readFile(inputPath);

  const worksheet = workbook.worksheets[0];
  const headerRowNumber = headerRowIndex + 1;
  worksheet.spliceColumns(1, 0, ...RESULT_HEADERS.map(() => []));
  styleInsertedResultColumns(worksheet, headerRowNumber);

  for (let rowIndex = headerRowIndex + 1; rowIndex < rows.length; rowIndex++) {
    const cols = rows[rowIndex];
    const geoPayload = buildGeoPayload(cols, indexes);
    const cepPayload = buildCepPayload(cols, indexes);
    if (!geoPayload && !cepPayload) continue;

    const row = worksheet.getRow(rowIndex + 1);
    try {
      const viab = await consultarComFallback(geoPayload, cepPayload);
      buildSuccessResultColumns(viab).forEach((value, index) => {
        const cell = row.getCell(index + 1);
        cell.value = value;
        cell.style = cloneCellStyle(row.getCell(RESULT_HEADERS.length + 1));
      });
      incrementProcessed(jobId);
    } catch (err) {
      buildErrorResultColumns(err).forEach((value, index) => {
        const cell = row.getCell(index + 1);
        cell.value = value;
        cell.style = cloneCellStyle(row.getCell(RESULT_HEADERS.length + 1));
      });
      incrementErrors(jobId);
      incrementProcessed(jobId);
    }
    row.commit();
  }

  await workbook.xlsx.writeFile(outputPath);
}

function shiftCellAddress(address, colOffset) {
  const decoded = XLSX.utils.decode_cell(address);
  decoded.c += colOffset;
  return XLSX.utils.encode_cell(decoded);
}

function shiftRange(range, colOffset) {
  const decoded = typeof range === 'string' ? XLSX.utils.decode_range(range) : range;
  return {
    s: { r: decoded.s.r, c: decoded.s.c + colOffset },
    e: { r: decoded.e.r, c: decoded.e.c + colOffset }
  };
}

function prependResultColumnsToWorksheet(worksheet, headerRowIndex, rowResults) {
  const colOffset = RESULT_HEADERS.length;
  const shiftedWorksheet = {};

  Object.keys(worksheet).forEach(key => {
    if (key[0] === '!') return;
    shiftedWorksheet[shiftCellAddress(key, colOffset)] = worksheet[key];
  });

  const originalRange = worksheet['!ref']
    ? XLSX.utils.decode_range(worksheet['!ref'])
    : { s: { r: 0, c: 0 }, e: { r: headerRowIndex, c: 0 } };

  shiftedWorksheet['!ref'] = XLSX.utils.encode_range({
    s: { r: Math.min(originalRange.s.r, headerRowIndex), c: 0 },
    e: { r: originalRange.e.r, c: originalRange.e.c + colOffset }
  });

  if (worksheet['!cols']) {
    shiftedWorksheet['!cols'] = Array(colOffset).fill({ wch: 16 }).concat(worksheet['!cols']);
  }

  if (worksheet['!merges']) {
    shiftedWorksheet['!merges'] = worksheet['!merges'].map(merge => shiftRange(merge, colOffset));
  }

  if (worksheet['!autofilter'] && worksheet['!autofilter'].ref) {
    shiftedWorksheet['!autofilter'] = {
      ...worksheet['!autofilter'],
      ref: XLSX.utils.encode_range(shiftRange(worksheet['!autofilter'].ref, colOffset))
    };
  }

  RESULT_HEADERS.forEach((value, index) => {
    const address = XLSX.utils.encode_cell({ r: headerRowIndex, c: index });
    shiftedWorksheet[address] = { t: 's', v: value };
  });

  rowResults.forEach(({ rowIndex, values }) => {
    values.forEach((value, index) => {
      const address = XLSX.utils.encode_cell({ r: rowIndex, c: index });
      shiftedWorksheet[address] = { t: 's', v: String(value ?? '') };
    });
  });

  return shiftedWorksheet;
}

async function countValidLines(inputPath) {
  await discoverDataType(inputPath);
  const rows = readRows(inputPath);
  const headerRowIndex = findHeaderRowIndex(rows);
  const headers = rows[headerRowIndex] || [];
  const indexes = resolveColumnIndexes(headers);
  let total = 0;

  for (const cols of rows.slice(headerRowIndex + 1)) {
    const geoPayload = buildGeoPayload(cols, indexes);
    const cepPayload = buildCepPayload(cols, indexes);
    if (geoPayload || cepPayload) total++;
  }

  return total;
}

async function processCsvFile(jobId, inputPath, originalName) {
  await discoverDataType(inputPath);
  const rows = readRows(inputPath);
  const headerRowIndex = findHeaderRowIndex(rows);
  const headers = rows[headerRowIndex] || [];
  const indexes = resolveColumnIndexes(headers);
  const baseName = path.parse(originalName || inputPath).name;
  const isExcel = isExcelFile(inputPath);
  const outputFilename = `processed_${Date.now()}_${baseName}${isExcel ? '.xlsx' : '.csv'}`;
  const outputPath = path.join(__dirname, '..', 'outputs', outputFilename);
  fs.mkdirSync(path.dirname(outputPath), { recursive: true });

  if (isXlsxFile(inputPath)) {
    await processXlsxFile(jobId, inputPath, outputPath, rows, headerRowIndex, indexes);
    finishJob(jobId, path.basename(outputPath));

    return outputPath;
  }

  if (isExcel) {
    const workbook = XLSX.readFile(inputPath, { cellDates: false, raw: false, cellStyles: true });
    const firstSheetName = workbook.SheetNames[0];
    const worksheet = workbook.Sheets[firstSheetName];
    const rowResults = [];

    for (let rowIndex = headerRowIndex + 1; rowIndex < rows.length; rowIndex++) {
      const cols = rows[rowIndex];
      const geoPayload = buildGeoPayload(cols, indexes);
      const cepPayload = buildCepPayload(cols, indexes);
      if (!geoPayload && !cepPayload) continue;

      try {
        const viab = await consultarComFallback(geoPayload, cepPayload);
        rowResults.push({ rowIndex, values: buildSuccessResultColumns(viab) });
        incrementProcessed(jobId);
      } catch (err) {
        rowResults.push({ rowIndex, values: buildErrorResultColumns(err) });
        incrementErrors(jobId);
        incrementProcessed(jobId);
      }
    }

    workbook.Sheets[firstSheetName] = prependResultColumnsToWorksheet(worksheet, headerRowIndex, rowResults);
    XLSX.writeFile(workbook, outputPath, { bookType: 'xlsx' });
    finishJob(jobId, path.basename(outputPath));

    return outputPath;
  }

  const outStream = fs.createWriteStream(outputPath, { encoding: 'utf8' });
  outStream.write('\uFEFF');
  outStream.write([...RESULT_HEADERS, ...headers].join(';') + '\n');

  for (const cols of rows.slice(headerRowIndex + 1)) {
    const geoPayload = buildGeoPayload(cols, indexes);
    const cepPayload = buildCepPayload(cols, indexes);
    if (!geoPayload && !cepPayload) continue;

    try {
      const viab = await consultarComFallback(geoPayload, cepPayload);
      const outCols = [...buildSuccessResultColumns(viab), ...cols].map(cleanCsvValue);
      outStream.write(outCols.join(';') + '\n');
      incrementProcessed(jobId);
    } catch (err) {
      const outCols = [...buildErrorResultColumns(err), ...cols].map(cleanCsvValue);
      outStream.write(outCols.join(';') + '\n');
      incrementErrors(jobId);
      incrementProcessed(jobId);
    }
  }

  outStream.end();
  await once(outStream, 'finish');

  finishJob(jobId, path.basename(outputPath));

  return outputPath;
}

module.exports = { processCsvFile, countValidLines };