// src/api/Providers/OCRProvider.js
const { ProviderError } = require("./ErrorHandler");
const {
  calculateLevenshteinDistance,
  normalizeOCRText,
} = require("../utils/StringUtils");

class OCRProvider {
  constructor(config = {}) {
    this.ocrRepository = config.ocrRepository || null;
    this.ocrService = config.ocrService || null;

    // In our updated approach, we don't throw errors during construction
    // but we will validate dependencies when methods using them are called
  }

  async processDocument(verificationId, file, options = {}) {
    try {
      const { documentType = "unknown", entityId } = options;

      // 1. Upload the document image
      const uploadResult = await this.ocrService.uploadImage(file, entityId);

      if (!uploadResult || !uploadResult.signedUrl) {
        throw new ProviderError(
          "Failed to upload document image",
          "ocr_upload_failed"
        );
      }

      // 2. Process the image with OCR
      const ocrResults = await this.ocrService.processImage(
        uploadResult,
        entityId
      );

      // 3. Save OCR verification details
      const ocrDetail = await this.ocrRepository.create({
        verification_id: verificationId,
        provider: options.provider || "internal",
        document_type: documentType,
        ocr_data: ocrResults,
        confidence_score: this.calculateConfidenceScore(ocrResults),
        extracted_data: this.extractStructuredData(ocrResults, documentType),
        raw_provider_data: ocrResults,
      });

      return {
        success: true,
        verificationDetail: ocrDetail,
        extractedData: ocrDetail.extracted_data,
        documentUrl: uploadResult.signedUrl,
      };
    } catch (error) {
      console.error("OCR processing error:", error);
      throw new ProviderError(
        `Failed to process document: ${error.message}`,
        "ocr_processing_failed",
        { originalError: error }
      );
    }
  }

  async extractDataFromImage(imageData, documentType) {
    try {
      // Process the image with OCR if it's a URL
      let ocrResults;
      if (typeof imageData === "string") {
        ocrResults = await this.ocrService.processImage(
          { signedUrl: imageData },
          null
        );
      } else {
        // If imageData is already OCR results
        ocrResults = imageData;
      }

      // Extract structured data based on document type
      const extractedData = this.extractStructuredData(
        ocrResults,
        documentType
      );

      return {
        success: true,
        extractedData,
        rawData: ocrResults,
      };
    } catch (error) {
      console.error("Data extraction error:", error);
      throw new ProviderError(
        `Failed to extract data from image: ${error.message}`,
        "data_extraction_failed",
        { originalError: error }
      );
    }
  }

  async validateOCRResults(results, expectedData) {
    try {
      if (!results || !expectedData) {
        return {
          valid: false,
          errors: ["Missing OCR results or expected data"],
        };
      }

      const extractedData = results.extractedData || results;
      const errors = [];
      const matches = {};

      // Compare expected fields with extracted data
      for (const [field, expectedValue] of Object.entries(expectedData)) {
        if (!extractedData[field]) {
          errors.push(`Field '${field}' not found in extracted data`);
          continue;
        }

        const extractedValue = extractedData[field];
        // Simple string similarity check (could be more sophisticated)
        const isMatch = this.compareValues(extractedValue, expectedValue);

        matches[field] = isMatch;
        if (!isMatch) {
          errors.push(
            `Field '${field}' value mismatch: expected '${expectedValue}', got '${extractedValue}'`
          );
        }
      }

      return {
        valid: errors.length === 0,
        errors,
        matches,
        matchPercentage: this.calculateMatchPercentage(matches),
      };
    } catch (error) {
      console.error("Validation error:", error);
      throw new ProviderError(
        `Failed to validate OCR results: ${error.message}`,
        "validation_failed",
        { originalError: error }
      );
    }
  }

  calculateConfidenceScore(ocrResults) {
    // Calculate an overall confidence score from OCR results
    if (!ocrResults || !ocrResults.microsoft) {
      return 0;
    }

    // Example: average confidence of all detected text blocks
    if (ocrResults.microsoft.pages && ocrResults.microsoft.pages.length > 0) {
      const confidences = [];
      ocrResults.microsoft.pages.forEach((page) => {
        if (page.lines) {
          page.lines.forEach((line) => {
            if (line.words) {
              line.words.forEach((word) => {
                if (word.confidence) {
                  confidences.push(word.confidence);
                }
              });
            }
          });
        }
      });

      if (confidences.length > 0) {
        return confidences.reduce((a, b) => a + b, 0) / confidences.length;
      }
    }

    return null;
  }

  compareValues(extracted, expected) {
    if (typeof extracted !== "string" || typeof expected !== "string") {
      return false;
    }

    // Normalize strings for comparison
    const normalize = (str) => str.toLowerCase().replace(/\s+/g, "");
    return (
      normalize(extracted).includes(normalize(expected)) ||
      normalize(expected).includes(normalize(extracted))
    );
  }

  calculateMatchPercentage(matches) {
    const totalFields = Object.keys(matches).length;
    if (totalFields === 0) return 0;

    const matchingFields = Object.values(matches).filter((v) => v).length;
    return (matchingFields / totalFields) * 100;
  }

  /**
   * Extracts all potential UUID-like substrings from the OCR text using regex.
   * This will look for substrings that match the expected pattern of a UUID,
   * allowing for minor OCR mistakes by replacing 'O' with '0'.
   *
   * @param {string} ocrText - The text extracted by the OCR process.
   * @returns {Array<string>} An array of candidate shipment IDs.
   */
  extractUuidCandidates(ocrText) {
    // Normalize OCR text first
    const normalizedText = normalizeOCRText(ocrText);

    // Relaxed regex pattern to accommodate minor OCR mistakes like 'O' instead of '0'
    const uuidRegex =
      /\b[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9a-z]{12}\b/gi;

    const candidates = normalizedText.match(uuidRegex) || [];

    console.log("Extracted UUID candidates:", candidates);
    return candidates;
  }

  /**
   * Finds the best match for the expected shipment ID from a list of OCR-extracted candidates.
   *
   * @param {string} expectedShipmentId - The expected shipment ID.
   * @param {Array<string>} candidates - The list of candidate strings extracted from the OCR text.
   * @returns {Object} An object containing the best-matched candidate and whether it meets the threshold.
   */
  findBestMatch(expectedShipmentId, candidates) {
    let bestMatch = null;
    let lowestDistance = Infinity;

    candidates.forEach((candidate) => {
      const distance = calculateLevenshteinDistance(
        candidate,
        expectedShipmentId
      );

      if (distance < lowestDistance) {
        lowestDistance = distance;
        bestMatch = candidate;
      }
    });

    // Define a threshold for what you consider "close enough"
    const isMatch = lowestDistance <= 4; // Adjust this threshold if necessary

    return { bestMatch, isMatch, distance: lowestDistance };
  }

  /**
   * Verifies if the expected shipment ID is present in the OCR-extracted text,
   * allowing for minor OCR mistakes using Levenshtein distance and normalization.
   *
   * @param {string} ocrText - The text extracted from the image by OCR.
   * @param {string} expectedShipmentId - The shipment ID that should be present in the document.
   * @returns {Object} An object containing the expected ID, the best match, and whether it matches.
   */
  verifyShipmentId(ocrText, expectedShipmentId) {
    console.log("Verifying shipment ID");

    // Normalize and extract UUID candidates from the OCR text
    const candidates = this.extractUuidCandidates(ocrText);
    console.log("Extracted candidates:", candidates);

    if (candidates.length === 0) {
      return { expected: expectedShipmentId, match: false, bestMatch: null };
    }

    // Find the best match among the candidates
    const { bestMatch, isMatch, distance } = this.findBestMatch(
      expectedShipmentId,
      candidates
    );

    console.log("The expectedShipmentId is", expectedShipmentId);
    console.log("The bestMatch is", bestMatch);
    console.log("The Levenshtein distance is", distance);
    console.log("The isMatch is", isMatch);

    return { expected: expectedShipmentId, match: isMatch, bestMatch };
  }
}

module.exports = OCRProvider;
