"""
Canonical data contracts for the PDF import pipeline.

StagedTransaction and ParseError are the only types parsers ever return.
Parsers NEVER import from ORM models, Flask, or SQLAlchemy.

compute_dedup_hash() is the canonical hash function used by:
  - All PDF parsers (when building StagedTransaction instances)
  - The transactions blueprint (Story 3.3, manual entry dedup)
  - staging_pipeline.py (Story 9.1, pre-commit dedup check)
"""
from __future__ import annotations

import hashlib
from dataclasses import dataclass
from datetime import date
from decimal import Decimal


@dataclass
class StagedTransaction:
    """Canonical output of any PDF/CSV parser. Never touches the database."""
    date: date
    merchant_raw: str
    merchant_normalized: str
    amount: Decimal
    is_credit: bool
    issuer: str
    dedup_hash: str
    confidence_score: float
    raw_text: str


@dataclass
class ParseError:
    """Structured parse failure — surfaced per-row, not raised as exception."""
    page_number: int
    raw_text: str
    reason: str
    parser_version: str


def compute_dedup_hash(
    normalized_merchant: str,
    amount: Decimal,
    txn_date: date,
) -> str:
    """
    Canonical dedup hash: SHA256(normalized_merchant + str(amount) + date.isoformat()).

    Args:
        normalized_merchant: Cleaned merchant name (post-normalization).
        amount: Transaction amount as Decimal.
        txn_date: Transaction date.

    Returns:
        64-character lowercase hex digest.
    """
    payload = f"{normalized_merchant}{str(amount)}{txn_date.isoformat()}"
    return hashlib.sha256(payload.encode("utf-8")).hexdigest()
