"""Discover credit card statement parser — Story 9.8."""
from __future__ import annotations

import re
from datetime import datetime
from decimal import Decimal

from app.services.pdf_parsers.base import StagedTransaction, ParseError, compute_dedup_hash
from app.services.merchant_normalizer import normalize

_PARSER_VERSION = "1.0.0"
# Discover format: MM/DD  Description  $Amount  or  MM/DD/YYYY  Description  Amount
_TXN_RE = re.compile(
    r"^(\d{2}/\d{2}(?:/\d{4})?)\s+(.+?)\s+\$?([-+]?\d{1,3}(?:,\d{3})*\.\d{2})\s*$"
)
_PAYMENT_RE = re.compile(r"(PAYMENT|CASHBACK|REWARDS|CREDIT)", re.I)


def parse(pdf_path: str) -> tuple[list[StagedTransaction], list[ParseError]]:
    try:
        import pdfplumber
    except ImportError:
        return [], [ParseError(0, "", "pdfplumber not installed", _PARSER_VERSION)]

    transactions: list[StagedTransaction] = []
    errors: list[ParseError] = []
    statement_year = datetime.now().year

    try:
        with pdfplumber.open(pdf_path) as pdf:
            if pdf.pages:
                first = pdf.pages[0].extract_text() or ""
                ym = re.search(r"\b(20\d{2})\b", first)
                if ym:
                    statement_year = int(ym.group(1))

            for page_num, page in enumerate(pdf.pages, start=1):
                for line in (page.extract_text() or "").split("\n"):
                    line = line.strip()
                    m = _TXN_RE.match(line)
                    if not m:
                        continue
                    date_str, description, amount_str = m.group(1), m.group(2), m.group(3)
                    try:
                        if len(date_str) == 5:
                            txn_date = datetime.strptime(
                                f"{date_str}/{statement_year}", "%m/%d/%Y"
                            ).date()
                        else:
                            txn_date = datetime.strptime(date_str, "%m/%d/%Y").date()
                        amount = Decimal(amount_str.replace(",", ""))
                    except Exception as exc:
                        errors.append(ParseError(page_num, line, str(exc), _PARSER_VERSION))
                        continue
                    is_credit = amount < Decimal("0") or bool(_PAYMENT_RE.search(description))
                    abs_amount = abs(amount)
                    norm = normalize(description.strip())
                    transactions.append(StagedTransaction(
                        date=txn_date, merchant_raw=description.strip(),
                        merchant_normalized=norm, amount=abs_amount, is_credit=is_credit,
                        issuer="discover", dedup_hash=compute_dedup_hash(norm, abs_amount, txn_date),
                        confidence_score=0.85, raw_text=line,
                    ))
    except Exception as exc:
        errors.append(ParseError(0, str(exc), "Failed to open PDF", _PARSER_VERSION))

    return transactions, errors
