"""
Chase credit card statement parser — Story 9.7.

Tested against: Chase Amazon Prime Visa, account ending 4086, period 04/14/26–05/13/26.

Format (ACCOUNT ACTIVITY section):
  MM/DD  Description  Amount

Note: The "ACCOUNT ACTIVITY" header is in doubled-character format in some PDFs
(e.g. "AACCCCOOUUNNTT AACCTTIIVVIITTYY"). We detect sections by their normal-text
section headers (PAYMENTS AND OTHER CREDITS, PURCHASE, INTEREST CHARGED).

State machine:
  NONE → PAYMENTS  (on "PAYMENTS AND OTHER CREDITS")
  PAYMENTS → PURCHASES  (on "PURCHASE[S]?")
  PURCHASES → DONE  (on "INTEREST CHARGED" or "TOTAL INTEREST")
"""
from __future__ import annotations

import re
from datetime import date
from decimal import Decimal

from app.services.pdf_parsers.base import StagedTransaction, ParseError, compute_dedup_hash
from app.services.merchant_normalizer import normalize

_PARSER_VERSION = "2.0.0"

# Transaction line: MM/DD  Description  Amount (positive or negative)
_TXN_RE = re.compile(
    r"^(\d{2}/\d{2})\s+(.+?)\s+([-]?\d{1,3}(?:,\d{3})*\.\d{2})\s*$"
)

# State transitions
_STARTS_PAYMENTS   = re.compile(r"^PAYMENTS AND OTHER CREDITS$", re.I)
_STARTS_PURCHASES  = re.compile(r"^PURCHASE[S]?$", re.I)
_ENDS_TRANSACTIONS = re.compile(r"^(INTEREST CHARGED|TOTAL INTEREST|FEES CHARGED|"
                                  r"2\d{3} Totals|QUALIFIED PROMOTIONAL|"
                                  r"IINNTTEERREESSTT|AACCCCOOUUNNTT(?! AACCTTIIVVIITTYY))", re.I)

# Lines that are continuations, headers, or summary rows — skip entirely
_SKIP_RE = re.compile(
    r"^(Order Number|Equal Pay applied|Date of|Transaction\s+Merchant|"
    r"\$ Amount|Manage your account|Page \d+ of|www\.chase|"
    r"SAYRE |0000001 |A50001)",
    re.I,
)

# Interest charges that appear as transaction-format lines — skip
_INTEREST_DESC_RE = re.compile(r"PURCHASE INTEREST CHARGE|CASH ADVANCE FEE", re.I)


def parse(pdf_path: str) -> tuple[list[StagedTransaction], list[ParseError]]:
    """Parse a Chase credit card statement PDF."""
    try:
        import pdfplumber
    except ImportError:
        return [], [ParseError(0, "", "pdfplumber not installed", _PARSER_VERSION)]

    transactions: list[StagedTransaction] = []
    errors: list[ParseError] = []

    try:
        with pdfplumber.open(pdf_path) as pdf:
            all_lines: list[tuple[int, str]] = []
            for page_num, page in enumerate(pdf.pages, start=1):
                for line in (page.extract_text() or "").split("\n"):
                    all_lines.append((page_num, line.strip()))

        # ── Extract statement year ────────────────────────────────────────────
        statement_year = _extract_year(all_lines)

        # ── State-machine parse ───────────────────────────────────────────────
        # States: 'none' | 'payments' | 'purchases' | 'done'
        state = "none"

        for page_num, line in all_lines:
            if not line:
                continue

            # State transitions
            if _STARTS_PAYMENTS.match(line):
                state = "payments"
                continue
            if _STARTS_PURCHASES.match(line):
                if state in ("payments", "purchases"):
                    state = "purchases"
                continue
            if _ENDS_TRANSACTIONS.match(line):
                if state in ("payments", "purchases"):
                    state = "done"
                continue

            if state not in ("payments", "purchases"):
                continue

            # Skip known non-transaction lines
            if _SKIP_RE.match(line):
                continue

            # Try to parse as transaction
            m = _TXN_RE.match(line)
            if not m:
                continue

            date_str = m.group(1)
            description = m.group(2).strip()
            amount_str = m.group(3)

            # Skip interest-type entries that look like transactions
            if _INTEREST_DESC_RE.search(description):
                continue

            try:
                month, day = int(date_str[:2]), int(date_str[3:])
                txn_date = date(statement_year, month, day)
            except ValueError as exc:
                errors.append(ParseError(page_num, line, str(exc), _PARSER_VERSION))
                continue

            try:
                amount = Decimal(amount_str.replace(",", ""))
            except Exception as exc:
                errors.append(ParseError(page_num, line, str(exc), _PARSER_VERSION))
                continue

            is_credit = amount < Decimal("0") or state == "payments"
            abs_amount = abs(amount)
            norm = normalize(description)
            dedup_hash = compute_dedup_hash(norm, abs_amount, txn_date)

            transactions.append(StagedTransaction(
                date=txn_date,
                merchant_raw=description,
                merchant_normalized=norm,
                amount=abs_amount,
                is_credit=is_credit,
                issuer="chase",
                dedup_hash=dedup_hash,
                confidence_score=0.90,
                raw_text=line,
            ))

    except Exception as exc:
        errors.append(ParseError(0, str(exc), "Failed to open or read PDF", _PARSER_VERSION))

    return transactions, errors


def _extract_year(lines: list[tuple[int, str]]) -> int:
    """Extract statement year from 'Opening/Closing Date MM/DD/YY - MM/DD/YY'."""
    import datetime
    for _, line in lines:
        m = re.search(r"Opening/Closing Date\s+\d{2}/\d{2}/(\d{2,4})", line, re.I)
        if m:
            yr = m.group(1)
            return int(yr) if len(yr) == 4 else 2000 + int(yr)
        # Also look for "Statement Date: MM/DD/YY"
        m2 = re.search(r"Statement Date:\s+\d{2}/\d{2}/(\d{2,4})", line, re.I)
        if m2:
            yr = m2.group(1)
            return int(yr) if len(yr) == 4 else 2000 + int(yr)
    return datetime.datetime.now().year