"""
Generic CSV/TSV transaction parser.

Supports tab-separated and comma-separated files with or without headers.
Auto-detects separator and column layout.

Tested against: Home Depot TSV export (05/26/2026, $839.20, THE HOME DEPOT LONDONDERRY  NH, purchase)
"""
from __future__ import annotations

import csv
import io
from dataclasses import dataclass
from datetime import datetime, date
from decimal import Decimal, InvalidOperation
from typing import IO

from app.services.pdf_parsers.base import StagedTransaction, ParseError, compute_dedup_hash
from app.services.merchant_normalizer import normalize

_PARSER_VERSION = "1.0.0"

# Date formats to try in order
_DATE_FORMATS = [
    "%m/%d/%Y",   # 05/26/2026
    "%m/%d/%y",   # 05/26/26
    "%Y-%m-%d",   # 2026-05-26
    "%m-%d-%Y",   # 05-26-2026
    "%d/%m/%Y",   # 26/05/2026 (UK format, lower priority)
]

# Column name patterns to detect auto-layout
_DATE_COLS    = {"date", "trans date", "transaction date", "posted date", "post date"}
_AMOUNT_COLS  = {"amount", "debit", "credit", "charge"}
_DESC_COLS    = {"description", "merchant", "payee", "name", "memo"}
_TYPE_COLS    = {"type", "transaction type", "category"}
_CREDIT_TYPES = {"credit", "payment", "refund", "return", "adjustment"}


@dataclass
class CsvParseResult:
    transactions: list[StagedTransaction]
    errors: list[ParseError]
    row_count: int


def parse_csv(
    file_obj: IO[bytes] | str,
    issuer: str = "csv_import",
) -> CsvParseResult:
    """
    Parse a CSV or TSV transaction file.

    Accepts either a file-like object or a file path string.
    Auto-detects separator (tab or comma) and column layout.
    """
    # Read content
    if isinstance(file_obj, str):
        with open(file_obj, "rb") as f:
            raw = f.read()
    else:
        raw = file_obj.read()

    # Decode, strip UTF-8 BOM
    try:
        content = raw.decode("utf-8-sig")
    except UnicodeDecodeError:
        content = raw.decode("latin-1")

    # Detect separator
    sep = _detect_separator(content)

    reader = csv.reader(io.StringIO(content), delimiter=sep)
    rows = list(reader)

    if not rows:
        return CsvParseResult([], [], 0)

    # Detect header row
    first_row = [c.strip().lower() for c in rows[0]]
    has_header = any(col in _DATE_COLS | _AMOUNT_COLS | _DESC_COLS for col in first_row)

    if has_header:
        headers = first_row
        data_rows = rows[1:]
    else:
        headers = None
        data_rows = rows

    # Detect column positions
    col_map = _detect_columns(headers, data_rows[0] if data_rows else [])

    transactions: list[StagedTransaction] = []
    errors: list[ParseError] = []

    for row_num, row in enumerate(data_rows, start=2 if has_header else 1):
        if not any(cell.strip() for cell in row):
            continue  # skip blank rows

        try:
            result = _parse_row(row, col_map, issuer, row_num)
            if result is not None:
                transactions.append(result)
        except Exception as exc:
            raw_text = sep.join(row)[:200]
            errors.append(ParseError(row_num, raw_text, str(exc), _PARSER_VERSION))

    return CsvParseResult(transactions=transactions, errors=errors, row_count=len(data_rows))


def _detect_separator(content: str) -> str:
    first_line = content.split("\n")[0]
    tabs = first_line.count("\t")
    commas = first_line.count(",")
    return "\t" if tabs > commas else ","


def _detect_columns(headers: list[str] | None, sample_row: list) -> dict:
    """Return {role: col_index} for date, amount, description, type."""
    if headers:
        col_map = {}
        for i, h in enumerate(headers):
            h_lower = h.strip().lower()
            if h_lower in _DATE_COLS and "date" not in col_map:
                col_map["date"] = i
            elif h_lower in _AMOUNT_COLS and "amount" not in col_map:
                col_map["amount"] = i
            elif h_lower in _DESC_COLS and "description" not in col_map:
                col_map["description"] = i
            elif h_lower in _TYPE_COLS and "type" not in col_map:
                col_map["type"] = i
        return col_map

    # No header — auto-detect by content type
    # Try each column: date-like, dollar-amount-like, text, type
    col_map = {}
    for i, val in enumerate(sample_row):
        val = val.strip()
        if "date" not in col_map and _looks_like_date(val):
            col_map["date"] = i
        elif "amount" not in col_map and _looks_like_amount(val):
            col_map["amount"] = i
        elif "type" not in col_map and val.lower() in _CREDIT_TYPES | {"purchase", "debit", "charge"}:
            col_map["type"] = i
        elif "description" not in col_map and len(val) > 5:
            col_map["description"] = i

    return col_map


def _looks_like_date(val: str) -> bool:
    for fmt in _DATE_FORMATS:
        try:
            datetime.strptime(val.strip(), fmt)
            return True
        except ValueError:
            continue
    return False


def _looks_like_amount(val: str) -> bool:
    cleaned = val.strip().lstrip("$").replace(",", "")
    try:
        Decimal(cleaned)
        return True
    except InvalidOperation:
        return False


def _parse_row(row: list, col_map: dict, issuer: str, row_num: int) -> StagedTransaction | None:
    def _get(key: str) -> str:
        idx = col_map.get(key)
        if idx is not None and idx < len(row):
            return row[idx].strip()
        return ""

    date_str   = _get("date")
    amount_str = _get("amount")
    desc       = _get("description")
    txn_type   = _get("type").lower()

    if not date_str or not amount_str:
        return None

    # Parse date
    txn_date = None
    for fmt in _DATE_FORMATS:
        try:
            txn_date = datetime.strptime(date_str, fmt).date()
            break
        except ValueError:
            continue
    if txn_date is None:
        raise ValueError(f"Cannot parse date: {date_str!r}")

    # Parse amount (strip leading $, handle negative)
    amount_clean = amount_str.lstrip("$").replace(",", "")
    try:
        amount = Decimal(amount_clean)
    except InvalidOperation:
        raise ValueError(f"Cannot parse amount: {amount_str!r}")

    if not desc:
        desc = f"Row {row_num}"

    is_credit = amount < Decimal("0") or txn_type in _CREDIT_TYPES
    abs_amount = abs(amount)
    norm = normalize(desc)
    dedup_hash = compute_dedup_hash(norm, abs_amount, txn_date)

    return StagedTransaction(
        date=txn_date,
        merchant_raw=desc,
        merchant_normalized=norm,
        amount=abs_amount,
        is_credit=is_credit,
        issuer=issuer,
        dedup_hash=dedup_hash,
        confidence_score=1.0,
        raw_text="\t".join(str(c) for c in row)[:200],
    )