"""
Evidence builder for AI categorization predictions.

Extracts evidence signals from a product record and its attributes, then
writes evidence rows to the database.  Each evidence record maps to a
source_type defined in the schema.
"""

import json
import re
from typing import List, Dict, Any, Optional


# ---------------------------------------------------------------------------
# Evidence source types (mirror the schema constraint)
# ---------------------------------------------------------------------------
SOURCE_PART_NUMBER_PATTERN  = "part_number_pattern"
SOURCE_MANUFACTURER_CATEGORY = "manufacturer_category"
SOURCE_ENRICHMENT_ATTRIBUTE  = "enrichment_attribute"
SOURCE_PRODUCT_DESCRIPTION   = "product_description"
SOURCE_CATEGORY_RULE         = "category_rule"


# ---------------------------------------------------------------------------
# Simple category inference rules
# Extend this dict as more category patterns are learned.
# ---------------------------------------------------------------------------
_PART_NUMBER_PATTERNS: List[Dict[str, Any]] = [
    # (regex pattern, category_code, human label, base signal_strength)
    {"pattern": r"^(CB|CAB)-",     "code": "cat_cables",       "label": "Cables & Connectors",  "strength": 0.8},
    {"pattern": r"^(R|RES)\d",     "code": "cat_resistors",    "label": "Resistors",             "strength": 0.75},
    {"pattern": r"^(C|CAP)\d",     "code": "cat_capacitors",   "label": "Capacitors",            "strength": 0.75},
    {"pattern": r"^(IC|U)\d",      "code": "cat_ics",          "label": "Integrated Circuits",   "strength": 0.7},
    {"pattern": r"^(SW|BTN)-",     "code": "cat_switches",     "label": "Switches & Buttons",    "strength": 0.7},
    {"pattern": r"^(PSU|PWR)-",    "code": "cat_power",        "label": "Power Supplies",        "strength": 0.8},
    {"pattern": r"^(LED|DIODE)-",  "code": "cat_diodes",       "label": "Diodes & LEDs",         "strength": 0.75},
    {"pattern": r"^(FAN|COOL)-",   "code": "cat_cooling",      "label": "Cooling & Thermal",     "strength": 0.7},
]

_MANUFACTURER_CATEGORY_MAP: Dict[str, Dict[str, Any]] = {
    "cable":         {"code": "cat_cables",    "label": "Cables & Connectors"},
    "connector":     {"code": "cat_cables",    "label": "Cables & Connectors"},
    "resistor":      {"code": "cat_resistors", "label": "Resistors"},
    "capacitor":     {"code": "cat_capacitors","label": "Capacitors"},
    "semiconductor": {"code": "cat_ics",       "label": "Integrated Circuits"},
    "ic":            {"code": "cat_ics",       "label": "Integrated Circuits"},
    "switch":        {"code": "cat_switches",  "label": "Switches & Buttons"},
    "power":         {"code": "cat_power",     "label": "Power Supplies"},
    "led":           {"code": "cat_diodes",    "label": "Diodes & LEDs"},
    "diode":         {"code": "cat_diodes",    "label": "Diodes & LEDs"},
    "thermal":       {"code": "cat_cooling",   "label": "Cooling & Thermal"},
    "cooling":       {"code": "cat_cooling",   "label": "Cooling & Thermal"},
}


def build_evidence(product: Dict[str, Any], attributes: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    Build a list of evidence signal dicts for a product.

    Each returned dict has:
        source_type (str), source_label (str), evidence_value (str),
        weight (float 0.0–1.0), signal_strength (float 0.0–1.0),
        suggested_category_code (str), suggested_category_label (str)

    Args:
        product:    Dict of product DB row columns.
        attributes: List of product_attribute DB rows for this product.

    Returns:
        List of evidence signal dicts (may be empty if no signals found).
    """
    signals: List[Dict[str, Any]] = []

    sku = str(product.get("sku") or "").strip()
    mpn = str(product.get("mpn") or "").strip()
    manufacturer_category = str(product.get("manufacturer_category") or "").strip()

    # --- Part number pattern matching ---
    identifier = sku or mpn
    if identifier:
        for rule in _PART_NUMBER_PATTERNS:
            if re.match(rule["pattern"], identifier, re.IGNORECASE):
                signals.append({
                    "source_type":             SOURCE_PART_NUMBER_PATTERN,
                    "source_label":            "Part Number Pattern",
                    "evidence_value":          (
                        f"SKU '{identifier}' matches the pattern for category "
                        f"'{rule['label']}'"
                    ),
                    "weight":                  0.4,
                    "signal_strength":         rule["strength"],
                    "suggested_category_code": rule["code"],
                    "suggested_category_label":rule["label"],
                })
                break  # use first matching pattern only

    # --- Manufacturer category mapping ---
    if manufacturer_category:
        mc_lower = manufacturer_category.lower()
        for keyword, mapping in _MANUFACTURER_CATEGORY_MAP.items():
            if keyword in mc_lower:
                signals.append({
                    "source_type":             SOURCE_MANUFACTURER_CATEGORY,
                    "source_label":            "Manufacturer Category",
                    "evidence_value":          (
                        f"Manufacturer category '{manufacturer_category}' "
                        f"maps to '{mapping['label']}'"
                    ),
                    "weight":                  0.35,
                    "signal_strength":         0.75,
                    "suggested_category_code": mapping["code"],
                    "suggested_category_label":mapping["label"],
                })
                break

    # --- Enrichment attributes ---
    for attr in attributes:
        attr_name  = str(attr.get("attribute_name")  or "").strip()
        attr_value = str(attr.get("attribute_value") or "").strip()
        if not attr_name or not attr_value:
            continue
        signals.append({
            "source_type":             SOURCE_ENRICHMENT_ATTRIBUTE,
            "source_label":            attr_name,
            "evidence_value":          attr_value,
            "weight":                  0.15,
            "signal_strength":         0.5,
            "suggested_category_code": None,
            "suggested_category_label": None,
        })

    return signals


def pick_category(signals: List[Dict[str, Any]]) -> Dict[str, str]:
    """
    Choose the best suggested category from the evidence signals.

    Picks the category with the highest combined weight × signal_strength
    among signals that carry a category suggestion.

    Returns:
        {"code": str, "label": str} — falls back to generic defaults.
    """
    scored: Dict[str, Dict[str, Any]] = {}
    for s in signals:
        code = s.get("suggested_category_code")
        if not code:
            continue
        w = s.get("weight", 0.0) * s.get("signal_strength", 0.0)
        if code not in scored or scored[code]["score"] < w:
            scored[code] = {
                "score": w,
                "code":  code,
                "label": s.get("suggested_category_label", code),
            }

    if not scored:
        return {"code": "cat_uncategorized", "label": "Uncategorized"}

    best = max(scored.values(), key=lambda x: x["score"])
    return {"code": best["code"], "label": best["label"]}


def fallback_signal() -> Dict[str, Any]:
    """Return the standard fallback evidence signal (confidence floor)."""
    return {
        "source_type":             SOURCE_CATEGORY_RULE,
        "source_label":            "Fallback",
        "evidence_value":          (
            "No specific evidence found; prediction based on default category rules"
        ),
        "weight":                  0.001,
        "signal_strength":         0.1,
        "suggested_category_code": "cat_uncategorized",
        "suggested_category_label": "Uncategorized",
    }
