"""
TDD spec for duplicate_detector.flag_duplicates().

Written BEFORE the first Alembic migration (AR-2 compliance).
Tests use only plain Python — no DB, no Flask app context required.
"""
import pytest
from app.services.duplicate_detector import flag_duplicates


HASH_A = "aaa111"
HASH_B = "bbb222"
HASH_C = "ccc333"


def test_exact_match_detected():
    """A new fingerprint that exists in existing set is flagged as duplicate."""
    result = flag_duplicates([HASH_A], [HASH_A, HASH_B])
    assert result == [True]


def test_no_match():
    """A new fingerprint not in the existing set is not flagged."""
    result = flag_duplicates([HASH_C], [HASH_A, HASH_B])
    assert result == [False]


def test_empty_new_fingerprints_returns_empty_list():
    """Empty input produces empty output."""
    result = flag_duplicates([], [HASH_A, HASH_B])
    assert result == []


def test_empty_existing_fingerprints_returns_all_false():
    """No existing transactions means nothing can be a duplicate."""
    result = flag_duplicates([HASH_A, HASH_B], [])
    assert result == [False, False]


@pytest.mark.skip("wired in Epic 9")
def test_staged_vs_main_dedup_path():
    """
    Staged transactions must be checked against BOTH the staging DB and the main DB.

    Implementation note (Epic 9):
      - staging_pipeline.begin_import() collects existing hashes from:
          1. Current import batch's staged rows (catches intra-batch dupes)
          2. Main DB transactions within ±3-day window of each new transaction's date
      - Both sets are merged before calling flag_duplicates()
      - This ensures statement-period overlaps across multiple imports are caught
    """
    pass
