#!/usr/bin/env bash
#
# AI Cats — per-batch categorization log cleanup.
#
# Each categorization batch spawns a Python worker that appends to
# logs/categorize_{batch_id}.log (see CategorizationService::spawnCategorizer
# and python/categorize.py). The file is written only while that batch runs;
# once the batch terminates, mtime stops advancing. That makes age-based
# deletion a clean fit — and it sidesteps the semantics problem that single-
# file logrotate hits, where logs/categorize_42.log.1 would be meaningless.
#
# Default retention: 30 days. Override by passing days as the first argument
# or via the AI_CATS_BATCH_LOG_DAYS environment variable.
#
# Install (cron — adjust the user to match whoever owns the logs directory):
#   crontab -e
#   # Daily at 3:15 AM:
#   15 3 * * * /var/www/html/ai_cats/scripts/cleanup_batch_logs.sh >> /var/log/ai_cats_batch_log_cleanup.log 2>&1
#
# Dry run (no deletion, just lists what would be removed):
#   /var/www/html/ai_cats/scripts/cleanup_batch_logs.sh --dry-run
#
# Custom retention:
#   /var/www/html/ai_cats/scripts/cleanup_batch_logs.sh 7        # delete >7d
#   AI_CATS_BATCH_LOG_DAYS=60 /var/www/html/ai_cats/scripts/cleanup_batch_logs.sh

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
LOG_DIR="${PROJECT_ROOT}/logs"

dry_run=0
days="${AI_CATS_BATCH_LOG_DAYS:-30}"

for arg in "$@"; do
    case "${arg}" in
        --dry-run) dry_run=1 ;;
        ''|*[!0-9]*)
            echo "Usage: $0 [--dry-run] [days]" >&2
            exit 2
            ;;
        *) days="${arg}" ;;
    esac
done

if [[ ! -d "${LOG_DIR}" ]]; then
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Log dir not found: ${LOG_DIR}" >&2
    exit 1
fi

ts="$(date '+%Y-%m-%d %H:%M:%S')"

if [[ "${dry_run}" -eq 1 ]]; then
    matches="$(find "${LOG_DIR}" -maxdepth 1 -type f -name 'categorize_*.log' -mtime "+${days}" -print | wc -l)"
    echo "[${ts}] DRY RUN — ${matches} categorize_*.log file(s) older than ${days} days would be removed."
    find "${LOG_DIR}" -maxdepth 1 -type f -name 'categorize_*.log' -mtime "+${days}" -print
    exit 0
fi

deleted=0
while IFS= read -r -d '' f; do
    rm -f -- "${f}"
    deleted=$((deleted + 1))
done < <(find "${LOG_DIR}" -maxdepth 1 -type f -name 'categorize_*.log' -mtime "+${days}" -print0)

echo "[${ts}] Removed ${deleted} categorize_*.log file(s) older than ${days} days from ${LOG_DIR}."
