#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
DesignSystem-Pro-Max Core - BM25 search engine for design system specifications
"""

import csv
import re
from pathlib import Path
from math import log
from collections import defaultdict

# ============ CONFIGURATION ============
DATA_DIR = Path(__file__).parent.parent / "data"
MAX_RESULTS = 3

# Domain configuration for design system specifications
DOMAIN_CONFIG = {
    "component": {
        "file": "components.csv",
        "search_cols": ["Component", "Keywords", "Framework", "Category", "Description"],
        "output_cols": ["Component", "Framework", "Category", "Props", "Styles", "Accessibility", "Docs URL", "Code Example"]
    },
    "color": {
        "file": "colors.csv",
        "search_cols": ["Token", "Keywords", "Framework", "Usage"],
        "output_cols": ["Token", "Value", "Framework", "Usage", "CSS Var", "Notes"]
    },
    "typography": {
        "file": "typography.csv",
        "search_cols": ["Token", "Keywords", "Type", "Font Family"],
        "output_cols": ["Token", "Font Family", "Size", "Weight", "Line Height", "CSS", "Framework"]
    },
    "spacing": {
        "file": "spacing.csv",
        "search_cols": ["Token", "Keywords", "Type"],
        "output_cols": ["Token", "Value", "Type", "CSS", "Framework", "Usage"]
    },
    "tokens": {
        "file": "tokens.csv",
        "search_cols": ["Token", "Keywords", "Category", "Framework"],
        "output_cols": ["Token", "Value", "Category", "Framework", "CSS Var", "Usage"]
    }
}

# Stack-specific configuration
STACK_CONFIG = {
    "react-antd": {"file": "stacks/react-antd.csv"},
    "material-ui": {"file": "stacks/material-ui.csv"},
    "figma-tokens": {"file": "stacks/figma-tokens.csv"},
    "bootstrap": {"file": "stacks/bootstrap.csv"},
    "tailwind": {"file": "stacks/tailwind.csv"},
    "chakra": {"file": "stacks/chakra.csv"},
    "elements": {"file": "stacks/elements.csv"}
}

# Common columns for all stack files
_STACK_COLS = {
    "search_cols": ["Category", "Guideline", "Description", "Do", "Don't"],
    "output_cols": ["Category", "Guideline", "Description", "Do", "Don't", "Code Good", "Code Bad", "Severity", "Docs URL"]
}

AVAILABLE_STACKS = list(STACK_CONFIG.keys())
AVAILABLE_DOMAINS = list(DOMAIN_CONFIG.keys())


# ============ BM25 IMPLEMENTATION ============
class BM25:
    """BM25 ranking algorithm for text search"""

    def __init__(self, k1=1.5, b=0.75):
        """
        Initialize BM25 with ranking parameters.
        k1: Term frequency saturation parameter (1.0-2.0)
        b: Length normalization parameter (0.0-1.0)
        """
        self.k1 = k1
        self.b = b
        self.corpus = []
        self.doc_lengths = []
        self.avgdl = 0
        self.idf = {}
        self.doc_freqs = defaultdict(int)
        self.N = 0

    def tokenize(self, text):
        """Lowercase, split, remove punctuation, filter short words"""
        text = re.sub(r'[^\w\s-]', ' ', str(text).lower())
        # Preserve framework names with hyphens (e.g., react-antd, material-ui)
        text = re.sub(r'\s+-\s+', '-', text)
        tokens = [w for w in text.split() if len(w) > 2]
        # Re-join framework tokens
        result = []
        for i, token in enumerate(tokens):
            if i > 0 and token == tokens[i-1][:-1] + '-' + tokens[i-1][-1:]:
                continue
            result.append(token)
        return result

    def fit(self, documents):
        """Build BM25 index from documents"""
        self.corpus = [self.tokenize(doc) for doc in documents]
        self.N = len(self.corpus)
        if self.N == 0:
            return

        self.doc_lengths = [len(doc) for doc in self.corpus]
        self.avgdl = sum(self.doc_lengths) / self.N

        # Calculate document frequencies
        for doc in self.corpus:
            seen = set()
            for word in doc:
                if word not in seen:
                    self.doc_freqs[word] += 1
                    seen.add(word)

        # Calculate IDF scores with smoothing
        for word, freq in self.doc_freqs.items():
            self.idf[word] = log((self.N - freq + 0.5) / (freq + 0.5) + 1)

    def score(self, query):
        """Score all documents against query"""
        query_tokens = self.tokenize(query)
        scores = []

        for idx, doc in enumerate(self.corpus):
            score = 0
            doc_len = self.doc_lengths[idx]
            term_freqs = defaultdict(int)
            for word in doc:
                term_freqs[word] += 1

            for token in query_tokens:
                if token in self.idf:
                    tf = term_freqs[token]
                    idf = self.idf[token]
                    numerator = tf * (self.k1 + 1)
                    denominator = tf + self.k1 * (1 - self.b + self.b * doc_len / self.avgdl)
                    score += idf * numerator / denominator

            scores.append((idx, score))

        return sorted(scores, key=lambda x: x[1], reverse=True)


# ============ CSV LOADING ============
def _load_csv(filepath):
    """Load CSV and return list of dicts"""
    if not filepath.exists():
        return []
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            return list(csv.DictReader(f))
    except Exception as e:
        print(f"Error loading CSV {filepath}: {e}")
        return []


# ============ SEARCH FUNCTIONS ============
def _search_csv(filepath, search_cols, output_cols, query, max_results):
    """Core search function using BM25"""
    if not filepath.exists():
        return []

    data = _load_csv(filepath)
    if not data:
        return []

    # Build documents from search columns
    documents = [" ".join(str(row.get(col, "")) for col in search_cols) for row in data]

    # BM25 search
    bm25 = BM25()
    bm25.fit(documents)
    ranked = bm25.score(query)

    # Get top results with score > 0
    results = []
    for idx, score in ranked[:max_results]:
        if score > 0:
            row = data[idx]
            result = {col: row.get(col, "") for col in output_cols if col in row}
            result["_score"] = round(score, 4)
            results.append(result)

    return results


def detect_domain(query):
    """Auto-detect the most relevant domain from query"""
    query_lower = query.lower()

    domain_keywords = {
        "color": ["color", "palette", "hex", "#", "rgb", "hsl", "theme", "primary", "secondary"],
        "component": ["button", "input", "form", "modal", "dropdown", "table", "card", "navbar", "sidebar", "component"],
        "typography": ["font", "text", "typography", "heading", "body", "size", "weight", "line-height", "letter-spacing"],
        "spacing": ["spacing", "margin", "padding", "gap", "grid", "layout", "space", "size"],
        "tokens": ["token", "variable", "design token", "css var", "--", "alias", "semantic"]
    }

    scores = {domain: sum(1 for kw in keywords if kw in query_lower) for domain, keywords in domain_keywords.items()}
    best = max(scores, key=scores.get)
    return best if scores[best] > 0 else "component"


def search(query, domain=None, max_results=MAX_RESULTS):
    """Main search function with auto-domain detection"""
    if domain is None:
        domain = detect_domain(query)

    config = DOMAIN_CONFIG.get(domain, DOMAIN_CONFIG["component"])
    filepath = DATA_DIR / config["file"]

    if not filepath.exists():
        return {"error": f"File not found: {filepath}", "domain": domain}

    results = _search_csv(filepath, config["search_cols"], config["output_cols"], query, max_results)

    return {
        "domain": domain,
        "query": query,
        "file": config["file"],
        "count": len(results),
        "results": results
    }


def search_stack(query, stack, max_results=MAX_RESULTS):
    """Search stack-specific guidelines"""
    if stack not in STACK_CONFIG:
        return {"error": f"Unknown stack: {stack}. Available: {', '.join(AVAILABLE_STACKS)}"}

    filepath = DATA_DIR / STACK_CONFIG[stack]["file"]

    if not filepath.exists():
        return {"error": f"Stack file not found: {filepath}", "stack": stack}

    results = _search_csv(filepath, _STACK_COLS["search_cols"], _STACK_COLS["output_cols"], query, max_results)

    return {
        "domain": "stack",
        "stack": stack,
        "query": query,
        "file": STACK_CONFIG[stack]["file"],
        "count": len(results),
        "results": results
    }


def get_all_specs(domain=None, stack=None):
    """Get all specifications for a domain or stack (useful for code generation)"""
    if stack:
        filepath = DATA_DIR / STACK_CONFIG[stack]["file"]
        if not filepath.exists():
            return []
        return _load_csv(filepath)

    if domain:
        config = DOMAIN_CONFIG.get(domain)
        if not config:
            return []
        filepath = DATA_DIR / config["file"]
        if not filepath.exists():
            return []
        return _load_csv(filepath)

    return []


def list_available_domains():
    """Return list of available search domains"""
    return AVAILABLE_DOMAINS


def list_available_stacks():
    """Return list of available stacks"""
    return AVAILABLE_STACKS


# ============ MAIN FOR TESTING ============
if __name__ == "__main__":
    # Simple test
    result = search("button", domain="component")
    print(f"Found {result['count']} results for 'button'")
    for r in result['results']:
        print(f"  - {r.get('Component', 'N/A')}: {r.get('Framework', 'N/A')}")
