#!/usr/bin/env python3
"""
Extract all scopes from git commit messages in the repository.

This script analyzes git history to extract all unique scopes used in
conventional commit messages, categorized by commit type.
"""

import argparse
import json
import re
import subprocess
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Set


def get_git_log(repo_path: Path, since: str = None) -> str:
    """Get git log with commit messages."""
    cmd = ["git", "log", "--format=%s"]
    if since:
        cmd.extend([f"--since={since}"])
    
    result = subprocess.run(
        cmd,
        cwd=repo_path,
        capture_output=True,
        text=True,
        check=True
    )
    return result.stdout


def parse_commit_message(message: str) -> tuple:
    """
    Parse a conventional commit message to extract type and scope.
    
    Returns: (commit_type, scope) or (None, None) if not conventional format
    
    Expected format: <type>(<scope>): <subject>
    Alternative: <type>: <subject> (no scope)
    """
    # Pattern: type(scope): subject — type can contain hyphens/digits
    pattern = r'^([a-z][a-z0-9-]*)\(([^)]+)\):\s*(.+)$'
    match = re.match(pattern, message)
    
    if match:
        commit_type = match.group(1)
        scope = match.group(2)
        return (commit_type, scope)
    
    # Pattern: type: subject (no scope) — type can contain hyphens/digits
    pattern_no_scope = r'^([a-z][a-z0-9-]*):\s*(.+)$'
    match_no_scope = re.match(pattern_no_scope, message)
    
    if match_no_scope:
        commit_type = match_no_scope.group(1)
        return (commit_type, None)
    
    return (None, None)


def extract_scopes(repo_path: Path, since: str = None) -> Dict[str, Set[str]]:
    """
    Extract all scopes from git history.
    
    Returns: Dictionary mapping commit types to sets of scopes
    """
    log = get_git_log(repo_path, since)
    scopes_by_type = defaultdict(set)
    
    for line in log.splitlines():
        commit_type, scope = parse_commit_message(line.strip())
        if commit_type:
            if scope:
                scopes_by_type[commit_type].add(scope)
            else:
                # Track commits without scopes
                scopes_by_type[commit_type].add("<no-scope>")
    
    return {k: sorted(v) for k, v in sorted(scopes_by_type.items())}


def format_markdown(scopes_by_type: Dict[str, List[str]], repo_path: Path, since: str = None) -> str:
    """Format scopes as markdown inventory."""
    from datetime import datetime
    
    lines = ["# Scopes Inventory", ""]
    lines.append(f"**Repository:** {repo_path.name}")
    lines.append(f"**Last Updated:** {datetime.now().strftime('%Y-%m-%d')}")
    lines.append("**Source:** Git commit history analysis")
    lines.append("")
    
    # Summary statistics
    total_types = len(scopes_by_type)
    total_scopes = sum(len(scopes) for scopes in scopes_by_type.values())
    lines.append("## Summary")
    lines.append("")
    lines.append(f"- **Total Commit Types:** {total_types}")
    lines.append(f"- **Total Unique Scopes:** {total_scopes}")
    if since:
        lines.append(f"- **Analysis Period:** Since {since}")
    else:
        lines.append(f"- **Analysis Period:** All time")
    lines.append("")
    
    # Scopes by type
    lines.append("## Scopes by Commit Type")
    lines.append("")
    
    for commit_type, scopes in scopes_by_type.items():
        lines.append(f"### `{commit_type}`")
        lines.append("")
        for scope in scopes:
            if scope == "<no-scope>":
                lines.append(f"- *(no scope used)*")
            else:
                lines.append(f"- {scope}")
        lines.append("")
    
    lines.append("---")
    lines.append("")
    lines.append("## Notes")
    lines.append("")
    lines.append("- Scopes marked with `*` are deprecated (see constitution for alternatives)")
    lines.append("- Scopes without descriptions need definition in constitution")
    lines.append("- Generated by: `python skills/git-commit-scope-constitution/scripts/extract_scopes.py`")
    
    return "\n".join(lines)


def format_json(scopes_by_type: Dict[str, List[str]]) -> str:
    """Format scopes as JSON."""
    return json.dumps(scopes_by_type, indent=2)


def main():
    parser = argparse.ArgumentParser(
        description="Extract scopes from git commit messages"
    )
    parser.add_argument(
        "--repo",
        type=Path,
        default=Path.cwd(),
        help="Path to git repository (default: current directory)"
    )
    parser.add_argument(
        "--since",
        type=str,
        help="Only include commits since this date (e.g., '1 month ago', '2024-01-01')"
    )
    parser.add_argument(
        "--format",
        choices=["markdown", "json"],
        default="markdown",
        help="Output format (default: markdown)"
    )
    parser.add_argument(
        "--output",
        type=Path,
        help="Output file path (default: stdout)"
    )
    
    args = parser.parse_args()
    
    # Extract scopes
    scopes_by_type = extract_scopes(args.repo, args.since)
    
    # Format output
    if args.format == "markdown":
        output = format_markdown(scopes_by_type, args.repo, args.since)
    else:
        output = format_json(scopes_by_type)
    
    # Write output
    if args.output:
        args.output.write_text(output)
        print(f"Scope inventory written to: {args.output}")
    else:
        print(output)


if __name__ == "__main__":
    main()
