#!/usr/bin/env python3
"""
Sanitize Claude Code skill for public distribution.

Usage:
    python sanitize_skill.py <skill_directory> [--dry-run] [--report]

Options:
    --dry-run    Show changes without applying them
    --report     Generate detailed report of all changes

Created by: Claude Code
Session ID: [to be filled by skill on use]
Date: 2025-11-13
"""

import re
import os
import sys
from pathlib import Path
from typing import Dict, List, Tuple

# Sanitization patterns (from references/sanitization_patterns.md)
SANITIZATION_PATTERNS = {
    # Absolute paths
    r'~/project': '~/project',
    r'~/projects/([^/\s]+)': r'~/projects/\1',
    r'~/\.claude': '~/.claude',
    r'~': '~',

    # Session directory patterns
    r'-Users-username-projects-project': '-Users-username-projects-project',
    r'-Users-username-projects-([^/\s]+)': r'-Users-username-projects-\1',

    # Client database IDs
    r'\bim_\d{4,}_\d+': 'im_XXXX_XXX',

    # Workspace & Agency IDs
    r'workspace_id:\s*\d{4,}': 'workspace_id: YYYY',
    r'agency_id:\s*\d{4,}': 'agency_id: XXXX',

    # Client names in paths
    r'client_cases/Example_Client/': 'client_cases/Example_Client/',
    r'client_cases/ClientName/': 'client_cases/ClientName/',
    r'client_cases/Example_Company/': 'client_cases/Example_Company/',

    # Client names in text
    r'\bExampleClient\b': 'Example Client',
    r'\bKaiser\b(?! Permanente)': 'Example Company',

    # Internal Notion database IDs (specific known ones)
    r'DATABASE_ID': 'DATABASE_ID',
    r'USER_ID_1': 'USER_ID_1',
    r'USER_ID_2': 'USER_ID_2',

    # Python venv paths
    r'~/project/claude_venv': '~/project/venv',
    r'~/project/venv': '~/project/venv',
}

# Email patterns (context-aware - preserve in author sections)
AUTHOR_SECTIONS = [
    'author', 'created by', 'contact', 'co-authored', 'attribution',
    'generated by', 'session id', 'meta note'
]

class SkillSanitizer:
    def __init__(self, skill_dir: str, dry_run: bool = False):
        self.skill_dir = Path(skill_dir)
        self.dry_run = dry_run
        self.changes: List[Tuple[str, int, str, str]] = []

    def is_author_context(self, line: str, line_num: int, lines: List[str]) -> bool:
        """Check if line is in author/attribution context."""
        # Check current line
        line_lower = line.lower()
        if any(section in line_lower for section in AUTHOR_SECTIONS):
            return True

        # Check surrounding lines (2 before, 2 after)
        start = max(0, line_num - 2)
        end = min(len(lines), line_num + 3)
        context = ' '.join(lines[start:end]).lower()

        return any(section in context for section in AUTHOR_SECTIONS)

    def sanitize_file(self, file_path: Path) -> int:
        """Sanitize a single file. Returns number of replacements."""
        if not file_path.is_file():
            return 0

        # Skip binary files
        if file_path.suffix not in ['.md', '.py', '.sh', '.txt', '.yaml', '.yml']:
            return 0

        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
                lines = content.split('\n')
        except UnicodeDecodeError:
            print(f"⚠️  Skipping binary file: {file_path}")
            return 0

        original_content = content
        replacements = 0

        # Apply sanitization patterns
        for pattern, replacement in SANITIZATION_PATTERNS.items():
            matches = list(re.finditer(pattern, content))
            if matches:
                for match in matches:
                    # Find line number
                    line_num = content[:match.start()].count('\n')
                    line = lines[line_num]

                    # Check if it's an email in author context
                    if '@' in match.group(0) and self.is_author_context(line, line_num, lines):
                        continue  # Skip replacement for author emails

                    # Record change
                    self.changes.append((
                        str(file_path.relative_to(self.skill_dir)),
                        line_num + 1,
                        match.group(0),
                        re.sub(pattern, replacement, match.group(0))
                    ))

                # Apply replacement
                content = re.sub(pattern, replacement, content)
                replacements += len(matches)

        # Write changes if not dry-run
        if not self.dry_run and content != original_content:
            with open(file_path, 'w', encoding='utf-8') as f:
                f.write(content)

        return replacements

    def sanitize_skill(self) -> Dict:
        """Sanitize entire skill directory."""
        if not self.skill_dir.exists():
            print(f"❌ Error: Skill directory not found: {self.skill_dir}")
            return {'status': 'error', 'message': 'Directory not found'}

        # Find all files to sanitize
        files_to_process = []
        for ext in ['*.md', '*.py', '*.sh', '*.txt', '*.yaml', '*.yml']:
            files_to_process.extend(self.skill_dir.rglob(ext))

        print(f"🔍 Scanning {len(files_to_process)} files in: {self.skill_dir.name}")
        print("━" * 60)

        total_replacements = 0
        for file_path in files_to_process:
            replacements = self.sanitize_file(file_path)
            if replacements > 0:
                total_replacements += replacements
                print(f"  {'[DRY-RUN]' if self.dry_run else '✓'} {file_path.relative_to(self.skill_dir)}: {replacements} changes")

        return {
            'status': 'success',
            'total_replacements': total_replacements,
            'files_processed': len(files_to_process),
            'changes': self.changes
        }

    def generate_report(self) -> str:
        """Generate detailed sanitization report."""
        if not self.changes:
            return "✅ No changes needed - skill is already sanitized!"

        report = []
        report.append("📝 SANITIZATION REPORT")
        report.append("━" * 60)
        report.append(f"Skill: {self.skill_dir.name}")
        report.append(f"Total changes: {len(self.changes)}")
        report.append("")

        # Group by file
        by_file = {}
        for file, line_num, before, after in self.changes:
            if file not in by_file:
                by_file[file] = []
            by_file[file].append((line_num, before, after))

        for file, changes in by_file.items():
            report.append(f"📄 {file}")
            for line_num, before, after in changes:
                report.append(f"  Line {line_num}:")
                report.append(f"    BEFORE: {before}")
                report.append(f"    AFTER:  {after}")
            report.append("")

        return "\n".join(report)


def main():
    if len(sys.argv) < 2:
        print("Usage: python sanitize_skill.py <skill_directory> [--dry-run] [--report]")
        sys.exit(1)

    skill_dir = sys.argv[1]
    dry_run = '--dry-run' in sys.argv
    show_report = '--report' in sys.argv

    sanitizer = SkillSanitizer(skill_dir, dry_run=dry_run)
    result = sanitizer.sanitize_skill()

    if result['status'] == 'success':
        print("")
        print("━" * 60)
        if dry_run:
            print(f"🔍 DRY RUN: Found {result['total_replacements']} items to sanitize")
            print("   Run without --dry-run to apply changes")
        else:
            print(f"✅ Sanitization complete: {result['total_replacements']} replacements made")

        if show_report or dry_run:
            print("")
            print(sanitizer.generate_report())

        sys.exit(0)
    else:
        print(f"❌ Error: {result['message']}")
        sys.exit(1)


if __name__ == '__main__':
    main()
