#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.8"
# ///

import argparse
import hashlib
import os
import re
import signal
import subprocess
import sys
from pathlib import Path
from typing import Dict, Optional, Union, TextIO, List, Tuple


__version__ = "0.1.2"


def _try_import_tiktoken():
    try:
        import tiktoken  # type: ignore

        return tiktoken
    except Exception:
        return None


class TeeStream:
    """Duplicate writes to the original stream and a secondary file handle."""

    def __init__(self, primary: TextIO, secondary: TextIO) -> None:
        self.primary = primary
        self.secondary = secondary

    def write(self, data: str) -> int:
        written = self.primary.write(data)
        self.primary.flush()
        try:
            self.secondary.write(data)
            self.secondary.flush()
        except Exception:
            # File write failures should not break stderr usage
            pass
        return written

    def flush(self) -> None:
        self.primary.flush()
        try:
            self.secondary.flush()
        except Exception:
            pass

    def isatty(self) -> bool:
        return hasattr(self.primary, "isatty") and self.primary.isatty()

    def close(self) -> None:
        try:
            self.secondary.close()
        except Exception:
            pass


def get_invocation_cwd() -> Path:
    """Best-effort to use the caller's working directory.

    Some launchers (e.g., uv --script) may change the process cwd to the script's
    location. Prefer the shell's original $PWD if available and valid; otherwise
    fall back to Path.cwd().
    """
    try:
        env_pwd = os.environ.get("PWD")
        if env_pwd:
            p = Path(env_pwd).resolve()
            if p.exists() and p.is_dir():
                return p
    except Exception:
        pass
    return Path.cwd()


def setup_tiktoken_cache() -> None:
    """Set up tiktoken cache directory to avoid SSL issues."""
    # Set cache directory to user's home .cache folder
    cache_dir = Path.home() / ".cache" / "tiktoken"
    cache_dir.mkdir(parents=True, exist_ok=True)
    os.environ["TIKTOKEN_CACHE_DIR"] = str(cache_dir)
    
    # Pre-compute cache key for cl100k_base
    blobpath = "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken"
    cache_key = hashlib.sha1(blobpath.encode()).hexdigest()
    cache_file = cache_dir / cache_key
    
    # If cache file exists, we're good
    if cache_file.exists():
        return
    
    # Try to download with fallback
    try:
        import urllib.request
        import ssl
        # Create an SSL context that doesn't verify certificates (for corporate environments)
        ssl_context = ssl.create_default_context()
        ssl_context.check_hostname = False
        ssl_context.verify_mode = ssl.CERT_NONE
        
        with urllib.request.urlopen(blobpath, context=ssl_context, timeout=3) as response:
            cache_file.write_bytes(response.read())
    except Exception:
        # If download fails, we'll just proceed without token counting
        pass


def count_tokens(text: str) -> Optional[int]:
    """Count tokens in text using tiktoken with cl100k_base encoding."""
    try:
        tiktoken = _try_import_tiktoken()
        if tiktoken is None:
            return None

        # Set up cache before trying to get encoding
        setup_tiktoken_cache()
        
        # Try to get the encoding
        encoding = tiktoken.get_encoding("cl100k_base")
        return len(encoding.encode(text))
    except Exception as e:
        # If token counting fails, just return 0 and warn
        print(f"Warning: Could not count tokens: {e}", file=sys.stderr)
        return None


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description=(
            f"repo_clipboard {__version__}\n"
            "\n"
            "Copy directory contents to Windows clipboard (default) or emit XML on stdout (with --llm).\n"
            "Always scans recursively from the current directory."
        ),
        prog="repo_clipboard",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=(
            "OUTPUT BEHAVIOR\n"
            "  Clipboard mode (default):\n"
            "    - XML is copied to the Windows clipboard (via clip.exe on WSL).\n"
            "    - A copy of the XML is also written to /tmp/repo_clipboard.stdout.\n"
            "    - Logs/warnings are printed to the terminal and mirrored to /tmp/repo_clipboard.stderr.\n"
            "\n"
            "  LLM mode (--llm):\n"
            "    - XML is printed on stdout (suitable for piping).\n"
            "    - Metadata (token estimate, file locations) is printed on stderr.\n"
            "    - Both streams are mirrored to /tmp (stdout → /tmp/repo_clipboard.stdout,\n"
            "      stderr → /tmp/repo_clipboard.stderr).\n"
            "\n"
            "SAVE TO A FILE\n"
            "  - Quickest: use --llm and redirect stdout:\n"
            "      repo_clipboard --llm -e py,md > snapshot.xml\n"
            "  - Or pick up the /tmp copy:\n"
            "      cp /tmp/repo_clipboard.stdout snapshot.xml\n"
            "\n"
            "PIPE TO AN LLM\n"
            "  - Example with the 'llm' CLI:\n"
            "      repo_clipboard --llm -e py,md | llm -m wbg:gpt-5-high -s \"Review focus...\" -x\n"
            "    The token estimate and file paths are printed to stderr so your stdout pipe stays clean.\n"
            "\n"
            "FILE LIST & TOKEN ESTIMATES\n"
            "  - Add --print-files to list included files with approximate token counts.\n"
            "  - In --llm mode, the list prints to stderr (to keep stdout clean).\n"
            "\n"
            "SNIPPETS\n"
            "  - Include file fragments with --snippet 'path:start[-end]'. Supported forms:\n"
            "      • Single line: path:N\n"
            "      • Open-end:   path:N-   (from N to EOF)\n"
            "      • Open-start: path:-M   (from start to M)\n"
            "      • Bounded:    path:N-M\n"
            "  - Repeat --snippet as needed, or use --snippets-file (one spec per line; '#' and blank lines ignored).\n"
            "  - Use --snippets-only to include only snippets and prevent inclusion of non-snippet files.\n"
            "  - Snippets appear as <file ... snippet_from=\"...\" start=\"N\" end=\"M\"> in the XML (no content headers).\n"
        ),
    )
    parser.add_argument(
        "-v",
        "--version",
        action="version",
        version=f"repo_clipboard {__version__}",
    )
    parser.add_argument(
        "-e",
        "--extensions",
        default="",
        help="Comma-separated list of file extensions to include (no dots). Example: 'py,yaml,txt'. If empty, include all",
    )
    parser.add_argument(
        "--max_size",
        type=int,
        default=50,
        help="Maximum file size in KB to include (default 50KB)",
    )
    parser.add_argument(
        "--git-ignore",
        type=lambda x: x.lower() in ['true', '1', 'yes', 'y'],
        default=True,
        help="Respect .gitignore patterns (default: True). Use '--git-ignore False' to include all files. Ignored when --llm is used (always True)",
    )
    parser.add_argument(
        "--no-ignore-common",
        action="store_true",
        help="Include common development files/directories like .venv, __pycache__, node_modules, etc. (default: ignore them)",
    )
    parser.add_argument(
        "--ignore-list",
        type=str,
        help="Regex pattern to ignore files (applied to relative file paths). Example: '.*\\.log$|__pycache__'",
    )
    parser.add_argument(
        "--only-include",
        type=str,
        help="Regex pattern to include only matching files (applied to relative file paths). Example: '.*\\.py$' for Python files only",
    )
    parser.add_argument(
        "--llm",
        action="store_true",
        help=(
            "LLM mode: print XML to stdout (for piping), emit metadata on stderr,"
            " mirror both to /tmp/repo_clipboard.(stdout|stderr); forces --git-ignore True"
        ),
    )
    parser.add_argument(
        "--install",
        action="store_true",
        help="Install script into ~/.local/bin and add aliases to .bashrc and Windows PowerShell",
    )
    parser.add_argument(
        "--print-files",
        action="store_true",
        help="Print the list of included files (stderr in --llm mode)",
    )
    parser.add_argument(
        "--snippet",
        action="append",
        help=(
            "Include a file snippet specified as 'path:start[-end]'. Supports single line (N), open-end (N-),"
            " open-start (-M), and bounded (N-M) ranges. Repeatable."
        ),
    )
    parser.add_argument(
        "--snippets-file",
        type=str,
        help=(
            "Path to a file containing one snippet spec per line in the form 'path:start[-end]'."
            " Lines starting with '#' or blank lines are ignored."
        ),
    )
    parser.add_argument(
        "--snippets-only",
        action="store_true",
        help=(
            "Only include snippets specified via --snippet/--snippets-file; prevents inclusion of non-snippet files."
        ),
    )
    return parser.parse_args()


def install_script(script_path: Path) -> None:
    target_dir = Path.home() / ".local" / "bin"
    target_dir.mkdir(parents=True, exist_ok=True)
    target = target_dir / script_path.name
    print(f"Copying script to {target}")
    target.write_bytes(script_path.read_bytes())
    target.chmod(target.stat().st_mode | 0o111)

    bashrc = Path.home() / ".bashrc"
    path_line = "export PATH=\"$PATH:$HOME/.local/bin\""
    if bashrc.exists():
        bash_content = bashrc.read_text()
    else:
        bash_content = ""
    if path_line not in bash_content:
        print("Adding ~/.local/bin to PATH in .bashrc")
        with bashrc.open("a") as fh:
            fh.write(f"\n# Added by repo_clipboard installer\n{path_line}\n")

    try:
        proc = subprocess.run(
            ["powershell.exe", "-NoProfile", "-Command", "$PROFILE"],
            capture_output=True,
            text=True,
            check=True,
        )
        profile_path_win = proc.stdout.strip()
        if not profile_path_win:
            raise RuntimeError("PowerShell returned empty $PROFILE")

        # Convert the Windows path to a WSL path so we don't accidentally create a
        # literal file named like 'C:\\Users\\...' in the current directory.
        profile_path_wsl = subprocess.run(
            ["wslpath", "-u", profile_path_win],
            capture_output=True,
            text=True,
            check=True,
        ).stdout.strip()
        if not profile_path_wsl:
            raise RuntimeError(f"wslpath returned empty output for: {profile_path_win}")

        profile_path = Path(profile_path_wsl)
        ps_content = profile_path.read_text() if profile_path.exists() else ""
        win_path = subprocess.run(
            ["wslpath", "-w", str(target)],
            capture_output=True,
            text=True,
            check=True,
        ).stdout.strip()
        alias_line = f'Set-Alias repo_clipboard "{win_path}"'
        if alias_line not in ps_content:
            print(f"Adding alias to PowerShell profile at {profile_path_win}")
            profile_path.parent.mkdir(parents=True, exist_ok=True)
            with profile_path.open("a") as fh:
                fh.write(f"\n# Added by repo_clipboard installer\n{alias_line}\n")
    except Exception as e:
        print(f"Failed to modify PowerShell profile: {e}")

    print("Installation complete. Restart your shells to use 'repo_clipboard'.")


def is_git_repository(path: Path) -> bool:
    """Check if the given path is inside a git repository."""
    try:
        subprocess.run(
            ["git", "rev-parse", "--git-dir"],
            capture_output=True,
            check=True,
            cwd=path,
        )
        return True
    except subprocess.CalledProcessError:
        return False


def get_git_root(path: Path) -> Path:
    """Get the root directory of the git repository."""
    result = subprocess.run(
        ["git", "rev-parse", "--show-toplevel"],
        capture_output=True,
        text=True,
        check=True,
        cwd=path,
    )
    return Path(result.stdout.strip())


def load_file_list_from_current_dir(
    current_dir: Path, respect_gitignore: bool, verbose: bool = True
) -> list:
    """Load all files recursively from the current directory.

    When respecting .gitignore, use `git ls-files` from the repo root, then filter down
    to the requested current_dir subtree.
    """
    if verbose:
        print(f"Loading files recursively from {current_dir}")
    
    files = []
    
    # Check if we're in a git repository and should respect gitignore
    if respect_gitignore and is_git_repository(current_dir):
        git_root = get_git_root(current_dir)

        # Use git ls-files from repo root so the returned paths resolve correctly even
        # when invoked from a subdirectory.
        git_command = ["git", "ls-files", "-c", "-o", "--exclude-standard"]
        try:
            result = subprocess.run(
                git_command,
                capture_output=True,
                text=True,
                check=True,
                cwd=git_root,
            )
            # Paths are relative to the repo root.
            git_files = [
                (git_root / line.strip())
                for line in result.stdout.splitlines()
                if line.strip()
            ]
            
            # Filter to only include files that are in current directory or subdirectories
            for file_path in git_files:
                try:
                    # Check if file is within current directory
                    file_path.relative_to(current_dir)
                    if file_path.is_file():
                        files.append(file_path)
                except ValueError:
                    # File is outside current directory, skip
                    continue
                    
            if verbose:
                print(
                    f"Using git ls-files from {git_root}, found {len(files)} files under {current_dir} respecting .gitignore"
                )
        except subprocess.CalledProcessError as exc:
            if verbose:
                print(f"Warning: git ls-files failed: {exc}, falling back to filesystem scan")
            # Fallback to filesystem scan
            for path in current_dir.rglob('*'):
                if path.is_file():
                    files.append(path)
    else:
        # Direct filesystem scan
        if verbose and respect_gitignore:
            print("Not in a git repository or .gitignore disabled, scanning filesystem")
        for path in current_dir.rglob('*'):
            if path.is_file():
                files.append(path)
    
    return files


def build_tree(paths: list, repo_root: Path) -> Dict[str, Union[dict, Path]]:
    tree: Dict[str, Union[dict, Path]] = {}
    for path in paths:
        rel_parts = path.relative_to(repo_root).parts
        node = tree
        for part in rel_parts[:-1]:
            node = node.setdefault(part, {})  # type: ignore[assignment]
        node[rel_parts[-1]] = path
    return tree


def generate_xml(
    node: Dict[str, Union[dict, Path]],
    indent: int = 0,
    file_contents: Optional[Dict[Path, str]] = None,
    snippet_meta: Optional[Dict[Path, Tuple[str, int, int]]] = None,
) -> str:
    lines = []
    for name, child in sorted(node.items()):
        prefix = " " * indent
        if isinstance(child, dict):
            lines.append(f"{prefix}<directory name=\"{name}\">")
            lines.append(generate_xml(child, indent + 2, file_contents=file_contents))
            lines.append(f"{prefix}</directory>")
        else:
            content = ""
            if file_contents is not None and child in file_contents:
                content = file_contents[child]
            else:
                try:
                    content = child.read_text(encoding="utf-8", errors="replace")
                except Exception as e:
                    print(f"Could not read {child}: {e}", file=sys.stderr)
                    content = ""
            # Add snippet attributes when available
            if snippet_meta is not None and child in snippet_meta:
                src, s_start, s_end = snippet_meta[child]
                lines.append(
                    f"{prefix}<file name=\"{name}\" snippet_from=\"{src}\" start=\"{s_start}\" end=\"{s_end}\">{content}</file>"
                )
            else:
                lines.append(f"{prefix}<file name=\"{name}\">{content}</file>")
    return "\n".join(lines)


def get_common_ignore_patterns() -> list:
    """Get a list of common development patterns to ignore."""
    return [
        # VCS / metadata
        '.git/', '.git/**',
        '.hg/', '.hg/**',
        '.svn/', '.svn/**',
        '.bzr/', '.bzr/**',

        # Environment managers / dev shells
        '.direnv/', '.direnv/**',
        '.env', '.env.*',

        # Editor/IDE
        '.idea/', '.idea/**',
        '.vscode/', '.vscode/**',
        '*.swp',
        '*.swo',
        '*~',

        # Agent / workflow tooling
        '.beads/', '.beads/**',
        '.claude/', '.claude/**',
        '.codex/', '.codex/**',
        '.aider*',
        '.cursor/', '.cursor/**',
        '.windsurf/', '.windsurf/**',

        # OS files
        '.DS_Store',
        '.DS_Store?',
        '._*',
        '__MACOSX/', '__MACOSX/**',
        'Thumbs.db',

        # Python virtual environments
        '.venv/', '.venv/**',
        'venv/', 'venv/**',
        'env/', 'env/**',
        '__pypackages__/', '__pypackages__/**',
        '.nox/', '.nox/**',
        
        # Python cache and compiled files
        '__pycache__/', '__pycache__/**',
        '*.pyc',
        '*.pyo',
        '*.pyd',
        '.ipynb_checkpoints/', '.ipynb_checkpoints/**',
        
        # Python testing and coverage
        '.pytest_cache/', '.pytest_cache/**',
        '.coverage',
        'coverage.xml',
        'htmlcov/', 'htmlcov/**',
        'coverage/', 'coverage/**',
        '.tox/', '.tox/**',
        
        # Python build artifacts
        'dist/', 'dist/**',
        'build/', 'build/**',
        '*.egg-info/', '*.egg-info/**',
        '*.egg',
        
        # Python tools caches
        '.mypy_cache/', '.mypy_cache/**',
        '.ruff_cache/', '.ruff_cache/**',
        '.cache/', '.cache/**',
        
        # Node.js
        'node_modules/', 'node_modules/**',
        '.npm/', '.npm/**',
        '.yarn/', '.yarn/**',
        '.pnp.cjs',
        '.pnp.loader.mjs',
        '.pnpm-store/', '.pnpm-store/**',
        '.eslintcache',
        '.stylelintcache',
        'npm-debug.log*',
        'pnpm-debug.log*',
        'yarn-debug.log*',
        'yarn-error.log*',
        
        # JavaScript framework caches
        '.next/', '.next/**',
        '.nuxt/', '.nuxt/**',
        '.svelte-kit/', '.svelte-kit/**',
        '.turbo/', '.turbo/**',
        '.parcel-cache/', '.parcel-cache/**',
        '.vite/', '.vite/**',
        
        # Rust
        'target/', 'target/**',

        # Go
        'vendor/', 'vendor/**',

        # .NET
        'bin/', 'bin/**',
        'obj/', 'obj/**',
        
        # Java / JVM
        '.gradle/', '.gradle/**',
        '*.class',
        '*.jar',
        '*.war',
        '*.ear',
        
        # Terraform
        '.terraform/', '.terraform/**',
        '.terraform.lock.hcl',

        # Log files
        '*.log',
        
        # Temporary files
        '*.tmp',
        '*.temp',
        '*.bak',
        '*.backup',
        '*.orig',
        '*.rej',
        'tmp/', 'tmp/**',
        'temp/', 'temp/**',

        # Common binary/data artifacts (usually not helpful for LLM snapshots)
        '*.pdf',
        '*.zip',
        '*.tar',
        '*.tgz',
        '*.gz',
        '*.bz2',
        '*.xz',
        '*.7z',
        '*.rar',
        '*.png',
        '*.jpg',
        '*.jpeg',
        '*.gif',
        '*.bmp',
        '*.ico',
        '*.mp4',
        '*.mov',
        '*.mp3',
        '*.wav',
        '*.ttf',
        '*.otf',
        '*.woff',
        '*.woff2',
        '*.db',
        '*.sqlite',
        '*.sqlite3',
        '*.duckdb',
        '*.parquet',
        '*.xlsx',
        '*.xls',
    ]


def convert_gitignore_patterns_to_regex(patterns: list) -> list:
    """Convert gitignore-style patterns to regex patterns for compatibility."""
    regex_patterns = []
    for pattern in patterns:
        pattern = pattern.strip()
        if not pattern or pattern.startswith('#'):
            continue

        # Ignore negation rules for now (out of scope for "common ignores")
        if pattern.startswith('!'):
            continue

        anchored = pattern.startswith('/')
        if anchored:
            pattern = pattern[1:]

        is_dir = pattern.endswith('/')
        if is_dir:
            pattern = pattern[:-1]

        def _glob_fragment_to_regex(fragment: str) -> str:
            out = []
            i = 0
            while i < len(fragment):
                c = fragment[i]
                if c == '*':
                    if i + 1 < len(fragment) and fragment[i + 1] == '*':
                        out.append('.*')
                        i += 2
                    else:
                        out.append('[^/]*')
                        i += 1
                elif c == '?':
                    out.append('[^/]')
                    i += 1
                else:
                    out.append(re.escape(c))
                    i += 1
            return ''.join(out)

        body = _glob_fragment_to_regex(pattern)
        prefix = '^' if anchored else r'(^|.*/)'

        if is_dir:
            regex_patterns.append(prefix + body + r'(/.*)?$')
        else:
            regex_patterns.append(prefix + body + r'$')
    
    return regex_patterns


def apply_regex_filters(files: list, root_dir: Path, ignore_regex: str = None, include_regex: str = None) -> list:
    """Apply regex filters to the file list."""
    # Convert to relative paths for pattern matching
    relative_files = []
    for f in files:
        try:
            rel_path = f.relative_to(root_dir)
            relative_files.append((f, str(rel_path)))
        except ValueError:
            # File is outside root_dir, skip
            continue
    
    # Apply include pattern first if specified
    if include_regex:
        try:
            include_pattern = re.compile(include_regex)
            relative_files = [(f, rel) for f, rel in relative_files if include_pattern.search(rel)]
        except re.error as e:
            print(f"Warning: Invalid include regex pattern '{include_regex}': {e}", file=sys.stderr)
    
    # Apply ignore pattern
    if ignore_regex:
        try:
            ignore_pattern = re.compile(ignore_regex)
            relative_files = [(f, rel) for f, rel in relative_files if not ignore_pattern.search(rel)]
        except re.error as e:
            print(f"Warning: Invalid ignore regex pattern '{ignore_regex}': {e}", file=sys.stderr)
    
    # Return only the file paths
    return [f for f, rel in relative_files]


def apply_common_ignore_patterns(files: list, root_dir: Path) -> list:
    """Apply common development file ignore patterns using regex."""
    common_patterns = get_common_ignore_patterns()
    regex_patterns = convert_gitignore_patterns_to_regex(common_patterns)
    
    # Combine all patterns into one big regex with alternation
    if regex_patterns:
        combined_pattern = '|'.join(f'({pattern})' for pattern in regex_patterns)
        return apply_regex_filters(files, root_dir, ignore_regex=combined_pattern)
    
    return files


def main() -> None:
    # Handle broken pipe errors gracefully
    signal.signal(signal.SIGPIPE, signal.SIG_DFL)
    
    args = parse_args()
    script_path = Path(__file__).resolve()

    original_stderr = sys.stderr
    stderr_handle: Optional[TextIO] = None
    # Always mirror to /tmp in both modes for agent interoperability
    stdout_path: Optional[Path] = Path("/tmp/repo_clipboard.stdout")
    stderr_path: Optional[Path] = Path("/tmp/repo_clipboard.stderr")

    try:
        if args.install:
            install_script(script_path)
            return

        # In --llm mode, enforce .gitignore and set up stderr tee; in non-llm mode,
        # still tee stderr to /tmp for easier debugging by agents.
        if args.llm:
            args.git_ignore = True
        try:
            if stderr_path is not None:
                stderr_handle = stderr_path.open("w", encoding="utf-8")
                sys.stderr = TeeStream(original_stderr, stderr_handle)
        except Exception as exc:
            print(
                f"Warning: Could not open {stderr_path} for stderr copy: {exc}",
                file=original_stderr,
            )
            stderr_handle = None
            stderr_path = None

        # Print version info (unless in LLM mode)
        if not args.llm:
            print(f"repo_clipboard {__version__}")

        # Always work from the caller's directory (prefer $PWD)
        current_dir = get_invocation_cwd()
        
        # Load file list from current directory
        files = load_file_list_from_current_dir(current_dir, args.git_ignore, verbose=not args.llm)

        allowed_extensions = {
            ext.strip().lstrip('.')
            for ext in args.extensions.split(',') if ext.strip()
        }
        if not allowed_extensions:
            allowed_extensions = None

        max_bytes = args.max_size * 1024

        # Apply common ignore patterns unless --no-ignore-common is set
        if not args.no_ignore_common:
            files = apply_common_ignore_patterns(files, current_dir)
            if not args.llm:
                print("Ignoring common development files (use --no-ignore-common to include all)")
        
        # Apply regex filters
        ignore_regex = args.ignore_list if args.ignore_list else None
        include_regex = args.only_include if args.only_include else None
        
        if ignore_regex or include_regex:
            files = apply_regex_filters(files, current_dir, ignore_regex, include_regex)

        selected: List[Path] = []
        if not args.snippets_only:
            for f in files:
                if allowed_extensions is not None and f.suffix.lstrip('.') not in allowed_extensions:
                    if not args.llm:
                        print(f"Skipping {f} due to extension")
                    continue
                try:
                    size = f.stat().st_size
                except FileNotFoundError:
                    continue
                if size > max_bytes:
                    print(f"Skipping {f} due to size {size} bytes")
                    continue
                selected.append(f)

        file_display_entries: List[Tuple[str, Path]] = []
        file_contents: Dict[Path, str] = {}
        file_token_estimates: Dict[Path, Optional[int]] = {}
        snippet_meta: Dict[Path, Tuple[str, int, int]] = {}

        for file_path in selected:
            try:
                display_path = str(file_path.relative_to(current_dir))
            except ValueError:
                display_path = str(file_path)
            file_display_entries.append((display_path, file_path))

            try:
                content = file_path.read_text(encoding="utf-8", errors="replace")
            except Exception as exc:
                content = ""
                file_token_estimates[file_path] = None
                print(f"Warning: Could not read {file_path}: {exc}", file=sys.stderr)
            else:
                raw_tokens = count_tokens(content)
                if raw_tokens is None:
                    file_token_estimates[file_path] = None
                else:
                    file_token_estimates[file_path] = int(raw_tokens * 1.2)
            file_contents[file_path] = content

        # Parse and include snippet entries
        def _parse_snippet_spec(spec: str) -> Optional[Tuple[Path, Optional[int], Optional[int]]]:
            try:
                if ':' not in spec:
                    print(f"Warning: invalid snippet spec (missing ':'): {spec}", file=sys.stderr)
                    return None
                p_str, rng = spec.split(':', 1)
                p_str = p_str.strip()
                rng = rng.strip()
                p = (current_dir / p_str) if not os.path.isabs(p_str) else Path(p_str)
                if not p.exists() or not p.is_file():
                    print(f"Warning: snippet source not found or not a file: {p_str}", file=sys.stderr)
                    return None
                # Forms: N, N-, -M, N-M
                start: Optional[int] = None
                end: Optional[int] = None
                if '-' in rng:
                    s_str, e_str = rng.split('-', 1)
                    s_str = s_str.strip()
                    e_str = e_str.strip()
                    if s_str:
                        if not s_str.isdigit():
                            print(f"Warning: invalid start in snippet: {spec}", file=sys.stderr)
                            return None
                        start = int(s_str)
                    if e_str:
                        if not e_str.isdigit():
                            print(f"Warning: invalid end in snippet: {spec}", file=sys.stderr)
                            return None
                        end = int(e_str)
                else:
                    if not rng.isdigit():
                        print(f"Warning: invalid single-line snippet: {spec}", file=sys.stderr)
                        return None
                    start = int(rng)
                    end = start
                return (p, start, end)
            except Exception as exc:
                print(f"Warning: failed parsing snippet spec '{spec}': {exc}", file=sys.stderr)
                return None

        snippet_specs: List[str] = []
        if args.snippet:
            snippet_specs.extend(args.snippet)
        if args.snippets_file:
            try:
                for line in Path(args.snippets_file).read_text().splitlines():
                    line = line.strip()
                    if not line or line.startswith('#'):
                        continue
                    snippet_specs.append(line)
            except Exception as exc:
                print(f"Warning: failed reading --snippets-file: {exc}", file=sys.stderr)

        snippet_counter = 0
        for spec in snippet_specs:
            parsed = _parse_snippet_spec(spec)
            if not parsed:
                continue
            src_path, start_opt, end_opt = parsed
            try:
                lines = src_path.read_text(encoding="utf-8", errors="replace").splitlines(keepends=True)
            except Exception as exc:
                print(f"Warning: could not read snippet source {src_path}: {exc}", file=sys.stderr)
                continue
            total = len(lines)
            # Defaults: open-start → 1, open-end → EOF
            start = 1 if start_opt is None else start_opt
            end = total if end_opt is None else end_opt
            # Clamp
            if start < 1:
                start = 1
            if end > total:
                end = total
            if start > end:
                print(f"Warning: snippet start > end in '{spec}' after clamping; skipping", file=sys.stderr)
                continue
            snippet_text = ''.join(lines[start - 1:end])
            snippet_bytes = len(snippet_text.encode('utf-8'))
            if snippet_bytes > max_bytes:
                print(f"Warning: skipping snippet {spec} due to size {snippet_bytes} bytes (> {max_bytes})", file=sys.stderr)
                continue
            # Virtual path under the source's parent; unique leaf
            try:
                rel_src = str(src_path.relative_to(current_dir))
            except ValueError:
                rel_src = str(src_path)
            snippet_counter += 1
            leaf = f"{src_path.name}#snippet-{snippet_counter}"
            virt = (src_path.parent / leaf)
            selected.append(virt)
            file_contents[virt] = snippet_text
            file_token = count_tokens(snippet_text)
            file_token_estimates[virt] = int(file_token * 1.2) if file_token is not None else None
            snippet_meta[virt] = (rel_src, start, end)
            # Display path
            try:
                display_path = str(virt.relative_to(current_dir))
            except ValueError:
                display_path = str(virt)
            file_display_entries.append((display_path, virt))

        if args.print_files:
            try:
                sorted_entries = sorted(file_display_entries, key=lambda item: item[0])
                stream = sys.stderr if args.llm else sys.stdout
                print(f"Included {len(sorted_entries)} files:", file=stream)
                for display_path, original_path in sorted_entries:
                    token_value = file_token_estimates.get(original_path)
                    if token_value is None:
                        print(f"{display_path} (tokens=unknown)", file=stream)
                    else:
                        print(f"{display_path} (tokens={token_value})", file=stream)
            except Exception as e:
                print(f"Warning: failed printing file list: {e}", file=sys.stderr)

        tree = build_tree(selected, current_dir)
        xml = generate_xml(tree, file_contents=file_contents, snippet_meta=snippet_meta)

        raw_token_total = count_tokens(xml)
        token_count: Optional[int] = None
        if raw_token_total is not None:
            token_count = int(raw_token_total * 1.2)
        stdout_written = False
        stdout_error: Optional[Exception] = None

        if args.llm and stdout_path is not None:
            try:
                stdout_path.write_text(xml, encoding="utf-8")
                stdout_written = True
            except Exception as exc:
                stdout_error = exc
                print(
                    f"Warning: Could not write stdout snapshot to {stdout_path}: {exc}",
                    file=sys.stderr,
                )

        if args.llm:
            # LLM mode: print metadata on stderr, XML payload on stdout
            location_details = []
            if stdout_written and stdout_path is not None:
                location_details.append(f"stdout={stdout_path}")
            elif stdout_path is not None and stdout_error is not None:
                location_details.append(f"stdout_failed={stdout_path}")
            if stderr_handle is not None and stderr_path is not None:
                location_details.append(f"stderr={stderr_path}")

            warning_messages: List[str] = []
            if token_count is None:
                metadata_parts = ["LLM_METADATA tokens=unknown"]
            else:
                metadata_parts = [f"LLM_METADATA tokens={token_count}"]
                if token_count > 900_000:
                    warning_messages.append(
                        "Warning - will likely exceed Gemini's input context window"
                    )
                if token_count > 270_000:
                    warning_messages.append(
                        "Warning - will likely exceed gpt-5's input context window"
                    )
            metadata_parts.extend(warning_messages)
            if location_details:
                metadata_parts.append("(" + ", ".join(location_details) + ")")
            print(" ".join(metadata_parts), file=sys.stderr, flush=True)
            try:
                print(xml)
                sys.stdout.flush()
            except (BrokenPipeError, IOError):
                # Handle broken pipe gracefully (e.g., when piping to head, grep, etc.)
                # This is normal when the receiving process terminates early
                try:
                    sys.stdout.close()
                except Exception:
                    pass
                try:
                    sys.stderr.close() 
                except Exception:
                    pass
        else:
            # Normal mode: write XML to /tmp and copy to clipboard
            if stdout_path is not None and not stdout_written:
                try:
                    stdout_path.write_text(xml, encoding="utf-8")
                except Exception as exc:
                    print(
                        f"Warning: Could not write snapshot to {stdout_path}: {exc}",
                        file=sys.stderr,
                    )
            try:
                subprocess.run(["clip.exe"], input=xml, text=True, check=True)
                print("Copied content to Windows clipboard using clip.exe")
            except Exception as e:
                print(f"Failed to copy using clip.exe: {e}", file=sys.stderr)
                try:
                    import pyperclip  # type: ignore
                    pyperclip.copy(xml)
                    print("Copied content using pyperclip")
                except Exception as e2:
                    print(f"Also failed to use pyperclip: {e2}", file=sys.stderr)
                    sys.exit(1)

            print("Done")

        if token_count is not None and token_count > 0 and not args.llm:
            print(f"Token count: {token_count} tokens")
            if stdout_path is not None:
                print(f"Snapshot written to {stdout_path}")
    finally:
        # Restore stderr and close tee if used
        sys.stderr = original_stderr
        if stderr_handle is not None:
            try:
                stderr_handle.close()
            except Exception:
                pass


if __name__ == "__main__":
    main()
