#!/usr/bin/env python3
#
# Git command to transform staged files according to a command that accepts file
# content on stdin and produces output on stdout. This command is useful in
# combination with `git add -p` which allows you to stage specific changes in
# a file. This command runs a formatter on the file with staged changes while
# ignoring unstaged changes.
#
# Usage: git-format-staged [OPTION]... [FILE]...
# Example: git-format-staged --formatter 'prettier --stdin-filepath "{}"' '*.js'
#
# Version 4.0 - Enhanced with pathspec support, config files, and debug output
#
# Tested with Python versions 3.8 - 3.13.
#
# Original author: Jesse Hallett <jesse@sitr.us>
# Enhanced by: Danny @SmoothBricks

from __future__ import print_function
import argparse
from gettext import gettext as _
import os
import re
import subprocess
import sys
import json
from pathlib import Path

# Lazy loading flags for optional config file parsers
HAS_YAML = None
HAS_TOML = None
yaml = None
toml = None
from typing import List, Dict, Tuple, Optional, Any
import logging

# Import pathspec for proper gitignore-style pattern matching
try:
    import pathspec
except ImportError:
    print("Error: pathspec library is required. Install with: pip install pathspec", file=sys.stderr)
    sys.exit(1)

def _ensure_yaml():
    """Lazy load yaml module when needed"""
    global HAS_YAML, yaml
    if HAS_YAML is None:
        try:
            import yaml as _yaml
            yaml = _yaml
            HAS_YAML = True
        except ImportError:
            HAS_YAML = False
    return HAS_YAML

def _ensure_toml():
    """Lazy load toml module when needed"""
    global HAS_TOML, toml
    if HAS_TOML is None:
        try:
            import toml as _toml
            toml = _toml
            HAS_TOML = True
        except ImportError:
            HAS_TOML = False
    return HAS_TOML

# The string 1.0.1 is replaced during the publish process.
VERSION = '1.0.1'
PROG = sys.argv[0]

# Setup logging
logger = logging.getLogger('git-format-staged')
logger.setLevel(logging.WARNING)

# Console handler
console_handler = logging.StreamHandler(sys.stderr)
formatter = logging.Formatter('[%(levelname)s] %(message)s')
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)

def info(msg):
    print(msg, file=sys.stdout)

def warn(msg):
    # Use stderr directly to maintain v3 output format
    print(f'{PROG}: warning: {msg}', file=sys.stderr)

def fatal(msg):
    logger.error(msg)
    exit(1)

def debug(msg):
    logger.debug(msg)

class Config:
    """Configuration holder for git-format-staged"""
    def __init__(self):
        self.formatters = {}
        self.pattern_sets = {}  # Store pattern sets
        self.debug = False
        self.update_working_tree = True
        self.show_commands = False
        
    def validate(self) -> List[str]:
        """Validate configuration and return list of errors"""
        errors = []
        
        for name, formatter in self.formatters.items():
            # Check required fields
            if not formatter.get('command'):
                errors.append(f"Formatter '{name}' is missing required 'command' field")
            
            # Check extends references
            extends = formatter.get('extends', [])
            if isinstance(extends, str):
                extends = [extends]
            for pattern_set_name in extends:
                if pattern_set_name not in self.pattern_sets:
                    errors.append(f"Formatter '{name}' extends undefined pattern set '{pattern_set_name}'")
            
            # Check patterns is a list
            patterns = formatter.get('patterns', [])
            if not isinstance(patterns, list):
                errors.append(f"Formatter '{name}' patterns must be a list, got {type(patterns).__name__}")
            else:
                # Validate each pattern (handle nested lists from YAML aliases)
                for i, pattern in enumerate(patterns):
                    if isinstance(pattern, list):
                        # This can happen with YAML aliases like - *alias
                        errors.append(f"Formatter '{name}' has nested list in patterns at index {i}. This usually happens when using '- *alias' syntax. Use 'patterns: *alias' instead.")
                    elif not isinstance(pattern, str):
                        errors.append(f"Formatter '{name}' pattern must be string, got {type(pattern).__name__}: {pattern}")
        
        return errors
    
    @classmethod
    def from_file(cls, config_file: Path) -> 'Config':
        """Load configuration from YAML or TOML file"""
        config = cls()
        
        if not config_file.exists():
            return config
            
        content = config_file.read_text()
        
        try:
            if config_file.suffix in ['.yml', '.yaml']:
                if not _ensure_yaml():
                    fatal(f"YAML config file found but PyYAML is not installed. Install with: pip install pyyaml")
                # Since YAML scanning happens before parsing, we need to preprocess
                # but let's be smart about it - only quote things that look like
                # file patterns, not actual YAML anchors/aliases
                
                def smart_quote_patterns(yaml_content):
                    """Quote file patterns that would conflict with YAML syntax"""
                    lines = yaml_content.split('\n')
                    result = []
                    
                    # First pass: find all anchor definitions
                    anchors = set()
                    for line in lines:
                        # Match anchor definitions like "&anchor" or "key: &anchor"
                        anchor_match = re.search(r'&(\w+)', line)
                        if anchor_match:
                            anchors.add(anchor_match.group(1))
                    
                    for line in lines:
                        # Match list items that might be file patterns
                        match = re.match(r'^(\s*-\s+)([*!&@`].*)$', line)
                        if match:
                            indent, value = match.groups()
                            
                            # Check if it's an alias reference
                            alias_match = re.match(r'^\*(\w+)$', value)
                            if alias_match and alias_match.group(1) in anchors:
                                # It's a valid alias reference, keep as-is
                                result.append(line)
                            elif value.startswith('&'):
                                # It's an anchor definition, keep as-is
                                result.append(line)
                            else:
                                # It's a file pattern, quote it
                                result.append(f'{indent}"{value}"')
                        else:
                            result.append(line)
                    
                    return '\n'.join(result)
                
                # Preprocess the content
                processed_content = smart_quote_patterns(content)
                data = yaml.safe_load(processed_content) or {}
            elif config_file.suffix == '.toml':
                if not _ensure_toml():
                    fatal(f"TOML config file found but toml is not installed. Install with: pip install toml")
                data = toml.loads(content) or {}
            else:
                fatal(f"Unsupported config file format: {config_file}")
        except Exception as e:
            fatal(f"Error parsing config file {config_file}: {e}")
            
        # Validate structure
        if not isinstance(data, dict):
            fatal(f"Config file must contain a dictionary/mapping, got {type(data).__name__}")
            
        # Parse pattern sets first
        pattern_sets_data = data.get('pattern_sets', {})
        if not isinstance(pattern_sets_data, dict):
            fatal(f"'pattern_sets' must be a dictionary/mapping, got {type(pattern_sets_data).__name__}")
            
        for name, pattern_set in pattern_sets_data.items():
            if isinstance(pattern_set, list):
                # Direct list format: pattern_sets: name: [patterns]
                # YAML: pattern_sets:
                #         common: ["*.js", "*.ts"]
                # TOML: [pattern_sets]
                #       common = ["*.js", "*.ts"]
                config.pattern_sets[name] = pattern_set
            else:
                warn(f"Skipping pattern set '{name}': must be a list, got {type(pattern_set).__name__}")
                continue
            
        # Parse formatters
        formatters_data = data.get('formatters', {})
        if not isinstance(formatters_data, dict):
            fatal(f"'formatters' must be a dictionary/mapping, got {type(formatters_data).__name__}")
            
        for name, formatter_config in formatters_data.items():
            if isinstance(formatter_config, dict):
                # Get base patterns from extends
                extends = formatter_config.get('extends', [])
                if isinstance(extends, str):
                    extends = [extends]  # Normalize to list
                
                # Collect patterns from extended sets
                extended_patterns = []
                for pattern_set_name in extends:
                    if pattern_set_name in config.pattern_sets:
                        extended_patterns.extend(config.pattern_sets[pattern_set_name])
                    else:
                        warn(f"Formatter '{name}' extends undefined pattern set '{pattern_set_name}'")
                
                # Add formatter's own patterns
                formatter_patterns = formatter_config.get('patterns', [])
                if not isinstance(formatter_patterns, list):
                    warn(f"Formatter '{name}' patterns must be a list, got {type(formatter_patterns).__name__}")
                    continue
                
                # Flatten any nested lists (can happen with YAML aliases like - *alias)
                flattened_patterns = []
                for pattern in formatter_patterns:
                    if isinstance(pattern, list):
                        # Flatten nested list
                        flattened_patterns.extend(pattern)
                    else:
                        flattened_patterns.append(pattern)
                
                all_patterns = extended_patterns + flattened_patterns
                
                config.formatters[name] = {
                    'command': formatter_config.get('command', ''),
                    'patterns': all_patterns,
                    'extends': extends,  # Keep for debugging
                    'readonly': formatter_config.get('readonly', formatter_config.get('no_write', False))
                }
            else:
                warn(f"Skipping formatter '{name}': must be a dictionary, got {type(formatter_config).__name__}")
        
        # Parse global settings
        settings = data.get('settings', {})
        if isinstance(settings, dict):
            config.update_working_tree = settings.get('update_working_tree', True)
            config.show_commands = settings.get('show_commands', False)
        
        # Debug mode
        config.debug = data.get('debug', False)
        
        # Validate the loaded config
        errors = config.validate()
        if errors:
            for error in errors:
                warn(f"Config validation: {error}")
            fatal(f"Invalid configuration in {config_file}")
        
        return config

def find_config_file() -> Optional[Path]:
    """Find configuration file in current directory or parent directories"""
    current = Path.cwd()
    
    while True:
        for name in ['.git-format-staged.yml', '.git-format-staged.yaml', '.git-format-staged.toml']:
            config_path = current / name
            if config_path.exists():
                return config_path
        
        # Check parent directory
        parent = current.parent
        if parent == current:  # Reached root
            break
        current = parent
    
    return None

class FormatterPatternSpec:
    """Matcher for formatter ownership patterns.

    Positive patterns select files for a formatter. Negative patterns are
    absolute exclusions, regardless of where they appear in the list.
    """
    def __init__(self, patterns: List[str]):
        positive_patterns = []
        negative_patterns = []
        for pattern in patterns:
            if pattern.startswith('!') and len(pattern) > 1:
                negative_patterns.append(pattern[1:])
            else:
                positive_patterns.append(pattern)

        self.positive_spec = pathspec.PathSpec.from_lines('gitwildmatch', positive_patterns)
        self.negative_spec = pathspec.PathSpec.from_lines('gitwildmatch', negative_patterns)

    def match_file(self, path: str) -> bool:
        return self.positive_spec.match_file(path) and not self.negative_spec.match_file(path)

def create_pattern_spec(patterns: List[str]) -> FormatterPatternSpec:
    """Convert patterns into a formatter matcher with exclusion precedence."""
    return FormatterPatternSpec(patterns)

def collect_git_files(include_staged=True, include_unstaged=False, specific_files=None, include_untracked=False):
    """Collect files from git based on what needs to be formatted
    
    Returns: List of (file_path, is_staged, diff_entry) tuples
    """
    files_to_format = []
    
    if specific_files:
        # Format specific files provided by user
        for file_path in specific_files:
            files_to_format.append((file_path, False, None))
        return files_to_format
    
    # Collect staged files
    if include_staged:
        try:
            output = subprocess.check_output([
                'git', 'diff-index',
                '--cached',
                '--diff-filter=AM',
                '--no-renames',
                'HEAD'
            ])
            
            for line in output.splitlines():
                entry = parse_diff(line.decode('utf-8'))
                
                # Skip symlinks and submodules
                if entry['dst_mode'] in ['120000', '160000']:
                    continue
                
                files_to_format.append((entry['src_path'], True, entry))
                
        except subprocess.CalledProcessError as err:
            if "bad revision 'HEAD'" in str(err):
                warn("No HEAD commit found. Skipping staged file formatting.")
            else:
                fatal(str(err))
    
    # Collect unstaged files
    if include_unstaged:
        unstaged_files = set()
        
        # Get modified files
        try:
            output = subprocess.check_output([
                'git', 'diff', '--name-only', '--diff-filter=AM'
            ])
            unstaged_files.update(output.decode('utf-8').splitlines())
        except subprocess.CalledProcessError:
            pass
        
        # Untracked files are opt-in. Formatting every untracked file by default
        # is surprising in worktrees that contain scratch directories or nested
        # external repo clones.
        if include_untracked:
            try:
                output = subprocess.check_output([
                    'git', 'ls-files', '--others', '--exclude-standard'
                ])
                unstaged_files.update(output.decode('utf-8').splitlines())
            except subprocess.CalledProcessError:
                pass
        
        for file_path in unstaged_files:
            # Don't add if already in staged list
            if not any(f[0] == file_path and f[1] for f in files_to_format):
                files_to_format.append((file_path, False, None))
    
    return files_to_format

def find_matching_formatters(formatters, relative_path, debug_mode=False):
    """Find all formatters that match a given file path
    
    Returns: (regular_formatters, readonly_formatters)
    """
    matching_formatters = []
    
    for formatter in formatters:
        if formatter['spec'].match_file(relative_path):
            matching_formatters.append(formatter)
            if debug_mode:
                debug(f"  Formatter '{formatter['name']}' matches")
    
    # Separate formatters into regular and readonly
    regular_formatters = [f for f in matching_formatters if not f.get('readonly', False)]
    readonly_formatters = [f for f in matching_formatters if f.get('readonly', False)]
    
    return regular_formatters, readonly_formatters

def run_formatter_on_content(command, content, file_path):
    """Run a formatter command on content and return the result
    
    Returns: (success, output_content, error_message)
    """
    try:
        result = subprocess.run(
            command,
            shell=True,
            input=content,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE
        )
        
        if result.returncode != 0:
            return False, None, result.stderr.decode('utf-8', errors='replace')
        
        return True, result.stdout, None
        
    except Exception as e:
        return False, None, str(e)

def write_file_atomically(file_path, content):
    """Write content to file atomically using temp file + rename"""
    import tempfile
    fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(file_path))
    try:
        os.write(fd, content)
        os.close(fd)
        
        # Preserve file permissions
        try:
            st = os.stat(file_path)
            os.chmod(temp_path, st.st_mode)
        except:
            pass  # New file, no permissions to preserve
        
        # Atomically replace
        os.rename(temp_path, file_path)
    except:
        os.close(fd)
        os.unlink(temp_path)
        raise

def run_readonly_formatter(formatter, file_path, content_hash=None, content=None):
    """Run a readonly formatter to check content
    
    Either content_hash (for git objects) or content (for working tree) must be provided
    """
    command = formatter['command'].replace('{}', file_path)
    
    try:
        if content_hash:
            # For git objects
            get_content = subprocess.Popen(
                ['git', 'cat-file', '-p', content_hash],
                stdout=subprocess.PIPE
            )
            check_content = subprocess.Popen(
                command,
                shell=True,
                stdin=get_content.stdout,
                stdout=subprocess.PIPE,
                stderr=None
            )
            get_content.stdout.close()
            check_content.communicate()
            
            if check_content.returncode != 0:
                fatal(f"Readonly formatter '{formatter['name']}' failed for {file_path}")
        else:
            # For working tree content
            success, _, error = run_formatter_on_content(command, content, file_path)
            if not success:
                fatal(f"Readonly formatter '{formatter['name']}' failed for {file_path}: {error}")
                
    except Exception as e:
        fatal(f"Error running readonly formatter '{formatter['name']}': {str(e)}")

def format_files_unified(formatters: List[Dict[str, Any]], git_root: str,
                        include_staged=True, include_unstaged=False,
                        specific_files: Optional[List[str]] = None,
                        include_untracked=False,
                        update_working_tree=True, write=True,
                        verbose=False, debug_mode=False):
    """Unified function to format both staged and unstaged files"""
    
    if debug_mode:
        logger.setLevel(logging.DEBUG)
        debug("Unified formatting mode:")
        debug(f"  Include staged: {include_staged}")
        debug(f"  Include unstaged: {include_unstaged}")
        debug(f"  Include untracked: {include_untracked}")
        debug(f"  Specific files: {specific_files}")
        debug(f"  Number of formatters: {len(formatters)}")
        for fmt in formatters:
            debug(f"  Formatter '{fmt['name']}': {fmt['command']}")
            debug(f"    Patterns: {fmt['patterns']}")
            if fmt.get('readonly'):
                debug(f"    readonly: True")
        debug("")
    
    # Collect files to format
    files_to_format = collect_git_files(include_staged, include_unstaged, specific_files, include_untracked)
    
    files_processed = 0
    files_formatted = 0
    files_skipped = 0
    
    for file_path, is_staged, diff_entry in files_to_format:
        # Get relative path for pattern matching
        relative_path = os.path.relpath(file_path, git_root)
        
        if debug_mode:
            file_type = "staged" if is_staged else "working tree"
            debug(f"Processing {file_type} file: {file_path}")
            debug(f"  Relative path: {relative_path}")
        
        # Find all formatters that match this file
        regular_formatters, readonly_formatters = find_matching_formatters(
            formatters, relative_path, debug_mode
        )
        
        if not regular_formatters and not readonly_formatters:
            if debug_mode:
                debug(f"  No formatters match - skipping")
            files_skipped += 1
            continue
        
        files_processed += 1
        
        if is_staged and diff_entry:
            # Format staged file
            orig_hash = diff_entry['dst_hash']
            current_hash = orig_hash
            formatted = False
            
            # Run regular formatters in a pipeline
            if regular_formatters:
                if debug_mode:
                    debug(f"  Running {len(regular_formatters)} regular formatters")
                
                formatter_commands = []
                for formatter in regular_formatters:
                    command = formatter['command'].replace('{}', diff_entry['src_path'])
                    formatter_commands.append(command)
                    if debug_mode:
                        debug(f"  Adding to pipeline: {formatter['name']}")
                
                if write:
                    # Run the pipeline and update index
                    new_hash = format_object_piped(
                        formatter_commands,
                        current_hash,
                        diff_entry['src_path'],
                        verbose=verbose or debug_mode
                    )
                    
                    # If formatters made changes, update the index
                    if new_hash != current_hash:
                        if debug_mode:
                            debug(f"  Formatters changed file (hash: {current_hash[:8]} -> {new_hash[:8]})")
                        if not object_is_empty(new_hash):
                            replace_file_in_index(diff_entry, new_hash)
                            
                            if update_working_tree:
                                try:
                                    patch_working_file(diff_entry['src_path'], current_hash, new_hash)
                                except Exception as err:
                                    warn(str(err))
                            
                            files_formatted += 1
                            formatter_names = ', '.join(f['name'] for f in regular_formatters)
                            info(f"Reformatted {diff_entry['src_path']} with {formatter_names}")
                            formatted = True
                            current_hash = new_hash
                else:
                    # --no-write mode: just run formatters to check for errors
                    for formatter in regular_formatters:
                        command = formatter['command'].replace('{}', diff_entry['src_path'])
                        
                        try:
                            get_content = subprocess.Popen(
                                ['git', 'cat-file', '-p', current_hash],
                                stdout=subprocess.PIPE
                            )
                            check_content = subprocess.Popen(
                                command,
                                shell=True,
                                stdin=get_content.stdout,
                                stdout=subprocess.PIPE,
                                stderr=None
                            )
                            get_content.stdout.close()
                            check_content.communicate()
                            
                            if check_content.returncode != 0:
                                fatal(f"Formatter '{formatter['name']}' failed for {diff_entry['src_path']}")
                        except Exception as e:
                            fatal(f"Error running formatter '{formatter['name']}': {str(e)}")
            
            # Run readonly formatters
            for formatter in readonly_formatters:
                if debug_mode:
                    debug(f"  Running readonly formatter: {formatter['name']}")
                
                run_readonly_formatter(formatter, diff_entry['src_path'], content_hash=current_hash)
        
        else:
            # Format working tree file
            if not os.path.exists(file_path):
                warn(f"File not found: {file_path}")
                continue
            
            file_changed = False
            
            # Skip readonly formatters for working tree files
            if regular_formatters and write:
                try:
                    # Read original content
                    with open(file_path, 'rb') as f:
                        content = f.read()
                    
                    original_content = content
                    
                    # Apply all formatters in sequence
                    for formatter in regular_formatters:
                        command = formatter['command'].replace('{}', file_path)
                        
                        if verbose or debug_mode:
                            info(f"Running: {command}")
                        
                        success, formatted_content, error = run_formatter_on_content(command, content, file_path)
                        
                        if not success:
                            warn(f"Formatter '{formatter['name']}' failed for {file_path}: {error}")
                            break
                        
                        if formatted_content != content:
                            content = formatted_content
                            file_changed = True
                    
                    # Write to file if changed
                    if file_changed:
                        write_file_atomically(file_path, content)
                        files_formatted += 1
                        formatter_names = ', '.join(f['name'] for f in regular_formatters)
                        info(f"Reformatted {file_path} with {formatter_names}")
                            
                except Exception as e:
                    warn(f"Error formatting {file_path}: {str(e)}")
    
    if debug_mode:
        debug("")
        debug("Summary:")
        debug(f"  Files processed: {files_processed}")
        debug(f"  Files formatted: {files_formatted}")
        debug(f"  Files skipped: {files_skipped}")


def format_staged_files(file_patterns: List[str], formatter: str, git_root: str, 
                       update_working_tree=True, write=True, verbose=False, 
                       debug_mode=False, config: Optional[Config] = None):
    """Format staged files matching the given patterns - legacy function for compatibility"""
    
    # Convert to unified format
    formatters = [{
        'name': formatter.split()[0] if formatter else 'formatter',
        'command': formatter,
        'patterns': file_patterns,
        'spec': create_pattern_spec(file_patterns),
        'readonly': False
    }]
    
    format_files_unified(
        formatters=formatters,
        git_root=git_root,
        include_staged=True,
        include_unstaged=False,
        specific_files=None,
        include_untracked=False,
        update_working_tree=update_working_tree,
        write=write,
        verbose=verbose,
        debug_mode=debug_mode
    )


def format_working_tree_file(formatter_cmd: str, file_path: str, verbose=False) -> bool:
    """Format a single file in the working tree"""
    
    if not os.path.exists(file_path):
        warn(f"File not found: {file_path}")
        return False
    
    try:
        # Read file content
        with open(file_path, 'rb') as f:
            original_content = f.read()
        
        # Prepare formatter command
        command = formatter_cmd.replace('{}', file_path)
        
        if verbose:
            info(f"Running: {command}")
        
        # Run formatter
        success, formatted_content, error = run_formatter_on_content(command, original_content, file_path)
        
        if not success:
            warn(f"Formatter failed for {file_path}: {error}")
            return False
        
        # Only write if content changed
        if formatted_content and formatted_content != original_content:
            write_file_atomically(file_path, formatted_content)
            return True
        
        return False
        
    except Exception as e:
        warn(f"Error formatting {file_path}: {str(e)}")
        return False

# Run formatter on file in the git index. Creates a new git object with the
# result, and replaces the content of the file in the index with that object.
# Returns hash of the new object if formatting produced any changes.
def format_file_in_index(formatter, diff_entry, update_working_tree=True, write=True, verbose=False):
    orig_hash = diff_entry['dst_hash']
    new_hash = format_object(formatter, orig_hash, diff_entry['src_path'], verbose=verbose)

    # If the new hash is the same then the formatter did not make any changes.
    if not write or new_hash == orig_hash:
        return None

    # If the content of the new object is empty then the formatter did not
    # produce any output. We want to abort instead of replacing the file with an
    # empty one.
    if object_is_empty(new_hash):
        return None

    replace_file_in_index(diff_entry, new_hash)

    if update_working_tree:
        try:
            patch_working_file(diff_entry['src_path'], orig_hash, new_hash)
        except Exception as err:
            # Errors patching working tree files are not fatal
            warn(str(err))

    return new_hash

file_path_placeholder = re.compile(r'\{\}')

def format_object_piped(formatter_commands: List[str], object_hash: str, file_path: str, verbose=False):
    """Run multiple formatters piped together on a git blob"""
    
    if not formatter_commands:
        return object_hash
    
    # Start the pipeline with git cat-file
    processes = []
    get_content = subprocess.Popen(
        ['git', 'cat-file', '-p', object_hash],
        stdout=subprocess.PIPE
    )
    processes.append(get_content)
    
    # Chain formatters
    prev_stdout = get_content.stdout
    for i, command in enumerate(formatter_commands):
        if verbose:
            info(f"Piping through: {command}")
        
        proc = subprocess.Popen(
            command,
            shell=True,
            stdin=prev_stdout,
            stdout=subprocess.PIPE
        )
        processes.append(proc)
        
        # Close the previous stdout to allow it to receive SIGPIPE
        prev_stdout.close()
        prev_stdout = proc.stdout
    
    # Final process: git hash-object
    write_object = subprocess.Popen(
        ['git', 'hash-object', '-w', '--stdin'],
        stdin=prev_stdout,
        stdout=subprocess.PIPE
    )
    processes.append(write_object)
    prev_stdout.close()
    
    # Wait for all processes and check for errors
    for i, proc in enumerate(processes[:-1]):  # All but the last
        exit_code = proc.wait()
        # Ignore SIGPIPE (-13) which is expected when formatter doesn't read all input
        # For example, 'echo' doesn't read stdin, so git cat-file gets SIGPIPE when
        # trying to write to it. This is normal behavior and the pipeline still works.
        if exit_code != 0 and exit_code != -13:
            if i == 0:
                raise ValueError(f'unable to read file content from object database: {object_hash}')
            else:
                raise Exception(f'formatter {i} exited with non-zero status: {formatter_commands[i-1]}')
    
    # Get the new hash from the last process
    new_hash, err = write_object.communicate()
    
    if write_object.returncode != 0:
        raise Exception('unable to write formatted content to object database')
    
    return new_hash.decode('utf-8').rstrip()

# Run formatter on a git blob identified by its hash. Writes output to a new git
# blob, and returns the hash of the new blob.
def format_object(formatter, object_hash, file_path, verbose=False):
    get_content = subprocess.Popen(
            ['git', 'cat-file', '-p', object_hash],
            stdout=subprocess.PIPE
            )
    command = re.sub(file_path_placeholder, file_path, formatter)
    if verbose:
        info(command)
    format_content = subprocess.Popen(
            command,
            shell=True,
            stdin=get_content.stdout,
            stdout=subprocess.PIPE
            )
    write_object = subprocess.Popen(
            ['git', 'hash-object', '-w', '--stdin'],
            stdin=format_content.stdout,
            stdout=subprocess.PIPE
            )

    get_content.stdout.close()
    format_content.stdout.close()

    get_exit_code = get_content.wait()
    # Ignore SIGPIPE (-13) which is expected when formatter doesn't read all input
    # For example, 'echo' doesn't read stdin, so git cat-file gets SIGPIPE when
    # trying to write to it. This is normal behavior and the pipeline still works.
    if get_exit_code != 0 and get_exit_code != -13:
        raise ValueError('unable to read file content from object database: ' + object_hash)

    if format_content.wait() != 0:
        raise Exception('formatter exited with non-zero status')

    new_hash, err = write_object.communicate()

    if write_object.returncode != 0:
        raise Exception('unable to write formatted content to object database')

    return new_hash.decode('utf-8').rstrip()

def object_is_empty(object_hash):
    get_content = subprocess.Popen(
            ['git', 'cat-file', '-p', object_hash],
            stdout=subprocess.PIPE
        )
    content, err = get_content.communicate()

    if get_content.returncode != 0:
        raise Exception('unable to verify content of formatted object')

    return not content

def replace_file_in_index(diff_entry, new_object_hash):
    subprocess.check_call(['git', 'update-index',
        '--cacheinfo', '{},{},{}'.format(
            diff_entry['dst_mode'],
            new_object_hash,
            diff_entry['src_path']
            )])

def patch_working_file(path, orig_object_hash, new_object_hash):
    patch = subprocess.check_output(
            ['git', 'diff', '--no-ext-diff', '--color=never', orig_object_hash, new_object_hash]
            )

    # Substitute object hashes in patch header with path to working tree file
    patch_b = patch.replace(orig_object_hash.encode(), path.encode()).replace(new_object_hash.encode(), path.encode())

    apply_patch = subprocess.Popen(
            ['git', 'apply', '-'],
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE
            )

    output, err = apply_patch.communicate(input=patch_b)

    if apply_patch.returncode != 0:
        raise Exception('could not apply formatting changes to working tree file {}'.format(path))

# Format: src_mode dst_mode src_hash dst_hash status/score? src_path dst_path?
diff_pat = re.compile(r'^:(\d+) (\d+) ([a-f0-9]+) ([a-f0-9]+) ([A-Z])(\d+)?\t([^\t]+)(?:\t([^\t]+))?$')

# Parse output from `git diff-index`
def parse_diff(diff):
    m = diff_pat.match(diff)
    if not m:
        raise ValueError('Failed to parse diff-index line: ' + diff)
    return {
            'src_mode': unless_zeroed(m.group(1)),
            'dst_mode': unless_zeroed(m.group(2)),
            'src_hash': unless_zeroed(m.group(3)),
            'dst_hash': unless_zeroed(m.group(4)),
            'status': m.group(5),
            'score': int(m.group(6)) if m.group(6) else None,
            'src_path': m.group(7),
            'dst_path': m.group(8)
            }

zeroed_pat = re.compile(r'^0+$')

# Returns the argument unless the argument is a string of zeroes, in which case
# returns `None`
def unless_zeroed(s):
    return s if not zeroed_pat.match(s) else None

def get_git_root():
    return subprocess.check_output(
            ['git', 'rev-parse', '--show-toplevel']
            ).decode('utf-8').rstrip()

def normalize_path(p, relative_to=None):
    return os.path.abspath(
            os.path.join(relative_to, p) if relative_to else p
            )

class CustomArgumentParser(argparse.ArgumentParser):
    def parse_args(self, args=None, namespace=None):
        args, argv = self.parse_known_args(args, namespace)
        if argv:
            msg = argparse._(
                    'unrecognized arguments: %s. Do you need to quote your formatter command?'
                    )
            self.error(msg % ' '.join(argv))
        return args

if __name__ == '__main__':
    parser = CustomArgumentParser(
            description='Transform staged files using a formatting command that accepts content via stdin and produces a result via stdout.',
            epilog='Example: %(prog)s --formatter "prettier --stdin-filepath \'{}\'" "src/*.js" "test/*.js"'
            )
    parser.add_argument(
            '--formatter', '-f',
            help='Shell command to format files, will run once per file. Occurrences of the placeholder `{}` will be replaced with a path to the file being formatted. (Example: "prettier --stdin-filepath \'{}\'")'
            )
    parser.add_argument(
            '--config',
            help='Path to configuration file (.yml, .yaml, or .toml)'
            )
    parser.add_argument(
            '--no-update-working-tree',
            action='store_true',
            help='By default formatting changes made to staged file content will also be applied to working tree files via a patch. This option disables that behavior, leaving working tree files untouched.'
            )
    parser.add_argument(
            '--no-write',
            action='store_true',
            help='Prevents %(prog)s from modifying staged or working tree files. You can use this option to check staged changes with a linter instead of formatting. With this option stdout from the formatter command is ignored. Example: %(prog)s --no-write -f "eslint --stdin --stdin-filename \'{}\' >&2" "*.js"'
            )
    parser.add_argument(
            '--unstaged',
            action='store_true',
            help='Format only unstaged changes in working tree'
            )
    parser.add_argument(
            '--also-unstaged',
            action='store_true',
            help='Format both staged AND unstaged changes'
            )
    parser.add_argument(
            '--include-untracked',
            action='store_true',
            help='Include untracked files when formatting unstaged changes. By default only tracked changes are included.'
            )
    parser.add_argument(
            '--files',
            nargs='+',
            help='Format specific files regardless of git status',
            metavar='FILE'
            )
    parser.add_argument(
            '--debug',
            action='store_true',
            help='Show detailed debug output including pattern matching decisions'
            )
    parser.add_argument(
            '--dry-run',
            action='store_true',
            help='Preview what would be formatted without making changes'
            )
    parser.add_argument(
            '--version',
            action='version',
            version='%(prog)s version {}'.format(VERSION),
            help='Display version of %(prog)s'
            )
    parser.add_argument(
            '--verbose',
            help='Show the formatting commands that are running',
            action='store_true'
            )
    parser.add_argument(
            'patterns',
            nargs='*',
            help='Patterns that specify files to format. The formatter will only transform staged files that are given here. Patterns use gitignore syntax and are matched using the pathspec library. Examples: "*.js" matches all .js files, "src/**/*.py" matches Python files in src/, "!tests/*" excludes the tests directory.'
            )
    
    args = parser.parse_args()
    
    # Load configuration
    config = None
    if args.config:
        config_path = Path(args.config)
        if not config_path.exists():
            fatal(f"Config file not found: {args.config}")
        config = Config.from_file(config_path)
    elif not args.formatter:
        # Only search for config file if no formatter is explicitly provided
        config_path = find_config_file()
        if config_path:
            config = Config.from_file(config_path)
            if args.debug:
                debug(f"Found config file: {config_path}")
    
    git_root = get_git_root()
    
    # Determine what to format
    include_staged = not (args.unstaged or args.files)
    include_unstaged = args.unstaged or args.also_unstaged or bool(args.files)
    
    # Build formatters list
    formatters = []
    
    if args.formatter:
        # Command line formatter
        if not args.patterns:
            fatal("No file patterns specified. Provide patterns to format.")
        
        # Extract formatter name from command for display
        formatter_name = args.formatter.split()[0] if args.formatter else 'formatter'
        formatters.append({
            'name': formatter_name,
            'command': args.formatter,
            'patterns': args.patterns,
            'spec': create_pattern_spec(args.patterns),
            'readonly': False
        })
    elif config and config.formatters:
        # Config file formatters
        if args.debug:
            debug("Using formatters from config file")
        
        for name, formatter_config in config.formatters.items():
            if args.debug:
                debug(f"Building formatter '{name}' from config: {formatter_config}")
            
            formatter_cmd = formatter_config.get('command', '')
            formatter_patterns = formatter_config.get('patterns', ['*'])
            
            if not formatter_cmd:
                warn(f"Formatter '{name}' has no command specified")
                continue
            
            readonly = formatter_config.get('readonly', False)
            
            # Skip readonly formatters when only formatting working tree files
            if include_unstaged and not include_staged and readonly:
                continue
            
            formatters.append({
                'name': name,
                'command': formatter_cmd,
                'patterns': formatter_patterns,
                'spec': create_pattern_spec(formatter_patterns),
                'readonly': readonly
            })
    else:
        fatal("No formatter specified. Use --formatter or create a config file.")
    
    if not formatters:
        if include_unstaged and not include_staged:
            fatal("No non-readonly formatters found in config file")
        else:
            fatal("No formatters found")
    
    # Use the unified formatter
    format_files_unified(
        formatters=formatters,
        git_root=git_root,
        include_staged=include_staged,
        include_unstaged=include_unstaged,
        specific_files=args.files,
        include_untracked=args.include_untracked,
        update_working_tree=not args.no_update_working_tree,
        write=not args.no_write and not args.dry_run,
        verbose=args.verbose or (config.show_commands if config else False),
        debug_mode=args.debug or (config.debug if config else False)
    )
