import os
import subprocess


def run_phobius(input_file, output_dir):
    """
    Run Phobius on the input file and save the results.
    """
    # Get the base filename without the path
    base_filename = os.path.basename(input_file)

    # Create the output filename
    output_file = os.path.join(output_dir, f"{base_filename}_phobius.txt")

    # Path to the Phobius script
    phobius_path = "/data/gkargas/phobius/phobius.pl"

    # Construct the Phobius command
    command = f"{phobius_path} -short {input_file} 2>/dev/null > {output_file}"

    print(f"Running Phobius on: {base_filename}")
    subprocess.run(command, shell=True)
    print(f"Results saved to: {output_file}")


# Define input and output directories for each type
directories = {
    "iORFs_proteins": {
        "input": "/data/gkargas/Actinobacteria/sORFs_proteins",
        "output": "/data/gkargas/Actinobacteria/phobius_results/sORFs_proteins",
    },
    "iORFs_shuffled_proteins": {
        "input": "/data/gkargas/Actinobacteria/sORFs_proteins_shuffled",
        "output": "/data/gkargas/Actinobacteria/phobius_results/sORFs_proteins_shuffled",
    },
}

# Process each type
for key, dirs in directories.items():
    input_dir = dirs["input"]
    output_dir = dirs["output"]

    print(f"\nProcessing: {key}")
    print(f"Input directory: {input_dir}")
    print(f"Output directory: {output_dir}")

    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Process each FASTA file in the input directory
    for filename in os.listdir(input_dir):
        if filename.endswith(
            (".fa", ".fasta", ".faa")
        ):  # Include multiple FASTA extensions
            input_file = os.path.join(input_dir, filename)
            run_phobius(input_file, output_dir)

print("\nAll processing completed!")

# Print a summary
print("\nResults Summary:")
for key, dirs in directories.items():
    output_dir = dirs["output"]
    if os.path.exists(output_dir):
        results_count = len(
            [f for f in os.listdir(output_dir) if f.endswith("_phobius.txt")]
        )
        print(f"{key}: {results_count} sequences processed")
