Mercurial > repos > estrain > lissero

diff lissero/variant4b.py @ 10:412d55f09755 draft
Uploaded
author: estrain
date: Thu, 08 Feb 2024 18:18:47 +0000
children: 40f397e29951
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lissero/variant4b.py	Thu Feb 08 18:18:47 2024 +0000
@@ -0,0 +1,48 @@
+import sys
+
+def identify_variants_with_genes(input_file_path, output_file_path):
+    # Define the genes of interest
+    genes_of_interest = ['LMO0737', 'ORF2110', 'ORF2819']
+
+    # Open the input file and read its lines
+    with open(input_file_path, 'r') as file:
+        lines = file.readlines()
+
+    # Check if the file has more than just the header
+    if len(lines) <= 1:
+        print("Input file does not contain enough data.")
+        return
+
+    # Extract the column headers and find the indices of the genes of interest
+    headers = lines[0].strip().split('\t')
+    gene_indices = [headers.index(gene) for gene in genes_of_interest]
+    serotype_index = headers.index('SEROTYPE')
+
+    # Initialize a list to hold the modified lines
+    modified_lines = [lines[0]]  # Start with the header
+
+    # Process each data line in the input file
+    for line in lines[1:]:
+        data = line.strip().split('\t')
+        # Check if the genes of interest are all present (marked as "FULL")
+        if all(data[index] == 'FULL' for index in gene_indices):
+            # Modify the SEROTYPE column to "4b variant"
+            data[serotype_index] = "4b variant"
+        # Rejoin the modified data into a single string and add it to the list
+        modified_lines.append('\t'.join(data) + '\n')
+
+    # Write the modified lines to the output file
+    with open(output_file_path, 'w') as file:
+        file.writelines(modified_lines)
+
+    print(f'Results written to {output_file_path}')
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: python script.py <input_file_path> <output_file_path>")
+        sys.exit(1)
+
+    input_file_path = sys.argv[1]
+    output_file_path = sys.argv[2]
+    identify_variants_with_genes(input_file_path, output_file_path)
+
author	estrain
date	Thu, 08 Feb 2024 18:18:47 +0000
parents
children	40f397e29951