Mercurial > repos > estrain > lissero
comparison lissero/variant4b.py @ 10:412d55f09755 draft
Uploaded
author | estrain |
---|---|
date | Thu, 08 Feb 2024 18:18:47 +0000 |
parents | |
children | 40f397e29951 |
comparison
equal
deleted
inserted
replaced
9:57d1d335ce88 | 10:412d55f09755 |
---|---|
1 import sys | |
2 | |
3 def identify_variants_with_genes(input_file_path, output_file_path): | |
4 # Define the genes of interest | |
5 genes_of_interest = ['LMO0737', 'ORF2110', 'ORF2819'] | |
6 | |
7 # Open the input file and read its lines | |
8 with open(input_file_path, 'r') as file: | |
9 lines = file.readlines() | |
10 | |
11 # Check if the file has more than just the header | |
12 if len(lines) <= 1: | |
13 print("Input file does not contain enough data.") | |
14 return | |
15 | |
16 # Extract the column headers and find the indices of the genes of interest | |
17 headers = lines[0].strip().split('\t') | |
18 gene_indices = [headers.index(gene) for gene in genes_of_interest] | |
19 serotype_index = headers.index('SEROTYPE') | |
20 | |
21 # Initialize a list to hold the modified lines | |
22 modified_lines = [lines[0]] # Start with the header | |
23 | |
24 # Process each data line in the input file | |
25 for line in lines[1:]: | |
26 data = line.strip().split('\t') | |
27 # Check if the genes of interest are all present (marked as "FULL") | |
28 if all(data[index] == 'FULL' for index in gene_indices): | |
29 # Modify the SEROTYPE column to "4b variant" | |
30 data[serotype_index] = "4b variant" | |
31 # Rejoin the modified data into a single string and add it to the list | |
32 modified_lines.append('\t'.join(data) + '\n') | |
33 | |
34 # Write the modified lines to the output file | |
35 with open(output_file_path, 'w') as file: | |
36 file.writelines(modified_lines) | |
37 | |
38 print(f'Results written to {output_file_path}') | |
39 | |
40 if __name__ == "__main__": | |
41 if len(sys.argv) != 3: | |
42 print("Usage: python script.py <input_file_path> <output_file_path>") | |
43 sys.exit(1) | |
44 | |
45 input_file_path = sys.argv[1] | |
46 output_file_path = sys.argv[2] | |
47 identify_variants_with_genes(input_file_path, output_file_path) | |
48 |