Mercurial > repos > estrain > lissero
changeset 10:412d55f09755 draft
Uploaded
author | estrain |
---|---|
date | Thu, 08 Feb 2024 18:18:47 +0000 |
parents | 57d1d335ce88 |
children | 40f397e29951 |
files | lissero.xml lissero/lissero.xml lissero/variant4b.py |
diffstat | 3 files changed, 100 insertions(+), 51 deletions(-) [+] |
line wrap: on
line diff
--- a/lissero.xml Tue Dec 13 18:43:42 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,51 +0,0 @@ -<tool id="lissero" name="lissero" version="0.1.4+galaxy0" python_template_version="3.5" profile="21.05"> - <requirements> - <requirement type="package">lissero</requirement> - </requirements> - <command detect_errors="exit_code"><![CDATA[ - lissero - #if $settings.advanced == "advanced" - --min_id $settings.min_id - --min_cov $settings.min_cov - #end if - '$input1' > '$output1'; - ]]></command> - <inputs> - <param type="data" name="input1" format="fasta" /> - <conditional name="settings"> - <param name="advanced" type="select" label="Specify advanced parameters"> - <option value="simple" selected="true">No, use program defaults.</option> - <option value="advanced">Yes, see full parameter list.</option> - </param> - <when value="simple"> - </when> - <when value="advanced"> - <param name="min_id" type="float" label="Minimum percent identity to accept a match" value="95.0" min="0" max="100" /> - <param name="min_cov" type="float" label="Minimum coverage of a gene to accept a match" value="95.0" min="0" max="100" /> - </when> - </conditional> - </inputs> - <outputs> - <data name="output1" format="txt" /> - </outputs> - <help><![CDATA[ - Usage: lissero [OPTIONS] FASTA... - - In silico serogroup prediction for L. monocytogenes. Alleles: lmo1118, - lmo0737, ORF2819, ORF2110, Prs - - References: - - Doumith et al. Differentiation of the major Listeria monocytogenes - serovars by multiplex PCR. J Clin Microbiol, 2004; 42:8; 3819-22 - -Options: - -h, --help Show this message and exit. - --min_id FLOAT Minimum percent identity to accept a match. [0-100][default=95.0] - --min_cov FLOAT Minimum coverage of the gene to accept a match. [0-100][default=95.0] - - ]]></help> - <citations> - <citation type="doi">10.1128/JCM.42.8.3819-3822.2004</citation> - </citations> -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lissero/lissero.xml Thu Feb 08 18:18:47 2024 +0000 @@ -0,0 +1,52 @@ +<tool id="lissero" name="lissero" version="1.0.0+galaxy0" python_template_version="3.5" profile="21.05"> + <requirements> + <requirement type="package">lissero</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + lissero + #if $settings.advanced == "advanced" + --min_id $settings.min_id + --min_cov $settings.min_cov + #end if + '$input1' > output1; + python $__tool_directory__/variant4b.py output1 output2.txt; + ]]></command> + <inputs> + <param type="data" name="input1" format="fasta" /> + <conditional name="settings"> + <param name="advanced" type="select" label="Specify advanced parameters"> + <option value="simple" selected="true">No, use program defaults.</option> + <option value="advanced">Yes, see full parameter list.</option> + </param> + <when value="simple"> + </when> + <when value="advanced"> + <param name="min_id" type="float" label="Minimum percent identity to accept a match" value="95.0" min="0" max="100" /> + <param name="min_cov" type="float" label="Minimum coverage of a gene to accept a match" value="95.0" min="0" max="100" /> + </when> + </conditional> + </inputs> + <outputs> + <data format="tabular" name="lissero.tsv" label="${tool.name} on ${on_string}: LisSero" from_work_dir="*.txt"/> + </outputs> + <help><![CDATA[ + Usage: lissero [OPTIONS] FASTA... + + In silico serogroup prediction for L. monocytogenes. Alleles: lmo1118, + lmo0737, ORF2819, ORF2110, Prs + + References: + + Doumith et al. Differentiation of the major Listeria monocytogenes + serovars by multiplex PCR. J Clin Microbiol, 2004; 42:8; 3819-22 + +Options: + -h, --help Show this message and exit. + --min_id FLOAT Minimum percent identity to accept a match. [0-100][default=95.0] + --min_cov FLOAT Minimum coverage of the gene to accept a match. [0-100][default=95.0] + + ]]></help> + <citations> + <citation type="doi">10.1128/JCM.42.8.3819-3822.2004</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lissero/variant4b.py Thu Feb 08 18:18:47 2024 +0000 @@ -0,0 +1,48 @@ +import sys + +def identify_variants_with_genes(input_file_path, output_file_path): + # Define the genes of interest + genes_of_interest = ['LMO0737', 'ORF2110', 'ORF2819'] + + # Open the input file and read its lines + with open(input_file_path, 'r') as file: + lines = file.readlines() + + # Check if the file has more than just the header + if len(lines) <= 1: + print("Input file does not contain enough data.") + return + + # Extract the column headers and find the indices of the genes of interest + headers = lines[0].strip().split('\t') + gene_indices = [headers.index(gene) for gene in genes_of_interest] + serotype_index = headers.index('SEROTYPE') + + # Initialize a list to hold the modified lines + modified_lines = [lines[0]] # Start with the header + + # Process each data line in the input file + for line in lines[1:]: + data = line.strip().split('\t') + # Check if the genes of interest are all present (marked as "FULL") + if all(data[index] == 'FULL' for index in gene_indices): + # Modify the SEROTYPE column to "4b variant" + data[serotype_index] = "4b variant" + # Rejoin the modified data into a single string and add it to the list + modified_lines.append('\t'.join(data) + '\n') + + # Write the modified lines to the output file + with open(output_file_path, 'w') as file: + file.writelines(modified_lines) + + print(f'Results written to {output_file_path}') + +if __name__ == "__main__": + if len(sys.argv) != 3: + print("Usage: python script.py <input_file_path> <output_file_path>") + sys.exit(1) + + input_file_path = sys.argv[1] + output_file_path = sys.argv[2] + identify_variants_with_genes(input_file_path, output_file_path) +