changeset 10:412d55f09755 draft

Uploaded
author estrain
date Thu, 08 Feb 2024 18:18:47 +0000
parents 57d1d335ce88
children 40f397e29951
files lissero.xml lissero/lissero.xml lissero/variant4b.py
diffstat 3 files changed, 100 insertions(+), 51 deletions(-) [+]
line wrap: on
line diff
--- a/lissero.xml	Tue Dec 13 18:43:42 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-<tool id="lissero" name="lissero" version="0.1.4+galaxy0" python_template_version="3.5" profile="21.05">
-    <requirements>
-        <requirement type="package">lissero</requirement>
-    </requirements>
-    <command detect_errors="exit_code"><![CDATA[
-        lissero 
-        #if $settings.advanced == "advanced"
-          --min_id $settings.min_id
-          --min_cov $settings.min_cov
-        #end if
-        '$input1' > '$output1';
-    ]]></command>
-    <inputs>
-        <param type="data" name="input1" format="fasta" />
-        <conditional name="settings">
-            <param name="advanced" type="select" label="Specify advanced parameters">
-                <option value="simple" selected="true">No, use program defaults.</option>
-                <option value="advanced">Yes, see full parameter list.</option>
-            </param>
-            <when value="simple">
-            </when>
-            <when value="advanced">
-                <param name="min_id" type="float" label="Minimum percent identity to accept a match" value="95.0" min="0" max="100" /> 
-                <param name="min_cov" type="float" label="Minimum coverage of a gene to accept a match" value="95.0" min="0" max="100" /> 
-            </when>
-        </conditional>
-    </inputs>
-    <outputs>
-        <data name="output1" format="txt" />
-    </outputs>
-    <help><![CDATA[
-        Usage: lissero [OPTIONS] FASTA...
-
-  In silico serogroup prediction for L. monocytogenes. Alleles: lmo1118,
-  lmo0737, ORF2819, ORF2110, Prs
-
-  References:
-
-  Doumith et al. Differentiation of the major Listeria monocytogenes
-  serovars by multiplex PCR. J Clin Microbiol, 2004; 42:8; 3819-22
-
-Options:
-  -h, --help              Show this message and exit.
-  --min_id FLOAT          Minimum percent identity to accept a match. [0-100][default=95.0]
-  --min_cov FLOAT         Minimum coverage of the gene to accept a match. [0-100][default=95.0]
-
-    ]]></help>
-    <citations>
-      <citation type="doi">10.1128/JCM.42.8.3819-3822.2004</citation> 
-    </citations>
-</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lissero/lissero.xml	Thu Feb 08 18:18:47 2024 +0000
@@ -0,0 +1,52 @@
+<tool id="lissero" name="lissero" version="1.0.0+galaxy0" python_template_version="3.5" profile="21.05">
+    <requirements>
+        <requirement type="package">lissero</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        lissero 
+        #if $settings.advanced == "advanced"
+          --min_id $settings.min_id
+          --min_cov $settings.min_cov
+        #end if
+        '$input1' > output1;
+        python $__tool_directory__/variant4b.py output1 output2.txt;   
+    ]]></command>
+    <inputs>
+        <param type="data" name="input1" format="fasta" />
+        <conditional name="settings">
+            <param name="advanced" type="select" label="Specify advanced parameters">
+                <option value="simple" selected="true">No, use program defaults.</option>
+                <option value="advanced">Yes, see full parameter list.</option>
+            </param>
+            <when value="simple">
+            </when>
+            <when value="advanced">
+                <param name="min_id" type="float" label="Minimum percent identity to accept a match" value="95.0" min="0" max="100" /> 
+                <param name="min_cov" type="float" label="Minimum coverage of a gene to accept a match" value="95.0" min="0" max="100" /> 
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+      <data format="tabular" name="lissero.tsv" label="${tool.name} on ${on_string}: LisSero" from_work_dir="*.txt"/>
+    </outputs>
+    <help><![CDATA[
+        Usage: lissero [OPTIONS] FASTA...
+
+  In silico serogroup prediction for L. monocytogenes. Alleles: lmo1118,
+  lmo0737, ORF2819, ORF2110, Prs
+
+  References:
+
+  Doumith et al. Differentiation of the major Listeria monocytogenes
+  serovars by multiplex PCR. J Clin Microbiol, 2004; 42:8; 3819-22
+
+Options:
+  -h, --help              Show this message and exit.
+  --min_id FLOAT          Minimum percent identity to accept a match. [0-100][default=95.0]
+  --min_cov FLOAT         Minimum coverage of the gene to accept a match. [0-100][default=95.0]
+
+    ]]></help>
+    <citations>
+      <citation type="doi">10.1128/JCM.42.8.3819-3822.2004</citation> 
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lissero/variant4b.py	Thu Feb 08 18:18:47 2024 +0000
@@ -0,0 +1,48 @@
+import sys
+
+def identify_variants_with_genes(input_file_path, output_file_path):
+    # Define the genes of interest
+    genes_of_interest = ['LMO0737', 'ORF2110', 'ORF2819']
+
+    # Open the input file and read its lines
+    with open(input_file_path, 'r') as file:
+        lines = file.readlines()
+
+    # Check if the file has more than just the header
+    if len(lines) <= 1:
+        print("Input file does not contain enough data.")
+        return
+
+    # Extract the column headers and find the indices of the genes of interest
+    headers = lines[0].strip().split('\t')
+    gene_indices = [headers.index(gene) for gene in genes_of_interest]
+    serotype_index = headers.index('SEROTYPE')
+
+    # Initialize a list to hold the modified lines
+    modified_lines = [lines[0]]  # Start with the header
+
+    # Process each data line in the input file
+    for line in lines[1:]:
+        data = line.strip().split('\t')
+        # Check if the genes of interest are all present (marked as "FULL")
+        if all(data[index] == 'FULL' for index in gene_indices):
+            # Modify the SEROTYPE column to "4b variant"
+            data[serotype_index] = "4b variant"
+        # Rejoin the modified data into a single string and add it to the list
+        modified_lines.append('\t'.join(data) + '\n')
+
+    # Write the modified lines to the output file
+    with open(output_file_path, 'w') as file:
+        file.writelines(modified_lines)
+
+    print(f'Results written to {output_file_path}')
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: python script.py <input_file_path> <output_file_path>")
+        sys.exit(1)
+
+    input_file_path = sys.argv[1]
+    output_file_path = sys.argv[2]
+    identify_variants_with_genes(input_file_path, output_file_path)
+