Mercurial > repos > cpt > cpt_disruptin_table

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Disruptin_hydrophobicity_helicity_table_package.py	Mon Jun 05 02:41:05 2023 +0000
@@ -0,0 +1,122 @@
+"""
+This program is intended to create the output table for the disruptin finder workflow
+"""
+from Bio import SeqIO
+from Bio.SeqUtils.ProtParam import ProteinAnalysis
+from Bio.SeqUtils import ProtParamData
+import csv
+import argparse
+import sys
+
+
+def disruptin_table(garnier_file, fasta_file):
+    # Iterable variables
+    position = 1
+    net_charge = 0
+    charge_res = 0
+    record_number = 0
+
+    # loop structures
+    names = []
+    sec_struct = []
+
+    # reading the lines from the garnier csv file
+    #    with open(garnier_file,'r') as csvfile:
+    #        garnierreader = csv.reader(csvfile)
+    for row in garnier_file:
+        if row[0] == "Sequence: ":
+            names += [row[1]]
+        elif row[0] in "HETC":
+            row = row.split("\t")
+            sec_struct += ["".join(row)]
+
+    record = []
+    p = []
+    r = []
+    c = []
+    h = []
+    s = []
+
+    # Parse the .fasta file and get the sequence
+    for rec in SeqIO.parse(fasta_file, "fasta"):
+        sequence = str(rec.seq)
+
+        # Set up the information vectors: for position #, residue, hydrophobic/charge/polar/nonpolar, and secondary
+        # structure
+        record += [rec.id]
+        position_vec = []
+        residue_vec = []
+        charge_sym_vec = []
+        sec_struct_vec = []
+
+        for aa in sequence:
+            position_vec += [str(position)]
+            residue_vec += [str(aa)]
+            sec_struct_vec += [str(sec_struct[record_number][position - 1])]
+
+            # For R and K residues a positive charge is given
+            if aa in "RK":
+                symbol = "+"
+            # For D and E residues a negative charge is given
+            elif aa in "DE":
+                symbol = "-"
+            elif aa in "AVMILPWFG":
+                symbol = "N"
+            elif aa in "HSYTCQN":
+                symbol = "P"
+            charge_sym_vec += symbol
+            position += 1
+
+            # Calculating hyrophobicity based on Kyte and Doolittle scale. Using binning value of 9. Since the binning
+            # is 9, the first 4 residues and last 4 residues as set blank so as to center the values to their
+            # approximate position on the sequence.
+            prot_ana_seq = ProteinAnalysis(sequence)
+            hydro = [0] * 4 + prot_ana_seq.protein_scale(ProtParamData.kd, 9) + [0] * 4
+
+        record_number += 1
+        position = 1
+
+        p += [position_vec]
+        r += [residue_vec]
+        c += [charge_sym_vec]
+        h += [hydro]
+        s += [sec_struct_vec]
+
+    # returns values for name of the sequence
+    return record, p, r, c, h, s
+
+
+if __name__ == "__main__":
+    # Grab all of the filters from our plugin loader
+    parser = argparse.ArgumentParser(description="Disruptin Table Output")
+    parser.add_argument(
+        "garnier_file", type=argparse.FileType("r"), help="csv file from garnier reader"
+    )
+    parser.add_argument(
+        "fasta_file",
+        type=argparse.FileType("r"),
+        help="fasta file of disruptin candidates",
+    )
+    args = parser.parse_args()
+
+    # Set up output location
+    #    f = open(sys.stdout, 'w', newline='')
+    #    writer1 = csv.writer(f)
+
+    iden, position, residue, charge, hydro, struct = disruptin_table(**vars(args))
+
+    for i in range(len(iden)):
+        #        writer1.writerow(['Protein ID']+[iden[i]])
+        #        writer1.writerow(['Position'] + [format(x, 's') for x in position[i]])
+        #        writer1.writerow(['Residue'] + [format(x, 's') for x in residue[i]])
+        #        writer1.writerow(['Charge'] + [format(x, 's') for x in charge[i]])
+        #        writer1.writerow(['Hydrophobicity'] + [format(x, '.3f') for x in hydro[i]])
+        #        writer1.writerow(['Secondary Structure'] + [format(x, 's') for x in struct[i]])
+        #        writer1.writerow([''])
+
+        print(str(iden[i]))
+        print("Position \t " + "\t".join(position[i]))
+        print("Residue \t" + "\t".join(residue[i]))
+        print("Charge \t" + "\t".join(charge[i]))
+        print("Hydrophobicity \t" + "\t".join(format(x, ".3f") for x in hydro[i]))
+        print("Secondary Structure \t" + "\t".join(struct[i]))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Disruptin_hydrophobicity_helicity_table_package.xml	Mon Jun 05 02:41:05 2023 +0000
@@ -0,0 +1,32 @@
+<tool id="edu.tamu.cpt2.phage.disruptin_table" name="Disruptin Table Output" version="1.0">
+  <description>makes table of disruptin candidates</description>
+  <macros>
+    <import>macros.xml</import>
+    <import>cpt-macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <command detect_errors="aggressive"><![CDATA[
+python '$__tool_directory__/Disruptin_hydrophobicity_helicity_table_package.py'
+'$garnier_file'
+'$fasta_file'
+
+
+
+>$output]]></command>
+  <inputs>
+    <param label="Garnier csv file" name="garnier_file" type="data" format="tabular"/>
+    <param label="Candidate fasta file" name="fasta_file" type="data" format="fasta"/>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="output"/>
+  </outputs>
+  <help><![CDATA[
+**What it does**
+This program takes the parsed output from the garnier tool and the fasta file with disruptin candidate sequences
+and compiles information on each of the sequences into a table format. The table includes the sequence and the position for each residue
+as well as the charge, hydrophobicity (based on the Kyte Doolittle scale), and secondary structure prediction from
+the garnier tool.
+
+        ]]></help>
+  <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt-macros.xml	Mon Jun 05 02:41:05 2023 +0000
@@ -0,0 +1,115 @@
+<macros>
+    <xml name="gff_requirements">
+        <requirements>
+            <requirement type="package" version="2.7">python</requirement>
+            <requirement type="package" version="1.65">biopython</requirement>
+            <requirement type="package" version="2.12.1">requests</requirement>
+			<requirement type="package" version="1.2.2">cpt_gffparser</requirement>
+            <yield/>
+        </requirements>
+        <version_command>
+		<![CDATA[
+			cd '$__tool_directory__' && git rev-parse HEAD
+		]]>
+		</version_command>
+    </xml>
+    <xml name="citation/mijalisrasche">
+        <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+        <citation type="bibtex">@unpublished{galaxyTools,
+		author = {E. Mijalis, H. Rasche},
+		title = {CPT Galaxy Tools},
+		year = {2013-2017},
+		note = {https://github.com/tamu-cpt/galaxy-tools/}
+		}
+		</citation>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {E. Mijalis, H. Rasche},
+				title = {CPT Galaxy Tools},
+				year = {2013-2017},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-crr">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Ross},
+				title = {CPT Galaxy Tools},
+				year = {2020-},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {E. Mijalis, H. Rasche},
+				title = {CPT Galaxy Tools},
+				year = {2013-2017},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020-AJC-solo">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-clm">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Maughmer},
+				title = {CPT Galaxy Tools},
+				year = {2017-2020},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="sl-citations-clm">
+        <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Maughmer},
+				title = {CPT Galaxy Tools},
+				year = {2017-2020},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+        <yield/>
+    </xml>
+</macros>
--- a/cpt_disruptin_table/Disruptin_hydrophobicity_helicity_table_package.py	Fri Jun 17 12:33:22 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,122 +0,0 @@
-"""
-This program is intended to create the output table for the disruptin finder workflow
-"""
-from Bio import SeqIO
-from Bio.SeqUtils.ProtParam import ProteinAnalysis
-from Bio.SeqUtils import ProtParamData
-import csv
-import argparse
-import sys
-
-
-def disruptin_table(garnier_file, fasta_file):
-    # Iterable variables
-    position = 1
-    net_charge = 0
-    charge_res = 0
-    record_number = 0
-
-    # loop structures
-    names = []
-    sec_struct = []
-
-    # reading the lines from the garnier csv file
-#    with open(garnier_file,'r') as csvfile:
-#        garnierreader = csv.reader(csvfile)
-    for row in garnier_file:
-        if row[0] == 'Sequence: ':
-            names += [row[1]]
-        elif row[0] in 'HETC':
-	    row = row.split('\t')
-            sec_struct += [''.join(row)]
-
-    record = []
-    p = []
-    r = []
-    c = []
-    h = []
-    s = []
-
-    # Parse the .fasta file and get the sequence
-    for rec in SeqIO.parse(fasta_file, "fasta"):
-        sequence = str(rec.seq)
-
-        # Set up the information vectors: for position #, residue, hydrophobic/charge/polar/nonpolar, and secondary
-        # structure
-        record += [rec.id]
-        position_vec = []
-        residue_vec = []
-        charge_sym_vec = []
-        sec_struct_vec = []
-
-        for aa in sequence:
-            position_vec += [str(position)]
-            residue_vec += [str(aa)]
-            sec_struct_vec += [str(sec_struct[record_number][position - 1])]
-
-            # For R and K residues a positive charge is given
-            if aa in "RK":
-                symbol = "+"
-            # For D and E residues a negative charge is given
-            elif aa in "DE":
-                symbol = "-"
-            elif aa in "AVMILPWFG":
-                symbol = "N"
-            elif aa in "HSYTCQN":
-                symbol = "P"
-            charge_sym_vec += symbol
-            position += 1
-
-            # Calculating hyrophobicity based on Kyte and Doolittle scale. Using binning value of 9. Since the binning
-            # is 9, the first 4 residues and last 4 residues as set blank so as to center the values to their
-            # approximate position on the sequence.
-            prot_ana_seq = ProteinAnalysis(sequence)
-            hydro = [0] * 4 + prot_ana_seq.protein_scale(ProtParamData.kd, 9) + [0] * 4
-
-        record_number += 1
-        position = 1
-
-        p += [position_vec]
-        r += [residue_vec]
-        c += [charge_sym_vec]
-        h += [hydro]
-        s += [sec_struct_vec]
-
-    # returns values for name of the sequence
-    return record, p, r, c, h, s
-
-
-if __name__ == "__main__":
-    # Grab all of the filters from our plugin loader
-    parser = argparse.ArgumentParser(description="Disruptin Table Output")
-    parser.add_argument(
-        "garnier_file", type=argparse.FileType("r"), help="csv file from garnier reader"
-    )
-    parser.add_argument(
-        "fasta_file",
-        type=argparse.FileType("r"),
-        help="fasta file of disruptin candidates",
-    )
-    args = parser.parse_args()
-
-    # Set up output location
-#    f = open(sys.stdout, 'w', newline='')
-#    writer1 = csv.writer(f)
-
-    iden, position, residue, charge, hydro, struct = disruptin_table(**vars(args))
-
-    for i in range(len(iden)):
-#		 writer1.writerow(['Protein ID']+[iden[i]])
-#        writer1.writerow(['Position'] + [format(x, 's') for x in position[i]])
-#        writer1.writerow(['Residue'] + [format(x, 's') for x in residue[i]])
-#		 writer1.writerow(['Charge'] + [format(x, 's') for x in charge[i]])
-#        writer1.writerow(['Hydrophobicity'] + [format(x, '.3f') for x in hydro[i]])
-#        writer1.writerow(['Secondary Structure'] + [format(x, 's') for x in struct[i]])
-#        writer1.writerow([''])
-
-        print(str(iden[i]))
-        print("Position \t " + "\t".join(position[i]))
-        print("Residue \t" + "\t".join(residue[i]))
-        print("Charge \t" + "\t".join(charge[i]))
-        print("Hydrophobicity \t" + "\t".join(format(x, ".3f") for x in hydro[i]))
-        print("Secondary Structure \t" + "\t".join(struct[i]))
--- a/cpt_disruptin_table/Disruptin_hydrophobicity_helicity_table_package.xml	Fri Jun 17 12:33:22 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-<?xml version="1.0"?>
-<tool id="edu.tamu.cpt2.phage.disruptin_table" name="Disruptin Table Output" version="1.0">
-    <description>makes table of disruptin candidates</description>
-    <macros>
-		<import>macros.xml</import>
-		<import>cpt-macros.xml</import>
-    </macros>
-    <expand macro="requirements"/>
-	<command detect_errors="aggressive"><![CDATA[
-python $__tool_directory__/Disruptin_hydrophobicity_helicity_table_package.py
-$garnier_file
-$fasta_file
-
-
-
->$output]]></command>
-    <inputs>
-        <param label="Garnier csv file" name="garnier_file" type="data" format="tabular" />
-		<param label="Candidate fasta file" name="fasta_file" type="data" format="fasta" />
-    </inputs>
-    <outputs>
-		<data format="tabular" name="output"/>
-    </outputs>
-    <help><![CDATA[
-**What it does**
-This program takes the parsed output from the garnier tool and the fasta file with disruptin candidate sequences
-and compiles information on each of the sequences into a table format. The table includes the sequence and the position for each residue
-as well as the charge, hydrophobicity (based on the Kyte Doolittle scale), and secondary structure prediction from
-the garnier tool.
-
-        ]]></help>
-		<expand macro="citations" />
-</tool>
--- a/cpt_disruptin_table/cpt-macros.xml	Fri Jun 17 12:33:22 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-<?xml version="1.0"?>
-<macros>
-	<xml name="gff_requirements">
-		<requirements>
-			<requirement type="package" version="2.7">python</requirement>
-			<requirement type="package" version="1.65">biopython</requirement>
-			<requirement type="package" version="2.12.1">requests</requirement>
-			<yield/>
-		</requirements>
-		<version_command>
-		<![CDATA[
-			cd $__tool_directory__ && git rev-parse HEAD
-		]]>
-		</version_command>
-	</xml>
-	<xml name="citation/mijalisrasche">
-		<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-		<citation type="bibtex">@unpublished{galaxyTools,
-		author = {E. Mijalis, H. Rasche},
-		title = {CPT Galaxy Tools},
-		year = {2013-2017},
-		note = {https://github.com/tamu-cpt/galaxy-tools/}
-		}
-		</citation>
-	</xml>
-	<xml name="citations">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {E. Mijalis, H. Rasche},
-				title = {CPT Galaxy Tools},
-				year = {2013-2017},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-		<yield/>
-		</citations>
-	</xml>
-    	<xml name="citations-crr">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {C. Ross},
-				title = {CPT Galaxy Tools},
-				year = {2020-},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-		<yield/>
-		</citations>
-	</xml>
-        <xml name="citations-2020">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {E. Mijalis, H. Rasche},
-				title = {CPT Galaxy Tools},
-				year = {2013-2017},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-                        <citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {A. Criscione},
-				title = {CPT Galaxy Tools},
-				year = {2019-2021},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-                        </citation>
-                        <yield/>
-		</citations>
-	</xml>
-        <xml name="citations-2020-AJC-solo">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-                        <citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {A. Criscione},
-				title = {CPT Galaxy Tools},
-				year = {2019-2021},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-                        </citation>
-                        <yield/>
-		</citations>
-	</xml>
-        <xml name="citations-clm">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {C. Maughmer},
-				title = {CPT Galaxy Tools},
-				year = {2017-2020},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-                        <yield/>
-		</citations>
-	</xml>
-        <xml name="sl-citations-clm">
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {C. Maughmer},
-				title = {CPT Galaxy Tools},
-				year = {2017-2020},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-                        <yield/>
-	</xml>
-</macros>
--- a/cpt_disruptin_table/macros.xml	Fri Jun 17 12:33:22 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-<?xml version="1.0"?>
-<macros>
-  <xml name="requirements">
-    <requirements>
-		<requirement type="package" version="3.8.13">python</requirement>
-		<requirement type="package" version="1.79">biopython</requirement>
-		<requirement type="package" version="1.2.2">cpt_gffparser</requirement>
-		<yield/>
-    </requirements>
-  </xml>
-  <xml name="genome_selector">
-	    <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
-  </xml>
-  <xml name="gff3_input">
-    <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
-  </xml>
-  <token name="@GENOME_SELECTOR_PRE@">
-		ln -s $genome_fasta genomeref.fa;
-	</token>
-	<token name="@GENOME_SELECTOR@">
-		genomeref.fa
-	</token>
-</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Mon Jun 05 02:41:05 2023 +0000
@@ -0,0 +1,74 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package">progressivemauve</requirement>
+            <!--<requirement type="package" version="2.7">python</requirement>-->
+            <requirement type="package" version="0.6.4">bcbiogff</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@WRAPPER_VERSION@">2.4.0</token>
+    <xml name="citation/progressive_mauve">
+        <citation type="doi">10.1371/journal.pone.0011147</citation>
+    </xml>
+    <xml name="citation/gepard">
+        <citation type="doi">10.1093/bioinformatics/btm039</citation>
+    </xml>
+    <token name="@XMFA_INPUT@">
+		'$xmfa'
+	</token>
+    <xml name="xmfa_input" token_formats="xmfa">
+        <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA"/>
+    </xml>
+    <token name="@XMFA_FA_INPUT@">
+		'$sequences'
+	</token>
+    <xml name="xmfa_fa_input">
+        <param type="data" format="fasta" name="sequences" label="Sequences in alignment" help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/>
+    </xml>
+    <xml name="genome_selector">
+        <conditional name="reference_genome">
+            <param name="reference_genome_source" type="select" label="Reference Genome">
+                <option value="history" selected="True">From History</option>
+                <option value="cached">Locally Cached</option>
+            </param>
+            <when value="cached">
+                <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+                    <options from_data_table="all_fasta"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="gff3_input">
+        <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+    </xml>
+    <xml name="input/gff3+fasta">
+        <expand macro="gff3_input"/>
+        <expand macro="genome_selector"/>
+    </xml>
+    <token name="@INPUT_GFF@">
+	    '$gff3_data'
+	</token>
+    <token name="@INPUT_FASTA@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+	</token>
+    <token name="@GENOME_SELECTOR_PRE@">
+    #if $reference_genome.reference_genome_source == 'history':
+            ln -s '$reference_genome.genome_fasta' genomeref.fa;
+    #end if
+	</token>
+    <token name="@GENOME_SELECTOR@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+	</token>
+</macros>