# HG changeset patch
# User cpt
# Date 1685933529 0
# Node ID edd518e72c89af65745ed733f07c3d87de84e1e4
# Parent 0d2226e1c5f69adbd34edd7cdb6b21247188788b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
diff -r 0d2226e1c5f6 -r edd518e72c89 cpt-macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt-macros.xml Mon Jun 05 02:52:09 2023 +0000
@@ -0,0 +1,115 @@
+
+
+
+ python
+ biopython
+ requests
+ cpt_gffparser
+
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {C. Ross},
+ title = {CPT Galaxy Tools},
+ year = {2020-},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
diff -r 0d2226e1c5f6 -r edd518e72c89 cpt_read_garnier/cpt-macros.xml
--- a/cpt_read_garnier/cpt-macros.xml Fri Jun 17 13:12:20 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-
-
-
-
- python
- biopython
- requests
-
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {C. Ross},
- title = {CPT Galaxy Tools},
- year = {2020-},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
diff -r 0d2226e1c5f6 -r edd518e72c89 cpt_read_garnier/macros.xml
--- a/cpt_read_garnier/macros.xml Fri Jun 17 13:12:20 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,85 +0,0 @@
-
-
-
-
- python
- biopython
- cpt_gffparser
-
-
-
-
- "$blast_tsv"
-
-
-
-
-
-
- "$blast_xml"
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- "$gff3_data"
-
-
-#if str($reference_genome.reference_genome_source) == 'cached':
- "${reference_genome.fasta_indexes.fields.path}"
-#else if str($reference_genome.reference_genome_source) == 'history':
- genomeref.fa
-#end if
-
-
-#if $reference_genome.reference_genome_source == 'history':
- ln -s $reference_genome.genome_fasta genomeref.fa;
-#end if
-
-
-#if str($reference_genome.reference_genome_source) == 'cached':
- "${reference_genome.fasta_indexes.fields.path}"
-#else if str($reference_genome.reference_genome_source) == 'history':
- genomeref.fa
-#end if
-
-
-
-
-
-
- "$sequences"
-
-
-
-
-
diff -r 0d2226e1c5f6 -r edd518e72c89 cpt_read_garnier/reading_garnier_output.py
--- a/cpt_read_garnier/reading_garnier_output.py Fri Jun 17 13:12:20 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,133 +0,0 @@
-#!/usr/bin/env python
-
-import csv
-import argparse
-
-# import sys
-
-# This function reads through the tagseq file and outputs a list of sequence names and the lengths of each sequence.
-def garnier_sequences(tagseq_file=None):
- # open the file and create blank lists
- f = tagseq_file # open(tagseq_file, 'r')
- f.seek(0)
- sequence = []
- lengths = []
-
- # for each line the in file, search for the words 'Sequence' and 'to' to find the sequence name and length,
- # respectively. Then add sequence names and lengths to the proper lists
- for line in f:
- words = line.split()
- if line.startswith("# Sequence:"):
- # if 'Sequence:' in line:
- # if words[1] == 'Sequence:':
- sequence += [words[words.index("Sequence:") + 1]]
- # if words[5] == 'to:':
- # lengths += [int(words[6])]
- if words.index("to:"):
- lengths += [int(words[words.index("to:") + 1])]
- # return the sequence names and lengths
- return sequence, lengths
-
-
-# This function extracts the helix, sheet, turn, and coil predictions from the file. The predictions for each type of
-# secondary structure are joined together in one string.
-def garnier_secondary_struct(tagseq_file=None):
- # opens the file and sets variables for the structural predictions
- f = tagseq_file # open(tagseq_file, 'r')
- helix = ""
- turns = ""
- coil = ""
- sheet = ""
-
- # if the first work in the line indicates a structural prediction, it adds the rest of the line to the right
- # prediction string.
- for line in f:
- words = line.split()
- if len(words) > 0:
- if words[0] in "helix":
- helix += str(line[6:]).rstrip("\n")
- elif words[0] in "sheet":
- sheet += str(line[6:]).rstrip("\n")
- elif words[0] in "turns":
- turns += str(line[6:]).rstrip("\n")
- elif words[0] in "coil":
- coil += str(line[6:]).rstrip("\n")
- # f.close()
- # returns the four structural prediction strings
- return helix, turns, coil, sheet
-
-
-# This functions cuts the strings based on the lengths of the original sequences. Lengths are given in a list.
-def vector_cutter(vector, lengths_to_cut):
- # sets up iteration variables
- start = 0
- end = lengths_to_cut[0]
- maximum = len(lengths_to_cut)
- # creates output list
- output = []
- # loops through the number of sequences based on the number of lengths
- for i in range(maximum):
- # outputs list of sequence strings
- output += [str(vector[start:end])]
- start = end
- if i + 1 != maximum:
- end += lengths_to_cut[i + 1]
- # returns list of strings. Each sequence has a string included in the list.
- return output
-
-
-# this function takes the helix, turn, sheet, and coil predictions for each sequence and creates a single structural
-# prediction string.
-def single_prediction(helix, sheet, turns, coil):
- # sets output list
- secondary_structure = []
- # checks to make sure each of the strings is the same length
- if len(helix) == len(sheet) == len(coil) == len(turns):
- # loops through the length of each sequence, and when the value is not a blank it is added to the output
- # prediction list.
- for j in range(len(helix)):
- if helix[j] != " ":
- secondary_structure += [str(helix[j])]
- elif sheet[j] != " ":
- secondary_structure += [str(sheet[j])]
- elif coil[j] != " ":
- secondary_structure += [str(coil[j])]
- else:
- secondary_structure += [str(turns[j])]
- # returns the output prediction list for the sequence
- return secondary_structure
-
-
-if __name__ == "__main__":
- # Grab all of the filters from our plugin loader
- parser = argparse.ArgumentParser(
- description="Read Garnier Secondary Structure Prediction"
- )
- parser.add_argument(
- "tagseq_file", type=argparse.FileType("r"), help="Tagseq file input"
- )
- args = parser.parse_args()
-
- # opens the tagseq file and prepares for writing csv
- # f = open(sys.stdout, 'w', newline='')
- # writer = csv.writer(f)
-
- # reads tagseq file for helix, turn, coil, and sheet sequences as well as for names and lengths of the sequences
- # summarized in the tagseq file#!/usr/bin/env python\r
- Hel, Tur, Coi, She = garnier_secondary_struct(**vars(args))
- names, gives = garnier_sequences(**vars(args))
-
- # cut each of the structural prediction strings so that they are individual sequences
- Helix = vector_cutter(Hel, gives)
- Sheet = vector_cutter(She, gives)
- Turns = vector_cutter(Tur, gives)
- Coil = vector_cutter(Coi, gives)
-
- # for each sequence compile the four types of structural predictions into a single prediction, and output the final
- # prediction in csv format and to the screen
- for i in range(len(Helix)):
- Final = single_prediction(Helix[i], Sheet[i], Turns[i], Coil[i])
- # csv.writerow(['Sequence: '] + [names[i]])
- # csv.writerow(Final)
- print("Sequence Name: " + "\t" + names[i])
- print("\t".join(Final))
diff -r 0d2226e1c5f6 -r edd518e72c89 cpt_read_garnier/reading_garnier_output.xml
--- a/cpt_read_garnier/reading_garnier_output.xml Fri Jun 17 13:12:20 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-
-
- read garnier tool output
-
- macros.xml
- cpt-macros.xml
-
-
- $output]]>
-
-
-
-
-
-
-
-
-
diff -r 0d2226e1c5f6 -r edd518e72c89 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Jun 05 02:52:09 2023 +0000
@@ -0,0 +1,74 @@
+
+
+
+ progressivemauve
+
+ bcbiogff
+
+
+
+ 2.4.0
+
+ 10.1371/journal.pone.0011147
+
+
+ 10.1093/bioinformatics/btm039
+
+
+ '$xmfa'
+
+
+
+
+
+ '$sequences'
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ '$gff3_data'
+
+
+ #if str($reference_genome.reference_genome_source) == 'cached':
+ '${reference_genome.fasta_indexes.fields.path}'
+ #else if str($reference_genome.reference_genome_source) == 'history':
+ genomeref.fa
+ #end if
+
+
+ #if $reference_genome.reference_genome_source == 'history':
+ ln -s '$reference_genome.genome_fasta' genomeref.fa;
+ #end if
+
+
+ #if str($reference_genome.reference_genome_source) == 'cached':
+ '${reference_genome.fasta_indexes.fields.path}'
+ #else if str($reference_genome.reference_genome_source) == 'history':
+ genomeref.fa
+ #end if
+
+
diff -r 0d2226e1c5f6 -r edd518e72c89 reading_garnier_output.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reading_garnier_output.py Mon Jun 05 02:52:09 2023 +0000
@@ -0,0 +1,133 @@
+#!/usr/bin/env python
+
+import csv
+import argparse
+
+# import sys
+
+# This function reads through the tagseq file and outputs a list of sequence names and the lengths of each sequence.
+def garnier_sequences(tagseq_file=None):
+ # open the file and create blank lists
+ f = tagseq_file # open(tagseq_file, 'r')
+ f.seek(0)
+ sequence = []
+ lengths = []
+
+ # for each line the in file, search for the words 'Sequence' and 'to' to find the sequence name and length,
+ # respectively. Then add sequence names and lengths to the proper lists
+ for line in f:
+ words = line.split()
+ if line.startswith("# Sequence:"):
+ # if 'Sequence:' in line:
+ # if words[1] == 'Sequence:':
+ sequence += [words[words.index("Sequence:") + 1]]
+ # if words[5] == 'to:':
+ # lengths += [int(words[6])]
+ if words.index("to:"):
+ lengths += [int(words[words.index("to:") + 1])]
+ # return the sequence names and lengths
+ return sequence, lengths
+
+
+# This function extracts the helix, sheet, turn, and coil predictions from the file. The predictions for each type of
+# secondary structure are joined together in one string.
+def garnier_secondary_struct(tagseq_file=None):
+ # opens the file and sets variables for the structural predictions
+ f = tagseq_file # open(tagseq_file, 'r')
+ helix = ""
+ turns = ""
+ coil = ""
+ sheet = ""
+
+ # if the first work in the line indicates a structural prediction, it adds the rest of the line to the right
+ # prediction string.
+ for line in f:
+ words = line.split()
+ if len(words) > 0:
+ if words[0] in "helix":
+ helix += str(line[6:]).rstrip("\n")
+ elif words[0] in "sheet":
+ sheet += str(line[6:]).rstrip("\n")
+ elif words[0] in "turns":
+ turns += str(line[6:]).rstrip("\n")
+ elif words[0] in "coil":
+ coil += str(line[6:]).rstrip("\n")
+ # f.close()
+ # returns the four structural prediction strings
+ return helix, turns, coil, sheet
+
+
+# This functions cuts the strings based on the lengths of the original sequences. Lengths are given in a list.
+def vector_cutter(vector, lengths_to_cut):
+ # sets up iteration variables
+ start = 0
+ end = lengths_to_cut[0]
+ maximum = len(lengths_to_cut)
+ # creates output list
+ output = []
+ # loops through the number of sequences based on the number of lengths
+ for i in range(maximum):
+ # outputs list of sequence strings
+ output += [str(vector[start:end])]
+ start = end
+ if i + 1 != maximum:
+ end += lengths_to_cut[i + 1]
+ # returns list of strings. Each sequence has a string included in the list.
+ return output
+
+
+# this function takes the helix, turn, sheet, and coil predictions for each sequence and creates a single structural
+# prediction string.
+def single_prediction(helix, sheet, turns, coil):
+ # sets output list
+ secondary_structure = []
+ # checks to make sure each of the strings is the same length
+ if len(helix) == len(sheet) == len(coil) == len(turns):
+ # loops through the length of each sequence, and when the value is not a blank it is added to the output
+ # prediction list.
+ for j in range(len(helix)):
+ if helix[j] != " ":
+ secondary_structure += [str(helix[j])]
+ elif sheet[j] != " ":
+ secondary_structure += [str(sheet[j])]
+ elif coil[j] != " ":
+ secondary_structure += [str(coil[j])]
+ else:
+ secondary_structure += [str(turns[j])]
+ # returns the output prediction list for the sequence
+ return secondary_structure
+
+
+if __name__ == "__main__":
+ # Grab all of the filters from our plugin loader
+ parser = argparse.ArgumentParser(
+ description="Read Garnier Secondary Structure Prediction"
+ )
+ parser.add_argument(
+ "tagseq_file", type=argparse.FileType("r"), help="Tagseq file input"
+ )
+ args = parser.parse_args()
+
+ # opens the tagseq file and prepares for writing csv
+ # f = open(sys.stdout, 'w', newline='')
+ # writer = csv.writer(f)
+
+ # reads tagseq file for helix, turn, coil, and sheet sequences as well as for names and lengths of the sequences
+ # summarized in the tagseq file#!/usr/bin/env python\r
+ Hel, Tur, Coi, She = garnier_secondary_struct(**vars(args))
+ names, gives = garnier_sequences(**vars(args))
+
+ # cut each of the structural prediction strings so that they are individual sequences
+ Helix = vector_cutter(Hel, gives)
+ Sheet = vector_cutter(She, gives)
+ Turns = vector_cutter(Tur, gives)
+ Coil = vector_cutter(Coi, gives)
+
+ # for each sequence compile the four types of structural predictions into a single prediction, and output the final
+ # prediction in csv format and to the screen
+ for i in range(len(Helix)):
+ Final = single_prediction(Helix[i], Sheet[i], Turns[i], Coil[i])
+ # csv.writerow(['Sequence: '] + [names[i]])
+ # csv.writerow(Final)
+ print("Sequence Name: " + "\t" + names[i])
+ print("\t".join(Final))
diff -r 0d2226e1c5f6 -r edd518e72c89 reading_garnier_output.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reading_garnier_output.xml Mon Jun 05 02:52:09 2023 +0000
@@ -0,0 +1,27 @@
+
+ read garnier tool output
+
+ macros.xml
+ cpt-macros.xml
+
+
+ '$output']]>
+
+
+
+
+
+
+
+
+