# HG changeset patch # User cpt # Date 1685933529 0 # Node ID edd518e72c89af65745ed733f07c3d87de84e1e4 # Parent 0d2226e1c5f69adbd34edd7cdb6b21247188788b planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c diff -r 0d2226e1c5f6 -r edd518e72c89 cpt-macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt-macros.xml Mon Jun 05 02:52:09 2023 +0000 @@ -0,0 +1,115 @@ + + + + python + biopython + requests + cpt_gffparser + + + + + + + + 10.1371/journal.pcbi.1008214 + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {C. Ross}, + title = {CPT Galaxy Tools}, + year = {2020-}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + diff -r 0d2226e1c5f6 -r edd518e72c89 cpt_read_garnier/cpt-macros.xml --- a/cpt_read_garnier/cpt-macros.xml Fri Jun 17 13:12:20 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,115 +0,0 @@ - - - - - python - biopython - requests - - - - - - - - 10.1371/journal.pcbi.1008214 - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - 10.1371/journal.pcbi.1008214 - - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - - - 10.1371/journal.pcbi.1008214 - - @unpublished{galaxyTools, - author = {C. Ross}, - title = {CPT Galaxy Tools}, - year = {2020-}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - - - 10.1371/journal.pcbi.1008214 - - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - @unpublished{galaxyTools, - author = {A. Criscione}, - title = {CPT Galaxy Tools}, - year = {2019-2021}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - - - 10.1371/journal.pcbi.1008214 - - @unpublished{galaxyTools, - author = {A. Criscione}, - title = {CPT Galaxy Tools}, - year = {2019-2021}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - - - 10.1371/journal.pcbi.1008214 - - @unpublished{galaxyTools, - author = {C. Maughmer}, - title = {CPT Galaxy Tools}, - year = {2017-2020}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - - - @unpublished{galaxyTools, - author = {C. Maughmer}, - title = {CPT Galaxy Tools}, - year = {2017-2020}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - diff -r 0d2226e1c5f6 -r edd518e72c89 cpt_read_garnier/macros.xml --- a/cpt_read_garnier/macros.xml Fri Jun 17 13:12:20 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,85 +0,0 @@ - - - - - python - biopython - cpt_gffparser - - - - - "$blast_tsv" - - - - - - - "$blast_xml" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - "$gff3_data" - - -#if str($reference_genome.reference_genome_source) == 'cached': - "${reference_genome.fasta_indexes.fields.path}" -#else if str($reference_genome.reference_genome_source) == 'history': - genomeref.fa -#end if - - -#if $reference_genome.reference_genome_source == 'history': - ln -s $reference_genome.genome_fasta genomeref.fa; -#end if - - -#if str($reference_genome.reference_genome_source) == 'cached': - "${reference_genome.fasta_indexes.fields.path}" -#else if str($reference_genome.reference_genome_source) == 'history': - genomeref.fa -#end if - - - - - - - "$sequences" - - - - - diff -r 0d2226e1c5f6 -r edd518e72c89 cpt_read_garnier/reading_garnier_output.py --- a/cpt_read_garnier/reading_garnier_output.py Fri Jun 17 13:12:20 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,133 +0,0 @@ -#!/usr/bin/env python - -import csv -import argparse - -# import sys - -# This function reads through the tagseq file and outputs a list of sequence names and the lengths of each sequence. -def garnier_sequences(tagseq_file=None): - # open the file and create blank lists - f = tagseq_file # open(tagseq_file, 'r') - f.seek(0) - sequence = [] - lengths = [] - - # for each line the in file, search for the words 'Sequence' and 'to' to find the sequence name and length, - # respectively. Then add sequence names and lengths to the proper lists - for line in f: - words = line.split() - if line.startswith("# Sequence:"): - # if 'Sequence:' in line: - # if words[1] == 'Sequence:': - sequence += [words[words.index("Sequence:") + 1]] - # if words[5] == 'to:': - # lengths += [int(words[6])] - if words.index("to:"): - lengths += [int(words[words.index("to:") + 1])] - # return the sequence names and lengths - return sequence, lengths - - -# This function extracts the helix, sheet, turn, and coil predictions from the file. The predictions for each type of -# secondary structure are joined together in one string. -def garnier_secondary_struct(tagseq_file=None): - # opens the file and sets variables for the structural predictions - f = tagseq_file # open(tagseq_file, 'r') - helix = "" - turns = "" - coil = "" - sheet = "" - - # if the first work in the line indicates a structural prediction, it adds the rest of the line to the right - # prediction string. - for line in f: - words = line.split() - if len(words) > 0: - if words[0] in "helix": - helix += str(line[6:]).rstrip("\n") - elif words[0] in "sheet": - sheet += str(line[6:]).rstrip("\n") - elif words[0] in "turns": - turns += str(line[6:]).rstrip("\n") - elif words[0] in "coil": - coil += str(line[6:]).rstrip("\n") - # f.close() - # returns the four structural prediction strings - return helix, turns, coil, sheet - - -# This functions cuts the strings based on the lengths of the original sequences. Lengths are given in a list. -def vector_cutter(vector, lengths_to_cut): - # sets up iteration variables - start = 0 - end = lengths_to_cut[0] - maximum = len(lengths_to_cut) - # creates output list - output = [] - # loops through the number of sequences based on the number of lengths - for i in range(maximum): - # outputs list of sequence strings - output += [str(vector[start:end])] - start = end - if i + 1 != maximum: - end += lengths_to_cut[i + 1] - # returns list of strings. Each sequence has a string included in the list. - return output - - -# this function takes the helix, turn, sheet, and coil predictions for each sequence and creates a single structural -# prediction string. -def single_prediction(helix, sheet, turns, coil): - # sets output list - secondary_structure = [] - # checks to make sure each of the strings is the same length - if len(helix) == len(sheet) == len(coil) == len(turns): - # loops through the length of each sequence, and when the value is not a blank it is added to the output - # prediction list. - for j in range(len(helix)): - if helix[j] != " ": - secondary_structure += [str(helix[j])] - elif sheet[j] != " ": - secondary_structure += [str(sheet[j])] - elif coil[j] != " ": - secondary_structure += [str(coil[j])] - else: - secondary_structure += [str(turns[j])] - # returns the output prediction list for the sequence - return secondary_structure - - -if __name__ == "__main__": - # Grab all of the filters from our plugin loader - parser = argparse.ArgumentParser( - description="Read Garnier Secondary Structure Prediction" - ) - parser.add_argument( - "tagseq_file", type=argparse.FileType("r"), help="Tagseq file input" - ) - args = parser.parse_args() - - # opens the tagseq file and prepares for writing csv - # f = open(sys.stdout, 'w', newline='') - # writer = csv.writer(f) - - # reads tagseq file for helix, turn, coil, and sheet sequences as well as for names and lengths of the sequences - # summarized in the tagseq file#!/usr/bin/env python\r - Hel, Tur, Coi, She = garnier_secondary_struct(**vars(args)) - names, gives = garnier_sequences(**vars(args)) - - # cut each of the structural prediction strings so that they are individual sequences - Helix = vector_cutter(Hel, gives) - Sheet = vector_cutter(She, gives) - Turns = vector_cutter(Tur, gives) - Coil = vector_cutter(Coi, gives) - - # for each sequence compile the four types of structural predictions into a single prediction, and output the final - # prediction in csv format and to the screen - for i in range(len(Helix)): - Final = single_prediction(Helix[i], Sheet[i], Turns[i], Coil[i]) - # csv.writerow(['Sequence: '] + [names[i]]) - # csv.writerow(Final) - print("Sequence Name: " + "\t" + names[i]) - print("\t".join(Final)) diff -r 0d2226e1c5f6 -r edd518e72c89 cpt_read_garnier/reading_garnier_output.xml --- a/cpt_read_garnier/reading_garnier_output.xml Fri Jun 17 13:12:20 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ - - - read garnier tool output - - macros.xml - cpt-macros.xml - - - $output]]> - - - - - - - - - diff -r 0d2226e1c5f6 -r edd518e72c89 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Jun 05 02:52:09 2023 +0000 @@ -0,0 +1,74 @@ + + + + progressivemauve + + bcbiogff + + + + 2.4.0 + + 10.1371/journal.pone.0011147 + + + 10.1093/bioinformatics/btm039 + + + '$xmfa' + + + + + + '$sequences' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + '$gff3_data' + + + #if str($reference_genome.reference_genome_source) == 'cached': + '${reference_genome.fasta_indexes.fields.path}' + #else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa + #end if + + + #if $reference_genome.reference_genome_source == 'history': + ln -s '$reference_genome.genome_fasta' genomeref.fa; + #end if + + + #if str($reference_genome.reference_genome_source) == 'cached': + '${reference_genome.fasta_indexes.fields.path}' + #else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa + #end if + + diff -r 0d2226e1c5f6 -r edd518e72c89 reading_garnier_output.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reading_garnier_output.py Mon Jun 05 02:52:09 2023 +0000 @@ -0,0 +1,133 @@ +#!/usr/bin/env python + +import csv +import argparse + +# import sys + +# This function reads through the tagseq file and outputs a list of sequence names and the lengths of each sequence. +def garnier_sequences(tagseq_file=None): + # open the file and create blank lists + f = tagseq_file # open(tagseq_file, 'r') + f.seek(0) + sequence = [] + lengths = [] + + # for each line the in file, search for the words 'Sequence' and 'to' to find the sequence name and length, + # respectively. Then add sequence names and lengths to the proper lists + for line in f: + words = line.split() + if line.startswith("# Sequence:"): + # if 'Sequence:' in line: + # if words[1] == 'Sequence:': + sequence += [words[words.index("Sequence:") + 1]] + # if words[5] == 'to:': + # lengths += [int(words[6])] + if words.index("to:"): + lengths += [int(words[words.index("to:") + 1])] + # return the sequence names and lengths + return sequence, lengths + + +# This function extracts the helix, sheet, turn, and coil predictions from the file. The predictions for each type of +# secondary structure are joined together in one string. +def garnier_secondary_struct(tagseq_file=None): + # opens the file and sets variables for the structural predictions + f = tagseq_file # open(tagseq_file, 'r') + helix = "" + turns = "" + coil = "" + sheet = "" + + # if the first work in the line indicates a structural prediction, it adds the rest of the line to the right + # prediction string. + for line in f: + words = line.split() + if len(words) > 0: + if words[0] in "helix": + helix += str(line[6:]).rstrip("\n") + elif words[0] in "sheet": + sheet += str(line[6:]).rstrip("\n") + elif words[0] in "turns": + turns += str(line[6:]).rstrip("\n") + elif words[0] in "coil": + coil += str(line[6:]).rstrip("\n") + # f.close() + # returns the four structural prediction strings + return helix, turns, coil, sheet + + +# This functions cuts the strings based on the lengths of the original sequences. Lengths are given in a list. +def vector_cutter(vector, lengths_to_cut): + # sets up iteration variables + start = 0 + end = lengths_to_cut[0] + maximum = len(lengths_to_cut) + # creates output list + output = [] + # loops through the number of sequences based on the number of lengths + for i in range(maximum): + # outputs list of sequence strings + output += [str(vector[start:end])] + start = end + if i + 1 != maximum: + end += lengths_to_cut[i + 1] + # returns list of strings. Each sequence has a string included in the list. + return output + + +# this function takes the helix, turn, sheet, and coil predictions for each sequence and creates a single structural +# prediction string. +def single_prediction(helix, sheet, turns, coil): + # sets output list + secondary_structure = [] + # checks to make sure each of the strings is the same length + if len(helix) == len(sheet) == len(coil) == len(turns): + # loops through the length of each sequence, and when the value is not a blank it is added to the output + # prediction list. + for j in range(len(helix)): + if helix[j] != " ": + secondary_structure += [str(helix[j])] + elif sheet[j] != " ": + secondary_structure += [str(sheet[j])] + elif coil[j] != " ": + secondary_structure += [str(coil[j])] + else: + secondary_structure += [str(turns[j])] + # returns the output prediction list for the sequence + return secondary_structure + + +if __name__ == "__main__": + # Grab all of the filters from our plugin loader + parser = argparse.ArgumentParser( + description="Read Garnier Secondary Structure Prediction" + ) + parser.add_argument( + "tagseq_file", type=argparse.FileType("r"), help="Tagseq file input" + ) + args = parser.parse_args() + + # opens the tagseq file and prepares for writing csv + # f = open(sys.stdout, 'w', newline='') + # writer = csv.writer(f) + + # reads tagseq file for helix, turn, coil, and sheet sequences as well as for names and lengths of the sequences + # summarized in the tagseq file#!/usr/bin/env python\r + Hel, Tur, Coi, She = garnier_secondary_struct(**vars(args)) + names, gives = garnier_sequences(**vars(args)) + + # cut each of the structural prediction strings so that they are individual sequences + Helix = vector_cutter(Hel, gives) + Sheet = vector_cutter(She, gives) + Turns = vector_cutter(Tur, gives) + Coil = vector_cutter(Coi, gives) + + # for each sequence compile the four types of structural predictions into a single prediction, and output the final + # prediction in csv format and to the screen + for i in range(len(Helix)): + Final = single_prediction(Helix[i], Sheet[i], Turns[i], Coil[i]) + # csv.writerow(['Sequence: '] + [names[i]]) + # csv.writerow(Final) + print("Sequence Name: " + "\t" + names[i]) + print("\t".join(Final)) diff -r 0d2226e1c5f6 -r edd518e72c89 reading_garnier_output.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reading_garnier_output.xml Mon Jun 05 02:52:09 2023 +0000 @@ -0,0 +1,27 @@ + + read garnier tool output + + macros.xml + cpt-macros.xml + + + '$output']]> + + + + + + + + +