Mercurial > repos > abims-sbr > filter_assemblies
comparison filter_assembly.xml @ 0:7a813e633d1c draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
| author | abims-sbr |
|---|---|
| date | Fri, 01 Feb 2019 10:22:32 -0500 |
| parents | |
| children | a83562c0719f |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:7a813e633d1c |
|---|---|
| 1 <tool name="Filter assemblies" id="filter_assemblies" version="2.0.3"> | |
| 2 | |
| 3 <description> | |
| 4 Filter the outputs of Velvet or Trinity assemblies | |
| 5 </description> | |
| 6 | |
| 7 <macros> | |
| 8 <import>macros.xml</import> | |
| 9 </macros> | |
| 10 | |
| 11 <requirements> | |
| 12 <expand macro="python_required" /> | |
| 13 <requirement type="package" version="0.0.14">fastx_toolkit</requirement> | |
| 14 <requirement type="package" version="10.2011">cap3</requirement> | |
| 15 </requirements> | |
| 16 | |
| 17 <command> | |
| 18 <![CDATA[ | |
| 19 #set $infiles = "" | |
| 20 #for $input in $inputs | |
| 21 ln -s '$input' '$input.element_identifier'; | |
| 22 #set $infiles = $infiles + $input.element_identifier + "," | |
| 23 #end for | |
| 24 #set $infiles = $infiles[:-1] | |
| 25 | |
| 26 ln -s '$__tool_directory__/scripts/S02a_remove_redondancy_from_velvet_oases.py' . && | |
| 27 ln -s '$__tool_directory__/scripts/S02b_format_fasta_name_trinity.py' . && | |
| 28 ln -s '$__tool_directory__/scripts/S03_choose_one_variants_per_locus_trinity.py' . && | |
| 29 ln -s '$__tool_directory__/scripts/S04_find_orf.py' . && | |
| 30 ln -s '$__tool_directory__/scripts/S05_filter.py' . && | |
| 31 | |
| 32 python '$__tool_directory__/scripts/S01_script_to_choose.py' | |
| 33 | |
| 34 '$infiles' | |
| 35 $length_seq_max | |
| 36 $percent_identity | |
| 37 $overlap_length | |
| 38 > ${log} | |
| 39 ]]> | |
| 40 </command> | |
| 41 | |
| 42 <inputs> | |
| 43 <param name="inputs" type="data" format="fasta" multiple="true" label="Input files" /> | |
| 44 <param name="percent_identity" type="integer" value="100" label="Overlap percent identity cutoff" help="Cap3 parameter (-p N); minimum percent identity of an overlap. The specified value should be more than 65%." /> | |
| 45 <param name="overlap_length" type="integer" value="60" label="Overlap length cutoff" help="Cap3 parameter (-o N); minimum length of an overlap (in base pairs). The specified value should be more than 15 base pairs." /> | |
| 46 <param name="length_seq_max" type="integer" value="100" label="Minimum sequence length" help="Keep sequences which length is higher than the minimum sequence length " /> | |
| 47 </inputs> | |
| 48 | |
| 49 <outputs> | |
| 50 <collection name="output_fasta" type="list" label="Filter Assemblies outputs"> | |
| 51 <discover_datasets pattern="__name_and_ext__" directory="outputs" /> | |
| 52 </collection> | |
| 53 <data format="txt" name="log" label="Filter Assemblies Summary"/> | |
| 54 </outputs> | |
| 55 | |
| 56 <tests> | |
| 57 <test> | |
| 58 <param name="inputs" ftype="fasta" value="trinity/Pfiji_trinity.fasta,trinity/Apomp_trinity.fasta,trinity/Amphi_trinity.fasta,trinity/Acaud_trinity.fasta,velvet/Pg_transcriptome_90109.fasta,velvet/Ap_transcriptome_35099.fasta,velvet/Ac_transcriptome_25591.fasta" /> | |
| 59 <param name="percent_identity" value="100" /> | |
| 60 <param name="overlap_length" value="60" /> | |
| 61 <param name="length_seq_max" value="100" /> | |
| 62 <output name="log" value="trinity_and_velvet_up.output" /> | |
| 63 <output_collection name="output_fasta" type="list"> | |
| 64 <element name="AcAc_transcriptome_25591" value="velvet_out/AcAc_transcriptome_25591.fasta" /> | |
| 65 <element name="ApAp_transcriptome_35099" value="velvet_out/ApAp_transcriptome_35099.fasta" /> | |
| 66 <element name="PgPg_transcriptome_90109" value="velvet_out/PgPg_transcriptome_90109.fasta" /> | |
| 67 <element name="AcAcaud_trinity" value="trinity_out/AcAcaud_trinity.fasta" /> | |
| 68 <element name="AmAmphi_trinity" value="trinity_out/AmAmphi_trinity.fasta" /> | |
| 69 <element name="ApApomp_trinity" value="trinity_out/ApApomp_trinity.fasta" /> | |
| 70 <element name="PfPfiji_trinity" value="trinity_out/PfPfiji_trinity.fasta" /> | |
| 71 </output_collection> | |
| 72 </test> | |
| 73 <test> | |
| 74 <param name="inputs" ftype="fasta" value="trinity/Pfiji_trinity.fasta,trinity/Apomp_trinity.fasta,trinity/Amphi_trinity.fasta,trinity/Acaud_trinity.fasta" /> | |
| 75 <param name="percent_identity" value="100" /> | |
| 76 <param name="overlap_length" value="60" /> | |
| 77 <param name="length_seq_max" value="100" /> | |
| 78 <output name="log" value="trinity_up.output" /> | |
| 79 <output_collection name="output_fasta" type="list"> | |
| 80 <element name="AcAcaud_trinity" value="trinity_out/AcAcaud_trinity.fasta" /> | |
| 81 <element name="AmAmphi_trinity" value="trinity_out/AmAmphi_trinity.fasta" /> | |
| 82 <element name="ApApomp_trinity" value="trinity_out/ApApomp_trinity.fasta" /> | |
| 83 <element name="PfPfiji_trinity" value="trinity_out/PfPfiji_trinity.fasta" /> | |
| 84 </output_collection> | |
| 85 </test> | |
| 86 <test> | |
| 87 <param name="inputs" ftype="fasta" value="velvet/Pg_transcriptome_90109.fasta,velvet/Ap_transcriptome_35099.fasta,velvet/Ac_transcriptome_25591.fasta" /> | |
| 88 <param name="percent_identity" value="100" /> | |
| 89 <param name="overlap_length" value="60" /> | |
| 90 <param name="length_seq_max" value="100" /> | |
| 91 <output name="log" value="velvet_up.output" /> | |
| 92 <output_collection name="output_fasta" type="list"> | |
| 93 <element name="AcAc_transcriptome_25591" value="velvet_out/AcAc_transcriptome_25591.fasta" /> | |
| 94 <element name="ApAp_transcriptome_35099" value="velvet_out/ApAp_transcriptome_35099.fasta" /> | |
| 95 <element name="PgPg_transcriptome_90109" value="velvet_out/PgPg_transcriptome_90109.fasta" /> | |
| 96 </output_collection> | |
| 97 </test> | |
| 98 | |
| 99 </tests> | |
| 100 | |
| 101 <help> | |
| 102 | |
| 103 @HELP_AUTHORS@ | |
| 104 | |
| 105 <![CDATA[ | |
| 106 | |
| 107 **Description** | |
| 108 | |
| 109 This tool reformats Velvet Oases or Trinity assemblies for the AdaptSearch galaxy suite and selects only one variant per gene according to its length and quality check. | |
| 110 | |
| 111 --------- | |
| 112 | |
| 113 **Input format** | |
| 114 | |
| 115 (1) Sequences are in the sequential format: | |
| 116 | |
| 117 | >seqname1 | |
| 118 | AAAGAGAGACCACATGTCAGTAGC -on one or several lines - | |
| 119 | >seqname2 | |
| 120 | AAGGCCTGACCACATGAGTTAAGC -on one or several lines - | |
| 121 | etc ... | |
| 122 | | |
| 123 | |
| 124 2) The file name should begin with a two letter abbreviation of the species name (for isntance, 'Ap' if the species is Alvinella pompejana). | |
| 125 | |
| 126 **For Velvet Oases assemblies input** | |
| 127 | |
| 128 The headers must be as follow : *>Locus_i_Transcript_i/j_Confidence_x.xxx_Length_N* where i is the locus number, j the transcript variant among all versions of the transcript, x.xxx the confidence value and N the length. | |
| 129 | |
| 130 **For Trinity assemblies inputs** | |
| 131 | |
| 132 The headers must be as follow : *>cj_gj_ij Len=j path=[j:0-j]* where all the j are integers (locus number, transcript variant, length, position...) | |
| 133 | |
| 134 **The tool handles the case if input files come from both assemblers (there is no need for input files to be exclusively from one or another assembler).** | |
| 135 | |
| 136 --------- | |
| 137 | |
| 138 **Parameters** | |
| 139 | |
| 140 - 'Input files' : a collection of fasta files (one file per species). | |
| 141 - 'Overlap percent identity cutoff' : cap3 -p parameter : minimum percent identity of an overlap. | |
| 142 must be > 65 ; default : 100. | |
| 143 - 'Overlap length cutoff' (integer) : cap3 -o parameter : minimum length of an overlap (in base pairs). | |
| 144 must be > 15 ; default : 60. | |
| 145 - 'Minimum sequence length' (integer) : only keep sequences which are longer than the specified value. | |
| 146 default : 100. | |
| 147 | |
| 148 --------- | |
| 149 | |
| 150 **Steps**: | |
| 151 | |
| 152 The tool: | |
| 153 1) Modifies the sequence name to add the species abbreviation using the 2 first letters of the name of the transcriptome file : note that each species abbreviation must be unique | |
| 154 2) Selects one allelic sequence from each transcript (c or locus) using the length of the sequence and its level of confidence | |
| 155 3) Selects the best ORF from the sequence between two stop codons | |
| 156 4) Performs a CAP3 from the full set of ORFs to minimize redundancy | |
| 157 5) Retrieves the initial transcript sequences from the remaining set of proceeded ORF sequences | |
| 158 | |
| 159 **Outputs** | |
| 160 | |
| 161 - 'Filter Assemblies Summary' : the log file. | |
| 162 - 'Filter Assemblies outputs' : the main results. | |
| 163 | |
| 164 --------- | |
| 165 | |
| 166 **The AdaptSearch Pipeline** | |
| 167 | |
| 168 .. image:: adaptsearch_picture_helps.png | |
| 169 | |
| 170 --------- | |
| 171 | |
| 172 Changelog | |
| 173 --------- | |
| 174 | |
| 175 **Version 2.1 - 15/01/2018** | |
| 176 | |
| 177 - Input files can be a mix from files coming either from Trinity or Velvet Oases assemblers | |
| 178 | |
| 179 **Version 2.0 - 14/04/2017** | |
| 180 | |
| 181 - NEW: Replace the zip between tools by Dataset Collection | |
| 182 | |
| 183 **Version 1.0 - 13/04/2017** | |
| 184 | |
| 185 - TEST: Add funtional test with planemo | |
| 186 - IMPROVEMENT: Use conda dependencies for cap3, fastaformatter and python | |
| 187 | |
| 188 ]]> | |
| 189 </help> | |
| 190 | |
| 191 </tool> |
