Mercurial > repos > iuc > funannotate_predict
comparison funannotate_predict.xml @ 0:40b87aef5241 draft
"planemo upload commit 9613152729099079c7465c3d5d42005ef22ca91e"
| author | iuc |
|---|---|
| date | Thu, 26 Aug 2021 06:55:33 +0000 |
| parents | |
| children | 1a59958c1f76 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:40b87aef5241 |
|---|---|
| 1 <tool id="funannotate_predict" name="Funannotate predict annotation" profile="20.01" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> | |
| 2 <description></description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <requirements> | |
| 7 <expand macro="requirements" /> | |
| 8 </requirements> | |
| 9 <version_command><![CDATA[funannotate check --show-versions]]></version_command> | |
| 10 <command><![CDATA[ | |
| 11 #if $genemark.genemark_license: | |
| 12 if [ -z "\$GENEMARK_PATH" ] ; then echo "GeneMark is not installed on this Galaxy server." >&2 ; exit 1 ; fi && | |
| 13 if [ ! -f "\$GENEMARK_PATH/gmes_petap.pl" ] ; then echo "GeneMark is not installed properly on this Galaxy server." >&2 ; exit 1 ; fi && | |
| 14 ## GeneMark only search for license in ~/.gm_key | |
| 15 cp '${genemark.genemark_license}' ~/.gm_key && | |
| 16 #end if | |
| 17 | |
| 18 #if $uglyTestingHack == "true": | |
| 19 ## funannotate_db contains some hard coded path, need to rewrite one for tests (not in real life when using data manager) | |
| 20 ## Need to copy too as the test_data is read only on CI | |
| 21 cp -r '${database.fields.path}' './hacked_database' && | |
| 22 sed -i.bak 's|/tmp/prout|'`pwd`'/hacked_database|' './hacked_database/trained_species/fly/info.json' && | |
| 23 #end if | |
| 24 | |
| 25 funannotate predict | |
| 26 --input '${input}' | |
| 27 --out output | |
| 28 | |
| 29 #if $uglyTestingHack == "true": | |
| 30 --database `pwd`'/hacked_database' | |
| 31 #else | |
| 32 --database '$database.fields.path' | |
| 33 #end if | |
| 34 | |
| 35 --species '${organism.species}' | |
| 36 --isolate '${organism.isolate}' | |
| 37 --strain '${organism.strain}' | |
| 38 --organism '${organism.organism}' | |
| 39 --ploidy ${organism.ploidy} | |
| 40 --SeqCenter '${organism.SeqCenter}' | |
| 41 --SeqAccession '${organism.SeqAccession}' | |
| 42 --name '${organism.name}' | |
| 43 --numbering ${organism.numbering} | |
| 44 | |
| 45 #if $parameters: | |
| 46 --parameters '${parameters}' | |
| 47 #end if | |
| 48 | |
| 49 #if $evidences.rna_bam: | |
| 50 --rna_bam ${evidences.rna_bam} | |
| 51 #end if | |
| 52 | |
| 53 #set est_list = "" | |
| 54 #if len($evidences.transcript_evidence) > 0: | |
| 55 #for $estev in $evidences.transcript_evidence: | |
| 56 #if $estev: | |
| 57 #set est_list += " '" + str($estev) + "'" | |
| 58 #end if | |
| 59 #end for | |
| 60 #end if | |
| 61 #if $est_list: | |
| 62 --transcript_evidence $est_list | |
| 63 #end if | |
| 64 | |
| 65 #if $evidences.prot_evidence == 'custom': | |
| 66 --protein_evidence | |
| 67 #for $protev in $evidences.protein_evidence: | |
| 68 '${protev}' | |
| 69 #end for | |
| 70 #end if | |
| 71 --p2g_pident ${evidences.p2g_pident} | |
| 72 --p2g_prefilter ${evidences.p2g_prefilter} | |
| 73 | |
| 74 #if $augustus.augustus_species != 'none': | |
| 75 --augustus_species '${augustus.augustus_species}' | |
| 76 #end if | |
| 77 --min_training_models ${augustus.min_training_models} | |
| 78 ${augustus.optimize_augustus} | |
| 79 | |
| 80 #if $genemark.genemark_license: | |
| 81 --genemark_mode '${genemark.genemark_mode}' | |
| 82 #if $genemark.genemark_mod: | |
| 83 --genemark_mod '${genemark.genemark_mod}' | |
| 84 #end if | |
| 85 --soft_mask ${genemark.soft_mask} | |
| 86 #end if | |
| 87 | |
| 88 --busco_seed_species '${busco.busco_seed_species}' | |
| 89 --busco_db '${busco.busco_db}' | |
| 90 | |
| 91 $evm.repeats2evm | |
| 92 #if $evm.evm_partitioning.evm_partition == "yes": | |
| 93 --evm-partition-interval ${evm.evm_partitioning.evm_partition_interval} | |
| 94 #else: | |
| 95 --no-evm-partitions | |
| 96 #end if | |
| 97 #if $evm.weights: | |
| 98 --weights '${evm.weights}' | |
| 99 #end if | |
| 100 | |
| 101 #if $other_predictors.stringtie: | |
| 102 --stringtie '${other_predictors.stringtie}' | |
| 103 #end if | |
| 104 #if $other_predictors.maker_gff: | |
| 105 --maker_gff '${other_predictors.maker_gff}' | |
| 106 #end if | |
| 107 #if $other_predictors.pasa_gff: | |
| 108 --pasa_gff '${other_predictors.pasa_gff}:${other_predictors.pasa_gff_weight}' | |
| 109 #end if | |
| 110 #if $other_predictors.other_gff: | |
| 111 --other_gff '${other_predictors.other_gff}:${other_predictors.other_gff_weight}' | |
| 112 #end if | |
| 113 | |
| 114 --min_intronlen ${filtering.min_intronlen} | |
| 115 --max_intronlen ${filtering.max_intronlen} | |
| 116 --min_protlen ${filtering.min_protlen} | |
| 117 ${filtering.keep_no_stops} | |
| 118 --repeat_filter ${filtering.repeat_filter} | |
| 119 | |
| 120 --cpus \${GALAXY_SLOTS:-2} | |
| 121 | |
| 122 && | |
| 123 | |
| 124 mv output/predict_results/*.gbk out.gbk && | |
| 125 mv output/predict_results/*.tbl out.tbl && | |
| 126 mv output/predict_results/*.gff3 out.gff3 && | |
| 127 mv output/predict_results/*.proteins.fa out.proteins.fa && | |
| 128 mv output/predict_results/*.mrna-transcripts.fa out.mrna-transcripts.fa && | |
| 129 mv output/predict_results/*.cds-transcripts.fa out.cds-transcripts.fa && | |
| 130 mv output/predict_results/*.discrepency.report.txt out.discrepency.report.txt && | |
| 131 mv output/predict_results/*.error.summary.txt out.error.summary.txt && | |
| 132 mv output/predict_results/*.validation.txt out.validation.txt && | |
| 133 mv output/predict_results/*.stats.json out.stats.json | |
| 134 ]]></command> | |
| 135 <inputs> | |
| 136 <param argument="--input" type="data" format="fasta" label="Assembly to annotate" help="The assembly should be soft-masked (with RepeatMasker for example)" /> | |
| 137 | |
| 138 <param name="database" label="Funannotate database" type="select"> | |
| 139 <options from_data_table="funannotate"> | |
| 140 <column name="value" index="0" /> | |
| 141 <column name="name" index="1" /> | |
| 142 <column name="path" index="3" /> | |
| 143 <filter type="sort_by" column="0" /> | |
| 144 <filter type="static_value" column="2" value="1.0" /> | |
| 145 </options> | |
| 146 </param> | |
| 147 | |
| 148 <section name="organism" expanded="true" title="Organism"> | |
| 149 <param argument="--species" type="text" optional="false" label="Name of the species to annotate" help="e.g. Genus species"> | |
| 150 <validator type="empty_field" /> | |
| 151 </param> | |
| 152 <param argument="--isolate" type="text" label="Isolate name" help="If relevant (e.g. Af293)" /> | |
| 153 <param argument="--strain" type="text" label="Strain name" help="If relevant (e.g. FGSCA4)" /> | |
| 154 <param argument="--organism" type="boolean" checked="false" truevalue="fungus" falsevalue="other" label="Is it a fungus species?" /> | |
| 155 <param argument="--ploidy" type="integer" value="1" label="Ploidy of assembly" /> | |
| 156 <param argument="--SeqCenter" type="text" value="CFMR" label="Sequencing facility for NCBI tbl file" /> | |
| 157 <param argument="--SeqAccession" type="text" value="12345" label="Sequence accession number for NCBI tbl file" /> | |
| 158 <param argument="--name" type="text" value="FUN_" label="Locus tag prefix" help="Will prefix all the gene names" /> | |
| 159 <param argument="--numbering" type="integer" value="1" label="Specify where gene numbering starts" /> | |
| 160 </section> | |
| 161 | |
| 162 <section name="evidences" expanded="true" title="Evidences"> | |
| 163 <param argument="--rna_bam" type="data" format="bam" optional="true" label="RNA-seq mapped to genome to train Augustus/GeneMark-ET" /> | |
| 164 <param argument="--transcript_evidence" type="data" format="fasta" multiple="true" optional="true" label="mRNA/ESTs to align to genome" /> | |
| 165 <conditional name="prot_evidence"> | |
| 166 <param name="prot_evidence_source" type="select" label="Select protein evidences"> | |
| 167 <option value="uniprot" selected="True">Use UniProtKb/SwissProt (from selected Funannotate database)</option> | |
| 168 <option value="custom">Custom protein sequences</option> | |
| 169 </param> | |
| 170 <when value="uniprot"/> | |
| 171 <when value="custom"> | |
| 172 <param argument="--protein_evidence" type="data" format="fasta" multiple="true" label="Proteins to map to genome" /> | |
| 173 </when> | |
| 174 </conditional> | |
| 175 <param argument="--p2g_pident" type="integer" value="80" label="Exonerate percent identity (for proteins)" /> | |
| 176 <param argument="--p2g_prefilter" type="select" label="Prefilter hists with (for proteins)"> | |
| 177 <option value="diamond" selected="True">Diamond</option> | |
| 178 <option value="tblastn">tblastn (slower)</option> | |
| 179 </param> | |
| 180 </section> | |
| 181 | |
| 182 <param argument="--parameters" type="data" format="json" optional="true" label="Ab-initio training parameters from a previous run" help="If specified, will over-rule any other training presets based on sepcies selection." /> | |
| 183 | |
| 184 <section name="other_predictors" expanded="false" title="Other annotations"> | |
| 185 <param argument="--stringtie" type="data" format="gtf" optional="true" label="StringTie GTF result" /> | |
| 186 <param argument="--maker_gff" type="data" format="gff3" optional="true" label="MAKER2 GFF file" help="Parse results directly to EVM" /> | |
| 187 <param argument="--pasa_gff" type="data" format="gff3" optional="true" label="PASA generated gene models" /> | |
| 188 <param name="pasa_gff_weight" type="integer" value="1" label="Weight for PASA generated gene models" /> | |
| 189 <param argument="--other_gff" type="data" format="gff3" optional="true" label="Annotation pass-through to EVM" /> | |
| 190 <param name="other_gff_weight" type="integer" value="1" label="Weight for annotation pass-through to EVM" /> | |
| 191 </section> | |
| 192 | |
| 193 <section name="augustus" expanded="true" title="Augustus settings"> | |
| 194 <param argument="--augustus_species" type="select" label="Augustus species training set" help="Select a species from the list"> | |
| 195 <option value="none" selected="True">No corresponding species, train from scratch</option> | |
| 196 <expand macro="augustus_species"/> | |
| 197 </param> | |
| 198 <param argument="--min_training_models" type="integer" value="200" label="Minimum number of models to train Augustus" /> | |
| 199 <param argument="--optimize_augustus" type="boolean" checked="false" truevalue="--optimize_augustus" falsevalue="" label="Run 'optimize_augustus.pl' to refine training (long runtime)" /> | |
| 200 </section> | |
| 201 | |
| 202 <section name="genemark" expanded="false" title="GeneMark settings"> | |
| 203 <param name="genemark_license" type="data" format="txt" optional="true" label="GeneMark license file" help="GeneMark is not a free software, to use it download and unzip a license from http://topaz.gatech.edu/GeneMark/license_download.cgi (ES/ET/EP version). GeneMark needs to be installed manually by Galaxy administrators, it might not be available on this server." /> | |
| 204 <param argument="--genemark_mode" type="select" label="GeneMark mode"> | |
| 205 <option value="ES" selected="True">ES</option> | |
| 206 <option value="ET">ET</option> | |
| 207 </param> | |
| 208 <param argument="--genemark_mod" type="data" format="txt" optional="true" label="Use pre-existing Genemark training file (e.g. gmhmm.mod)" /> | |
| 209 <param argument="--soft_mask" type="integer" value="2000" label="Softmasked length threshold for GeneMark" help="GeneMark will skip prediction on repeat regions shorter than this value" /> | |
| 210 </section> | |
| 211 | |
| 212 <section name="busco" expanded="true" title="BUSCO settings"> | |
| 213 <param argument="--busco_seed_species" type="select" label="Initial Augustus species training set for BUSCO alignment" help="Select the closest species. BUSCO will only be used if no RNASeq (bam) data is given as evidence."> | |
| 214 <expand macro="augustus_species"/> | |
| 215 </param> | |
| 216 <param argument="--busco_db" type="select" label="BUSCO models to align" help="BUSCO will only be used if no RNASeq (bam) data is given as evidence."> | |
| 217 <expand macro="busco_species"/> | |
| 218 </param> | |
| 219 </section> | |
| 220 | |
| 221 <section name="evm" expanded="false" title="EVM settings"> | |
| 222 <param argument="--repeats2evm" type="boolean" checked="false" truevalue="--repeats2evm" falsevalue="" label="Use repeats in EVM consensus model building" help="Not recommended for fungal genomes that have high gene density. You might want to turn this option on for larger genomes or those that have a high repeat content." /> | |
| 223 <conditional name="evm_partitioning"> | |
| 224 <param name="evm_partition" type="select" label="Split contigs into partitions for EVM processing?" help="Splits big contigs in smaller overlaping chunks to reduce memory usage and parallelize"> | |
| 225 <option value="yes" selected="True">Yes</option> | |
| 226 <option value="no">No</option> | |
| 227 </param> | |
| 228 <when value="yes"> | |
| 229 <param argument="--evm-partition-interval" type="integer" value="1500" label="Min length between genes to make a partition" /> | |
| 230 </when> | |
| 231 <when value="no"/> | |
| 232 </conditional> | |
| 233 <param argument="--weights" type="text" optional="true" label="Custom ab-initio predictor and EVM weight" help="e.g. augustus:2 pasa:10"> | |
| 234 <validator type="regex" message="Key must consist of alphanumeric characters only, possibly separated by the period character ('.')">^[\w: ]+$</validator> | |
| 235 </param> | |
| 236 </section> | |
| 237 | |
| 238 <section name="filtering" expanded="true" title="Filtering"> | |
| 239 <param argument="--min_intronlen" type="integer" value="10" label="Minimum intron length" /> | |
| 240 <param argument="--max_intronlen" type="integer" value="3000" label="Maximum intron length" /> | |
| 241 <param argument="--min_protlen" type="integer" value="50" label="Minimum protein length" /> | |
| 242 <param argument="--keep_no_stops" type="boolean" checked="false" truevalue="--keep_no_stops" falsevalue="" label="Keep gene models without valid stops" /> | |
| 243 <param argument="--repeat_filter" type="select" label="Repetitive gene model filtering" help="'overlap' drops gene models that are more than 90% contained within a repeat region; 'blast' compares the amino acid sequences to a small database of known transposons"> | |
| 244 <option value="overlap blast" selected="True">overlap + blast</option> | |
| 245 <option value="overlap">overlap</option> | |
| 246 <option value="blast">blast</option> | |
| 247 <option value="none">none</option> | |
| 248 </param> | |
| 249 </section> | |
| 250 | |
| 251 <!-- Need this to change path in the test funannotate_db --> | |
| 252 <param type="hidden" name="uglyTestingHack" value="" /> | |
| 253 </inputs> | |
| 254 <outputs> | |
| 255 <data name='annot_gbk' format='genbank' label="${tool.name} on ${on_string}: annotation (genbank)" from_work_dir="out.gbk" /> | |
| 256 <data name='annot_tbl' format='txt' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.tbl" /> | |
| 257 <data name='annot_gff3' format='gff3' label="${tool.name} on ${on_string}: annotation (GFF3)" from_work_dir="out.gff3" /> | |
| 258 <data name='fasta_proteins' format='fasta' label="${tool.name} on ${on_string}: protein sequences" from_work_dir="out.proteins.fa" /> | |
| 259 <data name='fasta_transcripts_mrna' format='fasta' label="${tool.name} on ${on_string}: transcript mRNA sequences" from_work_dir="out.mrna-transcripts.fa" /> | |
| 260 <data name='fasta_transcripts_cds' format='fasta' label="${tool.name} on ${on_string}: transcript CDS sequences" from_work_dir="out.cds-transcripts.fa" /> | |
| 261 <data name='tbl2asn_report' format='txt' label="${tool.name} on ${on_string}: tbl2asn summary report of annotated genome" from_work_dir="out.discrepency.report.txt" /> | |
| 262 <data name='tbl2asn_error' format='txt' label="${tool.name} on ${on_string}: tbl2asn error summary report" from_work_dir="out.error.summary.txt" /> | |
| 263 <data name='tbl2asn_validation' format='txt' label="${tool.name} on ${on_string}: tbl2asn genome validation report" from_work_dir="out.validation.txt" /> | |
| 264 <data name='stats' format='json' label="${tool.name} on ${on_string}: stats" from_work_dir="out.stats.json" /> | |
| 265 <!-- TODO some day: provide trained models as output, reusable as input to other funannotate runs | |
| 266 (parameters.json file references files with absolute paths, would probably need to create an archive + edit paths in parameters.json) --> | |
| 267 <!--data name='abinitio' format='json' label="${tool.name} on ${on_string}: ab-initio training parameters" from_work_dir="output/predict_results/*.parameters.json" /--> | |
| 268 </outputs> | |
| 269 <tests> | |
| 270 <!-- training from scratch --> | |
| 271 <test> | |
| 272 <param name="input" value="genome_masked.fa" /> | |
| 273 <param name="database" value="2021-07-20-120000" /> | |
| 274 <section name="organism"> | |
| 275 <param name="species" value="Genus species" /> | |
| 276 </section> | |
| 277 <section name="augustus"> | |
| 278 <param name="min_training_models" value="3" /> | |
| 279 </section> | |
| 280 <section name="busco"> | |
| 281 <param name="busco_seed_species" value="fly" /> | |
| 282 <param name="busco_db" value="insecta" /> | |
| 283 </section> | |
| 284 <!-- non deterministic results, so can't be more precise here --> | |
| 285 <output name="annot_gbk"> | |
| 286 <assert_contents> | |
| 287 <has_text text=" TITLE Direct Submission" /> | |
| 288 <has_text text="/locus_tag="FUN_000001"" /> | |
| 289 </assert_contents> | |
| 290 </output> | |
| 291 <output name="annot_tbl"> | |
| 292 <assert_contents> | |
| 293 <has_text text=">Feature sample" /> | |
| 294 <has_text text="gnl|ncbi|FUN_000001-T1_mrna" /> | |
| 295 </assert_contents> | |
| 296 </output> | |
| 297 <output name="annot_gff3"> | |
| 298 <assert_contents> | |
| 299 <has_text text="##gff-version 3" /> | |
| 300 <has_text text="ID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;" /> | |
| 301 </assert_contents> | |
| 302 </output> | |
| 303 <output name="fasta_proteins"> | |
| 304 <assert_contents> | |
| 305 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 306 </assert_contents> | |
| 307 </output> | |
| 308 <output name="fasta_transcripts_mrna"> | |
| 309 <assert_contents> | |
| 310 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 311 </assert_contents> | |
| 312 </output> | |
| 313 <output name="fasta_transcripts_cds"> | |
| 314 <assert_contents> | |
| 315 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 316 </assert_contents> | |
| 317 </output> | |
| 318 <!--output name="abinitio" file="predict_scratch/fly.parameters.json" compare="sim_size" /--> | |
| 319 <output name="tbl2asn_report" file="predict_scratch/Genus_species.discrepency.report.txt" compare="sim_size" /> | |
| 320 <output name="tbl2asn_error" file="predict_scratch/Genus_species.error.summary.txt" compare="sim_size" delta="500" /> | |
| 321 <output name="tbl2asn_validation" file="predict_scratch/Genus_species.validation.txt" compare="sim_size" delta="500" /> | |
| 322 <output name="stats" file="predict_scratch/Genus_species.stats.json" compare="sim_size" /> | |
| 323 <assert_stderr> | |
| 324 <has_text text="augustus busco"/> | |
| 325 <has_text text="glimmerhmm busco"/> | |
| 326 <has_text text="snap busco"/> | |
| 327 <has_text text="Running BUSCO to find conserved gene models for training ab-initio predictors"/> | |
| 328 <has_text text="Skipping CodingQuarry as no --rna_bam passed"/> | |
| 329 <has_text text="Running Augustus gene prediction using genus_species parameters"/> | |
| 330 <not_has_text text="Aligning transcript evidence to genome with minimap2"/> | |
| 331 <not_has_text text="Found 16 alignments, wrote GFF3 and Augustus hints to file"/> | |
| 332 <not_has_text text="Extracting hints from RNA-seq BAM file using bam2hints"/> | |
| 333 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/> | |
| 334 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/> | |
| 335 </assert_stderr> | |
| 336 </test> | |
| 337 | |
| 338 <!-- pre-trained augustus --> | |
| 339 <test> | |
| 340 <param name="input" value="genome_masked.fa" /> | |
| 341 <param name="database" value="2021-07-20-120000" /> | |
| 342 <section name="organism"> | |
| 343 <param name="species" value="Genus species" /> | |
| 344 </section> | |
| 345 <section name="augustus"> | |
| 346 <param name="augustus_species" value="fly" /> | |
| 347 </section> | |
| 348 <section name="busco"> | |
| 349 <param name="busco_seed_species" value="fly" /> | |
| 350 <param name="busco_db" value="insecta" /> | |
| 351 </section> | |
| 352 <param name="uglyTestingHack" value="true" /> | |
| 353 <!-- non deterministic results, so can't be more precise here --> | |
| 354 <output name="annot_gbk"> | |
| 355 <assert_contents> | |
| 356 <has_text text=" TITLE Direct Submission" /> | |
| 357 <has_text text="/locus_tag="FUN_000001"" /> | |
| 358 </assert_contents> | |
| 359 </output> | |
| 360 <output name="annot_tbl"> | |
| 361 <assert_contents> | |
| 362 <has_text text=">Feature sample" /> | |
| 363 <has_text text="gnl|ncbi|FUN_000001-T1_mrna" /> | |
| 364 </assert_contents> | |
| 365 </output> | |
| 366 <output name="annot_gff3"> | |
| 367 <assert_contents> | |
| 368 <has_text text="##gff-version 3" /> | |
| 369 <has_text text="ID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;" /> | |
| 370 </assert_contents> | |
| 371 </output> | |
| 372 <output name="fasta_proteins"> | |
| 373 <assert_contents> | |
| 374 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 375 </assert_contents> | |
| 376 </output> | |
| 377 <output name="fasta_transcripts_mrna"> | |
| 378 <assert_contents> | |
| 379 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 380 </assert_contents> | |
| 381 </output> | |
| 382 <output name="fasta_transcripts_cds"> | |
| 383 <assert_contents> | |
| 384 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 385 </assert_contents> | |
| 386 </output> | |
| 387 <assert_stderr> | |
| 388 <has_text text="augustus pretrained"/> | |
| 389 <has_text text="glimmerhmm busco"/> | |
| 390 <has_text text="snap busco"/> | |
| 391 <has_text text="Running BUSCO to find conserved gene models for training ab-initio predictors"/> | |
| 392 <has_text text="Skipping CodingQuarry as no --rna_bam passed"/> | |
| 393 <has_text text="Running Augustus gene prediction using fly parameters"/> | |
| 394 <not_has_text text="Aligning transcript evidence to genome with minimap2"/> | |
| 395 <not_has_text text="Found 16 alignments, wrote GFF3 and Augustus hints to file"/> | |
| 396 <not_has_text text="Extracting hints from RNA-seq BAM file using bam2hints"/> | |
| 397 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/> | |
| 398 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/> | |
| 399 </assert_stderr> | |
| 400 </test> | |
| 401 | |
| 402 <!-- bam --> | |
| 403 <test> | |
| 404 <param name="input" value="genome_masked.fa" /> | |
| 405 <param name="database" value="2021-07-20-120000" /> | |
| 406 <section name="organism"> | |
| 407 <param name="species" value="Genus species" /> | |
| 408 </section> | |
| 409 <section name="evidences"> | |
| 410 <param name="rna_bam" value="SRR7458692.bam" /> | |
| 411 <param name="transcript_evidence" value="predict_scratch/Genus_species.mrna-transcripts.fa" /> | |
| 412 <conditional name="prot_evidence"> | |
| 413 <param name="prot_evidence_source" value="custom" /> | |
| 414 <param name="protein_evidence" value="predict_scratch/Genus_species.proteins.fa" /> | |
| 415 </conditional> | |
| 416 </section> | |
| 417 <section name="augustus"> | |
| 418 <param name="min_training_models" value="3" /> | |
| 419 </section> | |
| 420 <section name="busco"> | |
| 421 <param name="busco_seed_species" value="fly" /> | |
| 422 <param name="busco_db" value="insecta" /> | |
| 423 </section> | |
| 424 <!-- non deterministic results, so can't be more precise here --> | |
| 425 <output name="annot_gbk"> | |
| 426 <assert_contents> | |
| 427 <has_text text=" TITLE Direct Submission" /> | |
| 428 <has_text text="/locus_tag="FUN_000001"" /> | |
| 429 </assert_contents> | |
| 430 </output> | |
| 431 <output name="annot_tbl"> | |
| 432 <assert_contents> | |
| 433 <has_text text=">Feature sample" /> | |
| 434 <has_text text="gnl|ncbi|FUN_000001-T1_mrna" /> | |
| 435 </assert_contents> | |
| 436 </output> | |
| 437 <output name="annot_gff3"> | |
| 438 <assert_contents> | |
| 439 <has_text text="##gff-version 3" /> | |
| 440 <has_text text="ID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;" /> | |
| 441 </assert_contents> | |
| 442 </output> | |
| 443 <output name="fasta_proteins"> | |
| 444 <assert_contents> | |
| 445 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 446 </assert_contents> | |
| 447 </output> | |
| 448 <output name="fasta_transcripts_mrna"> | |
| 449 <assert_contents> | |
| 450 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 451 </assert_contents> | |
| 452 </output> | |
| 453 <output name="fasta_transcripts_cds"> | |
| 454 <assert_contents> | |
| 455 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 456 </assert_contents> | |
| 457 </output> | |
| 458 <assert_stderr> | |
| 459 <has_text text="augustus busco"/> | |
| 460 <has_text text="glimmerhmm busco"/> | |
| 461 <has_text text="snap busco"/> | |
| 462 <has_text text="Running BUSCO to find conserved gene models for training ab-initio predictors"/> | |
| 463 <not_has_text text="Skipping CodingQuarry as no --rna_bam passed"/> | |
| 464 <has_text text="Running Augustus gene prediction using genus_species parameters"/> | |
| 465 <has_text text="Training Augustus using BUSCO gene models"/> | |
| 466 <has_text text="Aligning transcript evidence to genome with minimap2"/> | |
| 467 <has_text text="Found 16 alignments, wrote GFF3 and Augustus hints to file"/> | |
| 468 <has_text text="Extracting hints from RNA-seq BAM file using bam2hints"/> | |
| 469 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/> | |
| 470 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/> | |
| 471 </assert_stderr> | |
| 472 </test> | |
| 473 </tests> | |
| 474 <help><![CDATA[ | |
| 475 Funannotate_ predict | |
| 476 -------------------- | |
| 477 | |
| 478 Funannotate_ is a pipeline for genome annotation (built specifically for fungi, but will also work with higher eukaryotes). | |
| 479 | |
| 480 Script takes genome multi-fasta file and a variety of inputs to do a comprehensive whole | |
| 481 genome gene prediction. Uses AUGUSTUS, GeneMark, Snap, GlimmerHMM, BUSCO, EVidence Modeler, | |
| 482 tbl2asn, tRNAScan-SE, Exonerate, minimap2. | |
| 483 | |
| 484 .. _Funannotate: http://funannotate.readthedocs.io | |
| 485 ]]></help> | |
| 486 <expand macro="citations" /> | |
| 487 </tool> |
