Mercurial > repos > iuc > funannotate_predict
comparison funannotate_predict.xml @ 1:1a59958c1f76 draft
"planemo upload commit 87560553f1dbbd3e0ab7d7157fa5a7f32f61dca1"
| author | iuc |
|---|---|
| date | Mon, 04 Oct 2021 19:37:44 +0000 |
| parents | 40b87aef5241 |
| children | 33092577d65d |
comparison
equal
deleted
inserted
replaced
| 0:40b87aef5241 | 1:1a59958c1f76 |
|---|---|
| 30 --database `pwd`'/hacked_database' | 30 --database `pwd`'/hacked_database' |
| 31 #else | 31 #else |
| 32 --database '$database.fields.path' | 32 --database '$database.fields.path' |
| 33 #end if | 33 #end if |
| 34 | 34 |
| 35 $force | |
| 36 | |
| 35 --species '${organism.species}' | 37 --species '${organism.species}' |
| 36 --isolate '${organism.isolate}' | 38 --isolate '${organism.isolate}' |
| 37 --strain '${organism.strain}' | 39 --strain '${organism.strain}' |
| 38 --organism '${organism.organism}' | 40 --organism '${organism.organism}' |
| 39 --ploidy ${organism.ploidy} | 41 --ploidy ${organism.ploidy} |
| 44 | 46 |
| 45 #if $parameters: | 47 #if $parameters: |
| 46 --parameters '${parameters}' | 48 --parameters '${parameters}' |
| 47 #end if | 49 #end if |
| 48 | 50 |
| 49 #if $evidences.rna_bam: | 51 #if $evidences.rna_bam |
| 50 --rna_bam ${evidences.rna_bam} | 52 --rna_bam ${evidences.rna_bam} |
| 51 #end if | 53 #end if |
| 52 | 54 |
| 53 #set est_list = "" | 55 #set est_list = "" |
| 54 #if len($evidences.transcript_evidence) > 0: | 56 #if len($evidences.transcript_evidence) > 0: |
| 69 #end for | 71 #end for |
| 70 #end if | 72 #end if |
| 71 --p2g_pident ${evidences.p2g_pident} | 73 --p2g_pident ${evidences.p2g_pident} |
| 72 --p2g_prefilter ${evidences.p2g_prefilter} | 74 --p2g_prefilter ${evidences.p2g_prefilter} |
| 73 | 75 |
| 76 --busco_seed_species '${busco.busco_seed_species}' | |
| 77 --busco_db '${busco.busco_db}' | |
| 78 | |
| 74 #if $augustus.augustus_species != 'none': | 79 #if $augustus.augustus_species != 'none': |
| 75 --augustus_species '${augustus.augustus_species}' | 80 --augustus_species '${augustus.augustus_species}' |
| 76 #end if | 81 #end if |
| 77 --min_training_models ${augustus.min_training_models} | 82 --min_training_models ${augustus.min_training_models} |
| 78 ${augustus.optimize_augustus} | 83 ${augustus.optimize_augustus} |
| 82 #if $genemark.genemark_mod: | 87 #if $genemark.genemark_mod: |
| 83 --genemark_mod '${genemark.genemark_mod}' | 88 --genemark_mod '${genemark.genemark_mod}' |
| 84 #end if | 89 #end if |
| 85 --soft_mask ${genemark.soft_mask} | 90 --soft_mask ${genemark.soft_mask} |
| 86 #end if | 91 #end if |
| 87 | |
| 88 --busco_seed_species '${busco.busco_seed_species}' | |
| 89 --busco_db '${busco.busco_db}' | |
| 90 | 92 |
| 91 $evm.repeats2evm | 93 $evm.repeats2evm |
| 92 #if $evm.evm_partitioning.evm_partition == "yes": | 94 #if $evm.evm_partitioning.evm_partition == "yes": |
| 93 --evm-partition-interval ${evm.evm_partitioning.evm_partition_interval} | 95 --evm-partition-interval ${evm.evm_partitioning.evm_partition_interval} |
| 94 #else: | 96 #else: |
| 142 <column name="path" index="3" /> | 144 <column name="path" index="3" /> |
| 143 <filter type="sort_by" column="0" /> | 145 <filter type="sort_by" column="0" /> |
| 144 <filter type="static_value" column="2" value="1.0" /> | 146 <filter type="static_value" column="2" value="1.0" /> |
| 145 </options> | 147 </options> |
| 146 </param> | 148 </param> |
| 149 | |
| 150 <param argument="--force" type="boolean" checked="true" truevalue="" falsevalue="--force" label="Check the genome sequence" help="Disable at your own risk if you want to ignore problems in the genome sequence reported by Funannotate" /> | |
| 147 | 151 |
| 148 <section name="organism" expanded="true" title="Organism"> | 152 <section name="organism" expanded="true" title="Organism"> |
| 149 <param argument="--species" type="text" optional="false" label="Name of the species to annotate" help="e.g. Genus species"> | 153 <param argument="--species" type="text" optional="false" label="Name of the species to annotate" help="e.g. Genus species"> |
| 150 <validator type="empty_field" /> | 154 <validator type="empty_field" /> |
| 151 </param> | 155 </param> |
| 177 <option value="diamond" selected="True">Diamond</option> | 181 <option value="diamond" selected="True">Diamond</option> |
| 178 <option value="tblastn">tblastn (slower)</option> | 182 <option value="tblastn">tblastn (slower)</option> |
| 179 </param> | 183 </param> |
| 180 </section> | 184 </section> |
| 181 | 185 |
| 186 <section name="busco" expanded="true" title="Busco"> | |
| 187 <param argument="--busco_db" type="select" label="BUSCO models to align" help="BUSCO will be used to perform initial training of ab initio predictors (e.g. Augustus)."> | |
| 188 <expand macro="busco_species"/> | |
| 189 </param> | |
| 190 <param argument="--busco_seed_species" type="select" label="Initial Augustus species training set for BUSCO alignment" help="Used when BUSCO runs Augustus internally."> | |
| 191 <expand macro="augustus_species"/> | |
| 192 </param> | |
| 193 </section> | |
| 194 | |
| 195 <section name="filtering" expanded="true" title="Filtering"> | |
| 196 <param argument="--min_intronlen" type="integer" value="10" label="Minimum intron length" /> | |
| 197 <param argument="--max_intronlen" type="integer" value="3000" label="Maximum intron length" /> | |
| 198 <param argument="--min_protlen" type="integer" value="50" label="Minimum protein length" /> | |
| 199 <param argument="--keep_no_stops" type="boolean" checked="false" truevalue="--keep_no_stops" falsevalue="" label="Keep gene models without valid stops" /> | |
| 200 <param argument="--repeat_filter" type="select" label="Repetitive gene model filtering" help="'overlap' drops gene models that are more than 90% contained within a repeat region; 'blast' compares the amino acid sequences to a small database of known transposons"> | |
| 201 <option value="overlap blast" selected="True">overlap + blast</option> | |
| 202 <option value="overlap">overlap</option> | |
| 203 <option value="blast">blast</option> | |
| 204 <option value="none">none</option> | |
| 205 </param> | |
| 206 </section> | |
| 207 | |
| 182 <param argument="--parameters" type="data" format="json" optional="true" label="Ab-initio training parameters from a previous run" help="If specified, will over-rule any other training presets based on sepcies selection." /> | 208 <param argument="--parameters" type="data" format="json" optional="true" label="Ab-initio training parameters from a previous run" help="If specified, will over-rule any other training presets based on sepcies selection." /> |
| 183 | 209 |
| 184 <section name="other_predictors" expanded="false" title="Other annotations"> | 210 <section name="augustus" expanded="false" title="Augustus settings (advanced)"> |
| 211 <param argument="--augustus_species" type="select" label="Augustus species training set" help="Select a species from the list"> | |
| 212 <option value="none" selected="True">No corresponding species, train from scratch</option> | |
| 213 <expand macro="augustus_species"/> | |
| 214 </param> | |
| 215 <param argument="--min_training_models" type="integer" value="200" label="Minimum number of models to train Augustus" /> | |
| 216 <param argument="--optimize_augustus" type="boolean" checked="false" truevalue="--optimize_augustus" falsevalue="" label="Run 'optimize_augustus.pl' to refine training (long runtime)" /> | |
| 217 </section> | |
| 218 | |
| 219 <section name="genemark" expanded="false" title="GeneMark settings (advanced)"> | |
| 220 <param name="genemark_license" type="data" format="txt" optional="true" label="GeneMark license file" help="GeneMark is not a free software, to use it download and unzip a license from http://topaz.gatech.edu/GeneMark/license_download.cgi (ES/ET/EP version). GeneMark needs to be installed manually by Galaxy administrators, it might not be available on this server." /> | |
| 221 <param argument="--genemark_mode" type="select" label="GeneMark mode"> | |
| 222 <option value="ES" selected="True">ES</option> | |
| 223 <option value="ET">ET</option> | |
| 224 </param> | |
| 225 <param argument="--genemark_mod" type="data" format="txt" optional="true" label="Use pre-existing Genemark training file (e.g. gmhmm.mod)" /> | |
| 226 <param argument="--soft_mask" type="integer" value="2000" label="Softmasked length threshold for GeneMark" help="GeneMark will skip prediction on repeat regions shorter than this value" /> | |
| 227 </section> | |
| 228 | |
| 229 <section name="other_predictors" expanded="false" title="Other annotations (advanced)"> | |
| 185 <param argument="--stringtie" type="data" format="gtf" optional="true" label="StringTie GTF result" /> | 230 <param argument="--stringtie" type="data" format="gtf" optional="true" label="StringTie GTF result" /> |
| 186 <param argument="--maker_gff" type="data" format="gff3" optional="true" label="MAKER2 GFF file" help="Parse results directly to EVM" /> | 231 <param argument="--maker_gff" type="data" format="gff3" optional="true" label="MAKER2 GFF file" help="Parse results directly to EVM" /> |
| 187 <param argument="--pasa_gff" type="data" format="gff3" optional="true" label="PASA generated gene models" /> | 232 <param argument="--pasa_gff" type="data" format="gff3" optional="true" label="PASA generated gene models" /> |
| 188 <param name="pasa_gff_weight" type="integer" value="1" label="Weight for PASA generated gene models" /> | 233 <param name="pasa_gff_weight" type="integer" value="1" label="Weight for PASA generated gene models" /> |
| 189 <param argument="--other_gff" type="data" format="gff3" optional="true" label="Annotation pass-through to EVM" /> | 234 <param argument="--other_gff" type="data" format="gff3" optional="true" label="Annotation pass-through to EVM" /> |
| 190 <param name="other_gff_weight" type="integer" value="1" label="Weight for annotation pass-through to EVM" /> | 235 <param name="other_gff_weight" type="integer" value="1" label="Weight for annotation pass-through to EVM" /> |
| 191 </section> | 236 </section> |
| 192 | 237 |
| 193 <section name="augustus" expanded="true" title="Augustus settings"> | 238 <section name="evm" expanded="false" title="EVM settings (advanced)"> |
| 194 <param argument="--augustus_species" type="select" label="Augustus species training set" help="Select a species from the list"> | |
| 195 <option value="none" selected="True">No corresponding species, train from scratch</option> | |
| 196 <expand macro="augustus_species"/> | |
| 197 </param> | |
| 198 <param argument="--min_training_models" type="integer" value="200" label="Minimum number of models to train Augustus" /> | |
| 199 <param argument="--optimize_augustus" type="boolean" checked="false" truevalue="--optimize_augustus" falsevalue="" label="Run 'optimize_augustus.pl' to refine training (long runtime)" /> | |
| 200 </section> | |
| 201 | |
| 202 <section name="genemark" expanded="false" title="GeneMark settings"> | |
| 203 <param name="genemark_license" type="data" format="txt" optional="true" label="GeneMark license file" help="GeneMark is not a free software, to use it download and unzip a license from http://topaz.gatech.edu/GeneMark/license_download.cgi (ES/ET/EP version). GeneMark needs to be installed manually by Galaxy administrators, it might not be available on this server." /> | |
| 204 <param argument="--genemark_mode" type="select" label="GeneMark mode"> | |
| 205 <option value="ES" selected="True">ES</option> | |
| 206 <option value="ET">ET</option> | |
| 207 </param> | |
| 208 <param argument="--genemark_mod" type="data" format="txt" optional="true" label="Use pre-existing Genemark training file (e.g. gmhmm.mod)" /> | |
| 209 <param argument="--soft_mask" type="integer" value="2000" label="Softmasked length threshold for GeneMark" help="GeneMark will skip prediction on repeat regions shorter than this value" /> | |
| 210 </section> | |
| 211 | |
| 212 <section name="busco" expanded="true" title="BUSCO settings"> | |
| 213 <param argument="--busco_seed_species" type="select" label="Initial Augustus species training set for BUSCO alignment" help="Select the closest species. BUSCO will only be used if no RNASeq (bam) data is given as evidence."> | |
| 214 <expand macro="augustus_species"/> | |
| 215 </param> | |
| 216 <param argument="--busco_db" type="select" label="BUSCO models to align" help="BUSCO will only be used if no RNASeq (bam) data is given as evidence."> | |
| 217 <expand macro="busco_species"/> | |
| 218 </param> | |
| 219 </section> | |
| 220 | |
| 221 <section name="evm" expanded="false" title="EVM settings"> | |
| 222 <param argument="--repeats2evm" type="boolean" checked="false" truevalue="--repeats2evm" falsevalue="" label="Use repeats in EVM consensus model building" help="Not recommended for fungal genomes that have high gene density. You might want to turn this option on for larger genomes or those that have a high repeat content." /> | 239 <param argument="--repeats2evm" type="boolean" checked="false" truevalue="--repeats2evm" falsevalue="" label="Use repeats in EVM consensus model building" help="Not recommended for fungal genomes that have high gene density. You might want to turn this option on for larger genomes or those that have a high repeat content." /> |
| 223 <conditional name="evm_partitioning"> | 240 <conditional name="evm_partitioning"> |
| 224 <param name="evm_partition" type="select" label="Split contigs into partitions for EVM processing?" help="Splits big contigs in smaller overlaping chunks to reduce memory usage and parallelize"> | 241 <param name="evm_partition" type="select" label="Split contigs into partitions for EVM processing?" help="Splits big contigs in smaller overlaping chunks to reduce memory usage and parallelize"> |
| 225 <option value="yes" selected="True">Yes</option> | 242 <option value="yes" selected="True">Yes</option> |
| 226 <option value="no">No</option> | 243 <option value="no">No</option> |
| 233 <param argument="--weights" type="text" optional="true" label="Custom ab-initio predictor and EVM weight" help="e.g. augustus:2 pasa:10"> | 250 <param argument="--weights" type="text" optional="true" label="Custom ab-initio predictor and EVM weight" help="e.g. augustus:2 pasa:10"> |
| 234 <validator type="regex" message="Key must consist of alphanumeric characters only, possibly separated by the period character ('.')">^[\w: ]+$</validator> | 251 <validator type="regex" message="Key must consist of alphanumeric characters only, possibly separated by the period character ('.')">^[\w: ]+$</validator> |
| 235 </param> | 252 </param> |
| 236 </section> | 253 </section> |
| 237 | 254 |
| 238 <section name="filtering" expanded="true" title="Filtering"> | 255 <param name="outputs" type="select" optional="true" multiple="true" label="Which outputs should be generated"> |
| 239 <param argument="--min_intronlen" type="integer" value="10" label="Minimum intron length" /> | 256 <option value="gbk" selected="true">Annotated genome (genbank)</option> |
| 240 <param argument="--max_intronlen" type="integer" value="3000" label="Maximum intron length" /> | 257 <option value="tbl">NCBI tbl annotation file (use for NCBI submission)</option> |
| 241 <param argument="--min_protlen" type="integer" value="50" label="Minimum protein length" /> | 258 <option value="gff3">Annotation in GFF3 format</option> |
| 242 <param argument="--keep_no_stops" type="boolean" checked="false" truevalue="--keep_no_stops" falsevalue="" label="Keep gene models without valid stops" /> | 259 <option value="proteins_fa">Multi-fasta file of protein coding genes</option> |
| 243 <param argument="--repeat_filter" type="select" label="Repetitive gene model filtering" help="'overlap' drops gene models that are more than 90% contained within a repeat region; 'blast' compares the amino acid sequences to a small database of known transposons"> | 260 <option value="mrna_transcripts_fa">Multi-fasta file of transcripts (mRNA)</option> |
| 244 <option value="overlap blast" selected="True">overlap + blast</option> | 261 <option value="cds_transcripts_fa">Multi-fasta file of transcripts (CDS)</option> |
| 245 <option value="overlap">overlap</option> | 262 <option value="tbl2asn_report">tbl2asn summary report of annotated genome</option> |
| 246 <option value="blast">blast</option> | 263 <option value="tbl2asn_error">tbl2asn error summary report</option> |
| 247 <option value="none">none</option> | 264 <option value="tbl2asn_validation">tbl2asn genome validation report</option> |
| 248 </param> | 265 <option value="stats">statistics</option> |
| 249 </section> | 266 </param> |
| 250 | 267 |
| 251 <!-- Need this to change path in the test funannotate_db --> | 268 <!-- Need this to change path in the test funannotate_db --> |
| 252 <param type="hidden" name="uglyTestingHack" value="" /> | 269 <param type="hidden" name="uglyTestingHack" value="" /> |
| 253 </inputs> | 270 </inputs> |
| 254 <outputs> | 271 <outputs> |
| 255 <data name='annot_gbk' format='genbank' label="${tool.name} on ${on_string}: annotation (genbank)" from_work_dir="out.gbk" /> | 272 <data name='annot_gbk' format='genbank' label="${tool.name} on ${on_string}: annotation (genbank)" from_work_dir="out.gbk"> |
| 256 <data name='annot_tbl' format='txt' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.tbl" /> | 273 <filter>outputs and 'gbk' in outputs</filter> |
| 257 <data name='annot_gff3' format='gff3' label="${tool.name} on ${on_string}: annotation (GFF3)" from_work_dir="out.gff3" /> | 274 </data> |
| 258 <data name='fasta_proteins' format='fasta' label="${tool.name} on ${on_string}: protein sequences" from_work_dir="out.proteins.fa" /> | 275 <data name='annot_tbl' format='txt' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.tbl"> |
| 259 <data name='fasta_transcripts_mrna' format='fasta' label="${tool.name} on ${on_string}: transcript mRNA sequences" from_work_dir="out.mrna-transcripts.fa" /> | 276 <filter>outputs and 'tbl' in outputs</filter> |
| 260 <data name='fasta_transcripts_cds' format='fasta' label="${tool.name} on ${on_string}: transcript CDS sequences" from_work_dir="out.cds-transcripts.fa" /> | 277 </data> |
| 261 <data name='tbl2asn_report' format='txt' label="${tool.name} on ${on_string}: tbl2asn summary report of annotated genome" from_work_dir="out.discrepency.report.txt" /> | 278 <data name='annot_gff3' format='gff3' label="${tool.name} on ${on_string}: annotation (GFF3)" from_work_dir="out.gff3"> |
| 262 <data name='tbl2asn_error' format='txt' label="${tool.name} on ${on_string}: tbl2asn error summary report" from_work_dir="out.error.summary.txt" /> | 279 <filter>outputs and 'gff3' in outputs</filter> |
| 263 <data name='tbl2asn_validation' format='txt' label="${tool.name} on ${on_string}: tbl2asn genome validation report" from_work_dir="out.validation.txt" /> | 280 </data> |
| 264 <data name='stats' format='json' label="${tool.name} on ${on_string}: stats" from_work_dir="out.stats.json" /> | 281 <data name='fasta_proteins' format='fasta' label="${tool.name} on ${on_string}: protein sequences" from_work_dir="out.proteins.fa"> |
| 282 <filter>outputs and 'proteins_fa' in outputs</filter> | |
| 283 </data> | |
| 284 <data name='fasta_transcripts_mrna' format='fasta' label="${tool.name} on ${on_string}: transcript mRNA sequences" from_work_dir="out.mrna-transcripts.fa"> | |
| 285 <filter>outputs and 'mrna_transcripts_fa' in outputs</filter> | |
| 286 </data> | |
| 287 <data name='fasta_transcripts_cds' format='fasta' label="${tool.name} on ${on_string}: transcript CDS sequences" from_work_dir="out.cds-transcripts.fa"> | |
| 288 <filter>outputs and 'cds_transcripts_fa' in outputs</filter> | |
| 289 </data> | |
| 290 <data name='tbl2asn_report' format='txt' label="${tool.name} on ${on_string}: tbl2asn summary report of annotated genome" from_work_dir="out.discrepency.report.txt"> | |
| 291 <filter>outputs and 'tbl2asn_report' in outputs</filter> | |
| 292 </data> | |
| 293 <data name='tbl2asn_error' format='txt' label="${tool.name} on ${on_string}: tbl2asn error summary report" from_work_dir="out.error.summary.txt"> | |
| 294 <filter>outputs and 'tbl2asn_error' in outputs</filter> | |
| 295 </data> | |
| 296 <data name='tbl2asn_validation' format='txt' label="${tool.name} on ${on_string}: tbl2asn genome validation report" from_work_dir="out.validation.txt"> | |
| 297 <filter>outputs and 'tbl2asn_validation' in outputs</filter> | |
| 298 </data> | |
| 299 <data name='stats' format='json' label="${tool.name} on ${on_string}: stats" from_work_dir="out.stats.json"> | |
| 300 <filter>outputs and 'stats' in outputs</filter> | |
| 301 </data> | |
| 265 <!-- TODO some day: provide trained models as output, reusable as input to other funannotate runs | 302 <!-- TODO some day: provide trained models as output, reusable as input to other funannotate runs |
| 266 (parameters.json file references files with absolute paths, would probably need to create an archive + edit paths in parameters.json) --> | 303 (parameters.json file references files with absolute paths, would probably need to create an archive + edit paths in parameters.json) --> |
| 267 <!--data name='abinitio' format='json' label="${tool.name} on ${on_string}: ab-initio training parameters" from_work_dir="output/predict_results/*.parameters.json" /--> | 304 <!--data name='abinitio' format='json' label="${tool.name} on ${on_string}: ab-initio training parameters" from_work_dir="output/predict_results/*.parameters.json" /--> |
| 268 </outputs> | 305 </outputs> |
| 269 <tests> | 306 <tests> |
| 270 <!-- training from scratch --> | 307 <!-- training from scratch --> |
| 271 <test> | 308 <test> |
| 272 <param name="input" value="genome_masked.fa" /> | 309 <param name="input" value="genome_masked.fa" /> |
| 273 <param name="database" value="2021-07-20-120000" /> | 310 <param name="database" value="2021-07-20-120000" /> |
| 274 <section name="organism"> | |
| 275 <param name="species" value="Genus species" /> | |
| 276 </section> | |
| 277 <section name="augustus"> | |
| 278 <param name="min_training_models" value="3" /> | |
| 279 </section> | |
| 280 <section name="busco"> | 311 <section name="busco"> |
| 281 <param name="busco_seed_species" value="fly" /> | 312 <param name="busco_seed_species" value="fly" /> |
| 282 <param name="busco_db" value="insecta" /> | 313 <param name="busco_db" value="insecta" /> |
| 283 </section> | 314 </section> |
| 315 <section name="organism"> | |
| 316 <param name="species" value="Genus species" /> | |
| 317 </section> | |
| 318 <section name="augustus"> | |
| 319 <param name="min_training_models" value="3" /> | |
| 320 </section> | |
| 321 <param name="outputs" value="gbk,tbl,gff3,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,tbl2asn_error,tbl2asn_report,tbl2asn_validation,stats" /> | |
| 284 <!-- non deterministic results, so can't be more precise here --> | 322 <!-- non deterministic results, so can't be more precise here --> |
| 285 <output name="annot_gbk"> | 323 <output name="annot_gbk"> |
| 286 <assert_contents> | 324 <assert_contents> |
| 287 <has_text text=" TITLE Direct Submission" /> | 325 <has_text text=" TITLE Direct Submission" /> |
| 288 <has_text text="/locus_tag="FUN_000001"" /> | 326 <has_text text="/locus_tag="FUN_000001"" /> |
| 348 <section name="busco"> | 386 <section name="busco"> |
| 349 <param name="busco_seed_species" value="fly" /> | 387 <param name="busco_seed_species" value="fly" /> |
| 350 <param name="busco_db" value="insecta" /> | 388 <param name="busco_db" value="insecta" /> |
| 351 </section> | 389 </section> |
| 352 <param name="uglyTestingHack" value="true" /> | 390 <param name="uglyTestingHack" value="true" /> |
| 391 <param name="outputs" value="gbk,tbl,gff3,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,tbl2asn_error,tbl2asn_report,tbl2asn_validation,stats" /> | |
| 353 <!-- non deterministic results, so can't be more precise here --> | 392 <!-- non deterministic results, so can't be more precise here --> |
| 354 <output name="annot_gbk"> | 393 <output name="annot_gbk"> |
| 355 <assert_contents> | 394 <assert_contents> |
| 356 <has_text text=" TITLE Direct Submission" /> | 395 <has_text text=" TITLE Direct Submission" /> |
| 357 <has_text text="/locus_tag="FUN_000001"" /> | 396 <has_text text="/locus_tag="FUN_000001"" /> |
| 397 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/> | 436 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/> |
| 398 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/> | 437 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/> |
| 399 </assert_stderr> | 438 </assert_stderr> |
| 400 </test> | 439 </test> |
| 401 | 440 |
| 402 <!-- bam --> | 441 <!-- bam and transcripts and proteins --> |
| 403 <test> | 442 <test> |
| 404 <param name="input" value="genome_masked.fa" /> | 443 <param name="input" value="genome_masked.fa" /> |
| 405 <param name="database" value="2021-07-20-120000" /> | 444 <param name="database" value="2021-07-20-120000" /> |
| 406 <section name="organism"> | 445 <section name="organism"> |
| 407 <param name="species" value="Genus species" /> | 446 <param name="species" value="Genus species" /> |
| 412 <conditional name="prot_evidence"> | 451 <conditional name="prot_evidence"> |
| 413 <param name="prot_evidence_source" value="custom" /> | 452 <param name="prot_evidence_source" value="custom" /> |
| 414 <param name="protein_evidence" value="predict_scratch/Genus_species.proteins.fa" /> | 453 <param name="protein_evidence" value="predict_scratch/Genus_species.proteins.fa" /> |
| 415 </conditional> | 454 </conditional> |
| 416 </section> | 455 </section> |
| 417 <section name="augustus"> | |
| 418 <param name="min_training_models" value="3" /> | |
| 419 </section> | |
| 420 <section name="busco"> | 456 <section name="busco"> |
| 421 <param name="busco_seed_species" value="fly" /> | 457 <param name="busco_seed_species" value="fly" /> |
| 422 <param name="busco_db" value="insecta" /> | 458 <param name="busco_db" value="insecta" /> |
| 423 </section> | 459 </section> |
| 460 <section name="augustus"> | |
| 461 <param name="min_training_models" value="3" /> | |
| 462 </section> | |
| 463 <param name="outputs" value="gbk,tbl,gff3,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,tbl2asn_error,tbl2asn_report,tbl2asn_validation,stats" /> | |
| 424 <!-- non deterministic results, so can't be more precise here --> | 464 <!-- non deterministic results, so can't be more precise here --> |
| 425 <output name="annot_gbk"> | 465 <output name="annot_gbk"> |
| 426 <assert_contents> | 466 <assert_contents> |
| 427 <has_text text=" TITLE Direct Submission" /> | 467 <has_text text=" TITLE Direct Submission" /> |
| 428 <has_text text="/locus_tag="FUN_000001"" /> | 468 <has_text text="/locus_tag="FUN_000001"" /> |
| 468 <has_text text="Extracting hints from RNA-seq BAM file using bam2hints"/> | 508 <has_text text="Extracting hints from RNA-seq BAM file using bam2hints"/> |
| 469 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/> | 509 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/> |
| 470 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/> | 510 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/> |
| 471 </assert_stderr> | 511 </assert_stderr> |
| 472 </test> | 512 </test> |
| 513 | |
| 514 <!-- proteins --> | |
| 515 <test> | |
| 516 <param name="input" value="genome_masked.fa" /> | |
| 517 <param name="database" value="2021-07-20-120000" /> | |
| 518 <section name="organism"> | |
| 519 <param name="species" value="Genus species" /> | |
| 520 </section> | |
| 521 <section name="evidences"> | |
| 522 <conditional name="prot_evidence"> | |
| 523 <param name="prot_evidence_source" value="custom" /> | |
| 524 <param name="protein_evidence" value="predict_scratch/Genus_species.proteins.fa" /> | |
| 525 </conditional> | |
| 526 </section> | |
| 527 <section name="busco"> | |
| 528 <param name="busco_seed_species" value="fly" /> | |
| 529 <param name="busco_db" value="insecta" /> | |
| 530 </section> | |
| 531 <section name="augustus"> | |
| 532 <param name="min_training_models" value="3" /> | |
| 533 </section> | |
| 534 <param name="outputs" value="gbk,tbl,gff3,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,tbl2asn_error,tbl2asn_report,tbl2asn_validation,stats" /> | |
| 535 <!-- non deterministic results, so can't be more precise here --> | |
| 536 <output name="annot_gbk"> | |
| 537 <assert_contents> | |
| 538 <has_text text=" TITLE Direct Submission" /> | |
| 539 <has_text text="/locus_tag="FUN_000001"" /> | |
| 540 </assert_contents> | |
| 541 </output> | |
| 542 <output name="annot_tbl"> | |
| 543 <assert_contents> | |
| 544 <has_text text=">Feature sample" /> | |
| 545 <has_text text="gnl|ncbi|FUN_000001-T1_mrna" /> | |
| 546 </assert_contents> | |
| 547 </output> | |
| 548 <output name="annot_gff3"> | |
| 549 <assert_contents> | |
| 550 <has_text text="##gff-version 3" /> | |
| 551 <has_text text="ID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;" /> | |
| 552 </assert_contents> | |
| 553 </output> | |
| 554 <output name="fasta_proteins"> | |
| 555 <assert_contents> | |
| 556 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 557 </assert_contents> | |
| 558 </output> | |
| 559 <output name="fasta_transcripts_mrna"> | |
| 560 <assert_contents> | |
| 561 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 562 </assert_contents> | |
| 563 </output> | |
| 564 <output name="fasta_transcripts_cds"> | |
| 565 <assert_contents> | |
| 566 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 567 </assert_contents> | |
| 568 </output> | |
| 569 <assert_stderr> | |
| 570 <has_text text="augustus busco"/> | |
| 571 <has_text text="glimmerhmm busco"/> | |
| 572 <has_text text="snap busco"/> | |
| 573 <has_text text="Running BUSCO to find conserved gene models for training ab-initio predictors"/> | |
| 574 <has_text text="Skipping CodingQuarry as no --rna_bam passed"/> | |
| 575 <has_text text="Running Augustus gene prediction using genus_species parameters"/> | |
| 576 <has_text text="Training Augustus using BUSCO gene models"/> | |
| 577 <not_has_text text="Aligning transcript evidence to genome with minimap2"/> | |
| 578 <not_has_text text="Found 16 alignments, wrote GFF3 and Augustus hints to file"/> | |
| 579 <not_has_text text="Extracting hints from RNA-seq BAM file using bam2hints"/> | |
| 580 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/> | |
| 581 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/> | |
| 582 </assert_stderr> | |
| 583 </test> | |
| 473 </tests> | 584 </tests> |
| 474 <help><