diff maker.xml @ 5:5201ec38c01f draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/maker commit 5bef07276e14b38cca31ef724d0b4d2f55809715"
author iuc
date Mon, 28 Dec 2020 23:19:04 +0000
parents 5e96efe6e6c6
children d46d803ca6cc
line wrap: on
line diff
--- a/maker.xml	Thu Oct 10 02:56:46 2019 -0400
+++ b/maker.xml	Mon Dec 28 23:19:04 2020 +0000
@@ -1,19 +1,23 @@
 <?xml version="1.0"?>
-<tool id="maker" name="Maker" profile="16.04" version="@VERSION@+galaxy1">
+<tool id="maker" name="Maker" profile="16.04" version="@VERSION@">
     <description>genome annotation pipeline</description>
     <macros>
         <import>macros.xml</import>
     </macros>
     <expand macro="requirements"/>
     <command><![CDATA[
-        RM_LIB_PATH=\$(dirname \$(which RepeatMasker))/../share/RepeatMasker/Libraries &&
-        mkdir lib &&
-        export REPEATMASKER_LIB_DIR=\$(pwd)/lib &&
-          for file in \$(ls \$RM_LIB_PATH) ; do  ln -s \$RM_LIB_PATH/\$file lib/\$file ; done &&
-        #if $repeat_masking.repeat_source.source_type == "repbase":
-          cp '${repeat_masking.repeat_source.repbase_file}' 'lib/${repeat_masking.repeat_source.repbase_file_name}' &&
+        RM_PATH=\$(which RepeatMasker) &&
+        if [ -z "\$RM_PATH" ] ; then echo "Failed to find RepeatMasker in PATH (\$PATH)" >&2 ; exit 1 ; fi &&
+
+        LIBDIR=\$(dirname "\$RM_PATH")/../share/RepeatMasker/Libraries &&
+        #if $repeat_masking.repeat_source.source_type == "dfam_up":
+          mkdir lib/ &&
+          ln -s '${repeat_masking.repeat_source.dfam_lib}' lib/RepeatMaskerLib.h5 &&
+          LIBDIR=\$(pwd)/lib &&
         #end if
 
+        export LIBDIR &&
+
         maker -CTL
 
         &&
@@ -35,7 +39,12 @@
             export AUGUSTUS_CONFIG_PATH=`pwd`/augustus_dir/ &&
         #end if
 
-        mpiexec -n \${GALAXY_SLOTS:-4} maker --ignore_nfs_tmp maker_opts.ctl maker_bopts.ctl maker_exe.ctl < /dev/null
+        MPI_CMD="mpiexec -n \${GALAXY_SLOTS:-4}" &&
+        if [ "\$MAKER_NO_MPI" != "1" ]; then
+            MPI_CMD="";
+        fi &&
+
+        \${MPI_CMD} maker --ignore_nfs_tmp maker_opts.ctl maker_bopts.ctl maker_exe.ctl < /dev/null
 
         &&
 
@@ -112,7 +121,7 @@
 #end if
 
 #-----Repeat Masking (leave values blank to skip repeat masking)
-#if $repeat_masking.repeat_source.source_type == 'repbase'
+#if $repeat_masking.repeat_source.source_type == 'dfam'
 
 #if $repeat_masking.repeat_source.species_source.species_from_list == 'yes'
 model_org=${repeat_masking.repeat_source.species_source.species_list}
@@ -123,6 +132,11 @@
 rmlib= # provide an organism specific repeat library in fasta format for RepeatMasker
 softmask=${repeat_masking.repeat_source.softmask} # use soft-masking rather than hard-masking in BLAST (i.e. seg and dust filtering)
 
+#else if $repeat_masking.repeat_source.source_type == 'dfam_up'
+model_org=${repeat_masking.repeat_source.species_name}
+rmlib= # provide an organism specific repeat library in fasta format for RepeatMasker
+softmask=${repeat_masking.repeat_source.softmask} # use soft-masking rather than hard-masking in BLAST (i.e. seg and dust filtering)
+
 #else if $repeat_masking.repeat_source.source_type == 'library'
 model_org= # select a model organism for RepBase masking in RepeatMasker
 #if $repeat_masking.repeat_source.rmlib
@@ -385,14 +399,13 @@
 
         <section name="repeat_masking" title="Repeat masking" expanded="True">
             <conditional name="repeat_source">
-              <param label="Repeat library source" name="source_type" type="select">
-                <option selected="true" value="repbase">RepBase</option>
+              <param label="Repeat library source" name="source_type" type="select" help="To use RepBase, choose 'Custom library of repeats' and select a fasta version of this non-free database.">
+                <option selected="true" value="dfam">DFam (curated only, bundled with RepeatMasker)</option>
+                <option value="dfam_up">DFam (full/specific version)</option>
                 <option value="library">Custom library of repeats</option>
                 <option value="no">Disable repeat masking (not recommended)</option>
               </param>
-              <when value="repbase">
-                <param name="repbase_file" type="data" format="embl" label="RepBase (RMRBSeqs.embl) file" />
-                <param name="repbase_file_name" type="hidden" value="RMRBSeqs.embl"/> <!-- This is an ugly hack to allow testing with a fake repbase -->
+              <when value="dfam">
                 <conditional name="species_source">
                   <param label="Select species name from a list?" name="species_from_list" type="select">
                     <option value="yes" selected="true">Yes</option>
@@ -400,41 +413,28 @@
                   </param>
                   <when value="yes">
                     <param name="species_list" type="select" label="Species">
-                      <option value="anopheles" selected="true">anopheles</option>
-                      <option value="arabidopsis">arabidopsis</option>
-                      <option value="artiodactyl">artiodactyl</option>
-                      <option value="aspergillus">aspergillus</option>
-                      <option value="carnivore">carnivore</option>
-                      <option value="cat">cat</option>
-                      <option value="chicken">chicken</option>
-                      <option value="ciona intestinalis">ciona intestinalis</option>
-                      <option value="ciona savignyi">ciona savignyi</option>
-                      <option value="cow">cow</option>
-                      <option value="danio">danio</option>
-                      <option value="diatoaea">diatomea</option>
-                      <option value="dog">dog</option>
-                      <option value="drosophila">drosophila</option>
-                      <option value="elegans">elegans</option>
-                      <option value="fugu">fugu</option>
-                      <option value="fungi" selected="true">fungi</option>
-                      <option value="human">human</option>
-                      <option value="maize">maize</option>
-                      <option value="mammal">mammal</option>
-                      <option value="mouse">mouse</option>
-                      <option value="pig">pig</option>
-                      <option value="rat">rat</option>
-                      <option value="rice">rice</option>
-                      <option value="rodentia">rodentia</option>
-                      <option value="ruminantia">ruminantia</option>
-                      <option value="wheat">wheat</option>
+                        <option value="vertebrate">Vertebrate (other than below)</option>
+                        <option value="mammal">Mammal (other than below)</option>
+                        <option value="human" selected="true">Human</option>
+                        <option value="rodent">Rodent</option>
+                        <option value="mouse">Mouse</option>
+                        <option value="rat">Rat</option>
+                        <option value="danio">Danio (zebra fish)</option>
+                        <option value="drosophila">Fruit fly (Drosophila melanogaster)</option>
+                        <option value="elegans">Caenorhabditis elegans (nematode)</option>
                     </param>
                   </when>
                   <when value="no">
-                    <param name="species_name" type="text" value="homo sapiens" label="Repeat source species" help="Source species (or clade name) used to select repeats from RepBase" />
+                    <param name="species_name" type="text" value="human" label="Repeat source species" help="Source species (or clade name) used to select repeats from DFam" />
                   </when>
                 </conditional>
                 <param name="softmask" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Use soft-masking rather than hard-masking in BLAST" help="i.e. seg and dust filtering"/>
               </when>
+              <when value="dfam_up">
+                  <param name="dfam_lib" type="data" format="h5" label="DFam library" help="The full DFam library can be downloaded from https://www.dfam.org/releases/current/families/Dfam.h5.gz" />
+                  <param name="species_name" type="text" value="human" label="Repeat source species" help="Source species (or clade name) used to select repeats from DFam" />
+                  <param name="softmask" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Use soft-masking rather than hard-masking in BLAST" help="i.e. seg and dust filtering"/>
+              </when>
               <when value="library">
                 <param name="rmlib" type="data" format="fasta" label="Transposable element sequences for RepeatRunner" optional="True" />
                 <param name="repeat_protein" type="data" format="fasta" label="Transposable element protein sequences for RepeatRunner" optional="True" />
@@ -548,13 +548,21 @@
             <param name="genome" value="genome.fasta"/>
             <param name="est_evidences|est" value="est.fasta"/>
             <param name="est_evidences|est2genome" value="1"/>
-            <param name="repeat_masking|repeat_source|source_type" value="repbase"/>
-            <param name="repeat_masking|repeat_source|repbase_file" value="fake_repbase.embl" />
-            <param name="repeat_masking|repeat_source|repbase_file_name" value="fake.embl" />
-            <param name="repeat_masking|repeat_source|species_list" value="anopheles" />
-            <output name="output_gff" file="annot_repbase.gff3"/>
+            <param name="repeat_masking|repeat_source|source_type" value="dfam"/>
+            <param name="repeat_masking|repeat_source|species_list" value="drosophila" />
+            <output name="output_gff" file="annot_dfam.gff3"/>
             <output name="output_evidences" file="evidences_norm.gff3" compare="sim_size"/>
         </test>
+        <test>
+            <param name="genome" value="genome.fasta"/>
+            <param name="est_evidences|est" value="est.fasta"/>
+            <param name="est_evidences|est2genome" value="1"/>
+            <param name="repeat_masking|repeat_source|source_type" value="dfam_up"/>
+            <param name="repeat_masking|repeat_source|dfam_lib" value="Dfam_partial_test.h5" ftype="h5" />
+            <param name="repeat_masking|repeat_source|species_name" value="rodent" />
+            <output name="output_gff" file="annot_dfam_up.gff3"/>
+            <output name="output_evidences" file="evidences_norm_dfam_up.gff3" compare="sim_size"/>
+        </test>
     </tests>
     <help><![CDATA[
         MAKER is a portable and easily configurable genome annotation pipeline. Its purpose is to allow smaller eukaryotic and prokaryotic genome projects to independently annotate their genomes and to create genome databases. MAKER identifies repeats, aligns ESTs and proteins to a genome, produces ab-initio gene predictions and automatically synthesizes these data into gene annotations having evidence-based quality values. MAKER is also easily trainable: outputs of preliminary runs can be used to automatically retrain its gene prediction algorithm, producing higher quality gene-models on seusequent runs. MAKER's inputs are minimal and its ouputs can be directly loaded into a GMOD database. They can also be viewed in the Apollo genome browser; this feature of MAKER provides an easy means to annotate, view and edit individual contigs and BACs without the overhead of a database. MAKER should prove especially useful for emerging model organism projects with minimal bioinformatics expertise and computer resources.