diff stacks_denovomap.xml @ 8:39d2b6c1c030 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks commit dc23703c260d004a28fe24a2a7c00cb4371bc32e
author iuc
date Thu, 27 Apr 2017 04:17:51 -0400
parents 34000c653ae5
children 9f9d39f582f3
line wrap: on
line diff
--- a/stacks_denovomap.xml	Fri Apr 07 11:47:48 2017 -0400
+++ b/stacks_denovomap.xml	Thu Apr 27 04:17:51 2017 -0400
@@ -6,54 +6,47 @@
     <expand macro="requirements"/>
     <expand macro="stdio"/>
     <command><![CDATA[
-        #from os.path import splitext
-        #import re
 
-        #if str( $options_usage.rad_analysis_type ) == "genetic":
-            #for $input_parent in $options_usage.parent_sequences:
+        @CLEAN_EXT@
 
-                #if $input_parent.is_of_type('fastqsanger'):
-                    #set $data_path = splitext($input_parent.element_identifier)[0]
-                    #set $data_path = re.sub(r'\.1$', '', $data_path)
-                    #set $data_path = $data_path + ".fq"
-                #else:
-                    #set $data_path = splitext($input_parent.element_identifier)[0]
-                    #set $data_path = re.sub(r'\.1$', '', $data_path)
-                    #set $data_path = $data_path + ".fa"
+        #if str( $options_usage.rad_analysis_type ) == "genetic"
+            #for $input_parent in $options_usage.parent_sequences
+                #if $input_parent.is_of_type('fastqsanger')
+                    #set $data_path = $clean_ext($input_parent.element_identifier) + ".fq"
+                #else if $input_parent.is_of_type('fastqsanger.gz')
+                    #set $data_path = $clean_ext($input_parent.element_identifier) + ".fq.gz"
+                #else
+                    #set $data_path = $clean_ext($input_parent.element_identifier) + ".fa"
                 #end if
 
-                ln -s "${input_parent}" "${data_path}" &&
+                ln -s '${input_parent}' '${data_path}' &&
             #end for
 
-            #for $input_progeny in $options_usage.progeny_sequences:
-                #if $input_progeny:
-                    #if $input_progeny.is_of_type('fastqsanger'):
-                        #set $data_path = splitext($input_progeny.element_identifier)[0]
-                        #set $data_path = re.sub(r'\.1$', '', $data_path)
-                        #set $data_path = $data_path + ".fq"
-                    #else:
-                        #set $data_path = splitext($input_progeny.element_identifier)[0]
-                        #set $data_path = re.sub(r'\.1$', '', $data_path)
-                        #set $data_path = $data_path + ".fa"
+            #for $input_progeny in $options_usage.progeny_sequences
+                #if $input_progeny
+                    #if $input_progeny.is_of_type('fastqsanger')
+                        #set $data_path = $clean_ext($input_progeny.element_identifier) + ".fq"
+                    #else if $input_progeny.is_of_type('fastqsanger.gz')
+                        #set $data_path = $clean_ext($input_progeny.element_identifier) + ".fq.gz"
+                    #else
+                        #set $data_path = $clean_ext($input_progeny.element_identifier) + ".fa"
                     #end if
 
-                    ln -s "${input_progeny}" "${data_path}" &&
+                    ln -s '${input_progeny}' '${data_path}' &&
                 #end if
             #end for
-        #else:
-            #for $input_indiv in $options_usage.individual_sample:
+        #else
+            #for $input_indiv in $options_usage.individual_sample
 
-                #if $input_indiv.is_of_type('fastqsanger'):
-                    #set $data_path = splitext($input_indiv.element_identifier)[0]
-                    #set $data_path = re.sub(r'\.1$', '', $data_path)
-                    #set $data_path = $data_path + ".fq"
-                #else:
-                    #set $data_path = splitext($input_indiv.element_identifier)[0]
-                    #set $data_path = re.sub(r'\.1$', '', $data_path)
-                    #set $data_path = $data_path + ".fa"
+                #if $input_indiv.is_of_type('fastqsanger')
+                    #set $data_path = $clean_ext($input_indiv.element_identifier) + ".fq"
+                #else if $input_indiv.is_of_type('fastqsanger.gz')
+                    #set $data_path = $clean_ext($input_indiv.element_identifier) + ".fq.gz"
+                #else
+                    #set $data_path = $clean_ext($input_indiv.element_identifier) + ".fa"
                 #end if
 
-                ln -s "${input_indiv}" "${data_path}" &&
+                ln -s '${input_indiv}' '${data_path}' &&
             #end for
         #end if
 
@@ -65,64 +58,58 @@
 
             -T \${GALAXY_SLOTS:-1}
 
-            #if str( $options_usage.rad_analysis_type ) == "genetic":
-                #for $input_parent in $options_usage.parent_sequences:
-                    #if $input_parent.is_of_type('fastqsanger'):
-                        #set $data_path = splitext($input_parent.element_identifier)[0]
-                        #set $data_path = re.sub(r'\.1$', '', $data_path)
-                        #set $data_path = $data_path + ".fq"
-                    #else:
-                        #set $data_path = splitext($input_parent.element_identifier)[0]
-                        #set $data_path = re.sub(r'\.1$', '', $data_path)
-                        #set $data_path = $data_path + ".fa"
+            #if str( $options_usage.rad_analysis_type ) == "genetic"
+                #for $input_parent in $options_usage.parent_sequences
+                    #if $input_parent.is_of_type('fastqsanger')
+                        #set $data_path = $clean_ext($input_parent.element_identifier) + ".fq"
+                    #else if $input_parent.is_of_type('fastqsanger.gz')
+                        #set $data_path = $clean_ext($input_parent.element_identifier) + ".fq.gz"
+                    #else
+                        #set $data_path = $clean_ext($input_parent.element_identifier) + ".fa"
                     #end if
 
-                    -p "${data_path}"
+                    -p '${data_path}'
                 #end for
 
                 -A $options_usage.cross_type
 
-                #for $input_progeny in $options_usage.progeny_sequences:
-                    #if $input_progeny:
-                        #if $input_progeny.is_of_type('fastqsanger'):
-                            #set $data_path = splitext($input_progeny.element_identifier)[0]
-                            #set $data_path = re.sub(r'\.1$', '', $data_path)
-                            #set $data_path = $data_path + ".fq"
-                        #else:
-                            #set $data_path = splitext($input_progeny.element_identifier)[0]
-                            #set $data_path = re.sub(r'\.1$', '', $data_path)
-                            #set $data_path = $data_path + ".fa"
+                #for $input_progeny in $options_usage.progeny_sequences
+                    #if $input_progeny
+                        #if $input_progeny.is_of_type('fastqsanger')
+                            #set $data_path = $clean_ext($input_progeny.element_identifier) + ".fq"
+                        #else if $input_progeny.is_of_type('fastqsanger.gz')
+                            #set $data_path = $clean_ext($input_progeny.element_identifier) + ".fq.gz"
+                        #else
+                            #set $data_path = $clean_ext($input_progeny.element_identifier) + ".fa"
                         #end if
 
-                        -r "${data_path}"
+                        -r '${data_path}'
                     #end if
                 #end for
 
-                #if str($assembly_options.P):
+                #if str($assembly_options.P)
                     -P $assembly_options.P
                 #end if
-            #else:
-                #for $i_indiv, $input_indiv in enumerate($options_usage.individual_sample):
+            #else
+                #for $i_indiv, $input_indiv in enumerate($options_usage.individual_sample)
 
-                    #if $input_indiv.is_of_type('fastqsanger'):
-                        #set $data_path = splitext($input_indiv.element_identifier)[0]
-                        #set $data_path = re.sub(r'\.1$', '', $data_path)
-                        #set $data_path = $data_path + ".fq"
-                    #else:
-                        #set $data_path = splitext($input_indiv.element_identifier)[0]
-                        #set $data_path = re.sub(r'\.1$', '', $data_path)
-                        #set $data_path = $data_path + ".fa"
+                    #if $input_indiv.is_of_type('fastqsanger')
+                        #set $data_path = $clean_ext($input_indiv.element_identifier) + ".fq"
+                    #else if $input_indiv.is_of_type('fastqsanger.gz')
+                        #set $data_path = $clean_ext($input_indiv.element_identifier) + ".fq.gz"
+                    #else
+                        #set $data_path = $clean_ext($input_indiv.element_identifier) + ".fa"
                     #end if
 
-                    -s "${data_path}"
+                    -s '${data_path}'
                 #end for
-                -O "$options_usage.popmap"
+                -O '$options_usage.popmap'
             #end if
 
-            #if str($assembly_options.m):
+            #if str($assembly_options.m)
                 -m $assembly_options.m
             #end if
-            #if str($assembly_options.N):
+            #if str($assembly_options.N)
                 -N $assembly_options.N
             #end if
             -M $assembly_options.M
@@ -137,19 +124,29 @@
             -S
 
             ## snp_model
-            #if str( $snp_options.select_model.model_type) == "bounded":
+            #if str( $snp_options.select_model.model_type) == "bounded"
                 --bound_low $snp_options.select_model.bound_low
                 --bound_high $snp_options.select_model.bound_high
                 --alpha $snp_options.select_model.alpha
-            #else if str( $snp_options.select_model.model_type) == "snp":
+            #else if str( $snp_options.select_model.model_type) == "snp"
                 --alpha $snp_options.select_model.alpha
             #end if
 
             -o stacks_outputs
 
-            #if str( $options_usage.rad_analysis_type ) == "genetic":
+            #if str( $options_usage.rad_analysis_type ) == "genetic"
                 @NORM_GENOTYPES_OUTPUT_LIGHT@
             #end if
+
+           ## If input is in gz format, stacks will output gzipped files (no option to control this)
+           && if ls stacks_outputs/*.gz > /dev/null 2>&1; then gunzip stacks_outputs/*.gz; fi
+
+            &&
+
+            stacks_summary.py --stacks-prog denovo_map.pl --res-dir stacks_outputs --logfile stacks_outputs/denovo_map.log --summary stacks_outputs/summary.html
+            #if str( $options_usage.rad_analysis_type ) == "population"
+                --pop-map '$options_usage.popmap'
+            #end if
     ]]></command>
 
     <inputs>
@@ -159,16 +156,16 @@
                 <option value="population">Population</option>
             </param>
             <when value="genetic">
-                <param name="parent_sequences" argument="-p" format="fastqsanger,fasta" type="data" multiple="true" label="Files containing parent sequences" help="Files containing parent sequences from a mapping cross (only R1 reads). Dataset names will be used as sample name (no space allowed)." />
+                <param name="parent_sequences" argument="-p" format="fastqsanger,fastqsanger.gz,fasta" type="data" multiple="true" label="Files containing parent sequences" help="Files containing parent sequences from a mapping cross (only R1 reads). Dataset names will be used as sample name (no space allowed)." />
 
-                <param name="progeny_sequences" argument="-r" format="fastqsanger,fasta" type="data" multiple="true" optional="true" label="Files containing progeny sequences" help="files containing progeny sequences from a mapping cross (only R1 reads). Dataset names will be used as sample name (no space allowed)." />
+                <param name="progeny_sequences" argument="-r" format="fastqsanger,fastqsanger.gz,fasta" type="data" multiple="true" optional="true" label="Files containing progeny sequences" help="files containing progeny sequences from a mapping cross (only R1 reads). Dataset names will be used as sample name (no space allowed)." />
 
                 <param name="cross_type" argument="-A" type="select" label="Cross type">
                     <expand macro="cross_types"/>
                 </param>
             </when>
             <when value="population">
-                <param name="individual_sample" argument="-s" format="fastqsanger,fasta" type="data" multiple="true" label="Files containing an individual sample from a population" help="files containing an individual sample from a population (only R1 reads). Dataset names will be used as sample name (no space allowed)." />
+                <param name="individual_sample" argument="-s" format="fastqsanger,fastqsanger.gz,fasta" type="data" multiple="true" label="Files containing an individual sample from a population" help="files containing an individual sample from a population (only R1 reads). Dataset names will be used as sample name (no space allowed)." />
                 <param name="popmap" argument="-O" type="data" format="tabular,txt" label="Specify a population map" />
             </when>
         </conditional>
@@ -193,6 +190,8 @@
     <outputs>
         <data format="txt" name="output_log" label="denovo_map.log with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/denovo_map.log" />
 
+        <data format="html" name="output_summary" label="Summary from ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/summary.html" />
+
         <data format="tabular" name="catalogtags" label="Catalog assembled loci (tags) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.tags.tsv" />
         <data format="tabular" name="catalogsnps" label="Catalog model calls (snps) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.snps.tsv" />
         <data format="tabular" name="catalogalleles" label="Catalog haplotypes (alleles) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.alleles.tsv" />
@@ -232,6 +231,11 @@
                     <has_text text="denovo_map.pl completed" />
                 </assert_contents>
             </output>
+            <output name="output_summary">
+                <assert_contents>
+                    <has_text text="Stacks Statistics" />
+                </assert_contents>
+            </output>
 
             <!-- catalog -->
             <output name="catalogtags">
@@ -312,6 +316,11 @@
                     <has_text text="denovo_map.pl completed" />
                 </assert_contents>
             </output>
+            <output name="output_summary">
+                <assert_contents>
+                    <has_text text="Stacks Statistics" />
+                </assert_contents>
+            </output>
 
             <!-- catalog -->
             <output name="catalogtags">
@@ -392,6 +401,11 @@
                     <has_text text="denovo_map.pl completed" />
                 </assert_contents>
             </output>
+            <output name="output_summary">
+                <assert_contents>
+                    <has_text text="Stacks Statistics" />
+                </assert_contents>
+            </output>
 
             <!-- catalog -->
             <output name="catalogtags">
@@ -467,6 +481,89 @@
                 </element>
             </output_collection>
         </test>
+        <test>
+            <param name="options_usage|rad_analysis_type" value="genetic"/>
+            <param name="options_usage|parent_sequences" value="demultiplexed/PopA_01.1.fq.gzip" ftype="fastqsanger.gz" />
+            <output name="output_log">
+                <assert_contents>
+                    <has_text text="denovo_map.pl completed" />
+                </assert_contents>
+            </output>
+            <output name="output_summary">
+                <assert_contents>
+                    <has_text text="Stacks Statistics" />
+                </assert_contents>
+            </output>
+
+            <!-- catalog -->
+            <output name="catalogtags">
+                <assert_contents>
+                    <has_text text="catalog generated on" />
+                </assert_contents>
+            </output>
+            <output name="catalogsnps">
+                <assert_contents>
+                    <has_text text="catalog generated on" />
+                </assert_contents>
+            </output>
+            <output name="catalogalleles">
+                <assert_contents>
+                    <has_text text="catalog generated on" />
+                </assert_contents>
+            </output>
+
+            <!-- genotypes -->
+            <output name="out_generic_haplo">
+                <assert_contents>
+                    <has_text text="Catalog ID" />
+                </assert_contents>
+            </output>
+            <output name="out_sql_markers">
+                <assert_contents>
+                    <has_text text="Total Genotypes" />
+                </assert_contents>
+            </output>
+            <output name="out_joinmap">
+                <assert_contents>
+                    <has_text text="batch_1.genotypes_" />
+                </assert_contents>
+            </output>
+            <output name="out_sql_genotypes">
+                <assert_contents>
+                    <has_text text="SQL ID" />
+                </assert_contents>
+            </output>
+
+            <!-- samples -->
+            <output_collection name="tags">
+                <element name="PopA_01.tags">
+                    <assert_contents>
+                        <has_text text="generated on " />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="snps">
+                <element name="PopA_01.snps">
+                    <assert_contents>
+                        <has_text text="generated on " />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="alleles">
+                <element name="PopA_01.alleles">
+                    <assert_contents>
+                        <has_text text="generated on " />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="matches">
+                <element name="PopA_01.matches">
+                    <assert_contents>
+                        <has_text text="generated on " />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
     </tests>
 
     <help>