diff moabs.xml @ 0:26d7ec4af119 draft

"planemo upload for repository https://github.com/sunnyisgalaxy/moabs commit fca680a439f168971afc9944ccbbdd9b3b65c845"
author iuc
date Fri, 06 Sep 2019 09:54:27 -0400
parents
children 8c8cc81b34cd
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moabs.xml	Fri Sep 06 09:54:27 2019 -0400
@@ -0,0 +1,465 @@
+<tool id="moabs" name="MOABS" profile="16.04" version="@VERSION@">
+    <description>MOdel based Analysis of Bisulfite Sequencing data</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code">
+        <![CDATA[
+        #if str( $mcomp_advanced.doComp.compare_selector ) == "0":
+            cp -f '$mcomp_advanced.doComp.compFile' comp.g1.vs.g2.txt &&
+        #end if
+        moabs -v 1 --def MMAP.p="\${GALAXY_SLOTS:-4}" --def MCALL.p="\${GALAXY_SLOTS:-4}" --def MCOMP.p="\${GALAXY_SLOTS:-4}" --cf '$cfg_file' &&
+        #if "1" in $output_selector:
+            cp -f dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr '$output1' &&
+        #end if
+        #if "2" in $output_selector:
+            cp -f comp.g1.vs.g2.txt '$output2' &&
+        #end if
+        echo Done
+        ]]>
+    </command>
+    <configfiles>
+        <configfile name="cfg_file">
+            #if str( $reference_source.reference_source_selector ) == "history":
+                #set $reference_fasta_filename = $reference_source.ref_file
+            #else:
+                #set $reference_fasta_filename = $reference_source.ref_file.fields.path
+            #end if
+            [INPUT]
+            #for $i, $s in enumerate( $g1_fastq )
+                #if str( $s.fastq_input.fastq_input_selector ) == "paired":
+                    s1_r${i+1}_1='$s.fastq_input.fastq_input1'
+                    s1_r${i+1}_2='$s.fastq_input.fastq_input2'
+                #elif str( $s.fastq_input.fastq_input_selector ) == "paired_collection":
+                    s1_r${i+1}_1='$s.fastq_input.fastq_input1.forward'
+                    s1_r${i+1}_2='$s.fastq_input.fastq_input1.reverse'
+                #else:
+                    s1_r${i+1}='$s.fastq_input.fastq_input1'
+                #end if
+            #end for
+
+            #for $i, $s in enumerate( $g2_fastq )
+                #if str( $s.fastq_input.fastq_input_selector ) == "paired":
+                    s2_r${i+1}_1='$s.fastq_input.fastq_input1'
+                    s2_r${i+1}_2='$s.fastq_input.fastq_input2'
+                #elif str( $s.fastq_input.fastq_input_selector ) == "paired_collection":
+                    s2_r${i+1}_1='$s.fastq_input.fastq_input1.forward'
+                    s2_r${i+1}_2='$s.fastq_input.fastq_input1.reverse'
+                #else:
+                    s2_r${i+1}='$s.fastq_input.fastq_input1'
+                #end if
+            #end for
+
+            [TASK]
+            Program=MMAP
+            Label=g1,g2
+            Parallel=NONE
+
+            [MMAP]
+            Path=bsmap
+            d='${reference_fasta_filename}'
+            #if str( $bsmap_advanced.bsmap_mismatch.bsmap_mismatch_selector ) != "0":
+                v=$bsmap_advanced.bsmap_mismatch.v
+            #end if
+            n=$bsmap_advanced.n
+            r=$bsmap_advanced.r
+            R=''
+
+            [MCALL]
+            Path=mcall
+            r='${reference_fasta_filename}'
+
+            [MCOMP]
+            Path=mcomp
+            reference='${reference_fasta_filename}'
+            doComp=$mcomp_advanced.doComp.compare_selector
+        </configfile>
+    </configfiles>
+
+    <inputs>
+        <conditional name="reference_source">
+            <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a cache FASTA?" help="Cached FASTA">
+                <option value="cached">Use a cached genome FASTA</option>
+                <option value="history">Use a genome FASTA from history</option>
+            </param>
+            <when value="cached">
+                <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
+                    <options from_data_table="all_fasta">
+                        <filter type="sort_by" column="2" />
+                        <validator type="no_options" message="No genome FASTA are available" />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" />
+            </when>
+        </conditional>
+        <repeat name="g1_fastq" title="Group1: fastq files" min="1">
+            <conditional name="fastq_input">
+                <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
+                    <option value="single">Single</option>
+                    <option value="paired">Paired</option>
+                    <option value="paired_collection">Paired Collection</option>
+                </param>
+                <when value="paired">
+                    <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select first set of reads" help="Specify dataset with forward reads"/>
+                    <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz" label="Select second set of reads" help="Specify dataset with reverse reads"/>
+                </when>
+                <when value="single">
+                    <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select fastq dataset" help="Specify dataset with single reads"/>
+                </when>
+                <when value="paired_collection">
+                    <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
+                </when>
+            </conditional>
+        </repeat>
+        <repeat name="g2_fastq" title="Group2: fastq files" min="1">
+            <conditional name="fastq_input">
+                <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
+                    <option value="single">Single</option>
+                    <option value="paired">Paired</option>
+                    <option value="paired_collection">Paired Collection</option>
+                </param>
+                <when value="paired">
+                    <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select first set of reads" help="Specify dataset with forward reads"/>
+                    <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz" label="Select second set of reads" help="Specify dataset with reverse reads"/>
+                </when>
+                <when value="single">
+                    <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select fastq dataset" help="Specify dataset with single reads"/>
+                </when>
+                <when value="paired_collection">
+                    <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
+                </when>
+            </conditional>
+        </repeat>
+        <section name="bsmap_advanced" title="Advanced options for BSMAP" expanded="False">
+            <conditional name="bsmap_mismatch">
+                <param name="bsmap_mismatch_selector" type="select" label="Set the mismatch rate or number?" help="">
+                    <option value="0">Do not set</option>
+                    <option value="1">Set the mismatch rate</option>
+                    <option value="2">Set the mismatch number</option>
+                </param>
+                <when value="1">
+                    <param argument="-v" type="float" value="0.08" min="0" max="1" label="Mismatch rate" help="The mismatch rate w.r.t to the read length"/>
+                </when>
+                <when value="2">
+                    <param argument="-v" type="integer" value="3" min="0" label="Mismatch number" help="The maximum number of mismatches allowed on a read"/>
+                </when>
+            </conditional>
+            <param argument="-n" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Mapping to four strands?" help="Yes: map SE or PE reads to all 4 strands, i.e. ++, +-, -+, --; No: only map to 2 forward strands, i.e. BSW(++) and BSC(-+)"/>
+            <param argument="-r" type="select" label="How to report repeat hits" help="0=none(unique hit/pair); 1=random one; 2=all(slow)">
+                <option value="0" selected="true">0</option>
+                <option value="1">1</option>
+                <option value="2">2</option>
+            </param>
+        </section>
+        <section name="mcomp_advanced" title="Advanced options for MCOMP" expanded="False">
+            <conditional name="doComp">
+                <param name="compare_selector" type="select" label="Run the comparison or not" help="Yes: compare; No: do not compare, using the comparison result by `-c`">
+                    <option value="1">Yes</option>
+                    <option value="0">No</option>
+                </param>
+                <when value="0">
+                    <param argument="-c" name="compFile" type="data" format="txt" label="Input comparison results" help="Previously generated comparison file from history"/>
+                </when>
+            </conditional>
+        </section>
+        <param name="output_selector" type="select" multiple="true" optional="true" label="Select output files" help="">
+            <option value="1"> dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr </option>
+            <option value="2"> comp.g1.vs.g2.txt </option>
+            <option value="3"> BAM files </option>
+            <option value="4"> Methylation calling BED files </option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output1" format="interval" label="${tool.name} on ${on_string} : dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr">
+            <filter> "1" in output_selector </filter>
+        </data>
+        <data name="output2" format="interval" label="${tool.name} on ${on_string} : comp.g1.vs.g2.txt">
+            <filter> "2" in output_selector </filter>
+        </data>
+        <collection name="output_collection_bam" type="list" label="BAM files">
+            <filter> "3" in output_selector </filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+\.bam$)" ext='bam'/>
+        </collection>
+        <collection name="output_collection_bed" type="list" label="Methylation calling BED files">
+            <filter> "4" in output_selector </filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;g[12]\.G\.bed$)" ext='interval'/>
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <!-- test single-end reads -->
+            <param name="reference_source_selector" value="history"/>
+            <param name="ref_file" ftype="fasta" value="chr11.fa"/>
+            <repeat name="g1_fastq">
+                <conditional name="fastq_input">
+                    <param name="fastq_input_selector" value="single"/>
+                    <param name="fastq_input1" value="WTPE1.fastq.gz"/>
+                </conditional>
+            </repeat>
+            <repeat name="g1_fastq">
+                <conditional name="fastq_input">
+                    <param name="fastq_input_selector" value="single"/>
+                    <param name="fastq_input1" value="WTPE2.fastq.gz"/>
+                </conditional>
+            </repeat>
+            <repeat name="g2_fastq">
+                <conditional name="fastq_input">
+                    <param name="fastq_input_selector" value="single"/>
+                    <param name="fastq_input1" value="TKO2PE1.fastq.gz"/>
+                </conditional>
+            </repeat>
+            <repeat name="g2_fastq">
+                <conditional name="fastq_input">
+                    <param name="fastq_input_selector" value="single"/>
+                    <param name="fastq_input1" value="TKO2PE2.fastq.gz"/>
+                </conditional>
+            </repeat>
+            <conditional name="doComp">
+                <param name="compare_selector" value="0"/>
+                <param name="compFile" value="SE_comp.g1.vs.g2.txt"/>
+            </conditional>
+            <!--
+            <conditional name="doComp">
+                <param name="compare_selector" value="1"/>
+            </conditional>
+            -->
+            <param name="output_selector" value="1,2,3,4"/>
+            <output name="output1" file="SE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/>
+            <output name="output2" file="SE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
+            <output_collection name="output_collection_bam" count="4">
+                <element name="g1_r1.bam" file="SE_g1_r1.bam" compare="sim_size"/>
+                <element name="g1_r2.bam" file="SE_g1_r2.bam" compare="sim_size"/>
+                <element name="g2_r1.bam" file="SE_g2_r1.bam" compare="sim_size"/>
+                <element name="g2_r2.bam" file="SE_g2_r2.bam" compare="sim_size"/>
+            </output_collection>
+            <output_collection name="output_collection_bed" count="2">
+                <element name="g1.G.bed" file="SE_g1.G.bed" ftype="interval" lines_diff="1"/>
+                <element name="g2.G.bed" file="SE_g2.G.bed" ftype="interval" lines_diff="1"/>
+            </output_collection>
+        </test>
+        <test>
+            <!-- test paired-end reads -->
+            <param name="reference_source_selector" value="history"/>
+            <param name="ref_file" ftype="fasta" value="seg.fa"/>
+            <repeat name="g1_fastq">
+                <conditional name="fastq_input">
+                    <param name="fastq_input_selector" value="paired"/>
+                    <param name="fastq_input1" value="6_all_1.fq.gz"/>
+                    <param name="fastq_input2" value="6_all_2.fq.gz"/>
+                </conditional>
+            </repeat>
+            <repeat name="g2_fastq">
+                <conditional name="fastq_input">
+                    <param name="fastq_input_selector" value="paired"/>
+                    <param name="fastq_input1" value="8_all_1.fq.gz"/>
+                    <param name="fastq_input2" value="8_all_2.fq.gz"/>
+                </conditional>
+            </repeat>
+            <conditional name="doComp">
+                <param name="compare_selector" value="0"/>
+                <param name="compFile" value="PE_comp.g1.vs.g2.txt"/>
+            </conditional>
+            <!--
+            <conditional name="doComp">
+                <param name="compare_selector" value="1"/>
+            </conditional>
+            -->
+            <param name="output_selector" value="1,2"/>
+            <output name="output1" file="PE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/>
+            <output name="output2" file="PE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
+        </test>
+        <test>
+            <!-- test paired collection -->
+            <param name="reference_source_selector" value="history"/>
+            <param name="ref_file" ftype="fasta" value="seg.fa"/>
+            <repeat name="g1_fastq">
+                <conditional name="fastq_input">
+                    <param name="fastq_input_selector" value="paired_collection"/>
+                    <param name="fastq_input1">
+                        <collection type="paired">
+                            <element name="forward" value="6_all_1.fq.gz" />
+                            <element name="reverse" value="6_all_2.fq.gz" />
+                        </collection>
+                    </param>
+                </conditional>
+            </repeat>
+            <repeat name="g2_fastq">
+                <conditional name="fastq_input">
+                    <param name="fastq_input_selector" value="paired_collection"/>
+                    <param name="fastq_input1">
+                        <collection type="paired">
+                            <element name="forward" value="8_all_1.fq.gz" />
+                            <element name="reverse" value="8_all_2.fq.gz" />
+                        </collection>
+                    </param>
+                </conditional>
+            </repeat>
+            <conditional name="doComp">
+                <param name="compare_selector" value="0"/>
+                <param name="compFile" value="PE_comp.g1.vs.g2.txt"/>
+            </conditional>
+            <!--
+            <conditional name="doComp">
+                <param name="compare_selector" value="1"/>
+            </conditional>
+            -->
+            <param name="output_selector" value="1,2"/>
+            <output name="output1" file="PE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/>
+            <output name="output2" file="PE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
+        </test>
+        <test>
+            <!-- test data table reference -->
+            <param name="reference_source_selector" value="cached"/>
+            <param name="ref_file" value="chr11"/>
+            <repeat name="g1_fastq">
+                <conditional name="fastq_input">
+                    <param name="fastq_input_selector" value="single"/>
+                    <param name="fastq_input1" value="WTPE1.fastq.gz"/>
+                </conditional>
+            </repeat>
+            <repeat name="g1_fastq">
+                <conditional name="fastq_input">
+                    <param name="fastq_input_selector" value="single"/>
+                    <param name="fastq_input1" value="WTPE2.fastq.gz"/>
+                </conditional>
+            </repeat>
+            <repeat name="g2_fastq">
+                <conditional name="fastq_input">
+                    <param name="fastq_input_selector" value="single"/>
+                    <param name="fastq_input1" value="TKO2PE1.fastq.gz"/>
+                </conditional>
+            </repeat>
+            <repeat name="g2_fastq">
+                <conditional name="fastq_input">
+                    <param name="fastq_input_selector" value="single"/>
+                    <param name="fastq_input1" value="TKO2PE2.fastq.gz"/>
+                </conditional>
+            </repeat>
+            <conditional name="doComp">
+                <param name="compare_selector" value="0"/>
+                <param name="compFile" value="SE_comp.g1.vs.g2.txt"/>
+            </conditional>
+            <!--
+            <conditional name="doComp">
+                <param name="compare_selector" value="1"/>
+            </conditional>
+            -->
+            <param name="output_selector" value="1,2"/>
+            <output name="output1" file="SE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/>
+            <output name="output2" file="SE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+**MOABS: MOdel based Analysis of Bisulfite Sequencing data**
+
+MOABS is a comprehensive, accurate and efficient solution for analysis of large
+scale base-resolution DNA methylation data, bisulfite sequencing or single
+molecule direct sequencing.
+
+MOABS seamlessly integrates alignment, methylation calling, identification of
+hypomethylation for one sample and differential methylation for multiple
+samples, and other downstream analysis.
+
+For more information, check https://github.com/sunnyisgalaxy/moabs.
+
+-----
+
+**Input files**
+
+MOABS needs to input Bisulfite sequencing reads in two groups of interest, e.g.
+KO vs WT. Each group of reads may have combined sequencing library, i.e.
+single-end reads and/or paired-end reads. Multiple replicates can be specified in each group.
+
+**Outputs**
+
+Four output files can be selected to report, namely
+
+    1. **DMR region file** - the major result file
+    2. **Comparison file between two groups** - the intermediate comparion result
+    3. **BAM files** - intermediate BAM files
+    4. **Methylation BED files** - intermediate methylation BED files
+
+-----
+
+MOABS will detect differential methylated regions (DMRs) using the input BS-Seq
+reads. The output file is a tab-delimited text file (not strictly a BED
+format), representing DMRs. It has 8 columns as below.
+
+chrom<TAB>start<TAB>end<TAB>methylation_state<TAB>CpGsites<TAB>DMCcount<TAB>nonDMCcount<TAB>hidden_state
+
+    1. **chrom** - The chromosome of the region.
+    2. **start** - The start genomic locus of the region.
+    3. **end** - The end genomic locus of the region.
+    4. **methylation_state** - The methylation state of the region, "+"/"-" representing hyper- or hypo-methylation regions.
+    5. **CpGsites** - Total number of CpG sites in the region.
+    6. **DMCcount** - The number of differential methylated CpG sites (DMCs) in the region.
+    7. **nonDMCcount** - The number of non-DMCs in the region.
+    8. **hidden_state** - The hidden state prediced by Hidden Markov Model (HMM), "1"/"-1" representing hyper- or hypo-methylation states.
+
+For example, six DMRs are identified in the following format.
+
+@DMRExample@
+
+-----
+
+The intermediate comparison file summarizes methylation ratio comparison
+results on CpG sites. It has 19 columns as below.
+
+    1. **chrom** - The chromosome of the GpG site.
+    2. **start** - The start position of the site.
+    3. **end** - The end position of the site.
+    4. **single** - The next two columns are attributes for the single position.
+    5. **totalC_0** - Total number of Cs in the first group.
+    6. **nominalRatio_0** - Nominal methylation ratio in the first group.
+    7. **ratioCI_0** - The confidence interval of the methylation ratio in the first group.
+    8. **single** - The next two columns are attributes for the single position.
+    9. **totalC_1** - Total number of Cs in the second group.
+    10. **nominalRatio_1** - Nominal methylation ratio in the second group.
+    11. **ratioCI_1** - The confidence interval of the methylation ratio in the second group.
+    12. **pair** - The next three columns are attributes for pairs of groups.
+    13. **nominalDif_1-0** - Nominal difference of methylation ratio between group 1 and group 0.
+    14. **credibleDif_1-0** - Credible methylation difference between group 1 and group 0.
+    15. **difCI_1-0** - Difference of confidence intervals between group 1 and group 0.
+    16. **p_sim** - The next column is the simulation p-value.
+    17. **p_sim_1_v_0** - Simulation p-value between group 1 and group 0.
+    18. **p_fet** - The next column is the FET p-value.
+    19. **p_fet_1_v_0** - FET p-value between group 1 and group 0.
+
+The comparison result file can be reused for DMR calling.
+
+-----
+
+BAM files are intermediate mapping results of input reads to the referene
+genome. These BAM files can be reused in downstream methylation analysis.
+
+-----
+
+Methylation calling BED files are intermediate methylation calling results of
+Cs in two groups of input reads. These methyation calling results can be easily
+reused in downstream DMR calling and visualization. The BED file has 15 columns
+as below.
+
+    1. **chrom** - The chromosome of the site.
+    2. **start** - The start position of the site.
+    3. **end** - The end position of the site.
+    4. **ratio** - Methylation ratio in the site
+    5. **totalC** - Total number of reads in current Cs.
+    6. **methC** - Methylated Cs.
+    7. **strand** - The strand information for prevous three columns.
+    8. **next** - The next base.
+    9. **Plus** - Next two columns are for forward strand.
+    10. **totalC** - Total number of Cs.
+    11. **methC** - Methylated Cs.
+    12. **Minus** - Next two columns are for reverse strand.
+    13. **totalC** - Total number of Cs.
+    14. **methC** - Methylated Cs.
+    15. **localSeq** - Local sequences.
+
+]]>
+    </help>
+    <expand macro="citations"/>
+</tool>