view moabs.xml @ 0:26d7ec4af119 draft

"planemo upload for repository https://github.com/sunnyisgalaxy/moabs commit fca680a439f168971afc9944ccbbdd9b3b65c845"
author iuc
date Fri, 06 Sep 2019 09:54:27 -0400
parents
children 8c8cc81b34cd
line wrap: on
line source

<tool id="moabs" name="MOABS" profile="16.04" version="@VERSION@">
    <description>MOdel based Analysis of Bisulfite Sequencing data</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code">
        <![CDATA[
        #if str( $mcomp_advanced.doComp.compare_selector ) == "0":
            cp -f '$mcomp_advanced.doComp.compFile' comp.g1.vs.g2.txt &&
        #end if
        moabs -v 1 --def MMAP.p="\${GALAXY_SLOTS:-4}" --def MCALL.p="\${GALAXY_SLOTS:-4}" --def MCOMP.p="\${GALAXY_SLOTS:-4}" --cf '$cfg_file' &&
        #if "1" in $output_selector:
            cp -f dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr '$output1' &&
        #end if
        #if "2" in $output_selector:
            cp -f comp.g1.vs.g2.txt '$output2' &&
        #end if
        echo Done
        ]]>
    </command>
    <configfiles>
        <configfile name="cfg_file">
            #if str( $reference_source.reference_source_selector ) == "history":
                #set $reference_fasta_filename = $reference_source.ref_file
            #else:
                #set $reference_fasta_filename = $reference_source.ref_file.fields.path
            #end if
            [INPUT]
            #for $i, $s in enumerate( $g1_fastq )
                #if str( $s.fastq_input.fastq_input_selector ) == "paired":
                    s1_r${i+1}_1='$s.fastq_input.fastq_input1'
                    s1_r${i+1}_2='$s.fastq_input.fastq_input2'
                #elif str( $s.fastq_input.fastq_input_selector ) == "paired_collection":
                    s1_r${i+1}_1='$s.fastq_input.fastq_input1.forward'
                    s1_r${i+1}_2='$s.fastq_input.fastq_input1.reverse'
                #else:
                    s1_r${i+1}='$s.fastq_input.fastq_input1'
                #end if
            #end for

            #for $i, $s in enumerate( $g2_fastq )
                #if str( $s.fastq_input.fastq_input_selector ) == "paired":
                    s2_r${i+1}_1='$s.fastq_input.fastq_input1'
                    s2_r${i+1}_2='$s.fastq_input.fastq_input2'
                #elif str( $s.fastq_input.fastq_input_selector ) == "paired_collection":
                    s2_r${i+1}_1='$s.fastq_input.fastq_input1.forward'
                    s2_r${i+1}_2='$s.fastq_input.fastq_input1.reverse'
                #else:
                    s2_r${i+1}='$s.fastq_input.fastq_input1'
                #end if
            #end for

            [TASK]
            Program=MMAP
            Label=g1,g2
            Parallel=NONE

            [MMAP]
            Path=bsmap
            d='${reference_fasta_filename}'
            #if str( $bsmap_advanced.bsmap_mismatch.bsmap_mismatch_selector ) != "0":
                v=$bsmap_advanced.bsmap_mismatch.v
            #end if
            n=$bsmap_advanced.n
            r=$bsmap_advanced.r
            R=''

            [MCALL]
            Path=mcall
            r='${reference_fasta_filename}'

            [MCOMP]
            Path=mcomp
            reference='${reference_fasta_filename}'
            doComp=$mcomp_advanced.doComp.compare_selector
        </configfile>
    </configfiles>

    <inputs>
        <conditional name="reference_source">
            <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a cache FASTA?" help="Cached FASTA">
                <option value="cached">Use a cached genome FASTA</option>
                <option value="history">Use a genome FASTA from history</option>
            </param>
            <when value="cached">
                <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
                    <options from_data_table="all_fasta">
                        <filter type="sort_by" column="2" />
                        <validator type="no_options" message="No genome FASTA are available" />
                    </options>
                </param>
            </when>
            <when value="history">
                <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" />
            </when>
        </conditional>
        <repeat name="g1_fastq" title="Group1: fastq files" min="1">
            <conditional name="fastq_input">
                <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
                    <option value="single">Single</option>
                    <option value="paired">Paired</option>
                    <option value="paired_collection">Paired Collection</option>
                </param>
                <when value="paired">
                    <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select first set of reads" help="Specify dataset with forward reads"/>
                    <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz" label="Select second set of reads" help="Specify dataset with reverse reads"/>
                </when>
                <when value="single">
                    <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select fastq dataset" help="Specify dataset with single reads"/>
                </when>
                <when value="paired_collection">
                    <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
                </when>
            </conditional>
        </repeat>
        <repeat name="g2_fastq" title="Group2: fastq files" min="1">
            <conditional name="fastq_input">
                <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
                    <option value="single">Single</option>
                    <option value="paired">Paired</option>
                    <option value="paired_collection">Paired Collection</option>
                </param>
                <when value="paired">
                    <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select first set of reads" help="Specify dataset with forward reads"/>
                    <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz" label="Select second set of reads" help="Specify dataset with reverse reads"/>
                </when>
                <when value="single">
                    <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select fastq dataset" help="Specify dataset with single reads"/>
                </when>
                <when value="paired_collection">
                    <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
                </when>
            </conditional>
        </repeat>
        <section name="bsmap_advanced" title="Advanced options for BSMAP" expanded="False">
            <conditional name="bsmap_mismatch">
                <param name="bsmap_mismatch_selector" type="select" label="Set the mismatch rate or number?" help="">
                    <option value="0">Do not set</option>
                    <option value="1">Set the mismatch rate</option>
                    <option value="2">Set the mismatch number</option>
                </param>
                <when value="1">
                    <param argument="-v" type="float" value="0.08" min="0" max="1" label="Mismatch rate" help="The mismatch rate w.r.t to the read length"/>
                </when>
                <when value="2">
                    <param argument="-v" type="integer" value="3" min="0" label="Mismatch number" help="The maximum number of mismatches allowed on a read"/>
                </when>
            </conditional>
            <param argument="-n" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Mapping to four strands?" help="Yes: map SE or PE reads to all 4 strands, i.e. ++, +-, -+, --; No: only map to 2 forward strands, i.e. BSW(++) and BSC(-+)"/>
            <param argument="-r" type="select" label="How to report repeat hits" help="0=none(unique hit/pair); 1=random one; 2=all(slow)">
                <option value="0" selected="true">0</option>
                <option value="1">1</option>
                <option value="2">2</option>
            </param>
        </section>
        <section name="mcomp_advanced" title="Advanced options for MCOMP" expanded="False">
            <conditional name="doComp">
                <param name="compare_selector" type="select" label="Run the comparison or not" help="Yes: compare; No: do not compare, using the comparison result by `-c`">
                    <option value="1">Yes</option>
                    <option value="0">No</option>
                </param>
                <when value="0">
                    <param argument="-c" name="compFile" type="data" format="txt" label="Input comparison results" help="Previously generated comparison file from history"/>
                </when>
            </conditional>
        </section>
        <param name="output_selector" type="select" multiple="true" optional="true" label="Select output files" help="">
            <option value="1"> dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr </option>
            <option value="2"> comp.g1.vs.g2.txt </option>
            <option value="3"> BAM files </option>
            <option value="4"> Methylation calling BED files </option>
        </param>
    </inputs>
    <outputs>
        <data name="output1" format="interval" label="${tool.name} on ${on_string} : dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr">
            <filter> "1" in output_selector </filter>
        </data>
        <data name="output2" format="interval" label="${tool.name} on ${on_string} : comp.g1.vs.g2.txt">
            <filter> "2" in output_selector </filter>
        </data>
        <collection name="output_collection_bam" type="list" label="BAM files">
            <filter> "3" in output_selector </filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+\.bam$)" ext='bam'/>
        </collection>
        <collection name="output_collection_bed" type="list" label="Methylation calling BED files">
            <filter> "4" in output_selector </filter>
            <discover_datasets pattern="(?P&lt;designation&gt;g[12]\.G\.bed$)" ext='interval'/>
        </collection>
    </outputs>
    <tests>
        <test>
            <!-- test single-end reads -->
            <param name="reference_source_selector" value="history"/>
            <param name="ref_file" ftype="fasta" value="chr11.fa"/>
            <repeat name="g1_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="single"/>
                    <param name="fastq_input1" value="WTPE1.fastq.gz"/>
                </conditional>
            </repeat>
            <repeat name="g1_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="single"/>
                    <param name="fastq_input1" value="WTPE2.fastq.gz"/>
                </conditional>
            </repeat>
            <repeat name="g2_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="single"/>
                    <param name="fastq_input1" value="TKO2PE1.fastq.gz"/>
                </conditional>
            </repeat>
            <repeat name="g2_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="single"/>
                    <param name="fastq_input1" value="TKO2PE2.fastq.gz"/>
                </conditional>
            </repeat>
            <conditional name="doComp">
                <param name="compare_selector" value="0"/>
                <param name="compFile" value="SE_comp.g1.vs.g2.txt"/>
            </conditional>
            <!--
            <conditional name="doComp">
                <param name="compare_selector" value="1"/>
            </conditional>
            -->
            <param name="output_selector" value="1,2,3,4"/>
            <output name="output1" file="SE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/>
            <output name="output2" file="SE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
            <output_collection name="output_collection_bam" count="4">
                <element name="g1_r1.bam" file="SE_g1_r1.bam" compare="sim_size"/>
                <element name="g1_r2.bam" file="SE_g1_r2.bam" compare="sim_size"/>
                <element name="g2_r1.bam" file="SE_g2_r1.bam" compare="sim_size"/>
                <element name="g2_r2.bam" file="SE_g2_r2.bam" compare="sim_size"/>
            </output_collection>
            <output_collection name="output_collection_bed" count="2">
                <element name="g1.G.bed" file="SE_g1.G.bed" ftype="interval" lines_diff="1"/>
                <element name="g2.G.bed" file="SE_g2.G.bed" ftype="interval" lines_diff="1"/>
            </output_collection>
        </test>
        <test>
            <!-- test paired-end reads -->
            <param name="reference_source_selector" value="history"/>
            <param name="ref_file" ftype="fasta" value="seg.fa"/>
            <repeat name="g1_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="paired"/>
                    <param name="fastq_input1" value="6_all_1.fq.gz"/>
                    <param name="fastq_input2" value="6_all_2.fq.gz"/>
                </conditional>
            </repeat>
            <repeat name="g2_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="paired"/>
                    <param name="fastq_input1" value="8_all_1.fq.gz"/>
                    <param name="fastq_input2" value="8_all_2.fq.gz"/>
                </conditional>
            </repeat>
            <conditional name="doComp">
                <param name="compare_selector" value="0"/>
                <param name="compFile" value="PE_comp.g1.vs.g2.txt"/>
            </conditional>
            <!--
            <conditional name="doComp">
                <param name="compare_selector" value="1"/>
            </conditional>
            -->
            <param name="output_selector" value="1,2"/>
            <output name="output1" file="PE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/>
            <output name="output2" file="PE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
        </test>
        <test>
            <!-- test paired collection -->
            <param name="reference_source_selector" value="history"/>
            <param name="ref_file" ftype="fasta" value="seg.fa"/>
            <repeat name="g1_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="paired_collection"/>
                    <param name="fastq_input1">
                        <collection type="paired">
                            <element name="forward" value="6_all_1.fq.gz" />
                            <element name="reverse" value="6_all_2.fq.gz" />
                        </collection>
                    </param>
                </conditional>
            </repeat>
            <repeat name="g2_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="paired_collection"/>
                    <param name="fastq_input1">
                        <collection type="paired">
                            <element name="forward" value="8_all_1.fq.gz" />
                            <element name="reverse" value="8_all_2.fq.gz" />
                        </collection>
                    </param>
                </conditional>
            </repeat>
            <conditional name="doComp">
                <param name="compare_selector" value="0"/>
                <param name="compFile" value="PE_comp.g1.vs.g2.txt"/>
            </conditional>
            <!--
            <conditional name="doComp">
                <param name="compare_selector" value="1"/>
            </conditional>
            -->
            <param name="output_selector" value="1,2"/>
            <output name="output1" file="PE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/>
            <output name="output2" file="PE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
        </test>
        <test>
            <!-- test data table reference -->
            <param name="reference_source_selector" value="cached"/>
            <param name="ref_file" value="chr11"/>
            <repeat name="g1_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="single"/>
                    <param name="fastq_input1" value="WTPE1.fastq.gz"/>
                </conditional>
            </repeat>
            <repeat name="g1_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="single"/>
                    <param name="fastq_input1" value="WTPE2.fastq.gz"/>
                </conditional>
            </repeat>
            <repeat name="g2_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="single"/>
                    <param name="fastq_input1" value="TKO2PE1.fastq.gz"/>
                </conditional>
            </repeat>
            <repeat name="g2_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="single"/>
                    <param name="fastq_input1" value="TKO2PE2.fastq.gz"/>
                </conditional>
            </repeat>
            <conditional name="doComp">
                <param name="compare_selector" value="0"/>
                <param name="compFile" value="SE_comp.g1.vs.g2.txt"/>
            </conditional>
            <!--
            <conditional name="doComp">
                <param name="compare_selector" value="1"/>
            </conditional>
            -->
            <param name="output_selector" value="1,2"/>
            <output name="output1" file="SE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/>
            <output name="output2" file="SE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
        </test>
    </tests>
    <help>
        <![CDATA[
**MOABS: MOdel based Analysis of Bisulfite Sequencing data**

MOABS is a comprehensive, accurate and efficient solution for analysis of large
scale base-resolution DNA methylation data, bisulfite sequencing or single
molecule direct sequencing.

MOABS seamlessly integrates alignment, methylation calling, identification of
hypomethylation for one sample and differential methylation for multiple
samples, and other downstream analysis.

For more information, check https://github.com/sunnyisgalaxy/moabs.

-----

**Input files**

MOABS needs to input Bisulfite sequencing reads in two groups of interest, e.g.
KO vs WT. Each group of reads may have combined sequencing library, i.e.
single-end reads and/or paired-end reads. Multiple replicates can be specified in each group.

**Outputs**

Four output files can be selected to report, namely

    1. **DMR region file** - the major result file
    2. **Comparison file between two groups** - the intermediate comparion result
    3. **BAM files** - intermediate BAM files
    4. **Methylation BED files** - intermediate methylation BED files

-----

MOABS will detect differential methylated regions (DMRs) using the input BS-Seq
reads. The output file is a tab-delimited text file (not strictly a BED
format), representing DMRs. It has 8 columns as below.

chrom<TAB>start<TAB>end<TAB>methylation_state<TAB>CpGsites<TAB>DMCcount<TAB>nonDMCcount<TAB>hidden_state

    1. **chrom** - The chromosome of the region.
    2. **start** - The start genomic locus of the region.
    3. **end** - The end genomic locus of the region.
    4. **methylation_state** - The methylation state of the region, "+"/"-" representing hyper- or hypo-methylation regions.
    5. **CpGsites** - Total number of CpG sites in the region.
    6. **DMCcount** - The number of differential methylated CpG sites (DMCs) in the region.
    7. **nonDMCcount** - The number of non-DMCs in the region.
    8. **hidden_state** - The hidden state prediced by Hidden Markov Model (HMM), "1"/"-1" representing hyper- or hypo-methylation states.

For example, six DMRs are identified in the following format.

@DMRExample@

-----

The intermediate comparison file summarizes methylation ratio comparison
results on CpG sites. It has 19 columns as below.

    1. **chrom** - The chromosome of the GpG site.
    2. **start** - The start position of the site.
    3. **end** - The end position of the site.
    4. **single** - The next two columns are attributes for the single position.
    5. **totalC_0** - Total number of Cs in the first group.
    6. **nominalRatio_0** - Nominal methylation ratio in the first group.
    7. **ratioCI_0** - The confidence interval of the methylation ratio in the first group.
    8. **single** - The next two columns are attributes for the single position.
    9. **totalC_1** - Total number of Cs in the second group.
    10. **nominalRatio_1** - Nominal methylation ratio in the second group.
    11. **ratioCI_1** - The confidence interval of the methylation ratio in the second group.
    12. **pair** - The next three columns are attributes for pairs of groups.
    13. **nominalDif_1-0** - Nominal difference of methylation ratio between group 1 and group 0.
    14. **credibleDif_1-0** - Credible methylation difference between group 1 and group 0.
    15. **difCI_1-0** - Difference of confidence intervals between group 1 and group 0.
    16. **p_sim** - The next column is the simulation p-value.
    17. **p_sim_1_v_0** - Simulation p-value between group 1 and group 0.
    18. **p_fet** - The next column is the FET p-value.
    19. **p_fet_1_v_0** - FET p-value between group 1 and group 0.

The comparison result file can be reused for DMR calling.

-----

BAM files are intermediate mapping results of input reads to the referene
genome. These BAM files can be reused in downstream methylation analysis.

-----

Methylation calling BED files are intermediate methylation calling results of
Cs in two groups of input reads. These methyation calling results can be easily
reused in downstream DMR calling and visualization. The BED file has 15 columns
as below.

    1. **chrom** - The chromosome of the site.
    2. **start** - The start position of the site.
    3. **end** - The end position of the site.
    4. **ratio** - Methylation ratio in the site
    5. **totalC** - Total number of reads in current Cs.
    6. **methC** - Methylated Cs.
    7. **strand** - The strand information for prevous three columns.
    8. **next** - The next base.
    9. **Plus** - Next two columns are for forward strand.
    10. **totalC** - Total number of Cs.
    11. **methC** - Methylated Cs.
    12. **Minus** - Next two columns are for reverse strand.
    13. **totalC** - Total number of Cs.
    14. **methC** - Methylated Cs.
    15. **localSeq** - Local sequences.

]]>
    </help>
    <expand macro="citations"/>
</tool>