Mercurial > repos > iuc > moabs

<tool id="moabs" name="MOABS" profile="16.04" version="@VERSION@+galaxy1">
    <description>MOdel based Analysis of Bisulfite Sequencing data</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code">
        <![CDATA[
        #if str( $mcomp_advanced.doComp.compare_selector ) == "0":
            cp -f '$mcomp_advanced.doComp.compFile' comp.g1.vs.g2.txt &&
        #end if
        moabs -v 1 --def MMAP.p="\${GALAXY_SLOTS:-4}" --def MCALL.p="\${GALAXY_SLOTS:-4}" --def MCOMP.p="\${GALAXY_SLOTS:-4}" --cf '$cfg_file' &&
        #if "1" in $output_selector:
            cp -f dmc_M2_g1.G.bed_vs_g2.G.bed.txt '$output1' &&
        #end if
        #if "2" in $output_selector:
            cp -f dmr_M2_g1.G.bed_vs_g2.G.bed.txt '$output2' &&
        #end if
        #if "3" in $output_selector:
            cp -f comp.g1.vs.g2.txt '$output3' &&
        #end if
        echo Done
        ]]>
    </command>
    <configfiles>
        <configfile name="cfg_file">
            #if str( $reference_source.reference_source_selector ) == "history":
                #set $reference_fasta_filename = $reference_source.ref_file
            #else:
                #set $reference_fasta_filename = $reference_source.ref_file.fields.path
            #end if
            [INPUT]
            #for $i, $s in enumerate( $g1_fastq )
                #if str( $s.fastq_input.fastq_input_selector ) == "paired":
                    s1_r${i+1}_1='$s.fastq_input.fastq_input1'
                    s1_r${i+1}_2='$s.fastq_input.fastq_input2'
                #elif str( $s.fastq_input.fastq_input_selector ) == "paired_collection":
                    s1_r${i+1}_1='$s.fastq_input.fastq_input1.forward'
                    s1_r${i+1}_2='$s.fastq_input.fastq_input1.reverse'
                #else:
                    s1_r${i+1}='$s.fastq_input.fastq_input1'
                #end if
            #end for

            #for $i, $s in enumerate( $g2_fastq )
                #if str( $s.fastq_input.fastq_input_selector ) == "paired":
                    s2_r${i+1}_1='$s.fastq_input.fastq_input1'
                    s2_r${i+1}_2='$s.fastq_input.fastq_input2'
                #elif str( $s.fastq_input.fastq_input_selector ) == "paired_collection":
                    s2_r${i+1}_1='$s.fastq_input.fastq_input1.forward'
                    s2_r${i+1}_2='$s.fastq_input.fastq_input1.reverse'
                #else:
                    s2_r${i+1}='$s.fastq_input.fastq_input1'
                #end if
            #end for

            [TASK]
            Program=MMAP
            Label=g1,g2
            Parallel=NONE

            [MMAP]
            Path=bsmap
            d='${reference_fasta_filename}'
            #if str( $bsmap_advanced.bsmap_mismatch.bsmap_mismatch_selector ) != "0":
                v=$bsmap_advanced.bsmap_mismatch.v
            #end if
            s=$bsmap_advanced.s
            w=$bsmap_advanced.w
            #if $bsmap_advanced.D:
                D=$bsmap_advanced.D
            #end if
            S=$bsmap_advanced.S
            n=$bsmap_advanced.n
            q=$bsmap_advanced.q
            z=$bsmap_advanced.z
            f=$bsmap_advanced.f
            #if $bsmap_advanced.A:
                A=$bsmap_advanced.A
            #end if
            r=$bsmap_advanced.r
            #if str( $bsmap_advanced.R ) == "1":
                R=''
            #end if
            #if str( $bsmap_advanced.u ) == "1":
                u=''
            #end if
            m=$bsmap_advanced.m
            x=$bsmap_advanced.x

            [MCALL]
            Path=mcall
            r='${reference_fasta_filename}'
            cytosineMinScore=$mcall_advanced.cytosineMinScore
            nextBaseMinScore=$mcall_advanced.nextBaseMinScore
            qualityScoreBase=$mcall_advanced.qualityScoreBase
            trimWGBSEndRepairPE2Seq=$mcall_advanced.trimWGBSEndRepairPE2Seq
            trimWGBSEndRepairPE1Seq=$mcall_advanced.trimWGBSEndRepairPE1Seq
            processPEOverlapSeq=$mcall_advanced.processPEOverlapSeq
            trimRRBSEndRepairSeq=$mcall_advanced.trimRRBSEndRepairSeq
            minFragSize=$mcall_advanced.minFragSize
            minMMFragSize=$mcall_advanced.minMMFragSize
            reportCpX=$mcall_advanced.reportCpX

            [MCOMP]
            Path=mcomp
            doComp=$mcomp_advanced.doComp.compare_selector
            d=$mcomp_advanced.d
            filterCredibleDif=$mcomp_advanced.filterCredibleDif
            pFetDmc=$mcomp_advanced.pFetDmc
            pFetDmr=$mcomp_advanced.pFetDmr
            minNominalDif=$mcomp_advanced.minNominalDif
            minCredibleDif=$mcomp_advanced.minCredibleDif
            minDmcsInDmr=$mcomp_advanced.minDmcsInDmr
            maxDistConsDmcs=$mcomp_advanced.maxDistConsDmcs
        </configfile>
    </configfiles>

    <inputs>
        <conditional name="reference_source">
            <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a cache FASTA?" help="Cached FASTA">
                <option value="cached">Use a cached genome FASTA</option>
                <option value="history">Use a genome FASTA from history</option>
            </param>
            <when value="cached">
                <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
                    <options from_data_table="all_fasta">
                        <filter type="sort_by" column="2" />
                        <validator type="no_options" message="No genome FASTA are available" />
                    </options>
                </param>
            </when>
            <when value="history">
                <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" />
            </when>
        </conditional>
        <repeat name="g1_fastq" title="Group1: fastq files" min="1">
            <conditional name="fastq_input">
                <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
                    <option value="single">Single</option>
                    <option value="paired">Paired</option>
                    <option value="paired_collection">Paired Collection</option>
                </param>
                <when value="paired">
                    <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select first set of reads" help="Specify dataset with forward reads"/>
                    <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz" label="Select second set of reads" help="Specify dataset with reverse reads"/>
                </when>
                <when value="single">
                    <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select fastq dataset" help="Specify dataset with single reads"/>
                </when>
                <when value="paired_collection">
                    <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
                </when>
            </conditional>
        </repeat>
        <repeat name="g2_fastq" title="Group2: fastq files" min="1">
            <conditional name="fastq_input">
                <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
                    <option value="single">Single</option>
                    <option value="paired">Paired</option>
                    <option value="paired_collection">Paired Collection</option>
                </param>
                <when value="paired">
                    <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select first set of reads" help="Specify dataset with forward reads"/>
                    <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz" label="Select second set of reads" help="Specify dataset with reverse reads"/>
                </when>
                <when value="single">
                    <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select fastq dataset" help="Specify dataset with single reads"/>
                </when>
                <when value="paired_collection">
                    <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
                </when>
            </conditional>
        </repeat>
        <section name="bsmap_advanced" title="Advanced options for BSMAP" expanded="False">
            <param argument="-s" type="integer" value="16" min="8" max="16" label="Seed size" help="The seed size for the HASH table. BSMAP implements a seeding algorithm by indexing reference for all possible k-mers, i.e. seeds. As for the seed size, i.e. the length of k-mers, 16 is suggested for the WGBS mode, and 12 is suggested for the RRBS mode. Min=8, max=16."/>
            <conditional name="bsmap_mismatch">
                <param name="bsmap_mismatch_selector" type="select" label="Set the mismatch rate or number?" help="When `Do not set` selected, BSMAP will allow a default suggested 8% mismatch rate. Otherwise, a customized mismatch can be controlled by specifying a mismatch rate or a mismatch number.">
                    <option value="0" selected="true">Do not set</option>
                    <option value="1">Set the mismatch rate</option>
                    <option value="2">Set the mismatch number</option>
                </param>
                <when value="1">
                    <param argument="-v" type="float" value="0.08" min="0" max="1" label="Mismatch rate" help="The mismatch rate w.r.t to the read length"/>
                </when>
                <when value="2">
                    <param argument="-v" type="integer" value="3" min="0" label="Mismatch number" help="The maximum number of mismatches allowed on a read"/>
                </when>
            </conditional>
            <param argument="-w" type="integer" value="1000" min="0" max="1000" label="Maximum number of equal best hits to count" help="Maximum number of equal best hits to count. When multiple mapping occurs for a read, it should control the number of records to report. Default: 1000."/>
            <param argument="-D" type="text" label="Restriction enzyme digestion sites for RRBS mode" help="For RRBS data, this option activates RRBS mapping mode and set restriction enzyme digestion sites. Digestion position marked by '-', example: -D C-CGG for MspI digestion.  default: none (WGBS mode).">
              <validator type="regex" message="Use A/T/C/G/- for restriction enzyme digestion sites">^[ATCG-]*$</validator>
            </param>
            <param argument="-S" type="integer" value="0" label="Random seed" help="Seed for random number generation used in selecting multiple hits. Other seed values generate pseudo random number based on read index number, to allow reproducible mapping results. Default=0 (get seed from system clock, mapping results not resproducible)."/>
            <param argument="-n" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Mapping to four strands?" help="Yes: map SE or PE reads to all 4 strands, i.e. ++, +-, -+, --; No: only map to 2 forward strands, i.e. BSW(++) and BSC(-+). For example, for a traditional library construction, two forward strands, ++ and -+, are sufficient for alignments. However, a Pico library construction needs all four-strand mappings."/>
            <param argument="-q" type="integer" value="0" min="0" max="40" label="Quality threshold in trimming" help="The quality threshold to trim read bases. To obtain an accurate mapping, low-quality bases should be skipped beforehand. Min=0, max=40. Default=0 (no trim)."/>
            <param argument="-z" type="select" label="Base quality" help="Base quality for sequencing reads, Illumina or Sanger.">
                <option value="33" selected="true">Sanger</option>
                <option value="64">Illumina</option>
            </param>
            <param argument="-f" type="integer" value="5" min="0" label="Maximum number of Ns in a read to filter out" help="To filter out low-quality reads containing >n Ns. Default=5."/>
            <param argument="-A" type="text" label="3' adapter sequence to trim" help="To trim 3' adapter sequence. Default: none (no trim).">
              <validator type="regex" message="Use A/T/C/G for adapter sequences, and the length should be greater equal to 12 bases.">^[ATCG]{12,}$|^$</validator>
            </param>
            <param argument="-r" type="select" label="How to report repeat hits" help="0=none(unique hit/pair); 1=random one; 2=all(slow). When input reads coverage is high, it is suggested to report only unique hits (r=0). For a low-depth library, a random one from multiple mappings (r=1), or all multiple mappings (r=2) can be specified to increase read coverage, yet be cautious about bias caused by ambiguous mappings.">
                <option value="0" selected="true">0</option>
                <option value="1">1</option>
                <option value="2">2</option>
            </param>
            <param argument="-R" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Print corresponding reference sequences?" help="Yes: print corresponding reference sequences in mapping records, a `RS:` tag will be added in record attributes; No: do not print reference sequences."/>
            <param argument="-u" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Report unmapped reads?" help="Yes: print unmapped reads; No: do not print unmapped reads."/>
            <param argument="-m" type="integer" value="28" min="0" label="Minimal insert size allowed in paired-end mapping" help="For paired-end reads, the minimal insert size allowed in two end mapping. Default=28."/>
            <param argument="-x" type="integer" value="1000" min="0" label="Maximal insert size allowed in paired-end mapping" help="For paired-end reads, the maximal insert size allowed in two end mapping. Default=1000."/>
        </section>
        <section name="mcall_advanced" title="Advanced options for MCALL" expanded="False">
            <param argument="--cytosineMinScore" type="integer" value="20" min="0" label="Threshold for cytosine quality score" help="Threshold for cytosine quality score. Discard the base if threshold is not reached. Default=20."/>
            <param argument="--nextBaseMinScore" type="integer" value="3" min="-1" label="Threshold for the next base quality score" help="Threshold for the next base quality score. Possible values: -1 makes the program not to check if next base matches reference; any positive integer or zero makes the program to check if next base matches reference and reaches this score threshold; default=3, i.e., better than 'B' or '#'."/>
            <param argument="--qualityScoreBase" type="select" label="Specify the quality score system" help="Sanger, Solexa, or Illumina. See wiki FASTQ_format for details. Default: auto-detection.">
                <option value="0" selected="true">Auto-detection</option>
                <option value="33">Sanger</option>
                <option value="59">Solexa</option>
                <option value="64">Illumina</option>
            </param>
            <param argument="--trimWGBSEndRepairPE2Seq" type="integer" value="3" min="0" label="Bases to trim end-repair sequences from +-/--" help="To trim end-repair sequence from begin of +-/-- reads from Pair End WGBS Sequencing. 0: no trim; n (positive integer): trim n bases from begin of +-/-- reads. Default: trim 3 bases."/>
            <param argument="--trimWGBSEndRepairPE1Seq" type="integer" value="3" min="0" label="Bases to trim end-repair sequences from ++/-+" help="To trim end-repair sequence from end of ++/-+ reads from Pair End WGBS Sequencing. 0: no trim; n (positive integer): trim n bases from end of ++/-+ reads. Default: trim 3 bases."/>
            <param argument="--processPEOverlapSeq" type="select" label="Count once or twice the overlap sequence of two pairs" help="Two ends of paired-end reads may be overlapped in mapping. The overlap sequencce will be counted once or twice for cytosine methylation measurements. Default: once.">
                <option value="1" selected="true">Once</option>
                <option value="0">Twice</option>
            </param>
            <param argument="--trimRRBSEndRepairSeq" type="select" label="How to trim end-repair sequence for RRBS reads?" help="To trim end-repair sequence for RRBS reads. 0: no trim; 2: trim the last CG at exactly end of ++/-+ reads and trim the first CG at exactly begin of +-/-- reads like the WGBS situation. Default=2.">
                <option value="2" selected="true">2</option>
                <option value="0">0</option>
            </param>
            <param argument="--minFragSize" type="integer" value="0" min="0" label="Minimal fragment size for properly mapped reads" help="To retain properly mapped and large enough fragment sizes. The 9th field in the BAM file is the fragment size of the mapping, and non-properly-paired read has 0 at the 9th field. This option is set to require properly paired and large enough fragment size. Default=0 for all records."/>
            <param argument="--minMMFragSize" type="integer" value="0" min="0" label="Minimal fragment size for multiply matched read" help="Same as --minFragSize but this option is only applicable to reads with flag 0x100 set as 1, i.e., reads multiply mapped. Default=0 for all records."/>
            <param argument="--reportCpX" type="select" label="Generates CpG/A/C/T methylation?" help="To generate methylation for CpG, or CpA/CpC/CpT. Default=CpG.">
                <option value="G" selected="true">CpG</option>
                <option value="C">CpC</option>
                <option value="A">CpA</option>
                <option value="T">CpT</option>
            </param>
        </section>
        <section name="mcomp_advanced" title="Advanced options for MCOMP" expanded="False">
            <conditional name="doComp">
                <param name="compare_selector" type="select" label="Run the comparison or not" help="Yes: compare; No: do not compare, using the comparison result by `-c`">
                    <option value="1" selected="true">Yes</option>
                    <option value="0">No</option>
                </param>
                <when value="0">
                    <param argument="-c" name="compFile" type="data" format="txt" label="Input comparison results" help="Previously generated comparison file from history"/>
                </when>
            </conditional>
            <param argument="-d" type="integer" value="3" min="0" label="Minimum depth for a site coverage" help="If a site has depth less than `d`, this site is ignored for statistical tests. This option affects much of nominal ratios but none of credible ratios. This option may be reset during later DMC/DMR rescan to filter sites with depth less than `d`. Default=3."/>
            <param argument="--filterCredibleDif" type="float" value="-10" label="Minimum absolute credible methylation difference (CDIF)" help="If absolute value of CDIF for a site less than filterCredibleDif, this site is ignored for regional calculation. Use a small value, such as 0.01, to filter all sites with no difference; use 0.20 (for example) to select DMCs. Any negative number means no filter. Default=-10."/>
            <param argument="--pFetDmc" type="float" value="0.05" min="0" max="1" label="Cutoff of Pvalue from Fisher Exact Test for DMC scan" help="Cutoff of P value from Fisher Exact Test for DMC scan. Default=`0.05`."/>
            <param argument="--pFetDmr" type="float" value="0.05" min="0" max="1" label="Cutoff of Pvalue from Fisher Exact Test for DMR scan" help="Cutoff of P value from Fisher Exact Test for DMR scan. Default=`0.05`."/>
            <param argument="--minNominalDif" type="float" value="0.33333" min="0" max="1" label="Minimal nominal methylation difference for DMC and DMR calling" help="Minimal nominal methylation difference for DMC and DMR. Default=`0.3333`."/>
            <param argument="--minCredibleDif" type="float" value="0.2" min="0" max="1" label="Minimal credible methylation difference for DMC calling" help="Minimal CDIF for DMC calling. Default=`0.2`."/>
            <param argument="--minDmcsInDmr" type="integer" value="3" min="1" label="Minimum number of DMCs in a DMR" help="Minimum number of DMCs in a DMR. Default=3."/>
            <param argument="--maxDistConsDmcs" type="integer" value="300" min="1" label="Maximum distance between two consective DMCs for a DMR" help="Maximum distance between two consective DMCs for a DMR. Default=300."/>
        </section>
        <param name="output_selector" type="select" multiple="true" optional="true" label="Select output files" help="">
            <option value="1" selected="true">dmc_M2_g1.G.bed_vs_g2.G.bed.txt</option>
            <option value="2" selected="true">dmr_M2_g1.G.bed_vs_g2.G.bed.txt</option>
            <option value="3" selected="true">comp.g1.vs.g2.txt</option>
            <option value="4" selected="true">BAM files</option>
            <option value="5" selected="true">Methylation calling BED files</option>
        </param>
    </inputs>
    <outputs>
        <data name="output1" format="interval" label="${tool.name} on ${on_string} : dmc_M2_g1.G.bed_vs_g2.G.bed.txt">
            <filter> "1" in output_selector </filter>
        </data>
        <data name="output2" format="interval" label="${tool.name} on ${on_string} : dmr_M2_g1.G.bed_vs_g2.G.bed.txt">
            <filter> "2" in output_selector </filter>
        </data>
        <data name="output3" format="interval" label="${tool.name} on ${on_string} : comp.g1.vs.g2.txt">
            <filter> "3" in output_selector </filter>
        </data>
        <collection name="output_collection_bam" type="list" label="BAM files">
            <filter> "4" in output_selector </filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+\.bam$)" ext='bam'/>
        </collection>
        <collection name="output_collection_bed" type="list" label="Methylation calling BED files">
            <filter> "5" in output_selector </filter>
            <discover_datasets pattern="(?P&lt;designation&gt;g[12]\.G\.bed$)" ext='interval'/>
        </collection>
    </outputs>
    <tests>
        <test expect_num_outputs="5">
            <!-- test single-end reads -->
            <param name="reference_source_selector" value="history"/>
            <param name="ref_file" ftype="fasta" value="chr11.fa"/>
            <repeat name="g1_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="single"/>
                    <param name="fastq_input1" value="WTPE1.fastq.gz"/>
                </conditional>
            </repeat>
            <repeat name="g1_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="single"/>
                    <param name="fastq_input1" value="WTPE2.fastq.gz"/>
                </conditional>
            </repeat>
            <repeat name="g2_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="single"/>
                    <param name="fastq_input1" value="TKO2PE1.fastq.gz"/>
                </conditional>
            </repeat>
            <repeat name="g2_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="single"/>
                    <param name="fastq_input1" value="TKO2PE2.fastq.gz"/>
                </conditional>
            </repeat>
            <conditional name="doComp">
                <param name="compare_selector" value="0"/>
                <param name="compFile" value="SE_comp.g1.vs.g2.txt"/>
            </conditional>
            <!--
            <conditional name="doComp">
                <param name="compare_selector" value="1"/>
            </conditional>
            -->
            <param name="output_selector" value="1,2,3,4,5"/>
            <output name="output1" file="SE_dmc_M2_g1.G.bed_vs_g2.G.bed.txt" ftype="interval" lines_diff="1"/>
            <output name="output2" file="SE_dmr_M2_g1.G.bed_vs_g2.G.bed.txt" ftype="interval" lines_diff="1"/>
            <output name="output3" file="SE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
            <output_collection name="output_collection_bam" count="4">
                <element name="g1_r1.bam" file="SE_g1_r1.bam" ftype="bam" lines_diff="2"/>
                <element name="g1_r2.bam" file="SE_g1_r2.bam" ftype="bam" lines_diff="2"/>
                <element name="g2_r1.bam" file="SE_g2_r1.bam" ftype="bam" lines_diff="2"/>
                <element name="g2_r2.bam" file="SE_g2_r2.bam" ftype="bam" lines_diff="2"/>
            </output_collection>
            <output_collection name="output_collection_bed" count="2">
                <element name="g1.G.bed" file="SE_g1.G.bed" ftype="interval" lines_diff="1"/>
                <element name="g2.G.bed" file="SE_g2.G.bed" ftype="interval" lines_diff="1"/>
            </output_collection>
        </test>
        <test expect_num_outputs="3">
            <!-- test paired-end reads -->
            <param name="reference_source_selector" value="history"/>
            <param name="ref_file" ftype="fasta" value="seg.fa"/>
            <repeat name="g1_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="paired"/>
                    <param name="fastq_input1" value="6_all_1.fq.gz"/>
                    <param name="fastq_input2" value="6_all_2.fq.gz"/>
                </conditional>
            </repeat>
            <repeat name="g2_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="paired"/>
                    <param name="fastq_input1" value="8_all_1.fq.gz"/>
                    <param name="fastq_input2" value="8_all_2.fq.gz"/>
                </conditional>
            </repeat>
            <conditional name="doComp">
                <param name="compare_selector" value="0"/>
                <param name="compFile" value="PE_comp.g1.vs.g2.txt"/>
            </conditional>
            <!--
            <conditional name="doComp">
                <param name="compare_selector" value="1"/>
            </conditional>
            -->
            <param name="output_selector" value="1,2,3"/>
            <output name="output1" file="PE_dmc_M2_g1.G.bed_vs_g2.G.bed.txt" ftype="interval" lines_diff="1"/>
            <output name="output2" file="PE_dmr_M2_g1.G.bed_vs_g2.G.bed.txt" ftype="interval" lines_diff="1"/>
            <output name="output3" file="PE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
        </test>
        <test expect_num_outputs="3">
            <!-- test paired collection -->
            <param name="reference_source_selector" value="history"/>
            <param name="ref_file" ftype="fasta" value="seg.fa"/>
            <repeat name="g1_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="paired_collection"/>
                    <param name="fastq_input1">
                        <collection type="paired">
                            <element name="forward" value="6_all_1.fq.gz" />
                            <element name="reverse" value="6_all_2.fq.gz" />
                        </collection>
                    </param>
                </conditional>
            </repeat>
            <repeat name="g2_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="paired_collection"/>
                    <param name="fastq_input1">
                        <collection type="paired">
                            <element name="forward" value="8_all_1.fq.gz" />
                            <element name="reverse" value="8_all_2.fq.gz" />
                        </collection>
                    </param>
                </conditional>
            </repeat>
            <conditional name="doComp">
                <param name="compare_selector" value="0"/>
                <param name="compFile" value="PE_comp.g1.vs.g2.txt"/>
            </conditional>
            <!--
            <conditional name="doComp">
                <param name="compare_selector" value="1"/>
            </conditional>
            -->
            <param name="output_selector" value="1,2,3"/>
            <output name="output1" file="PE_dmc_M2_g1.G.bed_vs_g2.G.bed.txt" ftype="interval" lines_diff="1"/>
            <output name="output2" file="PE_dmr_M2_g1.G.bed_vs_g2.G.bed.txt" ftype="interval" lines_diff="1"/>
            <output name="output3" file="PE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
        </test>
        <test expect_num_outputs="3">
            <!-- test data table reference -->
            <param name="reference_source_selector" value="cached"/>
            <param name="ref_file" value="chr11"/>
            <repeat name="g1_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="single"/>
                    <param name="fastq_input1" value="WTPE1.fastq.gz"/>
                </conditional>
            </repeat>
            <repeat name="g1_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="single"/>
                    <param name="fastq_input1" value="WTPE2.fastq.gz"/>
                </conditional>
            </repeat>
            <repeat name="g2_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="single"/>
                    <param name="fastq_input1" value="TKO2PE1.fastq.gz"/>
                </conditional>
            </repeat>
            <repeat name="g2_fastq">
                <conditional name="fastq_input">
                    <param name="fastq_input_selector" value="single"/>
                    <param name="fastq_input1" value="TKO2PE2.fastq.gz"/>
                </conditional>
            </repeat>
            <conditional name="doComp">
                <param name="compare_selector" value="0"/>
                <param name="compFile" value="SE_comp.g1.vs.g2.txt"/>
            </conditional>
            <!--
            <conditional name="doComp">
                <param name="compare_selector" value="1"/>
            </conditional>
            -->
            <param name="output_selector" value="1,2,3"/>
            <output name="output1" file="SE_dmc_M2_g1.G.bed_vs_g2.G.bed.txt" ftype="interval" lines_diff="1"/>
            <output name="output2" file="SE_dmr_M2_g1.G.bed_vs_g2.G.bed.txt" ftype="interval" lines_diff="1"/>
            <output name="output3" file="SE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
        </test>
    </tests>
    <help>
        <![CDATA[
**MOABS: MOdel based Analysis of Bisulfite Sequencing data**

MOABS is a comprehensive, accurate and efficient solution for analysis of large
scale base-resolution DNA methylation data, bisulfite sequencing or single
molecule direct sequencing.

MOABS seamlessly integrates alignment, methylation calling, identification of
hypomethylation for one sample and differential methylation for multiple
samples, and other downstream analysis.

For more information, check https://github.com/sunnyisgalaxy/moabs.

-----

**Input files**

MOABS needs to input Bisulfite sequencing reads in two groups of interest, e.g.
KO vs WT. Each group of reads may have combined sequencing library, i.e.
single-end reads and/or paired-end reads. Multiple replicates can be specified in each group.

**Outputs**

Five output files can be selected to report, namely

    1. **DMC site file** - the major DMC result file
    2. **DMR region file** - the major DMR result file
    3. **Comparison file between two groups** - the intermediate comparion result
    4. **BAM files** - intermediate BAM files
    5. **Methylation BED files** - intermediate methylation BED files

-----

MOABS detects differentially methylated cytosines (DMCs) and differentially
methylated regions (DMRs) using the input BS-Seq reads. The output DMC and DMR
file are tab-delimited text files (not strictly a BED format), representing
DMCs and DMRs.

A DMC site file has 15 columns as below.

chrom<TAB>start<TAB>end<TAB>totalC_0<TAB>nominalRatio_0<TAB>ratioCI_0<TAB>totalC_1<TAB>nominalRatio_1<TAB>ratioCI_1<TAB>nominalDif_1-0<TAB>credibleDif_1-0<TAB>difCI_1-0<TAB>p_sim_1_v_0<TAB>p_fet_1_v_0<TAB>class

    1. **chrom** - The chromosome of the CpG site.
    2. **start** - The start genomic locus of the CpG site.
    3. **end** - The end genomic locus of the CpG site.
    4. **totalC_0** - The total number of CpG read coverage in group 0.
    5. **nominalRatio_0** - The nominal methylation ratio of the CpG in group 0.
    6. **ratioCI_0** - The confidence interval (CI) of the nominal methylation ratio at the CpG site in group 0.
    7. **totalC_1** - The total number of CpG read coverage in group 1.
    8. **nominalRatio_1** - The nominal methylation ratio of the CpG in group 1.
    9. **ratioCI_1** - The confidence interval (CI) of the nominal methylation ratio at the CpG site in group 1.
    10. **nominalDif_1-0** - The nominal methylation difference between the group 1 and the group 0.
    11. **credibleDif_1-0** - The credible methylation difference (CDIF) between the group 1 and the group 0.
    12. **difCI_1-0** - The difference of ratio CIs between the group 1 and the group 0.
    13. **p_sim_1_v_0** - P-value according to the similarity probablities.
    14. **p_fet_1_v_0** - P-value according to the Fisher exact test.
    15. **class** - 5-state class labels by methylation differences and p-values.

For example, CpGs in the DMC file are recorded in the following format.

@DMCExample@

A DMR result file has 12 columns as below.

chrom<TAB>start<TAB>end<TAB>meanRatio_0<TAB>totalC_0<TAB>cSites_0<TAB>meanRatio_1<TAB>totalC_1<TAB>cSites_1<TAB>methDif_1-0<TAB>p_1_v_0<TAB>class_1_v_0

    1. **chrom** - The chromosome of the region.
    2. **start** - The start genomic locus of the region.
    3. **end** - The end genomic locus of the region.
    4. **meanRatio_0** - Mean methylation ratio of the region in group 0.
    5. **totalC_0** - Total cytosine coverage of the region in group 0.
    6. **cSites_0** - The number of CpG sites of the region in group 0.
    7. **meanRatio_1** - Mean methylation ratio of the region in group 1.
    8. **totalC_1** - Total cytosine coverage of the region in group 1.
    9. **cSites_1** - The number of CpG sites of the region in group 1.
    10. **methDif_1-0** - Average methylation difference of the region between group 1 and group 0.
    11. **p_1_v_0** - P-value from Fisher exact test of the region between group 1 and group 0.
    12. **class_1_v_0** - 5-state class labels for the DMR.

For example, four DMRs are identified in the following format.

@DMRExample@

-----

The intermediate comparison file summarizes methylation ratio comparison
results on CpG sites. It has 19 columns as below.

    1. **chrom** - The chromosome of the GpG site.
    2. **start** - The start position of the site.
    3. **end** - The end position of the site.
    4. **single** - The next two columns are attributes for the single position.
    5. **totalC_0** - Total number of Cs in the first group.
    6. **nominalRatio_0** - Nominal methylation ratio in the first group.
    7. **ratioCI_0** - The confidence interval of the methylation ratio in the first group.
    8. **single** - The next two columns are attributes for the single position.
    9. **totalC_1** - Total number of Cs in the second group.
    10. **nominalRatio_1** - Nominal methylation ratio in the second group.
    11. **ratioCI_1** - The confidence interval of the methylation ratio in the second group.
    12. **pair** - The next three columns are attributes for pairs of groups.
    13. **nominalDif_1-0** - Nominal difference of methylation ratio between group 1 and group 0.
    14. **credibleDif_1-0** - Credible methylation difference between group 1 and group 0.
    15. **difCI_1-0** - Difference of confidence intervals between group 1 and group 0.
    16. **p_sim** - The next column is the simulation p-value.
    17. **p_sim_1_v_0** - Simulation p-value between group 1 and group 0.
    18. **p_fet** - The next column is the FET p-value.
    19. **p_fet_1_v_0** - FET p-value between group 1 and group 0.

The comparison result file can be reused for DMR calling.

-----

BAM files are intermediate mapping results of input reads to the referene
genome. These BAM files can be reused in downstream methylation analysis.

-----

Methylation calling BED files are intermediate methylation calling results of
Cs in two groups of input reads. These methyation calling results can be easily
reused in downstream DMR calling and visualization. The BED file has 15 columns
as below.

    1. **chrom** - The chromosome of the site.
    2. **start** - The start position of the site.
    3. **end** - The end position of the site.
    4. **ratio** - Methylation ratio in the site
    5. **totalC** - Total number of reads in current Cs.
    6. **methC** - Methylated Cs.
    7. **strand** - The strand information for prevous three columns.
    8. **next** - The next base.
    9. **Plus** - Next two columns are for forward strand.
    10. **totalC** - Total number of Cs.
    11. **methC** - Methylated Cs.
    12. **Minus** - Next two columns are for reverse strand.
    13. **totalC** - Total number of Cs.
    14. **methC** - Methylated Cs.
    15. **localSeq** - Local sequences.

]]>
    </help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Thu, 26 Aug 2021 11:45:34 +0000
parents	8c8cc81b34cd
children