Mercurial > repos > iuc > moabs
diff moabs.xml @ 0:26d7ec4af119 draft
"planemo upload for repository https://github.com/sunnyisgalaxy/moabs commit fca680a439f168971afc9944ccbbdd9b3b65c845"
author | iuc |
---|---|
date | Fri, 06 Sep 2019 09:54:27 -0400 |
parents | |
children | 8c8cc81b34cd |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moabs.xml Fri Sep 06 09:54:27 2019 -0400 @@ -0,0 +1,465 @@ +<tool id="moabs" name="MOABS" profile="16.04" version="@VERSION@"> + <description>MOdel based Analysis of Bisulfite Sequencing data</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"> + <![CDATA[ + #if str( $mcomp_advanced.doComp.compare_selector ) == "0": + cp -f '$mcomp_advanced.doComp.compFile' comp.g1.vs.g2.txt && + #end if + moabs -v 1 --def MMAP.p="\${GALAXY_SLOTS:-4}" --def MCALL.p="\${GALAXY_SLOTS:-4}" --def MCOMP.p="\${GALAXY_SLOTS:-4}" --cf '$cfg_file' && + #if "1" in $output_selector: + cp -f dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr '$output1' && + #end if + #if "2" in $output_selector: + cp -f comp.g1.vs.g2.txt '$output2' && + #end if + echo Done + ]]> + </command> + <configfiles> + <configfile name="cfg_file"> + #if str( $reference_source.reference_source_selector ) == "history": + #set $reference_fasta_filename = $reference_source.ref_file + #else: + #set $reference_fasta_filename = $reference_source.ref_file.fields.path + #end if + [INPUT] + #for $i, $s in enumerate( $g1_fastq ) + #if str( $s.fastq_input.fastq_input_selector ) == "paired": + s1_r${i+1}_1='$s.fastq_input.fastq_input1' + s1_r${i+1}_2='$s.fastq_input.fastq_input2' + #elif str( $s.fastq_input.fastq_input_selector ) == "paired_collection": + s1_r${i+1}_1='$s.fastq_input.fastq_input1.forward' + s1_r${i+1}_2='$s.fastq_input.fastq_input1.reverse' + #else: + s1_r${i+1}='$s.fastq_input.fastq_input1' + #end if + #end for + + #for $i, $s in enumerate( $g2_fastq ) + #if str( $s.fastq_input.fastq_input_selector ) == "paired": + s2_r${i+1}_1='$s.fastq_input.fastq_input1' + s2_r${i+1}_2='$s.fastq_input.fastq_input2' + #elif str( $s.fastq_input.fastq_input_selector ) == "paired_collection": + s2_r${i+1}_1='$s.fastq_input.fastq_input1.forward' + s2_r${i+1}_2='$s.fastq_input.fastq_input1.reverse' + #else: + s2_r${i+1}='$s.fastq_input.fastq_input1' + #end if + #end for + + [TASK] + Program=MMAP + Label=g1,g2 + Parallel=NONE + + [MMAP] + Path=bsmap + d='${reference_fasta_filename}' + #if str( $bsmap_advanced.bsmap_mismatch.bsmap_mismatch_selector ) != "0": + v=$bsmap_advanced.bsmap_mismatch.v + #end if + n=$bsmap_advanced.n + r=$bsmap_advanced.r + R='' + + [MCALL] + Path=mcall + r='${reference_fasta_filename}' + + [MCOMP] + Path=mcomp + reference='${reference_fasta_filename}' + doComp=$mcomp_advanced.doComp.compare_selector + </configfile> + </configfiles> + + <inputs> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a cache FASTA?" help="Cached FASTA"> + <option value="cached">Use a cached genome FASTA</option> + <option value="history">Use a genome FASTA from history</option> + </param> + <when value="cached"> + <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list"> + <options from_data_table="all_fasta"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No genome FASTA are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" /> + </when> + </conditional> + <repeat name="g1_fastq" title="Group1: fastq files" min="1"> + <conditional name="fastq_input"> + <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data"> + <option value="single">Single</option> + <option value="paired">Paired</option> + <option value="paired_collection">Paired Collection</option> + </param> + <when value="paired"> + <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select first set of reads" help="Specify dataset with forward reads"/> + <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz" label="Select second set of reads" help="Specify dataset with reverse reads"/> + </when> + <when value="single"> + <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select fastq dataset" help="Specify dataset with single reads"/> + </when> + <when value="paired_collection"> + <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/> + </when> + </conditional> + </repeat> + <repeat name="g2_fastq" title="Group2: fastq files" min="1"> + <conditional name="fastq_input"> + <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data"> + <option value="single">Single</option> + <option value="paired">Paired</option> + <option value="paired_collection">Paired Collection</option> + </param> + <when value="paired"> + <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select first set of reads" help="Specify dataset with forward reads"/> + <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz" label="Select second set of reads" help="Specify dataset with reverse reads"/> + </when> + <when value="single"> + <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz" label="Select fastq dataset" help="Specify dataset with single reads"/> + </when> + <when value="paired_collection"> + <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/> + </when> + </conditional> + </repeat> + <section name="bsmap_advanced" title="Advanced options for BSMAP" expanded="False"> + <conditional name="bsmap_mismatch"> + <param name="bsmap_mismatch_selector" type="select" label="Set the mismatch rate or number?" help=""> + <option value="0">Do not set</option> + <option value="1">Set the mismatch rate</option> + <option value="2">Set the mismatch number</option> + </param> + <when value="1"> + <param argument="-v" type="float" value="0.08" min="0" max="1" label="Mismatch rate" help="The mismatch rate w.r.t to the read length"/> + </when> + <when value="2"> + <param argument="-v" type="integer" value="3" min="0" label="Mismatch number" help="The maximum number of mismatches allowed on a read"/> + </when> + </conditional> + <param argument="-n" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Mapping to four strands?" help="Yes: map SE or PE reads to all 4 strands, i.e. ++, +-, -+, --; No: only map to 2 forward strands, i.e. BSW(++) and BSC(-+)"/> + <param argument="-r" type="select" label="How to report repeat hits" help="0=none(unique hit/pair); 1=random one; 2=all(slow)"> + <option value="0" selected="true">0</option> + <option value="1">1</option> + <option value="2">2</option> + </param> + </section> + <section name="mcomp_advanced" title="Advanced options for MCOMP" expanded="False"> + <conditional name="doComp"> + <param name="compare_selector" type="select" label="Run the comparison or not" help="Yes: compare; No: do not compare, using the comparison result by `-c`"> + <option value="1">Yes</option> + <option value="0">No</option> + </param> + <when value="0"> + <param argument="-c" name="compFile" type="data" format="txt" label="Input comparison results" help="Previously generated comparison file from history"/> + </when> + </conditional> + </section> + <param name="output_selector" type="select" multiple="true" optional="true" label="Select output files" help=""> + <option value="1"> dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr </option> + <option value="2"> comp.g1.vs.g2.txt </option> + <option value="3"> BAM files </option> + <option value="4"> Methylation calling BED files </option> + </param> + </inputs> + <outputs> + <data name="output1" format="interval" label="${tool.name} on ${on_string} : dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr"> + <filter> "1" in output_selector </filter> + </data> + <data name="output2" format="interval" label="${tool.name} on ${on_string} : comp.g1.vs.g2.txt"> + <filter> "2" in output_selector </filter> + </data> + <collection name="output_collection_bam" type="list" label="BAM files"> + <filter> "3" in output_selector </filter> + <discover_datasets pattern="(?P<designation>.+\.bam$)" ext='bam'/> + </collection> + <collection name="output_collection_bed" type="list" label="Methylation calling BED files"> + <filter> "4" in output_selector </filter> + <discover_datasets pattern="(?P<designation>g[12]\.G\.bed$)" ext='interval'/> + </collection> + </outputs> + <tests> + <test> + <!-- test single-end reads --> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" ftype="fasta" value="chr11.fa"/> + <repeat name="g1_fastq"> + <conditional name="fastq_input"> + <param name="fastq_input_selector" value="single"/> + <param name="fastq_input1" value="WTPE1.fastq.gz"/> + </conditional> + </repeat> + <repeat name="g1_fastq"> + <conditional name="fastq_input"> + <param name="fastq_input_selector" value="single"/> + <param name="fastq_input1" value="WTPE2.fastq.gz"/> + </conditional> + </repeat> + <repeat name="g2_fastq"> + <conditional name="fastq_input"> + <param name="fastq_input_selector" value="single"/> + <param name="fastq_input1" value="TKO2PE1.fastq.gz"/> + </conditional> + </repeat> + <repeat name="g2_fastq"> + <conditional name="fastq_input"> + <param name="fastq_input_selector" value="single"/> + <param name="fastq_input1" value="TKO2PE2.fastq.gz"/> + </conditional> + </repeat> + <conditional name="doComp"> + <param name="compare_selector" value="0"/> + <param name="compFile" value="SE_comp.g1.vs.g2.txt"/> + </conditional> + <!-- + <conditional name="doComp"> + <param name="compare_selector" value="1"/> + </conditional> + --> + <param name="output_selector" value="1,2,3,4"/> + <output name="output1" file="SE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/> + <output name="output2" file="SE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/> + <output_collection name="output_collection_bam" count="4"> + <element name="g1_r1.bam" file="SE_g1_r1.bam" compare="sim_size"/> + <element name="g1_r2.bam" file="SE_g1_r2.bam" compare="sim_size"/> + <element name="g2_r1.bam" file="SE_g2_r1.bam" compare="sim_size"/> + <element name="g2_r2.bam" file="SE_g2_r2.bam" compare="sim_size"/> + </output_collection> + <output_collection name="output_collection_bed" count="2"> + <element name="g1.G.bed" file="SE_g1.G.bed" ftype="interval" lines_diff="1"/> + <element name="g2.G.bed" file="SE_g2.G.bed" ftype="interval" lines_diff="1"/> + </output_collection> + </test> + <test> + <!-- test paired-end reads --> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" ftype="fasta" value="seg.fa"/> + <repeat name="g1_fastq"> + <conditional name="fastq_input"> + <param name="fastq_input_selector" value="paired"/> + <param name="fastq_input1" value="6_all_1.fq.gz"/> + <param name="fastq_input2" value="6_all_2.fq.gz"/> + </conditional> + </repeat> + <repeat name="g2_fastq"> + <conditional name="fastq_input"> + <param name="fastq_input_selector" value="paired"/> + <param name="fastq_input1" value="8_all_1.fq.gz"/> + <param name="fastq_input2" value="8_all_2.fq.gz"/> + </conditional> + </repeat> + <conditional name="doComp"> + <param name="compare_selector" value="0"/> + <param name="compFile" value="PE_comp.g1.vs.g2.txt"/> + </conditional> + <!-- + <conditional name="doComp"> + <param name="compare_selector" value="1"/> + </conditional> + --> + <param name="output_selector" value="1,2"/> + <output name="output1" file="PE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/> + <output name="output2" file="PE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/> + </test> + <test> + <!-- test paired collection --> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" ftype="fasta" value="seg.fa"/> + <repeat name="g1_fastq"> + <conditional name="fastq_input"> + <param name="fastq_input_selector" value="paired_collection"/> + <param name="fastq_input1"> + <collection type="paired"> + <element name="forward" value="6_all_1.fq.gz" /> + <element name="reverse" value="6_all_2.fq.gz" /> + </collection> + </param> + </conditional> + </repeat> + <repeat name="g2_fastq"> + <conditional name="fastq_input"> + <param name="fastq_input_selector" value="paired_collection"/> + <param name="fastq_input1"> + <collection type="paired"> + <element name="forward" value="8_all_1.fq.gz" /> + <element name="reverse" value="8_all_2.fq.gz" /> + </collection> + </param> + </conditional> + </repeat> + <conditional name="doComp"> + <param name="compare_selector" value="0"/> + <param name="compFile" value="PE_comp.g1.vs.g2.txt"/> + </conditional> + <!-- + <conditional name="doComp"> + <param name="compare_selector" value="1"/> + </conditional> + --> + <param name="output_selector" value="1,2"/> + <output name="output1" file="PE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/> + <output name="output2" file="PE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/> + </test> + <test> + <!-- test data table reference --> + <param name="reference_source_selector" value="cached"/> + <param name="ref_file" value="chr11"/> + <repeat name="g1_fastq"> + <conditional name="fastq_input"> + <param name="fastq_input_selector" value="single"/> + <param name="fastq_input1" value="WTPE1.fastq.gz"/> + </conditional> + </repeat> + <repeat name="g1_fastq"> + <conditional name="fastq_input"> + <param name="fastq_input_selector" value="single"/> + <param name="fastq_input1" value="WTPE2.fastq.gz"/> + </conditional> + </repeat> + <repeat name="g2_fastq"> + <conditional name="fastq_input"> + <param name="fastq_input_selector" value="single"/> + <param name="fastq_input1" value="TKO2PE1.fastq.gz"/> + </conditional> + </repeat> + <repeat name="g2_fastq"> + <conditional name="fastq_input"> + <param name="fastq_input_selector" value="single"/> + <param name="fastq_input1" value="TKO2PE2.fastq.gz"/> + </conditional> + </repeat> + <conditional name="doComp"> + <param name="compare_selector" value="0"/> + <param name="compFile" value="SE_comp.g1.vs.g2.txt"/> + </conditional> + <!-- + <conditional name="doComp"> + <param name="compare_selector" value="1"/> + </conditional> + --> + <param name="output_selector" value="1,2"/> + <output name="output1" file="SE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/> + <output name="output2" file="SE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/> + </test> + </tests> + <help> + <![CDATA[ +**MOABS: MOdel based Analysis of Bisulfite Sequencing data** + +MOABS is a comprehensive, accurate and efficient solution for analysis of large +scale base-resolution DNA methylation data, bisulfite sequencing or single +molecule direct sequencing. + +MOABS seamlessly integrates alignment, methylation calling, identification of +hypomethylation for one sample and differential methylation for multiple +samples, and other downstream analysis. + +For more information, check https://github.com/sunnyisgalaxy/moabs. + +----- + +**Input files** + +MOABS needs to input Bisulfite sequencing reads in two groups of interest, e.g. +KO vs WT. Each group of reads may have combined sequencing library, i.e. +single-end reads and/or paired-end reads. Multiple replicates can be specified in each group. + +**Outputs** + +Four output files can be selected to report, namely + + 1. **DMR region file** - the major result file + 2. **Comparison file between two groups** - the intermediate comparion result + 3. **BAM files** - intermediate BAM files + 4. **Methylation BED files** - intermediate methylation BED files + +----- + +MOABS will detect differential methylated regions (DMRs) using the input BS-Seq +reads. The output file is a tab-delimited text file (not strictly a BED +format), representing DMRs. It has 8 columns as below. + +chrom<TAB>start<TAB>end<TAB>methylation_state<TAB>CpGsites<TAB>DMCcount<TAB>nonDMCcount<TAB>hidden_state + + 1. **chrom** - The chromosome of the region. + 2. **start** - The start genomic locus of the region. + 3. **end** - The end genomic locus of the region. + 4. **methylation_state** - The methylation state of the region, "+"/"-" representing hyper- or hypo-methylation regions. + 5. **CpGsites** - Total number of CpG sites in the region. + 6. **DMCcount** - The number of differential methylated CpG sites (DMCs) in the region. + 7. **nonDMCcount** - The number of non-DMCs in the region. + 8. **hidden_state** - The hidden state prediced by Hidden Markov Model (HMM), "1"/"-1" representing hyper- or hypo-methylation states. + +For example, six DMRs are identified in the following format. + +@DMRExample@ + +----- + +The intermediate comparison file summarizes methylation ratio comparison +results on CpG sites. It has 19 columns as below. + + 1. **chrom** - The chromosome of the GpG site. + 2. **start** - The start position of the site. + 3. **end** - The end position of the site. + 4. **single** - The next two columns are attributes for the single position. + 5. **totalC_0** - Total number of Cs in the first group. + 6. **nominalRatio_0** - Nominal methylation ratio in the first group. + 7. **ratioCI_0** - The confidence interval of the methylation ratio in the first group. + 8. **single** - The next two columns are attributes for the single position. + 9. **totalC_1** - Total number of Cs in the second group. + 10. **nominalRatio_1** - Nominal methylation ratio in the second group. + 11. **ratioCI_1** - The confidence interval of the methylation ratio in the second group. + 12. **pair** - The next three columns are attributes for pairs of groups. + 13. **nominalDif_1-0** - Nominal difference of methylation ratio between group 1 and group 0. + 14. **credibleDif_1-0** - Credible methylation difference between group 1 and group 0. + 15. **difCI_1-0** - Difference of confidence intervals between group 1 and group 0. + 16. **p_sim** - The next column is the simulation p-value. + 17. **p_sim_1_v_0** - Simulation p-value between group 1 and group 0. + 18. **p_fet** - The next column is the FET p-value. + 19. **p_fet_1_v_0** - FET p-value between group 1 and group 0. + +The comparison result file can be reused for DMR calling. + +----- + +BAM files are intermediate mapping results of input reads to the referene +genome. These BAM files can be reused in downstream methylation analysis. + +----- + +Methylation calling BED files are intermediate methylation calling results of +Cs in two groups of input reads. These methyation calling results can be easily +reused in downstream DMR calling and visualization. The BED file has 15 columns +as below. + + 1. **chrom** - The chromosome of the site. + 2. **start** - The start position of the site. + 3. **end** - The end position of the site. + 4. **ratio** - Methylation ratio in the site + 5. **totalC** - Total number of reads in current Cs. + 6. **methC** - Methylated Cs. + 7. **strand** - The strand information for prevous three columns. + 8. **next** - The next base. + 9. **Plus** - Next two columns are for forward strand. + 10. **totalC** - Total number of Cs. + 11. **methC** - Methylated Cs. + 12. **Minus** - Next two columns are for reverse strand. + 13. **totalC** - Total number of Cs. + 14. **methC** - Methylated Cs. + 15. **localSeq** - Local sequences. + +]]> + </help> + <expand macro="citations"/> +</tool>