comparison damidseq_core.xml @ 0:eb3a145c4962 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damidseq_core commit b'33637968e1e32c02d7765a6701e930a0ea0dd903\n'
author mvdbeek
date Wed, 22 Mar 2017 09:56:09 -0400
parents
children 0d1514ecd757
comparison
equal deleted inserted replaced
-1:000000000000 0:eb3a145c4962
1 <tool id="damidseq_core" name="damidseq" version="0.1.0">
2 <description>align, extend and normalize a DAMID-seq experiment</description>
3 <requirements>
4 <requirement type="package" version="1.4">damidseq_pipeline</requirement>
5 </requirements>
6 <version_command><![CDATA[damidseq_pipeline --help 2>&1| grep damidseq_pipeline]]></version_command>
7 <command detect_errors="aggressive"><![CDATA[
8 export HOME="\$PWD" &&
9 ln -f -s '$dam' A001.$dam.ext &&
10 ln -f -s '$dam_fusion' A002.$dam_fusion.ext &&
11 ln -f -s '$index' index.txt &&
12 damidseq_pipeline
13 --bins=$bins
14 --bowtie=1
15 --bowtie2_genome_dir='$reference_index.fields.path'
16 --extend_reads=$extend_reads
17 --extension_method='$extension_method'
18 $full_data_files
19 --gatc_frag_file='$gatc_frag_file'
20 --len=$len
21 --max_norm_value='$max_norm_value'
22 $method_subtract
23 --min_norm_value='$min_norm_value'
24 --norm_method=$norm_method
25 --norm_steps=$norm_steps
26 --output_format=$output_format
27 --q=$q
28 --qscore1max=$qscore1max
29 --qscore1min=$qscore1min
30 --qscore2max=$qscore2max
31 --threads=\${GALAXY_SLOTS:-4} &&
32 mv Fusion-vs-Dam.*.$output_format fusion.output
33 ]]></command>
34 <configfiles>
35 <configfile name="index">A1 Dam
36 A2 Fusion</configfile>
37 </configfiles>
38 <inputs>
39 <param argument="--dam" type="data" format="fastq,fastq.gz" label="Control DAM alignment file"/>
40 <param name="dam_fusion" type="data" format="fastq,fastq.gz" label="DAM fusion alignment file"/>
41 <param name="reference_index" type="select" label="Select reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
42 <options from_data_table="bowtie2_indexes">
43 <filter type="sort_by" column="2"/>
44 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
45 </options>
46 </param>
47 <param argument="--gatc_frag_file" type="data" format="gff" label="GFF file with all GATC locations"/>
48 <param name="output_format" type="select" label="Select the output format for the peaks">
49 <option value="bedgraph">Bedgraph</option>
50 <option value="gff">GFF</option>
51 </param>
52 <param argument="--extend_reads" type="boolean" truevalue="1" falsevalue="0" checked="True" label="Perform read extension?"/>
53 <param argument="--extension_method" type="select" label="Select the read extension method" help="Select Full to extend all reads or GATC to extend reads to --len or to the next GATC site, whichever is shorter. Using this option increases peak resolution (default).">
54 <option value="gatc">To nearest GATC site</option>
55 <option value="full">Full</option>
56 </param>
57 <param argument="--full_data_files" type="boolean" truevalue="--full_data_file" falsevalue="" label="Output full binned ratio files (not only GATC array)"/>
58 <param argument="--len" type="integer" min="50" value="300" label="Length to extend reads to"/>
59 <param argument="--bins" type="integer" min="10" value="75" label="Width of bins to use for mapping reads"/>
60 <param argument="--min_norm_value" type="float" value="-5.0" label="Minimum log2 value to limit normalisation search at"/>
61 <param argument="--max_norm_value" type="float" value="5.0" label="Maximum log2 value to limit normalisation search at"/>
62 <param argument="--method_subtract" type="boolean" truevalue="--method_subtract" falsevalue="" label="Subtract DAM control values from DAM-fusion values instead of using the log2 ratio?"/>
63 <param argument="--norm_method" type="select" label="Select normalization method">
64 <option value="kde">kernel density estimation of log2 GATC fragment ratio (recommended)</option>
65 <option value="rpm">readcounts per million reads (not recommended for most use cases)</option>
66 </param>
67 <param argument="--norm_steps" type="integer" min="1" value="300" label="Number of points in normalisation routine"/>
68 <param argument="--q" type="integer" value="30" min="0" label="Cutoff average Q score for aligned reads"/>
69 <param argument="--qscore1min" type="float" min="0.0" value="0.4" max="1.0" label="min decile for normalising from Dam array"/>
70 <param argument="--qscore1max" type="float" min="0.0" value="1.0" max="1.0" label="max decile for normalising from Dam array"/>
71 <param argument="--qscore2max" type="float" min="0.0" value="1.0" max="1.0" label="max decile for normalising from fusion-protein array"/>
72 </inputs>
73 <outputs>
74 <data name="output_ratio" format="bedgraph" from_work_dir="fusion.output" label="DAM-fusion vs Dam-only ratio">
75 <change_format>
76 <when input="output_format" value="gff" format="gff" />
77 </change_format>
78 </data>
79 <data name="control_output" format="bam" from_work_dir="Dam-ext300.bam" label="DAM-only alignment"/>
80 <data name="fusion_output" format="bam" from_work_dir="Fusion-ext300.bam" label="DAM-fusion alignment"/>
81 </outputs>
82 <tests>
83 <test>
84 <param name="dam" value="A001.fastq"/>
85 <param name="dam_fusion" value="A002.fastq"/>
86 <param name="gatc_frag_file" value="dm6.GATC.gff"/>
87 <param name="index" value="dm6"/>
88 <param name="norm_method" value="rpm"/>
89 <output name="output_ratio" file="output_ratio.bedgraph"/>
90 <output name="control_output" file="control.bam"/>
91 <output name="fusion_output" file="fusion.bam"/>
92 </test>
93 </tests>
94 <help><![CDATA[
95
96 Processing DamID-seq data involves extending single-end reads, aligning
97 the reads to the genome and determining the coverage, similar to
98 processing regular ChIP-seq datasets. However, as DamID data is
99 represented as a log2 ratio of (Dam-fusion/Dam), normalisation of the
100 sample and Dam-only control is necessary and adding pseudocounts to
101 mitigate the effect of background counts is highly recommended.
102
103 damidseq_pipeline is a single script that automatically handles
104 sequence alignment, read extension, binned counts, normalisation,
105 pseudocount addition and final ratio file generation. The script uses
106 FASTQ or BAM files as input, and outputs the final log2 ratio files in
107 bedGraph (or optionally GFF) format.
108
109 The output ratio files can easily be converted to TDF for viewing in IGV using
110 igvtools. The files can be processed for peak calling using find_peaks or, if
111 using RNA pol II DamID, transcribed genes can be determined using
112 polii.gene.call.
113
114 ]]></help>
115 <citations>
116 <citation type="doi">10.1093/bioinformatics/btv386</citation>
117 </citations>
118 </tool>