comparison computeGCBias.xml @ 13:b4c5dd45778a draft

Uploaded
author bgruening
date Mon, 16 Dec 2013 04:36:19 -0500
parents 135f3bae5c56
children 16310f8b24d5
comparison
equal deleted inserted replaced
12:aea72b24dea0 13:b4c5dd45778a
8 <command> 8 <command>
9 ln -s $bamInput local_bamInput.bam; 9 ln -s $bamInput local_bamInput.bam;
10 ln -s $bamInput.metadata.bam_index local_bamInput.bam.bai; 10 ln -s $bamInput.metadata.bam_index local_bamInput.bam.bai;
11 11
12 computeGCBias 12 computeGCBias
13
14 @THREADS@ 13 @THREADS@
15 14
16 --bamfile 'local_bamInput.bam' 15 --bamfile 'local_bamInput.bam'
17 --GCbiasFrequenciesFile $outFileName 16 --GCbiasFrequenciesFile $outFileName
18 --fragmentLength $fragmentLength 17 --fragmentLength $fragmentLength
40 #if $advancedOpt.extraSampling: 39 #if $advancedOpt.extraSampling:
41 --extraSampling $advancedOpt.extraSampling 40 --extraSampling $advancedOpt.extraSampling
42 #end if 41 #end if
43 #end if 42 #end if
44 43
45 #if $saveBiasPlot: 44 #if str($image_format) != 'none':
46 --biasPlot $biasPlot 45 --biasPlot $outImageName
46 --plotFileFormat $image_format
47 #end if 47 #end if
48
49 ## #if $output.showOutputSettings == "yes"
50 ## #if $output.saveBiasPlot:
51 ## --biasPlot biasPlot.png ;
52 ## mv biasPlot.png $biasPlot
53 ## #end if
54 ## #end if
55
56 </command> 48 </command>
57 <inputs> 49 <inputs>
58
59 <param name="bamInput" format="bam" type="data" label="BAM file" 50 <param name="bamInput" format="bam" type="data" label="BAM file"
60 help="The BAM file must be sorted."/> 51 help="The BAM file must be sorted."/>
61 52
62 <expand macro="reference_genome_source" /> 53 <expand macro="reference_genome_source" />
63 <expand macro="effectiveGenomeSize" /> 54 <expand macro="effectiveGenomeSize" />
64 55
65 <param name="fragmentLength" type="integer" value="300" min="1" 56 <param name="fragmentLength" type="integer" value="300" min="1"
66 label="Fragment length used for the sequencing" 57 label="Fragment length used for the sequencing"
67 help ="If paired-end reads are used, the fragment length is computed from the BAM file."/> 58 help ="If paired-end reads are used, the fragment length is computed from the BAM file."/>
59 <conditional name="advancedOpt">
60 <param name="showAdvancedOpt" type="select" label="Show advanced options" >
61 <option value="no" selected="true">no</option>
62 <option value="yes">yes</option>
63 </param>
64 <when value="no" />
65 <when value="yes">
66 <param name="region" type="text" value=""
67 label="Region of the genome to limit the operation to"
68 help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example &quot;chr10&quot; or &quot;chr10:456700:891000&quot;" />
68 69
69 <conditional name="advancedOpt"> 70 <param name="sampleSize" type="integer" value="50000000" min="1"
70 <param name="showAdvancedOpt" type="select" label="Show advanced options" > 71 label="Number of sampling points to be considered" />
71 <option value="no" selected="true">no</option> 72
72 <option value="yes">yes</option> 73 <param name="regionSize" type="integer" value="300" min="1"
74 label="Region size"
75 help ="To plot the reads per GC over a region, the size of the region is required (see below for more details of the mthod). By default, the bin size is set to 300 bp, which is close to the standard fragment size many sequencing applications. However, if the depth of sequencing is low, a larger bin size will be required, otherwise many bins will not overlap with any read."/>
76
77 <param name="filterOut" type="data" format="bed" optional="true"
78 label="BED file containing genomic regions to be excluded from the estimation of the correction"
79 help="Such regions usually contain repetitive regions and peaks that if included will bias the correction. It is recommended to filter out known repetitive regions if multi-reads (reads that map to more than one genomic position) were excluded. In the case of ChIP-seq data, it is recommended to first use a peak caller to identify and filter out the identified peaks." />
80 <param name="extraSampling" type="data" format="bed" optional="true"
81 label="BED file containing genomic regions for which extra sampling is required because they are underrepresented in the genome"
82 help="" />
83 </when>
84 </conditional>
85 <param name="image_format" type="select" label="GC bias plot" help="If given, a diagnostic image summarizing the GC bias found on the sample will be created.">
86 <option value="none" selected="true">No image</option>
87 <option value="png">Image in png format</option>
88 <option value="pdf">Image in pdf format</option>
89 <option value="svg">Image in svg format</option>
90 <option value="eps">Image in eps format</option>
91 <option value="emf">Image in emf format</option>
73 </param> 92 </param>
74 <when value="no" /> 93 </inputs>
75 <when value="yes"> 94 <outputs>
76 <param name="region" type="text" value="" 95 <data format="tabular" name="outFileName" />
77 label="Region of the genome to limit the operation to" 96 <data format="png" name="outImageName" label="${tool.name} GC-bias Plot">
78 help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example &quot;chr10&quot; or &quot;chr10:456700:891000&quot;" /> 97 <filter>
79 98 ((
80 <param name="sampleSize" type="integer" value="50000000" min="1" 99 image_format != 'none'
81 label="Number of sampling points to be considered" /> 100 ))
82 101 </filter>
83 <param name="regionSize" type="integer" value="300" min="1" 102 <change_format>
84 label="Region size" 103 <when input="image_format" value="pdf" format="pdf" />
85 help ="To plot the reads per GC over a region, the size of the region is required (see below for more details of the mthod). By default, the bin size is set to 300 bp, which is close to the standard fragment size many sequencing applications. However, if the depth of sequencing is low, a larger bin size will be required, otherwise many bins will not overlap with any read."/> 104 <when input="image_format" value="svg" format="svg" />
86 105 <when input="image_format" value="eps" format="eps" />
87 <param name="filterOut" type="data" format="bed" optional="true" 106 <when input="image_format" value="emf" format="emf" />
88 label="BED file containing genomic regions to be excluded from the estimation of the correction" 107 </change_format>
89 help="Such regions usually contain repetitive regions and peaks that if included will bias the correction. It is recommended to filter out known repetitive regions if multi-reads (reads that map to more than one genomic position) were excluded. In the case of ChIP-seq data, it is recommended to first use a peak caller to identify and filter out the identified peaks." /> 108 </data>
90 <param name="extraSampling" type="data" format="bed" optional="true" 109 </outputs>
91 label="BED file containing genomic regions for which extra sampling is required because they are underrepresented in the genome" 110 <help>
92 help="" />
93 </when>
94 </conditional>
95
96 <param name="saveBiasPlot" type="boolean" truevalue="--biasPlot" falsevalue="" checked="True" label="Save a diagnostic image summarizing the GC bias found on the sample"/>
97 <!--
98 <conditional name="output" >
99 <param name="showOutputSettings" type="select" label="Show additional output options" >
100 <option value="no" selected="true">no</option>
101 <option value="yes">yes</option>
102 </param>
103 <when value="no" />
104 <when value="yes">
105 <param name="saveBiasPlot" type="boolean" label="Save a diagnostic image summarizing the GC bias found on the sample"/>
106 </when>
107 </conditional>
108 -->
109 </inputs>
110 <outputs>
111 <data format="tabular" name="outFileName" />
112 <data format="png" name="biasPlot" label="${tool.name} on ${on_string}: bias plot">
113 <filter>saveBiasPlot is True</filter>
114 <!--<filter>(output['showOutputSettings'] == 'yes' and output['saveBiasPlot'] == True)</filter>-->
115 </data>
116 </outputs>
117 <help>
118 111
119 **What it does** 112 **What it does**
120 113
121 This tool computes the GC bias using the method proposed by Benjamini and Speed (2012). Nucleic Acids Res. (see below for more explanations) 114 This tool computes the GC bias using the method proposed by Benjamini and Speed (2012). Nucleic Acids Res. (see below for more explanations)
122 The output is used to plot the bias and can also be used later on to correct the bias with the tool correctGCbias. 115 The output is used to plot the bias and can also be used later on to correct the bias with the tool correctGCbias.