comparison alignr.xml @ 20:16ba480adf96

Uploaded
author xuebing
date Sat, 31 Mar 2012 08:31:22 -0400
parents b11a21c704ec
children
comparison
equal deleted inserted replaced
19:d325683ec368 20:16ba480adf96
1 <tool id="alignr" name="align">
2 <description>two interval sets</description>
3 <command interpreter="python"> alignr.py -a $inputa -w $windowsize -n $nbins -o $output_data -v $output_plot $stranded -q -l $outputlabel --ylim=$ylim --span $span
4 #if $inputb_source_type.inputb_select == "user":
5 -b "$inputb"
6 #else:
7 -b "${inputb_source_type.selectedb.fields.value}"
8 #end if
9 #if $inputa_format.inputa_select == "BAM":
10 -f BAM
11 #end if
12 </command>
13 <inputs>
14 <conditional name="inputa_format">
15 <param name="inputa_select" type="select" label="Select your first input format" >
16 <option value="BED" selected="true">BED-like (chrNum start end ...) </option>
17 <option value="BAM"> BAM</option>
18 </param>
19 <when value="BED">
20 <param name="inputa" type="data" format="interval" label="Input file for the first interval set (-a)"/>
21 </when>
22 <when value="BAM">
23 <param name="inputa" type="data" format="bam" label="Input file for the first interval set (-a)"/>
24 </when>
25 </conditional>
26 <conditional name="inputb_source_type">
27 <param name="inputb_select" type="select" label="Input source for the second interval set">
28 <option value="mm9ucsc" selected="true">mm9 ucsc knownGene annotations</option>
29 <option value="mm9refseq">mm9 refseq gene annotations</option>
30 <option value="mm9ensembl">mm9 ensembl gene annotations</option>
31 <option value="hg18ucsc" >hg18 ucsc knownGene annotations</option>
32 <option value="hg18refseq">hg18 refseq gene annotations</option>
33 <option value="hg18ensembl">hg18 ensembl gene annotations</option>
34 <option value="user">Dataset in Your History</option>
35 </param>
36 <when value="user">
37 <param name="inputb" type="data" format="interval" label="Input file for the second interval set (-b)" />
38 </when>
39 <when value="mm9ucsc">
40 <param name="selectedb" type="select" label="Input for the second interval set (-b)" >
41 <options from_file="aligndb-mm9-knownGene.loc">
42 <column name="name" index="0"/>
43 <column name="value" index="1"/>
44 </options>
45 </param>
46 </when>
47 <when value="mm9refseq">
48 <param name="selectedb" type="select" label="Input for the second interval set (-b)" >
49 <options from_file="aligndb-mm9-refGene.loc">
50 <column name="name" index="0"/>
51 <column name="value" index="1"/>
52 </options>
53 </param>
54 </when>
55 <when value="mm9ensembl">
56 <param name="selectedb" type="select" label="Input for the second interval set (-b)" >
57 <options from_file="aligndb-mm9-ensGene.loc">
58 <column name="name" index="0"/>
59 <column name="value" index="1"/>
60 </options>
61 </param>
62 </when>
63 <when value="hg18ucsc">
64 <param name="selectedb" type="select" label="Input for the second interval set (-b)" >
65 <options from_file="aligndb-hg18-knownGene.loc">
66 <column name="name" index="0"/>
67 <column name="value" index="1"/>
68 </options>
69 </param>
70 </when>
71 <when value="hg18refseq">
72 <param name="selectedb" type="select" label="Input for the second interval set (-b)" >
73 <options from_file="aligndb-hg18-refGene.loc">
74 <column name="name" index="0"/>
75 <column name="value" index="1"/>
76 </options>
77 </param>
78 </when>
79 <when value="hg18ensembl">
80 <param name="selectedb" type="select" label="Input for the second interval set (-b)" >
81 <options from_file="aligndb-hg18-ensGene.loc">
82 <column name="name" index="0"/>
83 <column name="value" index="1"/>
84 </options>
85 </param>
86 </when>
87
88 </conditional>
89 <param name="windowsize" size="10" type="integer" value="0" label="Change input 2 interval size (-w)" help="will create new intervals of w bp flanking the original center. set to 0 will not change input interval size)"/>
90 <param name="nbins" size="10" type="integer" value="100" label="Number of bins dividing intervals in input 2(-n)"/>
91 <param name="span" size="10" type="float" value="0.1" label="loess span: smoothing parameter" help="value less then 0.1 disables smoothing"/>
92 <param name="stranded" label="Check if require overlap on the same strand (-s)" type="boolean" truevalue="-s" falsevalue="" checked="False"/>
93 <param name="outputlabel" size="80" type="text" label="Output label" value="test"/>
94 <param name="ylim" size="10" type="text" label="set ylim of the plot" value="min,max" help="e.g. 0,1 (default is the min and max of the signal)"/>
95
96 </inputs>
97 <outputs>
98 <data format="tabular" name="output_data" label="${outputlabel} (data)"/>
99 <data format="pdf" name="output_plot" label="${outputlabel} (plot)"/>
100 </outputs>
101 <help>
102
103 **What it does**
104
105 This tool aligns two sets of intervals, finds overlaps, calculates and plots the coverage of the first set across the second set. Applications include:
106
107 - check read distribution around TSS/poly A site/splice site/motif site/miRNA target site
108 - check relative position/overlap of two lists of ChIP-seq peaks
109
110 Two output files are generated. One is the coverage/profile for each interval in input 2. The first two columns are interval ID and the total number of overlapping intervals from input 1. Column 3 to column nbins+2 are coverage at each bin. The other file is an PDF file plotting the average coverage of each bin. To modify the visualization, please downlaod the coverage file and make your own plots.
111
112 -----
113
114 **Annotated features**
115
116 Currently supports mouse genome build mm9 and human hg18. Each interval spans 1000bp upstream and 1000bp downstream of a feature such as TSS. Features with overlapping exons in the intronic/intergenic part of the 2000bp interval are removed.
117
118 -----
119
120 **Usage**
121
122 -h, --help show this help message and exit
123 -a INPUTA (required) input file A, BED-like (first 3 columns: chr, start, end) or BAM format. The
124 script computes the depth of coverage of features in file
125 A across the features in file B
126 -b INPUTB (required) input file B, BED format or MACS peak file.
127 Requires an unique name for each line in column 4
128 -m inputB is a MACS peak file.
129 -f AFORMAT Format of input file A. Can be BED (default) or BAM
130 -w WINDOW Generate new inputB by making a window of 2 x WINDOW bp
131 (in total) flanking the center of each input feature
132 -n NBINS number of bins. Features in B are binned, and the coverage
133 is computed for each bin. Default is 100
134 -s enforce strandness: require overlapping on the same
135 strand. Default is off
136 -p load existed intersectBed outputfile
137 -q suppress output on screen
138 -o OUTPUTPROFILE (optional) output profile name.
139 -v PLOTFILE (optional) plot file name
140 </help>
141 </tool>
142