comparison iit_store.xml @ 7:561503a442f0

refactor
author Jim Johnson <jj@umn.edu>
date Tue, 08 Nov 2011 13:26:41 -0600
parents
children a89fec682254
comparison
equal deleted inserted replaced
6:3be0e0a858fe 7:561503a442f0
1 <tool id="gmap_iit_store" name="GMAP IIT" version="2.0.0">
2 <description>Create a map store for known genes or SNPs</description>
3 <requirements>
4 <requirement type="binary">iit_store</requirement>
5 <!-- proposed tag for added datatype dependencies -->
6 <requirement type="datatype">gmap_annotation</requirement>
7 <requirement type="datatype">gmap_snps</requirement>
8 <requirement type="datatype">iit</requirement>
9 <requirement type="datatype">splicesites.iit</requirement>
10 <requirement type="datatype">introns.iit</requirement>
11 <requirement type="datatype">snps.iit</requirement>
12 </requirements>
13 <version_string>iit_store --version</version_string>
14 <command interpreter="command"> /bin/bash $shscript 2> $log </command>
15 <inputs>
16 <!-- Input data -->
17 <conditional name="map">
18 <param name="type" type="select" label="Make map for" >
19 <option value="genes">Introns and Splice sites</option>
20 <option value="snps">SNPs</option>
21 <option value="gmap">GMAP Annotation</option>
22 </param>
23 <when value="genes">
24 <conditional name="src">
25 <param name="src_format" type="select" label="Add splice and intron info from" >
26 <option value="refGeneTable">refGenes table from UCSC table browser</option>
27 <option value="gtf">GTF</option>
28 <option value="gff3">GFF3</option>
29 </param>
30 <when value="refGeneTable">
31 <param name="genes" type="data" format="tabular" label="UCSC refGenes table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz" />
32 <param name="col_skip" type="integer" value="1" label="Columns to skip before the id/name column (default 1)"
33 help="Note that alignment tracks in UCSC sometimes have an extra column on the left.">
34 <validator type="in_range" message="The number of colmumns to skip must >= 0." min="0."/>
35 </param>
36 </when>
37 <when value="gtf">
38 <param name="genes" type="data" format="gtf" label="Genes as GTF" help="" />
39 </when>
40 <when value="gff3">
41 <param name="genes" type="data" format="gff3" label="Genes in GFF3 format" help="" />
42 </when>
43 </conditional>
44 <param name="maps" type="select" display="checkboxes" multiple="true" force_select="true" label="Add splice and intron info from" >
45 <option value="splicesites" selected="true">splicesites.iit</option>
46 <option value="introns" selected="false">introns.iit</option>
47 </param>
48 </when>
49 <when value="snps">
50 <conditional name="src">
51 <param name="src_format" type="select" label="Add SNP info from" >
52 <option value="snpTable">UCSC SNP Table</option>
53 <option value="snpFile">GMAP SNP File</option>
54 </param>
55 <when value="snpTable">
56 <param name="snps" type="data" format="tabular" label="UCSC SNPs table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz" />
57 <param name="snpsex" type="data" format="tabular" optional="true" label="UCSC SNP Exceptions table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz" />
58 <param name="weight" type="select" label="Include SNPs with at least Confidence Level" help="">
59 <option value="1" selected="true">1 (High)</option>
60 <option value="2">2 (Medium)</option>
61 <option value="3">3 (All)</option>
62 </param>
63 </when>
64 <when value="snpFile">
65 <param name="snps" type="data" format="gmap_snps" optional="true" label="GMAP SNPs file"
66 help="Format (3 columns):&lt;B&gt;
67 &lt;br&gt;>rs62211261 21:14379270 CG
68 &lt;br&gt;>rs62211262 21:14379281 CG
69 &lt;/B&gt;
70 &lt;br&gt;Each line must start with a &gt; character, then be followed by an
71 identifier (which may have duplicates). Then there should be the
72 chromosomal coordinate of the SNP. (Coordinates are all 1-based, so
73 the first character of a chromosome is number 1.) Finally, there
74 should be the two possible alleles: ( AC AG AT CG CT GT or AN CN GN TN)
75 &lt;br&gt;These alleles must correspond to the possible nucleotides on the plus strand of the genome.
76 If the one of these two letters does not match the allele in the reference
77 sequence, that SNP will be ignored in subsequent processing as a probable error.
78 The N stands for any other allele." />
79 </when>
80 </conditional>
81 </when>
82 <when value="gmap">
83 <param name="annotation" type="data" format="gmap_annotation" label="GMAP mapfile"
84 help="Format (2 or columns): &lt;B&gt;
85 &lt;br&gt;>label coords optional_tag
86 &lt;br&gt;optional_annotation (which may be zero, one, or multiple lines)
87 &lt;/B&gt;
88 &lt;br&gt;Each line must start with a &gt; character, then be followed by an identifier (which may have duplicates).
89 &lt;br&gt;Then there should be the chromosomal coordinate range. (Coordinates are all 1-based, so the first character of a chromosome is number 1.)
90 &lt;br&gt;The coords should be of the form
91 &lt;br&gt; chr:position
92 &lt;br&gt; chr:startposition..endposition
93 &lt;br&gt;The term chr:position is equivalent to chr:position..position.
94 &lt;br&gt;If you want to indicate that the interval is on the minus strand or reverse direction, then endposition may be less than startposition.
95 " />
96 </when>
97 </conditional>
98 </inputs>
99 <outputs>
100 <data format="txt" name="log" label="${tool.name} on ${on_string}: log"/>
101 <data format="splicesites.iit" name="splicesites_iit" label="${tool.name} on ${on_string} splicesites.iit">
102 <filter>(map['type'] == 'genes' and 'splicesites' in map['maps'])</filter>
103 </data>
104 <data format="introns.iit" name="introns_iit" label="${tool.name} on ${on_string} introns.iit">
105 <filter>(map['type'] == 'genes' and 'introns' in map['maps'])</filter>
106 </data>
107 <data format="snps.iit" name="snps_iit" label="${tool.name} on ${on_string} snps.iit">
108 <filter>(map['type'] == 'snps')</filter>
109 </data>
110 <data format="iit" name="map_iit" label="${tool.name} on ${on_string} map.iit">
111 <filter>(map['type'] == 'gmap')</filter>
112 </data>
113 </outputs>
114 <configfiles>
115 <configfile name="shscript">
116 #!/bin/bash
117 #set $catcmd = 'gzcat -f'
118 #set $catcmd = 'cat'
119 #set $ds = chr(36)
120 #set $gt = chr(62)
121 #set $lt = chr(60)
122 #set $ad = chr(38)
123 #set $ep = chr(33)
124 #set $toerr = ''.join([$gt,$ad,'2'])
125 #import os.path
126 #if $map.type == 'genes':
127 if [ $ep -e $map.src.genes ]; then echo "$map.src.genes does not exist" $toerr; exit 1; fi
128 if [ $ep -s $map.src.genes ]; then echo "$map.src.genes is empty" $toerr; exit 2; fi
129 #if $map.src.src_format == 'refGeneTable':
130 #if 'splicesites' in [ $map.maps.__str__ ]:
131 $catcmd $map.src.genes | psl_splicesites -s $map.src.col_skip | iit_store -o $splicesites_iit
132 #end if
133 #if 'introns' in [ $map.maps.__str__ ]:
134 $catcmd $map.src.genes | psl_introns -s $map.src.col_skip | iit_store -o $introns_iit
135 #end if
136 #elif $map.src.src_format == 'gtf':
137 #if 'splicesites' in [ $map.maps.__str__ ]:
138 $catcmd $map.src.genes | gtf_splicesites | iit_store -o $splicesites_iit
139 #end if
140 #if 'introns' in [ $map.maps.__str__ ]:
141 $catcmd $map.src.genes | gtf_introns | iit_store -o $introns_iit
142 #end if
143 #elif $map.src.src_format == 'gff3':
144 #if 'splicesites' in [ $map.maps.__str__ ]:
145 $catcmd $map.src.genes | gff3_splicesites | iit_store -o $splicesites_iit
146 #end if
147 #if 'introns' in [ $map.maps.__str__ ]:
148 $catcmd $map.src.genes | gff3_introns | iit_store -o $introns_iit
149 #end if
150 #end if
151 #elif $map.type == 'snps':
152 if [ $ep -s $map.src.snps ]; then echo "$map.src.snps is empty" $toerr; exit 2; fi
153 #if $map.src.snpsex.__str__ != 'None':
154 $catcmd $map.src.snps | dbsnp_iit -w $map.src.weight -e $map.src.snpsex | iit_store -o $snps_iit
155 #else:
156 $catcmd $map.src.snps | dbsnp_iit -w $map.src.weight | iit_store -o $snps_iit
157 #end if
158 #else:
159 $catcmd $map.src.snps | iit_store -o $map_iit
160 #end if
161 </configfile>
162 </configfiles>
163
164 <tests>
165 </tests>
166
167 <help>
168
169
170 **iit_store**
171
172 GMAP IIT creates an Interval Index Tree map of known splice sites, introns, or SNPs (it uses iit_store described in the GMAP documentation). The maps can be used in GMAP_ (Genomic Mapping and Alignment Program for mRNA and EST sequences) and GSNAP_ (Genomic Short-read Nucleotide Alignment Program). Maps are typically used for known splice sites, introns, or SNPs.
173
174 You will want to read the README_
175
176 Publication_ citation: Thomas D. Wu, Colin K. Watanabe Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
177
178 .. _GMAP: http://research-pub.gene.com/gmap/
179 .. _GSNAP: http://research-pub.gene.com/gmap/
180 .. _README: http://research-pub.gene.com/gmap/src/README
181 .. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859
182
183
184 **inputs**
185
186 </help>
187 </tool>
188