comparison gmap_build.xml @ 7:561503a442f0

refactor
author Jim Johnson <jj@umn.edu>
date Tue, 08 Nov 2011 13:26:41 -0600
parents
children a89fec682254
comparison
equal deleted inserted replaced
6:3be0e0a858fe 7:561503a442f0
1 <tool id="gmap_build" name="GMAP Build" version="2.0.0">
2 <description>a database genome index for GMAP and GSNAP</description>
3 <requirements>
4 <requirement type="binary">gmap_build</requirement>
5 <!-- proposed tag for added datatype dependencies -->
6 <requirement type="datatype">gmapdb</requirement>
7 <requirement type="datatype">gmap_snps</requirement>
8 </requirements>
9 <version_string>gmap --version</version_string>
10 <command interpreter="command"> /bin/bash $shscript 2>1 1> $output </command>
11 <inputs>
12 <!-- Name for this gmapdb -->
13 <param name="refname" type="text" label="Name you want to give this gmap database" help="">
14 <validator type="empty_field" message="A database name is required."/>
15 </param>
16 <!-- Input data -->
17 <repeat name="inputs" title="Reference Sequence" min="1">
18 <param name="input" type="data" format="fasta" label="reference sequence fasta" />
19 </repeat>
20
21 <param name="kmer" type="select" multiple="true" force_select="true" label="kmer size" help="">
22 <option value="12">12</option>
23 <option value="13">13</option>
24 <option value="14">14</option>
25 <option value="15" selected="true">15</option>
26 </param>
27 <param name="cmetindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create cmetindex to process reads from bisulfite-treated DNA"/>
28 <param name="atoiindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create atoiindex to process reads under RNA-editing tolerance"/>
29 <conditional name="splicesite">
30 <param name="splice_source" type="select" label="Add splice and intron info from" >
31 <option value="none"></option>
32 <option value="refGeneTable">refGenes table from UCSC table browser</option>
33 <option value="gtf">GTF</option>
34 <option value="gff3">GFF3</option>
35 </param>
36 <when value="none"/>
37 <when value="refGeneTable">
38 <param name="refGenes" type="data" format="tabular" optional="true" label="UCSC refGenes table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz" />
39 <param name="col_skip" type="integer" value="1" label="Columns to skip before the id/name column (default 1)"
40 help="Note that alignment tracks in UCSC sometimes have an extra column on the left.">
41 <validator type="in_range" message="The number of colmumns to skip must >= 0." min="0."/>
42 </param>
43
44 </when>
45 <when value="gtf">
46 <param name="gtfGenes" type="data" format="gtf" optional="true" label="Genes as GTF" help="" />
47 </when>
48 <when value="gff3">
49 <param name="gff3Genes" type="data" format="gff3" optional="true" label="Genes in GFF3 format" help="" />
50 </when>
51 </conditional>
52 <conditional name="dbsnp">
53 <param name="snp_source" type="select" label="Add SNP info from" >
54 <option value="none"></option>
55 <option value="snpTable">UCSC SNP Table</option>
56 <option value="snpFile">GMAP SNP File</option>
57 </param>
58 <when value="none"/>
59 <when value="snpTable">
60 <param name="snps" type="data" format="tabular" optional="true" label="UCSC SNPs table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz" />
61 <param name="snpsex" type="data" format="tabular" optional="true" label="UCSC SNP Exceptions table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz" />
62 <param name="weight" type="select" label="Include SNPs with at least Confidence Level" help="">
63 <option value="1" selected="true">1 (High)</option>
64 <option value="2">2 (Medium)</option>
65 <option value="3">3 (All)</option>
66 </param>
67 </when>
68 <when value="snpFile">
69 <param name="snps" type="data" format="gmap_snps" optional="true" label="GMAP SNPs file"
70 help="Format (3 columns):
71 &lt;br&gt;>rs62211261 21:14379270 CG
72 &lt;br&gt;>rs62211262 21:14379281 CG
73 &lt;br&gt;Each line must start with a &gt; character, then be followed by an
74 identifier (which may have duplicates). Then there should be the
75 chromosomal coordinate of the SNP. (Coordinates are all 1-based, so
76 the first character of a chromosome is number 1.) Finally, there
77 should be the two possible alleles: ( AC AG AT CG CT GT or AN CN GN TN)
78 &lt;br&gt;These alleles must correspond to the possible nucleotides on the plus strand of the genome.
79 If the one of these two letters does not match the allele in the reference
80 sequence, that SNP will be ignored in subsequent processing as a probable error.
81 The N stands for any other allele." />
82 </when>
83 </conditional>
84 </inputs>
85 <outputs>
86 <!--
87 <data format="txt" name="log" label="${tool.name} on ${on_string}: log"/>
88 -->
89 <data format="gmapdb" name="output" label="${tool.name} on ${on_string} gmapdb ${refname}" />
90 </outputs>
91 <configfiles>
92 <configfile name="shscript">
93 #!/bin/bash
94 #set $ds = chr(36)
95 #set $gt = chr(62)
96 #set $lt = chr(60)
97 #set $ad = chr(38)
98 ## #set $ref_files = ''
99 ## #for $i in $inputs:
100 ## #set $ref_files = $ref_files $i.input
101 ## #end for
102 ## echo $ref_files
103 #import os.path
104 #set $gmapdb = $output.extra_files_path
105 #set $mapsdir = $os.path.join($os.path.join($gmapdb,str($refname)), str($refname) + '.maps')
106 mkdir -p $gmapdb
107 ## export GMAPDB required for cmetindex and atoiindex
108 export GMAPDB=$gmapdb
109 #for $k in $kmer.__str__.split(','):
110 gmap_build -D $gmapdb -d $refname -s numeric-alpha -k $k #for i in $inputs# ${i.input}#end for#
111 #end for
112 get-genome -D $gmapdb -d '?' | sed 's/^Available .*/gmap db: /'
113 echo "kmers: " $kmer
114 #if $splicesite.splice_source == 'refGeneTable':
115 #if $splicesite.refGenes.__str__ != 'None':
116 cat $splicesite.refGenes | psl_splicesites -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'splicesites')
117 cat $splicesite.refGenes | psl_introns -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'introns')
118 #end if
119 #elif $splicesite.splice_source == 'gtf':
120 #if $splicesite.gtfGenes.__str__ != 'None':
121 cat $splicesite.gtfGenes | gtf_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites')
122 cat $splicesite.gtfGenes | gtf_introns | iit_store -o $os.path.join($mapsdir,'introns')
123 #end if
124 #elif $splicesite.splice_source == 'gff3':
125 #if $splicesite.gff3Genes.__str__ != 'None':
126 cat $splicesite.gff3Genes | gff3_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites')
127 cat $splicesite.gff3Genes | gff3_introns | iit_store -o $os.path.join($mapsdir,'introns')
128 #end if
129 #end if
130 #if $dbsnp.snp_source != 'none' and $dbsnp.snps.__str__ != 'None':
131 #if $dbsnp.snp_source == 'snpTable':
132 #if $dbsnp.snpsex.__str__ != 'None':
133 cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight -e $dbsnp.snpsex | iit_store -o $os.path.join($mapsdir,'snps')
134 #else:
135 cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight | iit_store -o $os.path.join($mapsdir,'snps')
136 #end if
137 #else:
138 cat $dbsnp.snps | iit_store -o $os.path.join($mapsdir,'snps')
139 #end if
140 snpindex -d $refname -v snps
141 echo "snpindex" -d $refname -v snps
142 #end if
143 #if $cmetindex.__str__ == 'yes':
144 cmetindex -d $refname
145 echo "cmetindex" -d $refname
146 #end if
147 #if $atoiindex.__str__ == 'yes':
148 atoiindex -d $refname
149 echo "atoiindex" -d $refname
150 #end if
151 get-genome -D $gmapdb -d $refname -m '?' | sed 's/^Available maps .*/maps: /'
152 </configfile>
153 </configfiles>
154
155 <tests>
156 </tests>
157
158 <help>
159
160
161 **GMAP Build**
162
163 GMAP Build creates an index of a genomic sequence for mapping and alignment using GMAP_ (Genomic Mapping and Alignment Program for mRNA and EST sequences) and GSNAP_ (Genomic Short-read Nucleotide Alignment Program). (GMAP Build uses GMSP commands: gmap_build, iit_store, psl_splicesites, psl_introns, gtf_splicesites, gtf_introns, gff3_splicesites, gff3_introns, dbsnp_iit, snpindex, cmetindex, and atoiindex.)
164
165 You will want to read the README_
166
167 Publication_ citation: Thomas D. Wu, Colin K. Watanabe Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
168
169 .. _GMAP: http://research-pub.gene.com/gmap/
170 .. _GSNAP: http://research-pub.gene.com/gmap/
171 .. _README: http://research-pub.gene.com/gmap/src/README
172 .. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859
173
174
175 </help>
176 </tool>
177