changeset 0:e11e303c39a4 default tip

Uploaded
author bjoern-gruening
date Wed, 11 Jan 2012 09:34:45 -0500
parents
children
files glimmer3/gbk2orf.xml glimmer3/gbk_to_orf.py glimmer3/glimmer2gff.py glimmer3/glimmer2gff.xml glimmer3/glimmer3-build-icm-wrapper.xml glimmer3/glimmer3-extract-wrapper.xml glimmer3/glimmer3-long-orfs-wrapper.xml glimmer3/glimmer3-main-wrapper.xml glimmer3/glimmer_acgt_content.xml glimmer3/glimmer_orf_to_seq.py glimmer3/glimmer_orf_to_seq.xml glimmer3/glimmer_predict.py glimmer3/glimmer_predict.xml readme.txt tool_conf.xml
diffstat 15 files changed, 1161 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer3/gbk2orf.xml	Wed Jan 11 09:34:45 2012 -0500
@@ -0,0 +1,208 @@
+<tool id="gbkToORF" name="Extract ORF" version="0.1">
+    <description>from a GenBank file</description>
+    <command interpreter="python">
+        gbk_to_orf.py -g $inputfile -a $aminoAcidOutput -n $orfOutput 
+    </command>
+    <inputs>
+        <param name="inputfile" type='data' format="genbank" label="gene bank file"/>
+    </inputs>
+    <outputs>
+        <data name="aminoAcidOutput" format="fasta" />
+        <data name="orfOutput" format="fasta" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+
+
+**What it does**
+Read a GenBank file and export fasta formatted amino acid and CDS files.
+
+
+-----
+
+**Example**
+	* input::
+	
+		Genebankfile
+
+			LOCUS       BA000030             9025608 bp    DNA     linear   BCT 21-DEC-2007
+		DEFINITION  Streptomyces avermitilis MA-4680 DNA, complete genome.
+		ACCESSION   BA000030 AP005021-AP005050
+		VERSION     BA000030.3  GI:148878541
+		DBLINK      Project: 189
+		KEYWORDS    .
+		SOURCE      Streptomyces avermitilis MA-4680
+		  ORGANISM  Streptomyces avermitilis MA-4680
+			    Bacteria; Actinobacteria; Actinobacteridae; Actinomycetales;
+			    Streptomycineae; Streptomycetaceae; Streptomyces.
+		REFERENCE   1
+		  AUTHORS   Omura,S., Ikeda,H., Ishikawa,J., Hanamoto,A., Takahashi,C.,
+			    Shinose,M., Takahashi,Y., Horikawa,H., Nakazawa,H., Osonoe,T.,
+			    Kikuchi,H., Shiba,T., Sakaki,Y. and Hattori,M.
+		  TITLE     Genome sequence of an industrial microorganism Streptomyces
+			    avermitilis: deducing the ability of producing secondary
+			    metabolites
+		  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 98 (21), 12215-12220 (2001)
+		   PUBMED   11572948
+		REFERENCE   2
+		  AUTHORS   Ikeda,H., Ishikawa,J., Hanamoto,A., Shinose,M., Kikuchi,H.,
+			    Shiba,T., Sakaki,Y., Hattori,M. and Omura,S.
+		  TITLE     Complete genome sequence and comparative analysis of the industrial
+			    microorganism Streptomyces avermitilis
+		  JOURNAL   Nat. Biotechnol. 21 (5), 526-531 (2003)
+		   PUBMED   12692562
+		REFERENCE   3  (bases 1 to 9025608)
+		  AUTHORS   Omura,S., Ikeda,H., Ishikawa,J., Hanamoto,A., Takahashi,C.,
+			    Shinose,M., Takahashi,Y., Horikawa,H., Nakazawa,H., Osonoe,T.,
+			    Kushida,N., Shiba,T., Sakaki,Y. and Hattori,M.
+		  TITLE     Direct Submission
+		  JOURNAL   Submitted (29-MAR-2002) Contact:S Omura Kitasato University,
+			    Kitasato Institute for Life Sciences; 1-15-1 Kitasato, Sagamihara,
+			    Kanagawa 228-8555, Japan URL
+			    :http://avermitilis.ls.kitasato-u.ac.jp/
+		COMMENT     On Jun 15, 2007 this sequence version replaced gi:57546753.
+			    This work was done in collaboration with Haruo Ikeda(*1), Jun
+			    Ishikawa(*2), Akiharu Hanamoto(*3), Chigusa Takahashi(*3), Mayumi
+			    Shinose(*3), Hiroshi Horikawa(*4), Hidekazu Nakazawa(*4), Tomomi
+			    Osonoe(*4), Norihiro Kushida(*4), Hisashi Kikuchi(*4), Tadayoshi
+			    Shiba(*5), Yoshiyuki Sakaki(*6,*7), Masahira Hattori(*1,*7)
+			    and Satoshi Omura(*1,*3).
+			    Final finishing process and all annotation were done by H. Ikeda
+			    and J. Ishikawa.
+			    *1 Kitasato Institute for Life Sciences, Kitasato University *2
+			    National Institute of Infectious Diseases
+			    *3 The Kitasato Institute
+			    *4 National Institute of Technology and Evaluation *5 School of
+			    Science, Kitasato University
+			    *6 Institute of Medical Science, University of Tokyo *7 RIKEN,
+			    Genomic Sciences Center
+			    All the annotated genes identified are available from following
+			    urls.
+			    http://avermitilis.ls.kitasato-u.ac.jp.
+		FEATURES             Location/Qualifiers
+		     source          1..9025608
+				     /organism="Streptomyces avermitilis MA-4680"
+				     /mol_type="genomic DNA"
+				     /strain="MA-4680"
+				     /db_xref="taxon:227882"
+				     /note="This strain is also named as strain: ATCC 31267,
+				     NCIMB 12804 or NRRL 8165."
+		     gene            complement(1380..1811)
+				     /locus_tag="SAV_1"
+		     CDS             complement(1380..1811)
+				     /locus_tag="SAV_1"
+				     /codon_start=1
+				     /transl_table=11
+				     /product="hypothetical protein"
+				     /protein_id="BAC67710.1"
+				     /db_xref="GI:29603637"
+				     /translation="MTAEWYVLVEEDTRETKRADGVELRLHRWKLAATQHIAGDQEQA
+				     AAAAEDAALNYMPGVLARHARPGDEPARHAFLTQDGAWLVLLRQRHRECHIRVTTARL
+				     MHTQEEKEAPPKSFKEKLRSALDGPQPPEPAGRPWKPGSET"
+
+
+* output::
+	
+	-  aminoAcidOutput
+	>SAV_1 
+	MTAEWYVLVEEDTRETKRADGVELRLHRWKLAATQHIAGDQEQAAAAAEDAALNYMPGVL
+	ARHARPGDEPARHAFLTQDGAWLVLLRQRHRECHIRVTTARLMHTQEEKEAPPKSFKEKL
+	RSALDGPQPPEPAGRPWKPGSET
+	>SAV_2 
+	VPPQGARGTIVSATGSGKTSMAAASTLNCFPEGRILVTVPTLDLLAQTAQAWRAVGHHSP
+	MIAVCSLENDPVLNERT
+	>SAV_3 
+	MDWNFPDDDIFFCGGCGDDDTPDPRVPRQDKALCVRCDRVERQVRRYRITVPRRNAIMRF
+	QRDVCALCQEGPPTDHCPDAVSFWHIDHDHRCCPPGGSCGRCVRGLLCLPCNATRLPAYE
+	RLPNVLRDSPRFNTYLNSPPARHPEARPTARDHAGPRDASSYLIDAFFTAADHPEGNALS
+	S
+	>SAV_4 
+	VALTPGGTRVTQWQDRQAIGDMHERRVAAALRARGWTVQPCGQGTYPPAVREALRRTRSA
+	LRHFPDLIAARGADLITIDAKDRMPSTDTDRYAVSADTVTAGLFFTAAHAPTPLYYVFGD
+	LKVLTPAEVVHYTAHALRHRSGAFHLVRTEQAHCFDDVFGSAGAAAAA
+	>SAV_5 
+	MMLLMAAYVDPRFRPTLWPGTPVPTPELMPLRGARADGEWIVWTPQVRSRSHTVPVPEDF
+	YLREFMEVDPEDLDAVAALMGAYGHLGGSINTGSWDVDVYERLKELTEREHPRAPFALHG
+	ELATLFMREAQAAITTWLALRREGGLDALIEPEVSEEELAQWQASNADLEEAWPRDLDHL
+	RELSLEIRISNLVSELNAALKPFSIGIGGLGDRYPTILAVAFLQLYNHLAEDATIRECAN
+	ETCRRHFVRQRGRAAYGQNRTSGIKYCTRECARAQAQREHRRRRKQQTTTLQQPPAPGPQ
+	SHDTSEPTAEGR
+	>SAV_6 
+	MISLREHQVEANARIRAWAGFPTRSPVPAQGLRGTVVSATGSGKTITAAWAARECFRGGR
+	ILVMVPTLDLLVQTAQAWRRVGHNGPMVAACSLEKDEVLEQLGVRTTTNPIQLALWAGHG
+	PVVVFATYASLVDREDPEDVTGRAKVRGPLEAALAGGQRLYGQTMDGFDLAVVDEAHSTT
+	GDLGRPWAAIHDNSRIPADFRLYLTATPRILASPRPQKGADGRELEIATMASDPDGPYGE
+	WLFELGLSEAVERGILAGFEIDVLEIRDPSPALGESEEAQRGRRLALLQTALLEHAAARN
+	LRTVMTFHQRVEEAAAFAQTMPQTAARLYEAEVSAEALVDAGALPESSIGAEFYELEAGR
+	HVPPDRVWAAWLCGDHLVAERREVLRQFADGLDAGNKRVHRAFLASVRVLGEGVDIVGER
+	GVEAICFADTRGSQVEIVQNIGRALRPNPDGTNKTARIIVPVFLQPGENPTDMVASASFA
+	PLVTVLQGLRSHSERLVEQLASRALTSGQRHVHVKRDEDGRIIGTTTEGEGGQHESEGAV
+	ESALLHFSTPRDATTIAAFLRTRVYRPESLVWLEGYQALLRWRKKNHITGLYAVPYDTET
+	EAGVTKAFPLGRWVHQQRRTYRAGELDPHRTTLLDEAGMVWEPGDEAWENKLAALRSFHR
+	AHGHLAPRRDAVWGDADSELVPVGEHMANLRRKDGLGKNPQRAATRATQLAAIDPDWNCP
+	WPLDWQRHYRVLADLATDEPHSRLPDIQPGVQFEGDDLGKWLQRQRRSWAELSEEQQQRL
+	TALGVTPAEPPTPTPSAKGGGKAAAFQRGLAALAQWIQREGAHKVVPRGHVEAVVIDGQE
+	HQHKLGVWISNTKTRRDKLTHDQRTALAALGVEWA
+	....
+
+	- orfs
+
+	>SAV_1 
+	ATGACCGCCGAGTGGTACGTCCTCGTCGAAGAGGACACACGAGAGACCAAGCGCGCCGAC
+	GGCGTTGAACTCAGATTGCACCGCTGGAAACTGGCGGCCACTCAGCACATCGCAGGAGAT
+	CAGGAACAGGCCGCCGCCGCGGCCGAGGATGCGGCCCTGAACTACATGCCGGGAGTGCTC
+	GCTCGGCATGCCCGACCGGGAGACGAACCGGCCCGGCATGCTTTCCTCACCCAGGACGGG
+	GCCTGGCTGGTGCTCCTCAGGCAGCGGCACCGCGAGTGTCACATACGGGTGACCACTGCC
+	CGGCTCATGCATACACAGGAAGAGAAGGAGGCCCCGCCGAAAAGCTTCAAGGAGAAACTC
+	CGCAGCGCCCTGGATGGTCCTCAGCCGCCCGAACCGGCTGGTAGGCCATGGAAGCCGGGC
+	AGCGAAACCTGA
+	>SAV_2 
+	GTGCCCCCTCAGGGAGCCCGTGGCACGATCGTGTCAGCTACCGGGTCCGGCAAAACGAGC
+	ATGGCCGCCGCGAGCACGCTGAACTGCTTCCCCGAAGGCCGGATCCTCGTGACCGTGCCG
+	ACCCTGGACCTGCTCGCACAGACCGCCCAGGCGTGGCGGGCAGTCGGCCACCACTCCCCC
+	ATGATCGCGGTGTGCTCGCTGGAGAACGACCCAGTGCTGAACGAGCGGACCTGA
+	>SAV_3 
+	ATGGACTGGAACTTCCCCGACGACGACATCTTCTTCTGCGGCGGGTGCGGCGACGACGAC
+	ACCCCCGACCCGCGGGTCCCGCGTCAGGACAAGGCCCTGTGCGTCCGCTGCGACAGAGTC
+	GAACGGCAGGTCCGCCGATACCGGATCACCGTGCCGCGGAGGAACGCGATCATGCGCTTC
+	CAGCGCGACGTCTGCGCCCTGTGCCAGGAAGGCCCGCCGACCGACCACTGCCCCGATGCC
+	GTCAGCTTCTGGCACATCGACCACGACCACCGCTGCTGCCCTCCCGGCGGCTCATGCGGG
+	CGGTGCGTCCGCGGCCTCCTGTGCCTGCCCTGCAACGCCACCCGCCTGCCCGCCTACGAA
+	CGCCTCCCCAACGTCCTCCGCGACAGCCCTCGCTTCAACACCTACCTCAACAGCCCACCC
+	GCCCGGCACCCCGAAGCCCGCCCCACCGCCAGGGACCATGCAGGCCCCCGCGACGCATCC
+	AGCTACCTCATCGACGCCTTTTTCACCGCCGCGGACCATCCCGAGGGGAACGCCCTCAGC
+	TCCTGA
+	>SAV_4 
+	GTGGCACTTACCCCAGGGGGAACCCGAGTGACGCAGTGGCAGGACCGCCAGGCGATAGGC
+	GACATGCACGAACGTCGGGTGGCGGCCGCGCTGCGCGCCCGCGGCTGGACCGTCCAGCCC
+	TGCGGACAGGGCACCTACCCGCCCGCCGTACGGGAAGCCCTGCGCCGGACCCGCTCCGCC
+	CTGCGGCACTTCCCCGACCTCATCGCCGCCCGCGGCGCCGACCTGATCACCATCGACGCC
+	AAGGACCGCATGCCCAGCACCGACACCGACCGCTACGCCGTCAGCGCCGACACCGTGACC
+	GCCGGCCTCTTTTTCACCGCGGCCCACGCTCCGACTCCGCTGTACTACGTCTTCGGCGAC
+	CTGAAGGTCCTCACGCCGGCGGAGGTGGTCCACTACACCGCTCACGCCTTGCGCCACCGC
+	AGCGGTGCCTTCCACCTCGTACGCACGGAGCAAGCACACTGCTTCGACGACGTCTTCGGA
+	TCGGCTGGCGCAGCAGCTGCGGCATGA
+	>SAV_5 
+	ATGATGCTCCTCATGGCGGCATACGTTGACCCACGCTTTCGTCCTACGCTATGGCCTGGA
+	ACGCCCGTGCCGACACCGGAGTTGATGCCTCTTCGCGGAGCGCGGGCCGACGGTGAATGG
+	ATCGTCTGGACCCCGCAGGTCCGCTCCCGCTCGCACACGGTCCCCGTGCCGGAGGACTTC
+	TACCTGCGCGAGTTCATGGAGGTCGACCCTGAGGACCTCGACGCCGTGGCCGCCCTGATG
+	GGCGCCTACGGACACCTCGGCGGGAGCATCAACACCGGAAGCTGGGACGTCGACGTCTAC
+	GAGCGCCTCAAGGAGCTCACGGAGCGCGAACACCCCCGCGCGCCGTTCGCCCTGCACGGC
+	GAACTGGCCACGCTGTTCATGAGGGAGGCGCAGGCGGCCATCACCACCTGGCTGGCCCTG
+	CGCCGCGAGGGCGGGCTCGACGCGCTCATCGAGCCCGAGGTGTCCGAGGAAGAACTGGCG
+	CAGTGGCAAGCGAGCAACGCTGATCTTGAGGAAGCGTGGCCGCGGGACCTGGACCACCTG
+	CGCGAACTCTCCCTGGAGATCAGGATCAGCAACCTCGTGAGCGAACTGAACGCCGCGCTG
+	AAGCCGTTCAGCATCGGCATCGGCGGCCTGGGCGACCGCTACCCCACCATCCTCGCTGTG
+	GCGTTCCTCCAGCTCTACAACCACCTCGCCGAGGACGCCACGATCCGCGAGTGCGCGAAC
+	GAGACCTGCCGCCGCCACTTCGTACGCCAGCGCGGCCGCGCCGCATACGGGCAGAACCGC
+	ACCAGCGGCATCAAGTACTGCACCCGCGAATGCGCCCGCGCCCAGGCCCAGCGCGAACAC
+	CGCCGGCGCCGCAAACAGCAGACCACGACCCTCCAGCAGCCGCCGGCGCCTGGTCCTCAG
+	TCTCACGACACCTCAGAGCCGACTGCCGAAGGGCGCTGA
+	.......
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer3/gbk_to_orf.py	Wed Jan 11 09:34:45 2012 -0500
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+
+###################################################################
+##
+## gbk2orf.py by Errol Strain (estrain@gmail.com)
+##
+## Read a GenBank file and export fasta formatted amino acid and 
+## CDS files
+##
+###################################################################
+
+import sys
+from optparse import OptionParser
+from Bio import SeqIO
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+
+## Command line usage
+usage = "usage: %prog -g input.gbk -a aa.fasta -n nuc.fasta" 
+p = OptionParser(usage)
+p.add_option("-t","--translate", dest="transtabl",type="int",default=11,
+  help="Translation table used to translate coding regions (default=11)")
+p.add_option("-g","--genbank", dest="gb_file",help="GenBank input file")
+p.add_option("-a","--amino_acid", dest="aa_file",help="Fasta amino acid output")
+p.add_option("-n","--nucleotide", dest="orf_file",help="Fasta nucleotide output")
+(opts, args) = p.parse_args()
+## Do I need this next line?
+if not opts and not args : p.error("Use --help to see usage")
+if len(sys.argv)==1 : p.error("Use --help to see usage") 
+
+## Lists to hold SeqRecords
+aalist = []
+nuclist = []
+
+## If the CDS does not have a locus tag the name will be assigned using the
+## order in which it was found
+feat_count=0
+
+## Iterate through genbank records in input file
+for gb_record in SeqIO.parse(open(opts.gb_file,"r"), "genbank") :
+  for (index, feature) in enumerate(gb_record.features) :
+    if feature.type=="CDS" :
+      feat_count = feat_count + 1
+      gene = feature.extract(gb_record.seq)
+      if "locus_tag" in feature.qualifiers :
+        value = feature.qualifiers["locus_tag"][0]
+      else :
+        value =  "Index_" + str(feat_count)
+      nuclist.append(SeqRecord(Seq(str(gene)),id=value,name=value))
+      pro=Seq(str(gene.translate(table=opts.transtabl,to_stop=True)))
+      aalist.append(SeqRecord(pro,id=value,name=value))
+
+## Write out lists in fasta format
+aa_handle = open(opts.aa_file,"w")
+SeqIO.write(aalist,aa_handle,"fasta")
+aa_handle.close()
+orf_handle = open(opts.orf_file,"w")
+SeqIO.write(nuclist,orf_handle,"fasta")
+orf_handle.close()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer3/glimmer2gff.py	Wed Jan 11 09:34:45 2012 -0500
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+"""
+Input: Glimmer3 prediction
+Output: GFF3 file
+Return a GFF3 file with the genes predicted by Glimmer3
+Bjoern Gruening
+
+Note: Its not a full-fledged GFF3 file, its a really simple one.
+
+"""
+
+import sys, re
+
+def __main__():
+    input_file = open(sys.argv[1], 'r')
+
+    print '##gff-version 3\n'
+    for line in input_file:
+        line = line.strip()
+        if line[0] == '>':
+            header = line[1:]
+        else:
+            (id, start, end, frame, score) = re.split('\s+', line)
+            if int(end) > int(start):
+                strand = '+'
+            else:
+                strand = '-'
+                (start, end) = (end, start)
+
+            rest = 'frame=%s;score=%s' % (frame, score)
+            print '\t'.join([header, 'glimmer_prediction', 'predicted_gene', start, end, '.', strand, '.', rest])
+
+
+if __name__ == "__main__" :
+    __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer3/glimmer2gff.xml	Wed Jan 11 09:34:45 2012 -0500
@@ -0,0 +1,60 @@
+<tool id="glimmer2gff" name="Convert Glimmer to GFF" version="0.1">
+	<description>Converts Glimmer Files to GFF Files</description>
+	<command interpreter="python">glimmer2gff.py $input > $output</command>
+	<inputs>
+		<param name="input" type="data" format="tabular" label="Glimmer Output File"/>
+	</inputs>
+	<outputs>
+		<data name="output" type="data" format="gff"/>
+	</outputs>
+	<tests>
+		<test>
+
+		</test>
+	</tests>
+	<help>
+
+**What it does**
+
+Converts a Glimmer3 output File to an GFF Annotation File::
+
+**Example**
+
+Input::
+    >contig00097 sbe.0.234 
+    orf00003     2869      497  -2     5.60
+    orf00005     3894     2875  -1     7.05
+    orf00007     4242     4826  +3     8.04
+    orf00010     4846     5403  +1     8.57
+    orf00012     6858     5413  -1    10.87
+    orf00013     6857     7594  +2     3.61
+    orf00014     7751     9232  +2    11.34
+    orf00015     9374    10357  +2    10.66
+    orf00017    10603    11196  +1    13.39
+    orf00021    11303    11911  +2     8.81
+    orf00025    14791    12050  -2    13.51
+    orf00026    15216    16199  +3     6.37
+    orf00028    16333    16935  +1     8.86
+
+
+Output:
+    contig00097 sbe.0.234	glimmer	gene	497	2869	.	-	.	-2     5.60
+    contig00097 sbe.0.234	glimmer	gene	2875	3894	.	-	.	-1     7.05
+    contig00097 sbe.0.234	glimmer	gene	4242	4826	.	+	.	+3     8.04
+    contig00097 sbe.0.234	glimmer	gene	4846	5403	.	+	.	+1     8.57
+    contig00097 sbe.0.234	glimmer	gene	5413	6858	.	-	.	-1    10.87
+    contig00097 sbe.0.234	glimmer	gene	6857	7594	.	+	.	+2     3.61
+    contig00097 sbe.0.234	glimmer	gene	7751	9232	.	+	.	+2    11.34
+    contig00097 sbe.0.234	glimmer	gene	9374	10357	.	+	.	+2    10.66
+    contig00097 sbe.0.234	glimmer	gene	10603	11196	.	+	.	+1    13.39
+    contig00097 sbe.0.234	glimmer	gene	11303	11911	.	+	.	+2     8.81
+    contig00097 sbe.0.234	glimmer	gene	12050	14791	.	-	.	-2    13.51
+    contig00097 sbe.0.234	glimmer	gene	15216	16199	.	+	.	+3     6.37
+    contig00097 sbe.0.234	glimmer	gene	16333	16935	.	+	.	+1     8.86
+
+
+-----	
+
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer3/glimmer3-build-icm-wrapper.xml	Wed Jan 11 09:34:45 2012 -0500
@@ -0,0 +1,74 @@
+<tool id="glimmer3-build-icm" name="glimmer3-build-icm" version="0.1">
+  <description>glimmer3-build-icm</description>
+  <command> tigr-glimmer build-icm -r  $output &lt; $inputfile  </command>
+  <inputs>
+	<param name="inputfile" type="data" format="txt" label="Dataset" help="Dataset missing? See TIP below"/>
+  </inputs>
+  <outputs>
+    <data format="icm" name="output" />
+  </outputs>
+  <tests>
+	<test>
+		<param name="inputfile" value='glimmer3/seqTest.fa'/>
+		<output name="output" file='glimmer3/buildICMTestOutput.dat'/>
+	</test>	
+ </tests>
+
+<help>
+
+**What it does**
+
+	This program constructs an interpolated context model (ICM) from an input set of sequences.
+
+-----
+
+**Glimmer Overview**
+
+::
+
+**************		**************		**************		**************		
+*            *		*	     *		*            *		*            *
+* long-orfs  *  ===>	*   Extract  *	===>	* build-icm  *  ===>	*  glimmer3  *	
+*            *		*	     *		*	     *  	*	     *	
+**************		**************		**************		**************
+
+-----
+
+**Example**
+
+* input::
+
+	-Genome Sequence
+
+	>CELF22B7  C.aenorhabditis elegans (Bristol N2) cosmid F22B7
+	GATCCTTGTAGATTTTGAATTTGAAGTTTTTTCTCATTCCAAAACTCTGT
+	GATCTGAAATAAAATGTCTCAAAAAAATAGAAGAAAACATTGCTTTATAT
+	TTATCAGTTATGGTTTTCAAAATTTTCTGACATACCGTTTTGCTTCTTTT
+	TTTCTCATCTTCTTCAAATATCAATTGTGATAATCTGACTCCTAACAATC
+	GAATTTCTTTTCCTTTTTCTTTTTCCAACAACTCCAGTGAGAACTTTTGA
+	ATATCTTCAAGTGACTTCACCACATCAGAAGGTGTCAACGATCTTGTGAG
+	AACATCGAATGAAGATAATTTTAATTTTAGAGTTACAGTTTTTCCTCCGA
+	CAATTCCTGATTTACGAACATCTTCTTCAAGCATTCTACAGATTTCTTGA
+	TGCTCTTCTAGGAGGATGTTGAAATCCGAAGTTGGAGAAAAAGTTCTCTC
+	AACTGAAATGCTTTTTCTTCGTGGATCCGATTCAGATGGACGACCTGGCA
+	GTCCGAGAGCCGTTCGAAGGAAAGATTCTTGTGAGAGAGGCGTGAAACAC
+	AAAGGGTATAGGTTCTTCTTCAGATTCATATCACCAACAGTTTGAATATC
+	CATTGCTTTCAGTTGAGCTTCGCATACACGACCAATTCCTCCAACCTAAA
+	AAATTATCTAGGTAAAACTAGAAGGTTATGCTTTAATAGTCTCACCTTAC
+	GAATCGGTAAATCCTTCAAAAACTCCATAATCGCGTTTTTATCATTTTCT
+	.....
+
+* output:
+	interpolated context model (ICM) 
+
+
+-------
+
+**References**
+
+A.L. Delcher, K.A. Bratke, E.C. Powers, and S.L. Salzberg. Identifying bacterial genes and endosymbiont DNA with Glimmer. Bioinformatics (Advance online version) (2007).
+
+
+ </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer3/glimmer3-extract-wrapper.xml	Wed Jan 11 09:34:45 2012 -0500
@@ -0,0 +1,119 @@
+<tool id="glimmer3-extract" name="glimmer3-extract" version="0.1">
+  <description></description>
+  <command > tigr-glimmer extract -t $seqInput $cordInput > $output 2> /dev/null </command>
+  <inputs>
+	<param name="seqInput" type="data" format="fasta" label="Genome Sequence" help="Dataset missing? See TIP below"/>
+	<param name="cordInput" type="data"  label="Coordinates" help="Dataset missing? See TIP below"/>
+  </inputs>
+  <outputs>
+    <data format="fasta" name="output" />
+  </outputs>
+  <tests>
+	<test>
+		<param name="seqInput" value='glimmer3/seqTest.fa'/>
+		<param name="cordInput" value='glimmer3/cordTest.txt'/>
+		<output name="output" file='glimmer3/extractTestOutput.dat'/>
+	</test>	
+ </tests>
+
+<help>
+
+**What it does**
+
+	This program reads a genome sequence and a list of coordinates for it and outputs a multi-
+	fasta file of the regions specified by the coordinates.
+
+-----
+
+**Glimmer Overview**
+
+::
+
+**************		**************		**************		**************		
+*            *		*	     *		*            *		*            *
+* long-orfs  *  ===>	*   Extract  *	===>	* build-icm  *  ===>	*  glimmer3  *	
+*            *		*	     *		*	     *  	*	     *	
+**************		**************		**************		**************
+
+-----
+
+**Example**
+
+
+* input ::
+	
+	-Genome Sequence
+
+	CELF22B7  C.aenorhabditis elegans (Bristol N2) cosmid F22B7
+	GATCCTTGTAGATTTTGAATTTGAAGTTTTTTCTCATTCCAAAACTCTGT
+	GATCTGAAATAAAATGTCTCAAAAAAATAGAAGAAAACATTGCTTTATAT
+	TTATCAGTTATGGTTTTCAAAATTTTCTGACATACCGTTTTGCTTCTTTT
+	TTTCTCATCTTCTTCAAATATCAATTGTGATAATCTGACTCCTAACAATC
+	GAATTTCTTTTCCTTTTTCTTTTTCCAACAACTCCAGTGAGAACTTTTGA
+	ATATCTTCAAGTGACTTCACCACATCAGAAGGTGTCAACGATCTTGTGAG
+	AACATCGAATGAAGATAATTTTAATTTTAGAGTTACAGTTTTTCCTCCGA
+	CAATTCCTGATTTACGAACATCTTCTTCAAGCATTCTACAGATTTCTTGA
+	TGCTCTTCTAGGAGGATGTTGAAATCCGAAGTTGGAGAAAAAGTTCTCTC
+	AACTGAAATGCTTTTTCTTCGTGGATCCGATTCAGATGGACGACCTGGCA
+	GTCCGAGAGCCGTTCGAAGGAAAGATTCTTGTGAGAGAGGCGTGAAACAC
+	AAAGGGTATAGGTTCTTCTTCAGATTCATATCACCAACAGTTTGAATATC
+	CATTGCTTTCAGTTGAGCTTCGCATACACGACCAATTCCTCCAACCTAAA
+	AAATTATCTAGGTAAAACTAGAAGGTTATGCTTTAATAGTCTCACCTTAC
+	GAATCGGTAAATCCTTCAAAAACTCCATAATCGCGTTTTTATCATTTTCT
+	...
+
+	- Coorinates 		
+
+	00001   40137      52  +2   0.892
+	00002    1319    1095  -3   0.654
+	00003    1555    1391  -2   0.793
+	00004    1953    2066  +3   1.078
+	00005    2045    2146  +2   0.919
+	00006    4463    4759  +2   0.985
+	00007    6785    6582  -3   1.033
+	00008    6862    7020  +1   0.915
+	00009    7300    7488  +1   0.900
+	00010    7463    7570  +2   0.912
+	00011    8399    8527  +2   1.044
+	00012   10652   10545  -3   0.895
+	00013   12170   12066  -3   1.108
+	00014   13891   13748  -2   0.998
+	00015   14157   14044  -1   1.026
+	00016   15285   15410  +3   0.928
+	00017   15829   15704  -2   0.949
+	...	
+
+* output::
+ 
+		>00001  40137 52  len=135
+		ATGACACATTTGCTCGTTGCTTTGACCCACTACGAGGCCAGTATCATGATTTCTAGAAAA
+		ACCCTCTTTTTGACTTCTTCCTCCATGATCCTTGTAGATTTTGAATTTGAAGTTTTTTCT
+		CATTCCAAAACTCTG
+	
+		>00002  1319 1095  len=222
+		TTGGCTCGCCGTTTTGGAGTCCGTGCTGGAATGCCTGGCTTCATCTCAAATAAACTTTGT
+		CCGAGTCTAACGATTGTTCCAGGAAATTACCCTAAATACACTAAAGTCAGTCGCCAATTT
+		TCACAAATTTTCATGGAATACGATTCGGATGTTGGAATGATGTCATTGGATGAGGCATTT
+		ATAGATTTGACAGACTATGTGGCAAGTAATACAGAAAAAAGT
+	
+		>00003  1555 1391  len=162
+		ATGGAGAATCTTGAGATGAAACTGGAATCATCTAGAGATTTATCAAGAGACTGTGTTTGT
+		ATAGATATGGATGCTTATTTTGCCGCAGTTGAAATGAGAGATAATCCTGCACTGAGAACA
+		GTTCCTATGGCCGTAGGCTCATCGGCAATGCTGGTAAGCACC
+	
+		>00004  1953 2066  len=111
+		GTGCGCGAGAAAAAACTACGCGTTAACCGCCAATTTTCACTTCCCCACAGATCTGTCTCG
+		AGATTCTCGAGTCATTTTTCAAGTTTATTTGTTTGTCAGCGGTTGTTTTAT
+		.....
+
+-------
+
+**References**
+
+A.L. Delcher, K.A. Bratke, E.C. Powers, and S.L. Salzberg. Identifying bacterial genes and endosymbiont DNA with Glimmer. Bioinformatics (Advance online version) (2007).
+
+
+
+ </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer3/glimmer3-long-orfs-wrapper.xml	Wed Jan 11 09:34:45 2012 -0500
@@ -0,0 +1,117 @@
+<tool id="glimmer3-long-orfs" name="glimmer3-long-orfs" version="0.1">
+  <description>identify  long, non-overlapping orfs(open reading frames)</description>
+  <command > tigr-glimmer long-orfs -n -t $cutoff $inputfile $output  2> /dev/null </command>
+  <inputs>
+	<param name="inputfile" type="data" format="fasta" label="Genome Sequence" help="Dataset missing? See TIP below"/>
+	<param name='cutoff' type='float' label='cutoff' value='1.5'/>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="output" />
+  </outputs>
+  <tests>
+	<test>
+		<param name="inputfile" value='glimmer3/seqTest.fa'/>
+		<param name='cutoff' value='1.5'/>
+		<output name="output" file='glimmer3/longORFSTestOutput.dat'/>
+	</test>	
+ </tests>
+
+<help>
+
+**What it does**
+
+	This program identifies long, non-overlapping open reading frames (orfs) in a DNA sequence file. 
+	These orfs are very likely to contain genes, and can be used as a set of training sequences 
+	More specifically, among all orfs longer than a minimum length , those that do not overlap any others are output. The start codon used for
+	each orf is the first possible one. The program, by default, automatically determines the
+	value that maximizes the number of orfs that are output. With the -t option, the initial
+	set of candidate orfs also can be filtered using entropy distance, which generally produces
+	a larger, more accurate training set, particularly for high-GC-content genomes. 
+
+
+
+-----
+
+**Glimmer Overview**
+
+::
+
+**************		**************		**************		**************		
+*            *		*	     *		*            *		*            *
+* long-orfs  *  ===>	*   Extract  *	===>	* build-icm  *  ===>	*  glimmer3  *	
+*            *		*	     *		*	     *  	*	     *	
+**************		**************		**************		**************
+
+-----
+
+**Example**
+
+
+* input::
+ 
+	-Genome Sequence
+
+	CELF22B7  C.aenorhabditis elegans (Bristol N2) cosmid F22B7
+	GATCCTTGTAGATTTTGAATTTGAAGTTTTTTCTCATTCCAAAACTCTGT
+	GATCTGAAATAAAATGTCTCAAAAAAATAGAAGAAAACATTGCTTTATAT
+	TTATCAGTTATGGTTTTCAAAATTTTCTGACATACCGTTTTGCTTCTTTT
+	TTTCTCATCTTCTTCAAATATCAATTGTGATAATCTGACTCCTAACAATC
+	GAATTTCTTTTCCTTTTTCTTTTTCCAACAACTCCAGTGAGAACTTTTGA
+	ATATCTTCAAGTGACTTCACCACATCAGAAGGTGTCAACGATCTTGTGAG
+	AACATCGAATGAAGATAATTTTAATTTTAGAGTTACAGTTTTTCCTCCGA
+	CAATTCCTGATTTACGAACATCTTCTTCAAGCATTCTACAGATTTCTTGA
+	TGCTCTTCTAGGAGGATGTTGAAATCCGAAGTTGGAGAAAAAGTTCTCTC
+	AACTGAAATGCTTTTTCTTCGTGGATCCGATTCAGATGGACGACCTGGCA
+	GTCCGAGAGCCGTTCGAAGGAAAGATTCTTGTGAGAGAGGCGTGAAACAC
+	AAAGGGTATAGGTTCTTCTTCAGATTCATATCACCAACAGTTTGAATATC
+	CATTGCTTTCAGTTGAGCTTCGCATACACGACCAATTCCTCCAACCTAAA
+	AAATTATCTAGGTAAAACTAGAAGGTTATGCTTTAATAGTCTCACCTTAC
+	GAATCGGTAAATCCTTCAAAAACTCCATAATCGCGTTTTTATCATTTTCT
+	.....
+	
+	- Cutoff 1.5	
+
+* output::
+
+	Sequence file = /home/mohammed/galaxy-central/database/files/000/dataset_34.dat
+	Excluded regions file = none
+	Circular genome = true
+	Initial minimum gene length = 90 bp
+	Determine optimal min gene length to maximize number of genes
+	Maximum overlap bases = 30
+	Start codons = atg,gtg,ttg
+	Stop codons = taa,tag,tga
+	Sequence length = 40222
+	Final minimum gene length = 97
+
+	Putative Genes:
+	00001   40137      52  +2   0.892
+	00002    1319    1095  -3   0.654
+	00003    1555    1391  -2   0.793
+	00004    1953    2066  +3   1.078
+	00005    2045    2146  +2   0.919
+	00006    4463    4759  +2   0.985
+	00007    6785    6582  -3   1.033
+	00008    6862    7020  +1   0.915
+	00009    7300    7488  +1   0.900
+	00010    7463    7570  +2   0.912
+	00011    8399    8527  +2   1.044
+	00012   10652   10545  -3   0.895
+	00013   12170   12066  -3   1.108
+	00014   13891   13748  -2   0.998
+	00015   14157   14044  -1   1.026
+	00016   15285   15410  +3   0.928
+	00017   15829   15704  -2   0.949
+
+	....
+
+-------
+
+**References**
+
+A.L. Delcher, K.A. Bratke, E.C. Powers, and S.L. Salzberg. Identifying bacterial genes and endosymbiont DNA with Glimmer. Bioinformatics (Advance online version) (2007).
+
+
+ </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer3/glimmer3-main-wrapper.xml	Wed Jan 11 09:34:45 2012 -0500
@@ -0,0 +1,146 @@
+<tool id="glimmer3-main" name="Glimmer3" version="0.1">
+  <description>Predict ORFs in prokaryotic genomes (knowlegde-based)</description>
+  <command> 
+	tigr-glimmer glimmer3 -o$overlaplen -g$genlen -t$thresh $linear $seqInput $icmInput $prediction 2> /dev/null;
+	cp $prediction".predict" $prediction;
+	cp $prediction".detail"  $detailed;
+	rm $prediction".predict";
+	rm $prediction".detail";
+  </command>
+  <inputs>
+	<param name="seqInput" type="data" format="fasta" label="Genome Sequence" help="Dataset missing? See TIP below"/>
+	<param name="icmInput" type="data"  label="interpolated context model (ICM)" help="Dataset missing? See TIP below"/>
+	<param name="overlaplen" type="integer" value="50" label="Set maximum overlap length. Overlaps this short or shorter are ignored."/>
+	<param name="genlen" type="integer" value="90" label="Set minimum gene length."/>
+	<param name="thresh" type="integer" value="30" label="Set threshold score for calling as gene. If the in-frame score >= N, then the region is given a number and considered a potential gene."/>
+	<param name="linear" type="boolean" truevalue="-l" falsevalue="" checked="true" label="Assume linear rather than circular genome, i.e., no wraparound"/>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="prediction" />
+    <data format="tabular" name="detailed" />
+  </outputs>
+  <tests>
+	<test>
+		<param name="seqInput" value='glimmer3/seqTest.fa'/>
+		<param name="icmInput" value='glimmer3/icmTest.icm'/>
+		<param name="overlaplen" value="50"/>
+		<param name="genlen"  value="90" />
+		<param name="thresh"  value="30"  />
+		<param name="linear" value="-l" />
+		<output name="output1" file='glimmer3/output1Test.dat' />
+		<output name="output2" file='glimmer3/output2Test.dat'/>	
+	</test>	
+ </tests>
+
+<help>
+
+
+**What it does**
+
+	This is the main program that makes gene preditions based on an interpolated context model (ICM).
+	The ICM can be generated either with a de novo prediction (see glimmer Overview) or with extracted CDS from related organisms.
+
+-----
+
+**TIP** To extract CDS from a GenBank file use the tool *Extract ORF from a GenBank file*.
+
+-----
+
+**Glimmer Overview**
+
+::
+
+**************		**************		**************		**************		
+*            *		*	     *		*            *		*            *
+* long-orfs  *  ===>	*   Extract  *	===>	* build-icm  *  ===>	*  glimmer3  *	
+*            *		*	     *		*	     *  	*	     *	
+**************		**************		**************		**************
+
+**Example**
+
+* input::
+	
+	-Genome Sequence
+
+	CELF22B7  C.aenorhabditis elegans (Bristol N2) cosmid F22B7
+	GATCCTTGTAGATTTTGAATTTGAAGTTTTTTCTCATTCCAAAACTCTGT
+	GATCTGAAATAAAATGTCTCAAAAAAATAGAAGAAAACATTGCTTTATAT
+	TTATCAGTTATGGTTTTCAAAATTTTCTGACATACCGTTTTGCTTCTTTT
+	TTTCTCATCTTCTTCAAATATCAATTGTGATAATCTGACTCCTAACAATC
+	GAATTTCTTTTCCTTTTTCTTTTTCCAACAACTCCAGTGAGAACTTTTGA
+	ATATCTTCAAGTGACTTCACCACATCAGAAGGTGTCAACGATCTTGTGAG
+	AACATCGAATGAAGATAATTTTAATTTTAGAGTTACAGTTTTTCCTCCGA
+	CAATTCCTGATTTACGAACATCTTCTTCAAGCATTCTACAGATTTCTTGA
+	TGCTCTTCTAGGAGGATGTTGAAATCCGAAGTTGGAGAAAAAGTTCTCTC
+	AACTGAAATGCTTTTTCTTCGTGGATCCGATTCAGATGGACGACCTGGCA
+	GTCCGAGAGCCGTTCGAAGGAAAGATTCTTGTGAGAGAGGCGTGAAACAC
+	AAAGGGTATAGGTTCTTCTTCAGATTCATATCACCAACAGTTTGAATATC
+	CATTGCTTTCAGTTGAGCTTCGCATACACGACCAATTCCTCCAACCTAAA
+	AAATTATCTAGGTAAAACTAGAAGGTTATGCTTTAATAGTCTCACCTTAC
+	GAATCGGTAAATCCTTCAAAAACTCCATAATCGCGTTTTTATCATTTTCT
+	.....
+
+
+	- interpolated context model (ICM) 	92: glimmer3-build-icm on data 89
+	- maximum overlap length		50
+	- minimum gene length. 			90
+	- threshold score			30
+	- linear	 			True
+
+* output:: 
+
+	.predict file
+	>CELF22B7  C.aenorhabditis elegans (Bristol N2) cosmid F22B7.
+	orf00001    40137       52  +2     8.68
+	orf00004      603       34  -1     2.91
+	orf00006     1289     1095  -3     3.16
+	orf00007     1555     1391  -2     2.33
+	orf00008     1809     1576  -1     1.02
+	orf00010     1953     2066  +3     3.09
+	orf00011     2182     2304  +1     0.89
+	orf00013     2390     2521  +2     0.60
+	orf00018     2570     3073  +2     2.54
+	orf00020     3196     3747  +1     2.91
+	orf00022     3758     4000  +2     0.83
+	orf00023     4399     4157  -2     1.31
+	orf00025     4463     4759  +2     2.92
+	orf00026     4878     5111  +3     0.78
+	orf00027     5468     5166  -3     1.64
+	orf00029     5590     5832  +1     0.29
+	orf00032     6023     6226  +2     6.02
+	orf00033     6217     6336  +1     3.09
+	........
+	
+
+	.details file
+	>CELF22B7  C.aenorhabditis elegans (Bristol N2) cosmid F22B7.
+	Sequence length = 40222
+
+		   ----- Start -----           --- Length ----  ------------- Scores -------------
+	 ID  Frame   of Orf  of Gene     Stop   of Orf of Gene      Raw InFrm F1 F2 F3 R1 R2 R3 NC
+	0001    +2    40137    40137       52      135     135     9.26    96  - 96  -  -  3  -  0
+	0002    +1       58       64      180      120     114     5.01    69 69  -  - 30  -  -  0
+		+3      300      309      422      120     111    -0.68    20  -  - 20 38  -  - 41
+		+3      423      432      545      120     111     1.29    21  - 51 21 13  -  8  5
+	0003    +2      401      416      595      192     177     2.51    93  - 93  -  5  -  -  1
+	0004    -1      645      552       34      609     516     2.33    99  -  -  - 99  -  -  0
+		+1      562      592      762      198     168    -2.54     1  1  -  -  -  -  - 98
+		+1      763      772      915      150     141    -1.34     1  1  -  -  -  - 86 11
+		+3      837      846     1007      168     159     1.35    28  - 50 28  -  - 17  3
+	0005    -3     1073      977      654      417     321     0.52    84  -  -  -  -  - 84 15
+	0006    -3     1373     1319     1095      276     222     3.80    99  -  -  -  -  - 99  0
+	0007    -2     1585     1555     1391      192     162     2.70    98  -  -  -  - 98  -  1
+	0008    -1     1812     1809     1576      234     231     1.26    94  -  -  - 94  -  -  5
+	0009    +2     1721     1730     1945      222     213     0.68    80  - 80  -  -  -  - 19
+	.....
+
+-------
+
+**References**
+
+A.L. Delcher, K.A. Bratke, E.C. Powers, and S.L. Salzberg. Identifying bacterial genes and endosymbiont DNA with Glimmer. Bioinformatics (Advance online version) (2007).
+
+
+ </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer3/glimmer_acgt_content.xml	Wed Jan 11 09:34:45 2012 -0500
@@ -0,0 +1,33 @@
+<tool id="glimmer_acgt_content" name="ACGT Content" version="0.1">
+	<description>ACGT-content of windows in each sequence.</description>
+	<command interpreter="python">tigr-glimmer window-acgt $p $output $input_fasta $input_win_len $input_win_skip</command>
+	<inputs>
+		<param name="input_fasta" type="data" format="fasta" label="Genome Sequence"/>
+		<param name="input_win_len" type="integer" value="10" label="The width of windows."/>
+		<param name="input_win_skip" type="integer" value="10" label="The number of positions between windows to report."/>
+		<param name="p" type="boolean" truevalue="-p" falsevalue="" checked="true" label="Output percentages instead of counts."/>
+	</inputs>
+	<outputs>
+		<data name="output" format="tabular"/>
+	</outputs>
+	<tests>
+		<test>
+			<param name="input" value="streptomyces_coelicolor.dna" />
+			<output name="output" file="fasta_tool_convert_from_dna.out" />
+		</test>
+	</tests>
+	<help>
+
+**What it does**
+
+This tool calculates the ACGT-Content from a given Sequence, given a sliding window.
+
+-------
+
+**References**
+
+A.L. Delcher, K.A. Bratke, E.C. Powers, and S.L. Salzberg. Identifying bacterial genes and endosymbiont DNA with Glimmer. Bioinformatics (Advance online version) (2007).
+
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer3/glimmer_orf_to_seq.py	Wed Jan 11 09:34:45 2012 -0500
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+"""
+Input: DNA Fasta File + Glimmer ORF File
+Output: ORF-sequence as FASTA file
+Author: Bjoern Gruening
+"""
+import sys, os
+import Bio.SeqIO
+from Bio.SeqRecord import SeqRecord as record
+
+def __main__():
+    if len(sys.argv) >= 4:
+        glimmerfile = open(sys.argv [1], "r")
+        sequence = open(sys.argv[2])
+        orf2seq = open(sys.argv [3], "w")
+    else:
+        print "Missing input values."
+        sys.exit()
+
+    fastafile = Bio.SeqIO.parse(sequence, "fasta")
+
+    sequences = {}
+    for entry in fastafile:
+        sequences[entry.description] = entry
+
+    for line in glimmerfile:
+        if line.startswith('>'):
+            print line[1:].strip()
+            entry = sequences[ line[1:].strip() ]
+        else:
+            orf_start = int(line[8:17])
+            orf_end = int(line[18:26])
+
+            orf_name = line[0:8]
+            if orf_start <= orf_end:
+                new_line = record(entry.seq[orf_start-1 : orf_end], id = orf_name, description = entry.description).format("fasta") + "\n"
+            else:         
+                new_line = record(entry.seq[orf_end-1 : orf_start].reverse_complement(), id = orf_name, description = entry.description).format("fasta") + "\n"
+            orf2seq.write(new_line)
+
+    orf2seq.close()
+    glimmerfile.close()
+
+if __name__ == "__main__" :
+    __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer3/glimmer_orf_to_seq.xml	Wed Jan 11 09:34:45 2012 -0500
@@ -0,0 +1,26 @@
+<tool id="glimmer orf-to-sequence" name="ORF to Sequence" version="0.1">
+	<description>assigns ORF to its DNA sequence</description>
+	<command interpreter="python">glimmer_orf_to_seq.py $glimmer_orfs $input_fasta $output </command>
+	<inputs>
+		<param name="input_fasta" type="data" format="fasta" label="Genome Sequence"/>
+		<param name="glimmer_orfs" type="data" format="tabular" label="Define Glimmer-ORFs"/>
+	</inputs>
+	<outputs>
+		<data name="output" type="data" format="fasta"/>
+	</outputs>
+	<tests>
+		<test>
+		</test>
+	</tests>
+	<help>
+
+**What it does**
+
+
+This tool extract all gene sequences from a genome, which are predicted with Glimmer3.
+
+
+-----
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer3/glimmer_predict.py	Wed Jan 11 09:34:45 2012 -0500
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+"""
+Input: DNA Fasta File
+Output: Tabular
+Return Tabular File with predicted ORF's
+Bjoern Gruening
+"""
+import sys, os
+import tempfile
+from random import Random
+import string
+import subprocess
+import shutil
+
+def __main__():
+    
+    genome_seq_file = sys.argv[1]
+    outfile_path = sys.argv[2]
+    outfile_ext_path = sys.argv[3]
+
+
+    tag = ''.join(Random().sample(string.letters+string.digits, 12))
+    tempdir = tempfile.gettempdir()
+
+    #longorfs = os.path.join(tempdir, tag + ".longorf")
+    trainingset = os.path.join(tempdir, tag + ".train")
+    icm = os.path.join(tempdir, tag + ".icm")
+
+    longorfs = tempfile.NamedTemporaryFile()
+    trainingset = tempfile.NamedTemporaryFile()
+    icm = tempfile.NamedTemporaryFile()
+
+
+    #glimmeropts = "-o0 -g110 -t30 -l"
+    glimmeropts = "-o%s -g%s -t%s" % (sys.argv[4], sys.argv[5], sys.argv[6])
+    if sys.argv[7] == "true":
+        glimmeropts += " -l"
+
+
+    """
+        1. Find long, non-overlapping orfs to use as a training set
+    """
+    subprocess.Popen(["tigr-glimmer", "long-orfs", "-n", "-t", "1.15",
+        genome_seq_file, "-"], stdout = longorfs, 
+        stderr = subprocess.PIPE).communicate()
+
+    """
+        2. Extract the training sequences from the genome file
+    """
+    subprocess.Popen(["tigr-glimmer", "extract", "-t",
+        genome_seq_file, longorfs.name], stdout=trainingset, 
+        stderr=subprocess.PIPE).communicate()
+
+    """
+        3. Build the icm from the training sequences
+    """
+
+    # the "-" parameter is used to redirect the output to stdout
+    subprocess.Popen(["tigr-glimmer", "build-icm", "-r", "-"], 
+        stdin=open(trainingset.name), stdout = icm, 
+        stderr=subprocess.PIPE).communicate()
+
+    """
+        Run Glimmer3
+    """
+    b = subprocess.Popen(["tigr-glimmer", "glimmer3", glimmeropts, 
+        genome_seq_file, icm.name, os.path.join(tempdir, tag)], 
+        stdout = subprocess.PIPE, stderr=subprocess.PIPE).communicate()
+
+    #shutil.copyfileobj
+    shutil.copyfile( os.path.join(tempdir, tag + ".predict"), outfile_path )
+    shutil.copyfile( os.path.join(tempdir, tag + ".detail"), outfile_ext_path )
+
+
+
+if __name__ == "__main__" :
+    __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer3/glimmer_predict.xml	Wed Jan 11 09:34:45 2012 -0500
@@ -0,0 +1,69 @@
+<tool id="glimmer_predict_standalone" name="Glimmer3" version="0.1">
+	<description>Predict ORFs in prokaryotic genomes (not knowlegde-based)</description>
+	<command interpreter="python">glimmer_predict.py $input $output $output_ext $o $g $t $l</command>
+	<inputs>
+		<param name="input" type="data" format="fasta" label="Genome Sequence"/>
+		<param name="o" type="integer" value="0" label="Set maximum overlap length. Overlaps this short or shorter are ignored."/>
+		<param name="g" type="integer" value="110" label="Set minimum gene length."/>
+		<param name="t" type="integer" value="30" label="Set threshold score for calling as gene. If the in-frame score >= N, then the region is given a number and considered a potential gene."/>
+		<param name="l" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Assume linear rather than circular genome, i.e., no wraparound"/>
+	</inputs>
+	<outputs>
+		<data name="output" format="tabular"/>
+		<data name="output_ext" format="tabular"/>
+	</outputs>
+	<tests>
+		<test>
+			<param name="input" value="streptomyces_coelicolor.dna" />
+			<output name="output" file="fasta_tool_convert_from_dna.out" />
+		</test>
+	</tests>
+	<help>
+
+**What it does**
+
+This tool predicts open reading frames (orfs) from a given DNA Sequence. That tool is not knowlegde-based.
+If you want to use a trained Glimmer3 use the knowlegde-based version and insert/generate a training set.
+
+-----	
+
+**Example**
+
+Suppose you have the following DNA formatted sequences::
+
+    >SQ   Sequence 8667507 BP; 1203558 A; 3121252 C; 3129638 G; 1213059 T; 0 other;
+    cccgcggagcgggtaccacatcgctgcgcgatgtgcgagcgaacacccgggctgcgcccg
+    ggtgttgcgctcccgctccgcgggagcgctggcgggacgctgcgcgtcccgctcaccaag
+    cccgcttcgcgggcttggtgacgctccgtccgctgcgcttccggagttgcggggcttcgc
+    cccgctaaccctgggcctcgcttcgctccgccttgggcctgcggcgggtccgctgcgctc
+    ccccgcctcaagggcccttccggctgcgcctccaggacccaaccgcttgcgcgggcctgg
+
+Running this tool will produce this::
+
+    >SQ   Sequence 8667507 BP; 1203558 A; 3121252 C; 3129638 G; 1213059 T; 0 other;
+    orf00001      577      699  +1     5.24
+    orf00003      800     1123  +2     5.18
+    orf00004     1144     3813  +1    10.62
+    orf00006     3857     6220  +2     6.07
+    orf00007     6226     7173  +1     1.69
+    orf00008     7187     9307  +2     8.95
+    orf00009     9424    10410  +1     8.29
+    orf00010    10515    11363  +3     7.00
+    orf00011    11812    11964  +1     2.80
+    orf00012    12360    13457  +3     4.80
+    orf00013    14379    14044  -1     7.41
+    orf00015    15029    14739  -3    12.43
+    orf00016    15066    15227  +3     1.91
+    orf00020    16061    15351  -3     2.83
+    orf00021    17513    17391  -3     2.20
+    orf00023    17529    17675  +3     0.11
+
+
+-------
+
+**References**
+
+A.L. Delcher, K.A. Bratke, E.C. Powers, and S.L. Salzberg. Identifying bacterial genes and endosymbiont DNA with Glimmer. Bioinformatics (Advance online version) (2007).
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.txt	Wed Jan 11 09:34:45 2012 -0500
@@ -0,0 +1,73 @@
+Galaxy wrapper for RepeatMasker
+=====================================
+
+This wrapper is copyright 2012 by Björn Grüning.
+
+This is a wrapper for the command line tool of Glimmer3.
+http://www.cbcb.umd.edu/software/glimmer/
+
+Glimmer is a system for finding genes in microbial DNA, 
+especially the genomes of bacteria, archaea, and viruses. 
+Glimmer (Gene Locator and Interpolated Markov ModelER) uses interpolated 
+Markov models (IMMs) to identify the coding regions and distinguish them from noncoding DNA. 
+
+A.L. Delcher, D. Harmon, S. Kasif, O. White, and S.L. Salzberg. Improved microbial gene identification with GLIMMER, Nucleic Acids Research 27:23 (1999), 4636-4641.
+S. Salzberg, A. Delcher, S. Kasif, and O. White. Microbial gene identification using interpolated Markov models, Nucleic Acids Research 26:2 (1998), 544-548.
+A.L. Delcher, K.A. Bratke, E.C. Powers, and S.L. Salzberg. Identifying bacterial genes and endosymbiont DNA with Glimmer. Bioinformatics (Advance online version) (2007). 
+
+
+
+Installation
+============
+
+To install Glimmer3, please download Glimmer3 from 
+
+http://www.cbcb.umd.edu/software/glimmer/glimmer302.tar.gz
+
+and follow the installation instructions. You can also use packages from your distribution like
+
+http://packages.debian.org/stable/science/tigr-glimmer
+
+
+To install the wrapper copy the glimmer3 folder in the galaxy tools
+folder and modify the tools_conf.xml file to make the tool available to Galaxy.
+For example:
+
+<tool file="gene_prediction/tools/glimmer3/glimmer3-main-wrapper.xml" />
+<tool file="gene_prediction/tools/glimmer3/glimmer_predict.xml" />
+<tool file="gene_prediction/tools/glimmer3/glimmer_orf_to_seq.xml" />
+<tool file="gene_prediction/tools/glimmer3/glimmer2gff.xml" />
+<tool file="gene_prediction/tools/glimmer3/gbktoorfWrapper.xml" />
+<tool file="gene_prediction/tools/glimmer3/glimmer_acgt_content.xml" />
+<tool file="gene_prediction/tools/glimmer3/glimmer3-build-icm-wrapper.xml" />
+<tool file="gene_prediction/tools/glimmer3/glimmer3-extract-wrapper.xml" />
+<tool file="gene_prediction/tools/glimmer3/glimmer3-long-orfs-wrapper.xml" />
+
+
+History
+=======
+
+v0.1 - Initial public release
+
+
+Wrapper Licence (MIT/BSD style)
+===============================
+
+Permission to use, copy, modify, and distribute this software and its
+documentation with or without modifications and for any purpose and
+without fee is hereby granted, provided that any copyright notices
+appear in all copies and that both those copyright notices and this
+permission notice appear in supporting documentation, and that the
+names of the contributors or copyright holders not be used in
+advertising or publicity pertaining to distribution of the software
+without specific prior permission.
+
+THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_conf.xml	Wed Jan 11 09:34:45 2012 -0500
@@ -0,0 +1,17 @@
+<?xml version="1.0"?>
+<toolbox>
+  <section name="Gene Prediction" id="gene_prediction">
+
+    <label text="Glimmer3 (prokaryotes)" id="glimmer3_prokaryotes" />
+        <tool file="gene_prediction/tools/glimmer3/glimmer3-main-wrapper.xml" />
+        <tool file="gene_prediction/tools/glimmer3/glimmer_predict.xml" />
+        <tool file="gene_prediction/tools/glimmer3/glimmer_orf_to_seq.xml" />
+        <tool file="gene_prediction/tools/glimmer3/glimmer2gff.xml" />
+        <tool file="gene_prediction/tools/glimmer3/gbk2orf.xml" />
+        <tool file="gene_prediction/tools/glimmer3/glimmer_acgt_content.xml" />
+        <tool file="gene_prediction/tools/glimmer3/glimmer3-build-icm-wrapper.xml" />
+        <tool file="gene_prediction/tools/glimmer3/glimmer3-extract-wrapper.xml" />
+        <tool file="gene_prediction/tools/glimmer3/glimmer3-long-orfs-wrapper.xml" />
+
+  </section>
+</toolbox>