view test-data/arabidopsis_augustus_utr-off_singlestrand-on_mea-on.gtf @ 2:a77a4e9921e0 draft

Uploaded
author bgruening
date Wed, 06 May 2015 14:31:02 -0400
parents af307d3285c5
children
line wrap: on
line source

# This output was generated with AUGUSTUS (version 2.7).
# AUGUSTUS is a gene prediction tool for eukaryotes written by Mario Stanke (mario.stanke@uni-greifswald.de)
# and Oliver Keller (keller@cs.uni-goettingen.de).
# Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),
# Using native and syntenically mapped cDNA alignments to improve de novo gene finding
# Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013
# No extrinsic information on sequences given.
# arabidopsis version. Using default transition matrix.
# We have hints for 0 sequences and for 0 of the sequences in the input set.
#
# ----- prediction on sequence number 1 (length = 2066, name = arabidopsis) -----
#
# Constraints/Hints:
# (none)
# Predicted genes for sequence number 1 on both strands
# start gene g1
arabidopsis	AUGUSTUS	gene	775	1851	0	+	.	g1
arabidopsis	AUGUSTUS	transcript	775	1851	.	+	.	g1.t1
arabidopsis	AUGUSTUS	start_codon	775	777	.	+	0	transcript_id "g1.t1"; gene_id "g1";
arabidopsis	AUGUSTUS	CDS	775	1851	0.99	+	0	transcript_id "g1.t1"; gene_id "g1";
arabidopsis	AUGUSTUS	stop_codon	1849	1851	.	+	0	transcript_id "g1.t1"; gene_id "g1";
# protein sequence = [MDLSLAPTTTTSSDQEQDRDQELTSNIGASSSSGPSGNNNNLPMMMIPPPEKEHMFDKVVTPSDVGKLNRLVIPKQHA
# ERYFPLDSSNNQNGTLLNFQDRNGKMWRFRYSYWNSSQSYVMTKGWSRFVKEKKLDAGDIVSFQRGIGDESERSKLYIDWRHRPDMSLVQAHQFGNFG
# FNFNFPTTSQYSNRFHPLPEYNSVPIHRGLNIGNHQRSYYNTQRQEFVGYGYGNLAGRCYYTGSPLDHRNIVGSEPLVIDSVPVVPGRLTPVMLPPLP
# PPPSTAGKRLRLFGVNMECGNDYNQQEESWLVPRGEIGASSSSSSALRLNLSTDHDDDNDDGDDGDDDQFAKKGKSSLSLNFNP]
# end gene g1
###
# start gene g2
arabidopsis	AUGUSTUS	gene	841	1661	.	-	.	g2
arabidopsis	AUGUSTUS	transcript	841	1661	.	-	.	g2.t1
arabidopsis	AUGUSTUS	stop_codon	841	843	.	-	0	transcript_id "g2.t1"; gene_id "g2";
arabidopsis	AUGUSTUS	intron	1024	1101	0.75	-	.	transcript_id "g2.t1"; gene_id "g2";
arabidopsis	AUGUSTUS	intron	1193	1325	0.03	-	.	transcript_id "g2.t1"; gene_id "g2";
arabidopsis	AUGUSTUS	intron	1416	1512	0.85	-	.	transcript_id "g2.t1"; gene_id "g2";
arabidopsis	AUGUSTUS	CDS	841	1023	0.87	-	0	transcript_id "g2.t1"; gene_id "g2";
arabidopsis	AUGUSTUS	CDS	1102	1192	0.78	-	1	transcript_id "g2.t1"; gene_id "g2";
arabidopsis	AUGUSTUS	CDS	1326	1415	0.05	-	1	transcript_id "g2.t1"; gene_id "g2";
arabidopsis	AUGUSTUS	CDS	1513	1661	0.35	-	0	transcript_id "g2.t1"; gene_id "g2";
# protein sequence = [SLPHSIFTPKSLSLFPAVEGGGGSGGNITGVNLPGTTGTESITNGSDPTMNVGDFRCLSPGESGRSYILAMDEICWNI
# EKDNVSCIELFLFDETAPSFGHNVALARVPIRREIPLSVLFRDHESVEFSDVAWCYHFVEHVFFLRRWNHHHRKVVVVSAGTGAAACSDVGG]
# end gene g2
###
#
# ----- prediction on sequence number 2 (length = 1802, name = arabidopsis2) -----
#
# Constraints/Hints:
# (none)
# Predicted genes for sequence number 2 on both strands
# start gene g3
arabidopsis2	AUGUSTUS	gene	97	1600	.	-	.	g3
arabidopsis2	AUGUSTUS	transcript	97	1600	.	-	.	g3.t1
arabidopsis2	AUGUSTUS	stop_codon	97	99	.	-	0	transcript_id "g3.t1"; gene_id "g3";
arabidopsis2	AUGUSTUS	intron	349	521	0.73	-	.	transcript_id "g3.t1"; gene_id "g3";
arabidopsis2	AUGUSTUS	intron	1210	1333	0.36	-	.	transcript_id "g3.t1"; gene_id "g3";
arabidopsis2	AUGUSTUS	intron	1452	1560	0.54	-	.	transcript_id "g3.t1"; gene_id "g3";
arabidopsis2	AUGUSTUS	CDS	97	348	0.73	-	0	transcript_id "g3.t1"; gene_id "g3";
arabidopsis2	AUGUSTUS	CDS	522	1209	0.6	-	1	transcript_id "g3.t1"; gene_id "g3";
arabidopsis2	AUGUSTUS	CDS	1334	1451	0.44	-	2	transcript_id "g3.t1"; gene_id "g3";
arabidopsis2	AUGUSTUS	CDS	1561	1600	0.42	-	0	transcript_id "g3.t1"; gene_id "g3";
arabidopsis2	AUGUSTUS	start_codon	1598	1600	.	-	0	transcript_id "g3.t1"; gene_id "g3";
# protein sequence = [MVKLVFSDLSCAFASANSTASSTNSSIFFSIDFSSASFASLFSIKYFFNAAIGITELIGISQGVSSESISLTFNQGWT
# FSSSSSSDSFFSDFPDLQDIHTVNMNTRHPKCRTFLPNLGVRSGSQVRHPNGPQVILDNKDNRQFIESSHVETLEELSVVTSSISEKGDSDIITILFQ
# DFPPVLGAKGSTSGDRNAFTNKSKATKHVVFFGEHVHGSTLASAASSNLAEELAHNSTSRDTFAEGMDMVTVGTNDRVRLGKELDKASRNSLLAIVQV
# NKTKHLASAGIIGGLLLLDNFLDRDDGRSTGGVGVIESAKGEGTGGSEERRRRSQSDREQETILMGSMQRNRGSELSGSESRRHCYQWKPQ]
# end gene g3
###
# start gene g4
arabidopsis2	AUGUSTUS	gene	121	1582	0	+	.	g4
arabidopsis2	AUGUSTUS	transcript	121	1582	.	+	.	g4.t1
arabidopsis2	AUGUSTUS	start_codon	121	123	.	+	0	transcript_id "g4.t1"; gene_id "g4";
arabidopsis2	AUGUSTUS	intron	352	434	0.97	+	.	transcript_id "g4.t1"; gene_id "g4";
arabidopsis2	AUGUSTUS	intron	1213	1304	1	+	.	transcript_id "g4.t1"; gene_id "g4";
arabidopsis2	AUGUSTUS	CDS	121	351	0.72	+	0	transcript_id "g4.t1"; gene_id "g4";
arabidopsis2	AUGUSTUS	CDS	435	1212	0.97	+	0	transcript_id "g4.t1"; gene_id "g4";
arabidopsis2	AUGUSTUS	CDS	1305	1582	1	+	2	transcript_id "g4.t1"; gene_id "g4";
arabidopsis2	AUGUSTUS	stop_codon	1580	1582	.	+	0	transcript_id "g4.t1"; gene_id "g4";
# protein sequence = [MATAFAPTKLTATVPLHGSHENRLLLPIRLAPPSSFLGSTRSLSLRRLNHSNATRRSPVVSVQEVVKEKQSTNNTSLL
# ITKEEGLELYEDMILGRSFEDMCAQMYYRGKMFGFVHLYNGQEAVSTGFIKLLTKSDSVVSTYRDHVHALSKGVSARAVMSELFGKVTGCCRGQGGSM
# HMFSKEHNMLGGFAFIGEGIPVATGAAFSSKYRREVLKQDCDDVTVAFFGDGTCNNGQFFECLNMAALYKLPIIFVVENNLWAIGMSHLRATSDPEIW
# KKGPAFGMPGVHVDGMDVLKVREVAKEAVTRARRGEGPTLVECETYRFRGHSLADPDELRDAAEKAKYAARDPIAALKKYLIENKLAKEAELKSIEKK
# IDELVEEAVEFADASPQPGRSQLLENVFADPKGFGIGPDGRYRCEDPKFTEGTAQV]
# end gene g4
###
# command line:
# ./bin/augustus --species=arabidopsis --UTR=off --singlestrand=true --mea=1 /home/bag/projects/github/galaxytools/augustus/test-data/arabidopsis_augustus.fa