annotate phrap/jgi_assembly.py @ 0:f9e4e6fe0e73 default tip

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author edward-kirton
date Tue, 07 Jun 2011 17:41:56 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
1 """
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
2 Assembly classes
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
3 """
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
4
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
5 import data
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
6 import logging
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
7 import re
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
8 import string
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
9 from cgi import escape
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
10 from galaxy.datatypes.metadata import MetadataElement
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
11 from galaxy.datatypes import metadata
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
12 import galaxy.model
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
13 from galaxy import util
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
14 from sniff import *
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
15
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
16 log = logging.getLogger(__name__)
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
17
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
18 class Assembly( data.Text ):
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
19 """Class describing an assembly"""
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
20
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
21 """Add metadata elements"""
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
22 MetadataElement( name="contigs", default=0, desc="Number of contigs", readonly=True, visible=False, optional=True, no_value=0 )
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
23 MetadataElement( name="reads", default=0, desc="Number of reads", readonly=True, visible=False, optional=True, no_value=0 )
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
24
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
25
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
26 class Ace(Assembly):
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
27 """Class describing an assembly Ace file"""
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
28
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
29 file_ext = "ace"
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
30
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
31 # def init_meta( self, dataset, copy_from=None ):
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
32 # Assembly.init_meta( self, dataset, copy_from=copy_from )
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
33
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
34 def set_meta( self, dataset, overwrite=True, **kwd ):
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
35 """
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
36 Set the number of assembled contigs and read sequences and the number of data lines in dataset.
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
37 """
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
38 contigs = 0
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
39 reads = 0
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
40 for line in file( dataset.file_name ):
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
41 line = line.strip()
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
42 if line and line.startswith( '#' ):
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
43 # Don't count comment lines
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
44 continue
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
45 if line and line.startswith( 'CO' ):
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
46 contigs += 1
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
47 if line and line.startswith( 'RD' ):
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
48 reads += 1
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
49 dataset.metadata.contigs = contigs
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
50 dataset.metadata.reads = reads
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
51
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
52 def set_peek( self, dataset, is_multi_byte=False ):
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
53 if not dataset.dataset.purged:
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
54 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
55 if dataset.metadata.contigs:
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
56 dataset.blurb = "%s contigs" % util.commaify( str( dataset.metadata.contigs ) )
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
57 else:
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
58 dataset.blurb = data.nice_size( dataset.get_size() )
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
59 else:
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
60 dataset.peek = 'file does not exist'
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
61 dataset.blurb = 'file purged from disk'
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
62
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
63 def sniff( self, filename ):
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
64 """
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
65 Determines whether the file is in ace format
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
66
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
67 An ace file contains these sections
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
68 AS \d+ \d+
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
69
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
70 CO \S+ \d+ \d+ \d+ \w
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
71 [atcgATCGN\*]+
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
72
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
73 BQ
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
74 [\d\s]+
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
75
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
76 AF \S+ [CU] \-?\d+
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
77
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
78 BS \d+ \d+ \S+
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
79
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
80 RD \S+ \d+ \d+ \d+
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
81 [ATCGN\*]+
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
82
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
83 QA \d+ \d+ \d+ \d+
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
84 DS .*
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
85
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
86 Currently we only check if file begins with AS
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
87
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
88 >>> fname = get_test_fname( 'genome.ace' )
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
89 >>> Ace().sniff( fname )
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
90 True
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
91 >>> fname = get_test_fname( 'genome.fasta' )
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
92 >>> Ace().sniff( fname )
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
93 False
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
94 """
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
95
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
96 try:
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
97 fh = open( filename )
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
98 line = fh.readline()
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
99 line = line.strip()
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
100 if line:
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
101 if line.startswith( 'AS ' ):
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
102 fh.close()
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
103 return True
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
104 fh.close()
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
105 return False
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
106 except:
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
107 pass
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
108 return False
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
109
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
110 class Velveth(Assembly):
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
111 composite_type='basic'
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
112 file_ext = "txt"
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
113
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
114 def __init__(self,**kwd):
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
115 Assembly.__init__(self,**kwd)
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
116 self.add_composite_file('Roadmap')
f9e4e6fe0e73 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
117 self.add_composite_file('Sequences')