comparison velvet/jgi_assembly.py @ 0:4afe13ac23b6 default tip

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author edward-kirton
date Tue, 07 Jun 2011 17:52:16 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4afe13ac23b6
1 """
2 Assembly classes
3 """
4
5 import data
6 import logging
7 import re
8 import string
9 from cgi import escape
10 from galaxy.datatypes.metadata import MetadataElement
11 from galaxy.datatypes import metadata
12 import galaxy.model
13 from galaxy import util
14 from sniff import *
15
16 log = logging.getLogger(__name__)
17
18 class Assembly( data.Text ):
19 """Class describing an assembly"""
20
21 """Add metadata elements"""
22 MetadataElement( name="contigs", default=0, desc="Number of contigs", readonly=True, visible=False, optional=True, no_value=0 )
23 MetadataElement( name="reads", default=0, desc="Number of reads", readonly=True, visible=False, optional=True, no_value=0 )
24
25
26 class Ace(Assembly):
27 """Class describing an assembly Ace file"""
28
29 file_ext = "ace"
30
31 # def init_meta( self, dataset, copy_from=None ):
32 # Assembly.init_meta( self, dataset, copy_from=copy_from )
33
34 def set_meta( self, dataset, overwrite=True, **kwd ):
35 """
36 Set the number of assembled contigs and read sequences and the number of data lines in dataset.
37 """
38 contigs = 0
39 reads = 0
40 for line in file( dataset.file_name ):
41 line = line.strip()
42 if line and line.startswith( '#' ):
43 # Don't count comment lines
44 continue
45 if line and line.startswith( 'CO' ):
46 contigs += 1
47 if line and line.startswith( 'RD' ):
48 reads += 1
49 dataset.metadata.contigs = contigs
50 dataset.metadata.reads = reads
51
52 def set_peek( self, dataset, is_multi_byte=False ):
53 if not dataset.dataset.purged:
54 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
55 if dataset.metadata.contigs:
56 dataset.blurb = "%s contigs" % util.commaify( str( dataset.metadata.contigs ) )
57 else:
58 dataset.blurb = data.nice_size( dataset.get_size() )
59 else:
60 dataset.peek = 'file does not exist'
61 dataset.blurb = 'file purged from disk'
62
63 def sniff( self, filename ):
64 """
65 Determines whether the file is in ace format
66
67 An ace file contains these sections
68 AS \d+ \d+
69
70 CO \S+ \d+ \d+ \d+ \w
71 [atcgATCGN\*]+
72
73 BQ
74 [\d\s]+
75
76 AF \S+ [CU] \-?\d+
77
78 BS \d+ \d+ \S+
79
80 RD \S+ \d+ \d+ \d+
81 [ATCGN\*]+
82
83 QA \d+ \d+ \d+ \d+
84 DS .*
85
86 Currently we only check if file begins with AS
87
88 >>> fname = get_test_fname( 'genome.ace' )
89 >>> Ace().sniff( fname )
90 True
91 >>> fname = get_test_fname( 'genome.fasta' )
92 >>> Ace().sniff( fname )
93 False
94 """
95
96 try:
97 fh = open( filename )
98 line = fh.readline()
99 line = line.strip()
100 if line:
101 if line.startswith( 'AS ' ):
102 fh.close()
103 return True
104 fh.close()
105 return False
106 except:
107 pass
108 return False
109
110 class Velveth(Assembly):
111 composite_type='basic'
112 file_ext = "txt"
113
114 def __init__(self,**kwd):
115 Assembly.__init__(self,**kwd)
116 self.add_composite_file('Roadmap')
117 self.add_composite_file('Sequences')