Mercurial > repos > edward-kirton > velvet_toolsuite
comparison velvet/jgi_assembly.py @ 0:4afe13ac23b6 default tip
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author | edward-kirton |
---|---|
date | Tue, 07 Jun 2011 17:52:16 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4afe13ac23b6 |
---|---|
1 """ | |
2 Assembly classes | |
3 """ | |
4 | |
5 import data | |
6 import logging | |
7 import re | |
8 import string | |
9 from cgi import escape | |
10 from galaxy.datatypes.metadata import MetadataElement | |
11 from galaxy.datatypes import metadata | |
12 import galaxy.model | |
13 from galaxy import util | |
14 from sniff import * | |
15 | |
16 log = logging.getLogger(__name__) | |
17 | |
18 class Assembly( data.Text ): | |
19 """Class describing an assembly""" | |
20 | |
21 """Add metadata elements""" | |
22 MetadataElement( name="contigs", default=0, desc="Number of contigs", readonly=True, visible=False, optional=True, no_value=0 ) | |
23 MetadataElement( name="reads", default=0, desc="Number of reads", readonly=True, visible=False, optional=True, no_value=0 ) | |
24 | |
25 | |
26 class Ace(Assembly): | |
27 """Class describing an assembly Ace file""" | |
28 | |
29 file_ext = "ace" | |
30 | |
31 # def init_meta( self, dataset, copy_from=None ): | |
32 # Assembly.init_meta( self, dataset, copy_from=copy_from ) | |
33 | |
34 def set_meta( self, dataset, overwrite=True, **kwd ): | |
35 """ | |
36 Set the number of assembled contigs and read sequences and the number of data lines in dataset. | |
37 """ | |
38 contigs = 0 | |
39 reads = 0 | |
40 for line in file( dataset.file_name ): | |
41 line = line.strip() | |
42 if line and line.startswith( '#' ): | |
43 # Don't count comment lines | |
44 continue | |
45 if line and line.startswith( 'CO' ): | |
46 contigs += 1 | |
47 if line and line.startswith( 'RD' ): | |
48 reads += 1 | |
49 dataset.metadata.contigs = contigs | |
50 dataset.metadata.reads = reads | |
51 | |
52 def set_peek( self, dataset, is_multi_byte=False ): | |
53 if not dataset.dataset.purged: | |
54 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | |
55 if dataset.metadata.contigs: | |
56 dataset.blurb = "%s contigs" % util.commaify( str( dataset.metadata.contigs ) ) | |
57 else: | |
58 dataset.blurb = data.nice_size( dataset.get_size() ) | |
59 else: | |
60 dataset.peek = 'file does not exist' | |
61 dataset.blurb = 'file purged from disk' | |
62 | |
63 def sniff( self, filename ): | |
64 """ | |
65 Determines whether the file is in ace format | |
66 | |
67 An ace file contains these sections | |
68 AS \d+ \d+ | |
69 | |
70 CO \S+ \d+ \d+ \d+ \w | |
71 [atcgATCGN\*]+ | |
72 | |
73 BQ | |
74 [\d\s]+ | |
75 | |
76 AF \S+ [CU] \-?\d+ | |
77 | |
78 BS \d+ \d+ \S+ | |
79 | |
80 RD \S+ \d+ \d+ \d+ | |
81 [ATCGN\*]+ | |
82 | |
83 QA \d+ \d+ \d+ \d+ | |
84 DS .* | |
85 | |
86 Currently we only check if file begins with AS | |
87 | |
88 >>> fname = get_test_fname( 'genome.ace' ) | |
89 >>> Ace().sniff( fname ) | |
90 True | |
91 >>> fname = get_test_fname( 'genome.fasta' ) | |
92 >>> Ace().sniff( fname ) | |
93 False | |
94 """ | |
95 | |
96 try: | |
97 fh = open( filename ) | |
98 line = fh.readline() | |
99 line = line.strip() | |
100 if line: | |
101 if line.startswith( 'AS ' ): | |
102 fh.close() | |
103 return True | |
104 fh.close() | |
105 return False | |
106 except: | |
107 pass | |
108 return False | |
109 | |
110 class Velveth(Assembly): | |
111 composite_type='basic' | |
112 file_ext = "txt" | |
113 | |
114 def __init__(self,**kwd): | |
115 Assembly.__init__(self,**kwd) | |
116 self.add_composite_file('Roadmap') | |
117 self.add_composite_file('Sequences') |