Mercurial > repos > edward-kirton > velvet_toolsuite
annotate velvet/jgi_assembly.py @ 0:4afe13ac23b6 default tip
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
| author | edward-kirton |
|---|---|
| date | Tue, 07 Jun 2011 17:52:16 -0400 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
1 """ |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
2 Assembly classes |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
3 """ |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
4 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
5 import data |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
6 import logging |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
7 import re |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
8 import string |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
9 from cgi import escape |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
10 from galaxy.datatypes.metadata import MetadataElement |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
11 from galaxy.datatypes import metadata |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
12 import galaxy.model |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
13 from galaxy import util |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
14 from sniff import * |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
15 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
16 log = logging.getLogger(__name__) |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
17 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
18 class Assembly( data.Text ): |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
19 """Class describing an assembly""" |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
20 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
21 """Add metadata elements""" |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
22 MetadataElement( name="contigs", default=0, desc="Number of contigs", readonly=True, visible=False, optional=True, no_value=0 ) |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
23 MetadataElement( name="reads", default=0, desc="Number of reads", readonly=True, visible=False, optional=True, no_value=0 ) |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
24 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
25 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
26 class Ace(Assembly): |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
27 """Class describing an assembly Ace file""" |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
28 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
29 file_ext = "ace" |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
30 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
31 # def init_meta( self, dataset, copy_from=None ): |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
32 # Assembly.init_meta( self, dataset, copy_from=copy_from ) |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
33 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
34 def set_meta( self, dataset, overwrite=True, **kwd ): |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
35 """ |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
36 Set the number of assembled contigs and read sequences and the number of data lines in dataset. |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
37 """ |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
38 contigs = 0 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
39 reads = 0 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
40 for line in file( dataset.file_name ): |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
41 line = line.strip() |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
42 if line and line.startswith( '#' ): |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
43 # Don't count comment lines |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
44 continue |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
45 if line and line.startswith( 'CO' ): |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
46 contigs += 1 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
47 if line and line.startswith( 'RD' ): |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
48 reads += 1 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
49 dataset.metadata.contigs = contigs |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
50 dataset.metadata.reads = reads |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
51 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
52 def set_peek( self, dataset, is_multi_byte=False ): |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
53 if not dataset.dataset.purged: |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
54 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
55 if dataset.metadata.contigs: |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
56 dataset.blurb = "%s contigs" % util.commaify( str( dataset.metadata.contigs ) ) |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
57 else: |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
58 dataset.blurb = data.nice_size( dataset.get_size() ) |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
59 else: |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
60 dataset.peek = 'file does not exist' |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
61 dataset.blurb = 'file purged from disk' |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
62 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
63 def sniff( self, filename ): |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
64 """ |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
65 Determines whether the file is in ace format |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
66 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
67 An ace file contains these sections |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
68 AS \d+ \d+ |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
69 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
70 CO \S+ \d+ \d+ \d+ \w |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
71 [atcgATCGN\*]+ |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
72 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
73 BQ |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
74 [\d\s]+ |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
75 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
76 AF \S+ [CU] \-?\d+ |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
77 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
78 BS \d+ \d+ \S+ |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
79 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
80 RD \S+ \d+ \d+ \d+ |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
81 [ATCGN\*]+ |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
82 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
83 QA \d+ \d+ \d+ \d+ |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
84 DS .* |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
85 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
86 Currently we only check if file begins with AS |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
87 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
88 >>> fname = get_test_fname( 'genome.ace' ) |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
89 >>> Ace().sniff( fname ) |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
90 True |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
91 >>> fname = get_test_fname( 'genome.fasta' ) |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
92 >>> Ace().sniff( fname ) |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
93 False |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
94 """ |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
95 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
96 try: |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
97 fh = open( filename ) |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
98 line = fh.readline() |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
99 line = line.strip() |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
100 if line: |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
101 if line.startswith( 'AS ' ): |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
102 fh.close() |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
103 return True |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
104 fh.close() |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
105 return False |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
106 except: |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
107 pass |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
108 return False |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
109 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
110 class Velveth(Assembly): |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
111 composite_type='basic' |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
112 file_ext = "txt" |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
113 |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
114 def __init__(self,**kwd): |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
115 Assembly.__init__(self,**kwd) |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
116 self.add_composite_file('Roadmap') |
|
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
117 self.add_composite_file('Sequences') |
