Mercurial > repos > edward-kirton > velvet_toolsuite
annotate velvet/jgi_assembly.py @ 0:4afe13ac23b6 default tip
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author | edward-kirton |
---|---|
date | Tue, 07 Jun 2011 17:52:16 -0400 |
parents | |
children |
rev | line source |
---|---|
0
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
1 """ |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
2 Assembly classes |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
3 """ |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
4 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
5 import data |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
6 import logging |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
7 import re |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
8 import string |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
9 from cgi import escape |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
10 from galaxy.datatypes.metadata import MetadataElement |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
11 from galaxy.datatypes import metadata |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
12 import galaxy.model |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
13 from galaxy import util |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
14 from sniff import * |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
15 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
16 log = logging.getLogger(__name__) |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
17 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
18 class Assembly( data.Text ): |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
19 """Class describing an assembly""" |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
20 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
21 """Add metadata elements""" |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
22 MetadataElement( name="contigs", default=0, desc="Number of contigs", readonly=True, visible=False, optional=True, no_value=0 ) |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
23 MetadataElement( name="reads", default=0, desc="Number of reads", readonly=True, visible=False, optional=True, no_value=0 ) |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
24 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
25 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
26 class Ace(Assembly): |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
27 """Class describing an assembly Ace file""" |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
28 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
29 file_ext = "ace" |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
30 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
31 # def init_meta( self, dataset, copy_from=None ): |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
32 # Assembly.init_meta( self, dataset, copy_from=copy_from ) |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
33 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
34 def set_meta( self, dataset, overwrite=True, **kwd ): |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
35 """ |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
36 Set the number of assembled contigs and read sequences and the number of data lines in dataset. |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
37 """ |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
38 contigs = 0 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
39 reads = 0 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
40 for line in file( dataset.file_name ): |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
41 line = line.strip() |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
42 if line and line.startswith( '#' ): |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
43 # Don't count comment lines |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
44 continue |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
45 if line and line.startswith( 'CO' ): |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
46 contigs += 1 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
47 if line and line.startswith( 'RD' ): |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
48 reads += 1 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
49 dataset.metadata.contigs = contigs |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
50 dataset.metadata.reads = reads |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
51 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
52 def set_peek( self, dataset, is_multi_byte=False ): |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
53 if not dataset.dataset.purged: |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
54 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
55 if dataset.metadata.contigs: |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
56 dataset.blurb = "%s contigs" % util.commaify( str( dataset.metadata.contigs ) ) |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
57 else: |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
58 dataset.blurb = data.nice_size( dataset.get_size() ) |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
59 else: |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
60 dataset.peek = 'file does not exist' |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
61 dataset.blurb = 'file purged from disk' |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
62 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
63 def sniff( self, filename ): |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
64 """ |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
65 Determines whether the file is in ace format |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
66 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
67 An ace file contains these sections |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
68 AS \d+ \d+ |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
69 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
70 CO \S+ \d+ \d+ \d+ \w |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
71 [atcgATCGN\*]+ |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
72 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
73 BQ |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
74 [\d\s]+ |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
75 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
76 AF \S+ [CU] \-?\d+ |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
77 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
78 BS \d+ \d+ \S+ |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
79 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
80 RD \S+ \d+ \d+ \d+ |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
81 [ATCGN\*]+ |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
82 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
83 QA \d+ \d+ \d+ \d+ |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
84 DS .* |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
85 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
86 Currently we only check if file begins with AS |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
87 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
88 >>> fname = get_test_fname( 'genome.ace' ) |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
89 >>> Ace().sniff( fname ) |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
90 True |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
91 >>> fname = get_test_fname( 'genome.fasta' ) |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
92 >>> Ace().sniff( fname ) |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
93 False |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
94 """ |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
95 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
96 try: |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
97 fh = open( filename ) |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
98 line = fh.readline() |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
99 line = line.strip() |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
100 if line: |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
101 if line.startswith( 'AS ' ): |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
102 fh.close() |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
103 return True |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
104 fh.close() |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
105 return False |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
106 except: |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
107 pass |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
108 return False |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
109 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
110 class Velveth(Assembly): |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
111 composite_type='basic' |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
112 file_ext = "txt" |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
113 |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
114 def __init__(self,**kwd): |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
115 Assembly.__init__(self,**kwd) |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
116 self.add_composite_file('Roadmap') |
4afe13ac23b6
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff
changeset
|
117 self.add_composite_file('Sequences') |