annotate velvet/jgi_assembly.py @ 0:4afe13ac23b6 default tip

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author edward-kirton
date Tue, 07 Jun 2011 17:52:16 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
1 """
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
2 Assembly classes
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
3 """
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
4
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
5 import data
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
6 import logging
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
7 import re
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
8 import string
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
9 from cgi import escape
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
10 from galaxy.datatypes.metadata import MetadataElement
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
11 from galaxy.datatypes import metadata
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
12 import galaxy.model
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
13 from galaxy import util
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
14 from sniff import *
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
15
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
16 log = logging.getLogger(__name__)
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
17
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
18 class Assembly( data.Text ):
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
19 """Class describing an assembly"""
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
20
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
21 """Add metadata elements"""
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
22 MetadataElement( name="contigs", default=0, desc="Number of contigs", readonly=True, visible=False, optional=True, no_value=0 )
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
23 MetadataElement( name="reads", default=0, desc="Number of reads", readonly=True, visible=False, optional=True, no_value=0 )
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
24
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
25
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
26 class Ace(Assembly):
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
27 """Class describing an assembly Ace file"""
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
28
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
29 file_ext = "ace"
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
30
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
31 # def init_meta( self, dataset, copy_from=None ):
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
32 # Assembly.init_meta( self, dataset, copy_from=copy_from )
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
33
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
34 def set_meta( self, dataset, overwrite=True, **kwd ):
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
35 """
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
36 Set the number of assembled contigs and read sequences and the number of data lines in dataset.
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
37 """
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
38 contigs = 0
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
39 reads = 0
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
40 for line in file( dataset.file_name ):
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
41 line = line.strip()
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
42 if line and line.startswith( '#' ):
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
43 # Don't count comment lines
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
44 continue
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
45 if line and line.startswith( 'CO' ):
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
46 contigs += 1
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
47 if line and line.startswith( 'RD' ):
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
48 reads += 1
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
49 dataset.metadata.contigs = contigs
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
50 dataset.metadata.reads = reads
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
51
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
52 def set_peek( self, dataset, is_multi_byte=False ):
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
53 if not dataset.dataset.purged:
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
54 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
55 if dataset.metadata.contigs:
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
56 dataset.blurb = "%s contigs" % util.commaify( str( dataset.metadata.contigs ) )
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
57 else:
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
58 dataset.blurb = data.nice_size( dataset.get_size() )
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
59 else:
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
60 dataset.peek = 'file does not exist'
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
61 dataset.blurb = 'file purged from disk'
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
62
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
63 def sniff( self, filename ):
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
64 """
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
65 Determines whether the file is in ace format
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
66
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
67 An ace file contains these sections
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
68 AS \d+ \d+
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
69
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
70 CO \S+ \d+ \d+ \d+ \w
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
71 [atcgATCGN\*]+
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
72
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
73 BQ
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
74 [\d\s]+
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
75
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
76 AF \S+ [CU] \-?\d+
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
77
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
78 BS \d+ \d+ \S+
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
79
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
80 RD \S+ \d+ \d+ \d+
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
81 [ATCGN\*]+
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
82
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
83 QA \d+ \d+ \d+ \d+
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
84 DS .*
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
85
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
86 Currently we only check if file begins with AS
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
87
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
88 >>> fname = get_test_fname( 'genome.ace' )
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
89 >>> Ace().sniff( fname )
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
90 True
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
91 >>> fname = get_test_fname( 'genome.fasta' )
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
92 >>> Ace().sniff( fname )
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
93 False
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
94 """
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
95
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
96 try:
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
97 fh = open( filename )
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
98 line = fh.readline()
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
99 line = line.strip()
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
100 if line:
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
101 if line.startswith( 'AS ' ):
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
102 fh.close()
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
103 return True
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
104 fh.close()
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
105 return False
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
106 except:
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
107 pass
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
108 return False
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
109
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
110 class Velveth(Assembly):
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
111 composite_type='basic'
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
112 file_ext = "txt"
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
113
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
114 def __init__(self,**kwd):
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
115 Assembly.__init__(self,**kwd)
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
116 self.add_composite_file('Roadmap')
4afe13ac23b6 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
edward-kirton
parents:
diff changeset
117 self.add_composite_file('Sequences')