annotate blast.py @ 9:2bda64d39931 draft

Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
author peterjc
date Wed, 26 Nov 2014 06:55:48 -0500
parents de11e1a921c4
children 5482a8cd0f36
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
1 """
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
2 BlastXml class
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
3 """
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
4
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
5 from galaxy.datatypes.data import get_file_peek
9
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
6 from galaxy.datatypes.data import Text, Data, GenericAsn1
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
7 from galaxy.datatypes.xml import GenericXml
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
8 from galaxy.datatypes.metadata import MetadataElement
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
9
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
10 from time import sleep
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
11 import os
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
12 import logging
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
13
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
14 log = logging.getLogger(__name__)
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
15
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
16 class BlastXml( GenericXml ):
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
17 """NCBI Blast XML Output data"""
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
18 file_ext = "blastxml"
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
19
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
20 def set_peek( self, dataset, is_multi_byte=False ):
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
21 """Set the peek and blurb text"""
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
22 if not dataset.dataset.purged:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
23 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
24 dataset.blurb = 'NCBI Blast XML data'
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
25 else:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
26 dataset.peek = 'file does not exist'
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
27 dataset.blurb = 'file purged from disk'
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
28
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
29 def sniff( self, filename ):
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
30 """
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
31 Determines whether the file is blastxml
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
32
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
33 >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
34 >>> BlastXml().sniff( fname )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
35 True
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
36 >>> fname = get_test_fname( 'tblastn_four_human_vs_rhodopsin.xml' )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
37 >>> BlastXml().sniff( fname )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
38 True
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
39 >>> fname = get_test_fname( 'interval.interval' )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
40 >>> BlastXml().sniff( fname )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
41 False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
42 """
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
43 #TODO - Use a context manager on Python 2.5+ to close handle
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
44 handle = open(filename)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
45 line = handle.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
46 if line.strip() != '<?xml version="1.0"?>':
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
47 handle.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
48 return False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
49 line = handle.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
50 if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">',
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
51 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
52 handle.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
53 return False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
54 line = handle.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
55 if line.strip() != '<BlastOutput>':
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
56 handle.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
57 return False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
58 handle.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
59 return True
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
60
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
61 def merge(split_files, output_file):
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
62 """Merging multiple XML files is non-trivial and must be done in subclasses."""
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
63 if len(split_files) == 1:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
64 #For one file only, use base class method (move/copy)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
65 return Text.merge(split_files, output_file)
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
66 if not split_files:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
67 raise ValueError("Given no BLAST XML files, %r, to merge into %s" \
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
68 % (split_files, output_file))
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
69 out = open(output_file, "w")
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
70 h = None
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
71 for f in split_files:
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
72 if not os.path.isfile(f):
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
73 log.warning("BLAST XML file %s missing, retry in 1s..." % f)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
74 sleep(1)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
75 if not os.path.isfile(f):
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
76 log.error("BLAST XML file %s missing" % f)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
77 raise ValueError("BLAST XML file %s missing" % f)
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
78 h = open(f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
79 body = False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
80 header = h.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
81 if not header:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
82 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
83 h.close()
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
84 #Retry, could be transient error with networked file system...
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
85 log.warning("BLAST XML file %s empty, retry in 1s..." % f)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
86 sleep(1)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
87 h = open(f)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
88 header = h.readline()
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
89 if not header:
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
90 log.error("BLAST XML file %s was empty" % f)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
91 raise ValueError("BLAST XML file %s was empty" % f)
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
92 if header.strip() != '<?xml version="1.0"?>':
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
93 out.write(header) #for diagnosis
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
94 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
95 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
96 raise ValueError("%s is not an XML file!" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
97 line = h.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
98 header += line
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
99 if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">',
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
100 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
101 out.write(header) #for diagnosis
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
102 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
103 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
104 raise ValueError("%s is not a BLAST XML file!" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
105 while True:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
106 line = h.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
107 if not line:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
108 out.write(header) #for diagnosis
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
109 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
110 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
111 raise ValueError("BLAST XML file %s ended prematurely" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
112 header += line
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
113 if "<Iteration>" in line:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
114 break
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
115 if len(header) > 10000:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
116 #Something has gone wrong, don't load too much into memory!
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
117 #Write what we have to the merged file for diagnostics
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
118 out.write(header)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
119 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
120 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
121 raise ValueError("BLAST XML file %s has too long a header!" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
122 if "<BlastOutput>" not in header:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
123 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
124 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
125 raise ValueError("%s is not a BLAST XML file:\n%s\n..." % (f, header))
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
126 if f == split_files[0]:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
127 out.write(header)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
128 old_header = header
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
129 elif old_header[:300] != header[:300]:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
130 #Enough to check <BlastOutput_program> and <BlastOutput_version> match
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
131 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
132 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
133 raise ValueError("BLAST XML headers don't match for %s and %s - have:\n%s\n...\n\nAnd:\n%s\n...\n" \
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
134 % (split_files[0], f, old_header[:300], header[:300]))
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
135 else:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
136 out.write(" <Iteration>\n")
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
137 for line in h:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
138 if "</BlastOutput_iterations>" in line:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
139 break
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
140 #TODO - Increment <Iteration_iter-num> and if required automatic query names
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
141 #like <Iteration_query-ID>Query_3</Iteration_query-ID> to be increasing?
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
142 out.write(line)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
143 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
144 out.write(" </BlastOutput_iterations>\n")
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
145 out.write("</BlastOutput>\n")
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
146 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
147 merge = staticmethod(merge)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
148
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
149
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
150 class _BlastDb(object):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
151 """Base class for BLAST database datatype."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
152
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
153 def set_peek( self, dataset, is_multi_byte=False ):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
154 """Set the peek and blurb text."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
155 if not dataset.dataset.purged:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
156 dataset.peek = "BLAST database (multiple files)"
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
157 dataset.blurb = "BLAST database (multiple files)"
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
158 else:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
159 dataset.peek = 'file does not exist'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
160 dataset.blurb = 'file purged from disk'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
161
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
162 def display_peek( self, dataset ):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
163 """Create HTML content, used for displaying peek."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
164 try:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
165 return dataset.peek
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
166 except:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
167 return "BLAST database (multiple files)"
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
168
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
169 def display_data(self, trans, data, preview=False, filename=None,
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
170 to_ext=None, size=None, offset=None, **kwd):
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
171 """Documented as an old display method, but still gets called via tests etc
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
172
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
173 This allows us to format the data shown in the central pane via the "eye" icon.
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
174 """
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
175 if filename is not None and filename != "index":
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
176 #Change nothing - important for the unit tests to access child files:
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
177 return Data.display_data(self, trans, data, preview, filename,
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
178 to_ext, size, offset, **kwd)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
179 if self.file_ext == "blastdbn":
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
180 title = "This is a nucleotide BLAST database"
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
181 elif self.file_ext =="blastdbp":
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
182 title = "This is a protein BLAST database"
9
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
183 elif self.file_ext =="blastdbd":
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
184 title = "This is a domain BLAST database"
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
185 else:
9
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
186 #Error?
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
187 title = "This is a BLAST database."
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
188 msg = ""
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
189 try:
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
190 #Try to use any text recorded in the dummy index file:
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
191 handle = open(data.file_name, "rU")
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
192 msg = handle.read().strip()
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
193 handle.close()
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
194 except Exception, err:
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
195 #msg = str(err)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
196 pass
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
197 if not msg:
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
198 msg = title
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
199 #Galaxy assumes HTML for the display of composite datatypes,
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
200 return "<html><head><title>%s</title></head><body><pre>%s</pre></body></html>" % (title, msg)
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
201
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
202 def merge(split_files, output_file):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
203 """Merge BLAST databases (not implemented for now)."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
204 raise NotImplementedError("Merging BLAST databases is non-trivial (do this via makeblastdb?)")
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
205
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
206 def split( cls, input_datasets, subdir_generator_function, split_params):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
207 """Split a BLAST database (not implemented for now)."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
208 if split_params is None:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
209 return None
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
210 raise NotImplementedError("Can't split BLAST databases")
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
211
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
212
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
213 class BlastNucDb( _BlastDb, Data ):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
214 """Class for nucleotide BLAST database files."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
215 file_ext = 'blastdbn'
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
216 allow_datatype_change = False
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
217 composite_type = 'basic'
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
218
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
219 def __init__(self, **kwd):
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
220 Data.__init__(self, **kwd)
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
221 self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
222 self.add_composite_file('blastdb.nin', is_binary=True) # index file
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
223 self.add_composite_file('blastdb.nsq', is_binary=True) # nucleotide sequences
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
224 self.add_composite_file('blastdb.nal', is_binary=False, optional=True) # alias ( -gi_mask option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
225 self.add_composite_file('blastdb.nhd', is_binary=True, optional=True) # sorted sequence hash values ( -hash_index option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
226 self.add_composite_file('blastdb.nhi', is_binary=True, optional=True) # index of sequence hash values ( -hash_index option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
227 self.add_composite_file('blastdb.nnd', is_binary=True, optional=True) # sorted GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
228 self.add_composite_file('blastdb.nni', is_binary=True, optional=True) # index of GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
229 self.add_composite_file('blastdb.nog', is_binary=True, optional=True) # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
230 self.add_composite_file('blastdb.nsd', is_binary=True, optional=True) # sorted sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
231 self.add_composite_file('blastdb.nsi', is_binary=True, optional=True) # index of sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
6
a04cf51612f1 Uploaded v0.0.16, MIT License, development moved to GitHub, nucleotide database definition aware of MegaBLAST index superheader
peterjc
parents: 5
diff changeset
232 # self.add_composite_file('blastdb.00.idx', is_binary=True, optional=True) # first volume of the MegaBLAST index generated by makembindex
a04cf51612f1 Uploaded v0.0.16, MIT License, development moved to GitHub, nucleotide database definition aware of MegaBLAST index superheader
peterjc
parents: 5
diff changeset
233 # The previous line should be repeated for each index volume, with filename extensions like '.01.idx', '.02.idx', etc.
a04cf51612f1 Uploaded v0.0.16, MIT License, development moved to GitHub, nucleotide database definition aware of MegaBLAST index superheader
peterjc
parents: 5
diff changeset
234 self.add_composite_file('blastdb.shd', is_binary=True, optional=True) # MegaBLAST index superheader (-old_style_index false option of makembindex)
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
235 # self.add_composite_file('blastdb.naa', is_binary=True, optional=True) # index of a WriteDB column for e.g. mask data
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
236 # self.add_composite_file('blastdb.nab', is_binary=True, optional=True) # data of a WriteDB column
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
237 # self.add_composite_file('blastdb.nac', is_binary=True, optional=True) # multiple byte order for a WriteDB column
6
a04cf51612f1 Uploaded v0.0.16, MIT License, development moved to GitHub, nucleotide database definition aware of MegaBLAST index superheader
peterjc
parents: 5
diff changeset
238 # The previous 3 lines should be repeated for each WriteDB column, with filename extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc.
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
239
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
240
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
241 class BlastProtDb( _BlastDb, Data ):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
242 """Class for protein BLAST database files."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
243 file_ext = 'blastdbp'
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
244 allow_datatype_change = False
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
245 composite_type = 'basic'
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
246
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
247 def __init__(self, **kwd):
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
248 Data.__init__(self, **kwd)
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
249 # Component file comments are as in BlastNucDb except where noted
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
250 self.add_composite_file('blastdb.phr', is_binary=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
251 self.add_composite_file('blastdb.pin', is_binary=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
252 self.add_composite_file('blastdb.psq', is_binary=True) # protein sequences
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
253 self.add_composite_file('blastdb.phd', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
254 self.add_composite_file('blastdb.phi', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
255 self.add_composite_file('blastdb.pnd', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
256 self.add_composite_file('blastdb.pni', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
257 self.add_composite_file('blastdb.pog', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
258 self.add_composite_file('blastdb.psd', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
259 self.add_composite_file('blastdb.psi', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
260 # self.add_composite_file('blastdb.paa', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
261 # self.add_composite_file('blastdb.pab', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
262 # self.add_composite_file('blastdb.pac', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
263 # The last 3 lines should be repeated for each WriteDB column, with filename extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc.
9
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
264
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
265
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
266 class BlastDomainDb( _BlastDb, Data ):
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
267 """Class for domain BLAST database files."""
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
268 file_ext = 'blastdbd'
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
269 allow_datatype_change = False
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
270 composite_type = 'basic'
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
271
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
272 def __init__(self, **kwd):
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
273 Data.__init__(self, **kwd)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
274 self.add_composite_file('blastdb.phr', is_binary=True)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
275 self.add_composite_file('blastdb.pin', is_binary=True)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
276 self.add_composite_file('blastdb.psq', is_binary=True)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
277 self.add_composite_file('blastdb.freq', is_binary=True, optional=True)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
278 self.add_composite_file('blastdb.loo', is_binary=True, optional=True)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
279 self.add_composite_file('blastdb.psd', is_binary=True, optional=True)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
280 self.add_composite_file('blastdb.psi', is_binary=True, optional=True)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
281 self.add_composite_file('blastdb.rps', is_binary=True, optional=True)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
282 self.add_composite_file('blastdb.aux', is_binary=True, optional=True)