annotate blast.py @ 11:01b38f20197e draft default tip

v0.0.21 - Updated citation information with GigaScience paper
author peterjc
date Fri, 04 Sep 2015 07:10:04 -0400
parents 5482a8cd0f36
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
1 """
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
2 BlastXml class
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
3 """
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
4
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
5 from galaxy.datatypes.data import get_file_peek
9
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
6 from galaxy.datatypes.data import Text, Data, GenericAsn1
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
7 from galaxy.datatypes.xml import GenericXml
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
8 from galaxy.datatypes.metadata import MetadataElement
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
9
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
10 from time import sleep
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
11 import os
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
12 import logging
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
13
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
14 log = logging.getLogger(__name__)
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
15
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
16 class BlastXml( GenericXml ):
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
17 """NCBI Blast XML Output data"""
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
18 file_ext = "blastxml"
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
19
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
20 def set_peek( self, dataset, is_multi_byte=False ):
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
21 """Set the peek and blurb text"""
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
22 if not dataset.dataset.purged:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
23 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
24 dataset.blurb = 'NCBI Blast XML data'
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
25 else:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
26 dataset.peek = 'file does not exist'
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
27 dataset.blurb = 'file purged from disk'
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
28
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
29 def sniff( self, filename ):
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
30 """
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
31 Determines whether the file is blastxml
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
32
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
33 >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
34 >>> BlastXml().sniff( fname )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
35 True
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
36 >>> fname = get_test_fname( 'tblastn_four_human_vs_rhodopsin.xml' )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
37 >>> BlastXml().sniff( fname )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
38 True
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
39 >>> fname = get_test_fname( 'interval.interval' )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
40 >>> BlastXml().sniff( fname )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
41 False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
42 """
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
43 #TODO - Use a context manager on Python 2.5+ to close handle
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
44 handle = open(filename)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
45 line = handle.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
46 if line.strip() != '<?xml version="1.0"?>':
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
47 handle.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
48 return False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
49 line = handle.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
50 if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">',
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
51 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
52 handle.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
53 return False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
54 line = handle.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
55 if line.strip() != '<BlastOutput>':
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
56 handle.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
57 return False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
58 handle.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
59 return True
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
60
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
61 def merge(split_files, output_file):
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
62 """Merging multiple XML files is non-trivial and must be done in subclasses."""
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
63 if len(split_files) == 1:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
64 #For one file only, use base class method (move/copy)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
65 return Text.merge(split_files, output_file)
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
66 if not split_files:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
67 raise ValueError("Given no BLAST XML files, %r, to merge into %s" \
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
68 % (split_files, output_file))
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
69 out = open(output_file, "w")
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
70 h = None
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
71 for f in split_files:
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
72 if not os.path.isfile(f):
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
73 log.warning("BLAST XML file %s missing, retry in 1s..." % f)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
74 sleep(1)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
75 if not os.path.isfile(f):
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
76 log.error("BLAST XML file %s missing" % f)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
77 raise ValueError("BLAST XML file %s missing" % f)
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
78 h = open(f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
79 header = h.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
80 if not header:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
81 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
82 h.close()
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
83 #Retry, could be transient error with networked file system...
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
84 log.warning("BLAST XML file %s empty, retry in 1s..." % f)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
85 sleep(1)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
86 h = open(f)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
87 header = h.readline()
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
88 if not header:
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
89 log.error("BLAST XML file %s was empty" % f)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
90 raise ValueError("BLAST XML file %s was empty" % f)
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
91 if header.strip() != '<?xml version="1.0"?>':
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
92 out.write(header) #for diagnosis
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
93 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
94 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
95 raise ValueError("%s is not an XML file!" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
96 line = h.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
97 header += line
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
98 if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">',
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
99 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
100 out.write(header) #for diagnosis
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
101 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
102 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
103 raise ValueError("%s is not a BLAST XML file!" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
104 while True:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
105 line = h.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
106 if not line:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
107 out.write(header) #for diagnosis
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
108 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
109 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
110 raise ValueError("BLAST XML file %s ended prematurely" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
111 header += line
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
112 if "<Iteration>" in line:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
113 break
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
114 if len(header) > 10000:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
115 #Something has gone wrong, don't load too much into memory!
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
116 #Write what we have to the merged file for diagnostics
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
117 out.write(header)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
118 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
119 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
120 raise ValueError("BLAST XML file %s has too long a header!" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
121 if "<BlastOutput>" not in header:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
122 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
123 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
124 raise ValueError("%s is not a BLAST XML file:\n%s\n..." % (f, header))
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
125 if f == split_files[0]:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
126 out.write(header)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
127 old_header = header
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
128 elif old_header[:300] != header[:300]:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
129 #Enough to check <BlastOutput_program> and <BlastOutput_version> match
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
130 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
131 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
132 raise ValueError("BLAST XML headers don't match for %s and %s - have:\n%s\n...\n\nAnd:\n%s\n...\n" \
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
133 % (split_files[0], f, old_header[:300], header[:300]))
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
134 else:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
135 out.write(" <Iteration>\n")
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
136 for line in h:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
137 if "</BlastOutput_iterations>" in line:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
138 break
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
139 #TODO - Increment <Iteration_iter-num> and if required automatic query names
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
140 #like <Iteration_query-ID>Query_3</Iteration_query-ID> to be increasing?
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
141 out.write(line)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
142 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
143 out.write(" </BlastOutput_iterations>\n")
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
144 out.write("</BlastOutput>\n")
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
145 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
146 merge = staticmethod(merge)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
147
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
148
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
149 class _BlastDb(object):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
150 """Base class for BLAST database datatype."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
151
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
152 def set_peek( self, dataset, is_multi_byte=False ):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
153 """Set the peek and blurb text."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
154 if not dataset.dataset.purged:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
155 dataset.peek = "BLAST database (multiple files)"
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
156 dataset.blurb = "BLAST database (multiple files)"
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
157 else:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
158 dataset.peek = 'file does not exist'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
159 dataset.blurb = 'file purged from disk'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
160
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
161 def display_peek( self, dataset ):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
162 """Create HTML content, used for displaying peek."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
163 try:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
164 return dataset.peek
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
165 except:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
166 return "BLAST database (multiple files)"
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
167
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
168 def display_data(self, trans, data, preview=False, filename=None,
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
169 to_ext=None, size=None, offset=None, **kwd):
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
170 """Documented as an old display method, but still gets called via tests etc
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
171
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
172 This allows us to format the data shown in the central pane via the "eye" icon.
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
173 """
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
174 if filename is not None and filename != "index":
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
175 #Change nothing - important for the unit tests to access child files:
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
176 return Data.display_data(self, trans, data, preview, filename,
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
177 to_ext, size, offset, **kwd)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
178 if self.file_ext == "blastdbn":
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
179 title = "This is a nucleotide BLAST database"
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
180 elif self.file_ext =="blastdbp":
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
181 title = "This is a protein BLAST database"
9
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
182 elif self.file_ext =="blastdbd":
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
183 title = "This is a domain BLAST database"
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
184 else:
9
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
185 #Error?
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
186 title = "This is a BLAST database."
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
187 msg = ""
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
188 try:
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
189 #Try to use any text recorded in the dummy index file:
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
190 handle = open(data.file_name, "rU")
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
191 msg = handle.read().strip()
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
192 handle.close()
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
193 except Exception, err:
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
194 #msg = str(err)
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
195 pass
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
196 if not msg:
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
197 msg = title
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
198 #Galaxy assumes HTML for the display of composite datatypes,
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
199 return "<html><head><title>%s</title></head><body><pre>%s</pre></body></html>" % (title, msg)
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
200
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
201 def merge(split_files, output_file):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
202 """Merge BLAST databases (not implemented for now)."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
203 raise NotImplementedError("Merging BLAST databases is non-trivial (do this via makeblastdb?)")
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
204
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
205 def split( cls, input_datasets, subdir_generator_function, split_params):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
206 """Split a BLAST database (not implemented for now)."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
207 if split_params is None:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
208 return None
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
209 raise NotImplementedError("Can't split BLAST databases")
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
210
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
211
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
212 class BlastNucDb( _BlastDb, Data ):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
213 """Class for nucleotide BLAST database files."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
214 file_ext = 'blastdbn'
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
215 allow_datatype_change = False
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
216 composite_type = 'basic'
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
217
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
218 def __init__(self, **kwd):
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
219 Data.__init__(self, **kwd)
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
220 self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
221 self.add_composite_file('blastdb.nin', is_binary=True) # index file
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
222 self.add_composite_file('blastdb.nsq', is_binary=True) # nucleotide sequences
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
223 self.add_composite_file('blastdb.nal', is_binary=False, optional=True) # alias ( -gi_mask option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
224 self.add_composite_file('blastdb.nhd', is_binary=True, optional=True) # sorted sequence hash values ( -hash_index option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
225 self.add_composite_file('blastdb.nhi', is_binary=True, optional=True) # index of sequence hash values ( -hash_index option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
226 self.add_composite_file('blastdb.nnd', is_binary=True, optional=True) # sorted GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
227 self.add_composite_file('blastdb.nni', is_binary=True, optional=True) # index of GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
228 self.add_composite_file('blastdb.nog', is_binary=True, optional=True) # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
229 self.add_composite_file('blastdb.nsd', is_binary=True, optional=True) # sorted sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
230 self.add_composite_file('blastdb.nsi', is_binary=True, optional=True) # index of sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
6
a04cf51612f1 Uploaded v0.0.16, MIT License, development moved to GitHub, nucleotide database definition aware of MegaBLAST index superheader
peterjc
parents: 5
diff changeset
231 # self.add_composite_file('blastdb.00.idx', is_binary=True, optional=True) # first volume of the MegaBLAST index generated by makembindex
a04cf51612f1 Uploaded v0.0.16, MIT License, development moved to GitHub, nucleotide database definition aware of MegaBLAST index superheader
peterjc
parents: 5
diff changeset
232 # The previous line should be repeated for each index volume, with filename extensions like '.01.idx', '.02.idx', etc.
a04cf51612f1 Uploaded v0.0.16, MIT License, development moved to GitHub, nucleotide database definition aware of MegaBLAST index superheader
peterjc
parents: 5
diff changeset
233 self.add_composite_file('blastdb.shd', is_binary=True, optional=True) # MegaBLAST index superheader (-old_style_index false option of makembindex)
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
234 # self.add_composite_file('blastdb.naa', is_binary=True, optional=True) # index of a WriteDB column for e.g. mask data
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
235 # self.add_composite_file('blastdb.nab', is_binary=True, optional=True) # data of a WriteDB column
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
236 # self.add_composite_file('blastdb.nac', is_binary=True, optional=True) # multiple byte order for a WriteDB column
6
a04cf51612f1 Uploaded v0.0.16, MIT License, development moved to GitHub, nucleotide database definition aware of MegaBLAST index superheader
peterjc
parents: 5
diff changeset
237 # The previous 3 lines should be repeated for each WriteDB column, with filename extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc.
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
238
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
239
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
240 class BlastProtDb( _BlastDb, Data ):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
241 """Class for protein BLAST database files."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
242 file_ext = 'blastdbp'
8
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
243 allow_datatype_change = False
de11e1a921c4 Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents: 6
diff changeset
244 composite_type = 'basic'
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
245
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
246 def __init__(self, **kwd):
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
247 Data.__init__(self, **kwd)
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
248 # Component file comments are as in BlastNucDb except where noted
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
249 self.add_composite_file('blastdb.phr', is_binary=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
250 self.add_composite_file('blastdb.pin', is_binary=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
251 self.add_composite_file('blastdb.psq', is_binary=True) # protein sequences
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
252 self.add_composite_file('blastdb.phd', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
253 self.add_composite_file('blastdb.phi', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
254 self.add_composite_file('blastdb.pnd', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
255 self.add_composite_file('blastdb.pni', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
256 self.add_composite_file('blastdb.pog', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
257 self.add_composite_file('blastdb.psd', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
258 self.add_composite_file('blastdb.psi', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
259 # self.add_composite_file('blastdb.paa', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
260 # self.add_composite_file('blastdb.pab', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
261 # self.add_composite_file('blastdb.pac', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
262 # The last 3 lines should be repeated for each WriteDB column, with filename extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc.
9
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
263
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
264
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
265 class BlastDomainDb( _BlastDb, Data ):
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
266 """Class for domain BLAST database files."""
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
267 file_ext = 'blastdbd'
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
268 allow_datatype_change = False
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
269 composite_type = 'basic'
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
270
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
271 def __init__(self, **kwd):
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
272 Data.__init__(self, **kwd)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
273 self.add_composite_file('blastdb.phr', is_binary=True)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
274 self.add_composite_file('blastdb.pin', is_binary=True)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
275 self.add_composite_file('blastdb.psq', is_binary=True)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
276 self.add_composite_file('blastdb.freq', is_binary=True, optional=True)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
277 self.add_composite_file('blastdb.loo', is_binary=True, optional=True)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
278 self.add_composite_file('blastdb.psd', is_binary=True, optional=True)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
279 self.add_composite_file('blastdb.psi', is_binary=True, optional=True)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
280 self.add_composite_file('blastdb.rps', is_binary=True, optional=True)
2bda64d39931 Uploaded v0.0.19, adds blastdbp and pssm-asn1 datatypes.
peterjc
parents: 8
diff changeset
281 self.add_composite_file('blastdb.aux', is_binary=True, optional=True)