annotate blast.py @ 6:a04cf51612f1 draft

Uploaded v0.0.16, MIT License, development moved to GitHub, nucleotide database definition aware of MegaBLAST index superheader
author peterjc
date Mon, 23 Sep 2013 09:56:10 -0400
parents b3a3ba0c1d47
children de11e1a921c4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
1 """
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
2 BlastXml class
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
3 """
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
4
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
5 from galaxy.datatypes.data import get_file_peek
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
6 from galaxy.datatypes.data import Text, Data
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
7 from galaxy.datatypes.xml import GenericXml
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
8 from galaxy.datatypes.metadata import MetadataElement
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
9
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
10
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
11 class BlastXml( GenericXml ):
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
12 """NCBI Blast XML Output data"""
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
13 file_ext = "blastxml"
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
14
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
15 def set_peek( self, dataset, is_multi_byte=False ):
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
16 """Set the peek and blurb text"""
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
17 if not dataset.dataset.purged:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
18 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
19 dataset.blurb = 'NCBI Blast XML data'
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
20 else:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
21 dataset.peek = 'file does not exist'
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
22 dataset.blurb = 'file purged from disk'
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
23
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
24 def sniff( self, filename ):
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
25 """
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
26 Determines whether the file is blastxml
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
27
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
28 >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
29 >>> BlastXml().sniff( fname )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
30 True
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
31 >>> fname = get_test_fname( 'tblastn_four_human_vs_rhodopsin.xml' )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
32 >>> BlastXml().sniff( fname )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
33 True
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
34 >>> fname = get_test_fname( 'interval.interval' )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
35 >>> BlastXml().sniff( fname )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
36 False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
37 """
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
38 #TODO - Use a context manager on Python 2.5+ to close handle
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
39 handle = open(filename)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
40 line = handle.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
41 if line.strip() != '<?xml version="1.0"?>':
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
42 handle.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
43 return False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
44 line = handle.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
45 if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">',
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
46 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
47 handle.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
48 return False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
49 line = handle.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
50 if line.strip() != '<BlastOutput>':
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
51 handle.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
52 return False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
53 handle.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
54 return True
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
55
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
56 def merge(split_files, output_file):
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
57 """Merging multiple XML files is non-trivial and must be done in subclasses."""
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
58 if len(split_files) == 1:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
59 #For one file only, use base class method (move/copy)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
60 return Text.merge(split_files, output_file)
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
61 if not split_files:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
62 raise ValueError("Given no BLAST XML files, %r, to merge into %s" \
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
63 % (split_files, output_file))
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
64 out = open(output_file, "w")
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
65 h = None
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
66 for f in split_files:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
67 h = open(f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
68 body = False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
69 header = h.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
70 if not header:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
71 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
72 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
73 raise ValueError("BLAST XML file %s was empty" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
74 if header.strip() != '<?xml version="1.0"?>':
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
75 out.write(header) #for diagnosis
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
76 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
77 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
78 raise ValueError("%s is not an XML file!" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
79 line = h.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
80 header += line
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
81 if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">',
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
82 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
83 out.write(header) #for diagnosis
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
84 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
85 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
86 raise ValueError("%s is not a BLAST XML file!" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
87 while True:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
88 line = h.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
89 if not line:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
90 out.write(header) #for diagnosis
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
91 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
92 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
93 raise ValueError("BLAST XML file %s ended prematurely" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
94 header += line
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
95 if "<Iteration>" in line:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
96 break
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
97 if len(header) > 10000:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
98 #Something has gone wrong, don't load too much into memory!
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
99 #Write what we have to the merged file for diagnostics
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
100 out.write(header)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
101 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
102 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
103 raise ValueError("BLAST XML file %s has too long a header!" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
104 if "<BlastOutput>" not in header:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
105 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
106 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
107 raise ValueError("%s is not a BLAST XML file:\n%s\n..." % (f, header))
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
108 if f == split_files[0]:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
109 out.write(header)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
110 old_header = header
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
111 elif old_header[:300] != header[:300]:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
112 #Enough to check <BlastOutput_program> and <BlastOutput_version> match
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
113 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
114 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
115 raise ValueError("BLAST XML headers don't match for %s and %s - have:\n%s\n...\n\nAnd:\n%s\n...\n" \
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
116 % (split_files[0], f, old_header[:300], header[:300]))
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
117 else:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
118 out.write(" <Iteration>\n")
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
119 for line in h:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
120 if "</BlastOutput_iterations>" in line:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
121 break
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
122 #TODO - Increment <Iteration_iter-num> and if required automatic query names
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
123 #like <Iteration_query-ID>Query_3</Iteration_query-ID> to be increasing?
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
124 out.write(line)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
125 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
126 out.write(" </BlastOutput_iterations>\n")
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
127 out.write("</BlastOutput>\n")
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
128 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
129 merge = staticmethod(merge)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
130
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
131
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
132 class _BlastDb(object):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
133 """Base class for BLAST database datatype."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
134
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
135 def set_peek( self, dataset, is_multi_byte=False ):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
136 """Set the peek and blurb text."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
137 if not dataset.dataset.purged:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
138 dataset.peek = "BLAST database (multiple files)"
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
139 dataset.blurb = "BLAST database (multiple files)"
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
140 else:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
141 dataset.peek = 'file does not exist'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
142 dataset.blurb = 'file purged from disk'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
143
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
144 def display_peek( self, dataset ):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
145 """Create HTML content, used for displaying peek."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
146 try:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
147 return dataset.peek
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
148 except:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
149 return "BLAST database (multiple files)"
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
150
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
151 def display_data(self, trans, data, preview=False, filename=None,
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
152 to_ext=None, size=None, offset=None, **kwd):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
153 """Apparently an old display method, but still gets called.
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
154
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
155 This allows us to format the data shown in the central pane via the "eye" icon.
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
156 """
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
157 return "This is a BLAST database."
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
158
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
159 def get_mime(self):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
160 """Returns the mime type of the datatype (pretend it is text for peek)"""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
161 return 'text/plain'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
162
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
163 def merge(split_files, output_file):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
164 """Merge BLAST databases (not implemented for now)."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
165 raise NotImplementedError("Merging BLAST databases is non-trivial (do this via makeblastdb?)")
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
166
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
167 def split( cls, input_datasets, subdir_generator_function, split_params):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
168 """Split a BLAST database (not implemented for now)."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
169 if split_params is None:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
170 return None
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
171 raise NotImplementedError("Can't split BLAST databases")
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
172
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
173
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
174 class BlastNucDb( _BlastDb, Data ):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
175 """Class for nucleotide BLAST database files."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
176 file_ext = 'blastdbn'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
177 composite_type ='basic'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
178
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
179 def __init__(self, **kwd):
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
180 Data.__init__(self, **kwd)
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
181 self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
182 self.add_composite_file('blastdb.nin', is_binary=True) # index file
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
183 self.add_composite_file('blastdb.nsq', is_binary=True) # nucleotide sequences
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
184 self.add_composite_file('blastdb.nal', is_binary=False, optional=True) # alias ( -gi_mask option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
185 self.add_composite_file('blastdb.nhd', is_binary=True, optional=True) # sorted sequence hash values ( -hash_index option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
186 self.add_composite_file('blastdb.nhi', is_binary=True, optional=True) # index of sequence hash values ( -hash_index option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
187 self.add_composite_file('blastdb.nnd', is_binary=True, optional=True) # sorted GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
188 self.add_composite_file('blastdb.nni', is_binary=True, optional=True) # index of GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
189 self.add_composite_file('blastdb.nog', is_binary=True, optional=True) # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
190 self.add_composite_file('blastdb.nsd', is_binary=True, optional=True) # sorted sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
191 self.add_composite_file('blastdb.nsi', is_binary=True, optional=True) # index of sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
6
a04cf51612f1 Uploaded v0.0.16, MIT License, development moved to GitHub, nucleotide database definition aware of MegaBLAST index superheader
peterjc
parents: 5
diff changeset
192 # self.add_composite_file('blastdb.00.idx', is_binary=True, optional=True) # first volume of the MegaBLAST index generated by makembindex
a04cf51612f1 Uploaded v0.0.16, MIT License, development moved to GitHub, nucleotide database definition aware of MegaBLAST index superheader
peterjc
parents: 5
diff changeset
193 # The previous line should be repeated for each index volume, with filename extensions like '.01.idx', '.02.idx', etc.
a04cf51612f1 Uploaded v0.0.16, MIT License, development moved to GitHub, nucleotide database definition aware of MegaBLAST index superheader
peterjc
parents: 5
diff changeset
194 self.add_composite_file('blastdb.shd', is_binary=True, optional=True) # MegaBLAST index superheader (-old_style_index false option of makembindex)
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
195 # self.add_composite_file('blastdb.naa', is_binary=True, optional=True) # index of a WriteDB column for e.g. mask data
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
196 # self.add_composite_file('blastdb.nab', is_binary=True, optional=True) # data of a WriteDB column
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
197 # self.add_composite_file('blastdb.nac', is_binary=True, optional=True) # multiple byte order for a WriteDB column
6
a04cf51612f1 Uploaded v0.0.16, MIT License, development moved to GitHub, nucleotide database definition aware of MegaBLAST index superheader
peterjc
parents: 5
diff changeset
198 # The previous 3 lines should be repeated for each WriteDB column, with filename extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc.
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
199
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
200 def display_data(self, trans, data, preview=False, filename=None,
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
201 to_ext=None, size=None, offset=None, **kwd):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
202 """Apparently an old display method, but still gets called.
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
203
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
204 This allows us to format the data shown in the central pane via the "eye" icon.
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
205 """
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
206 return "This is a BLAST nucleotide database."
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
207
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
208
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
209 class BlastProtDb( _BlastDb, Data ):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
210 """Class for protein BLAST database files."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
211 file_ext = 'blastdbp'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
212 composite_type ='basic'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
213
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
214 def __init__(self, **kwd):
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
215 Data.__init__(self, **kwd)
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
216 # Component file comments are as in BlastNucDb except where noted
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
217 self.add_composite_file('blastdb.phr', is_binary=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
218 self.add_composite_file('blastdb.pin', is_binary=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
219 self.add_composite_file('blastdb.psq', is_binary=True) # protein sequences
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
220 self.add_composite_file('blastdb.phd', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
221 self.add_composite_file('blastdb.phi', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
222 self.add_composite_file('blastdb.pnd', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
223 self.add_composite_file('blastdb.pni', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
224 self.add_composite_file('blastdb.pog', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
225 self.add_composite_file('blastdb.psd', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
226 self.add_composite_file('blastdb.psi', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
227 # self.add_composite_file('blastdb.paa', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
228 # self.add_composite_file('blastdb.pab', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
229 # self.add_composite_file('blastdb.pac', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
230 # The last 3 lines should be repeated for each WriteDB column, with filename extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc.
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
231
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
232 def display_data(self, trans, data, preview=False, filename=None,
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
233 to_ext=None, size=None, offset=None, **kwd):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
234 """Apparently an old display method, but still gets called.
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
235
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
236 This allows us to format the data shown in the central pane via the "eye" icon.
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
237 """
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
238 return "This is a BLAST protein database."