annotate blast.py @ 5:b3a3ba0c1d47 draft

Uploaded v0.0.15 which updates the BLAST database definitions. Fixes a MetadataElement bug and includes more of the optional BLAST database files (contribution from Nicola Soranzo).
author peterjc
date Wed, 20 Mar 2013 10:39:27 -0400
parents f9a7783ed7b6
children a04cf51612f1
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
1 """
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
2 BlastXml class
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
3 """
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
4
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
5 from galaxy.datatypes.data import get_file_peek
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
6 from galaxy.datatypes.data import Text, Data
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
7 from galaxy.datatypes.xml import GenericXml
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
8 from galaxy.datatypes.metadata import MetadataElement
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
9
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
10
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
11 class BlastXml( GenericXml ):
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
12 """NCBI Blast XML Output data"""
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
13 file_ext = "blastxml"
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
14
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
15 def set_peek( self, dataset, is_multi_byte=False ):
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
16 """Set the peek and blurb text"""
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
17 if not dataset.dataset.purged:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
18 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
19 dataset.blurb = 'NCBI Blast XML data'
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
20 else:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
21 dataset.peek = 'file does not exist'
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
22 dataset.blurb = 'file purged from disk'
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
23
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
24 def sniff( self, filename ):
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
25 """
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
26 Determines whether the file is blastxml
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
27
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
28 >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
29 >>> BlastXml().sniff( fname )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
30 True
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
31 >>> fname = get_test_fname( 'tblastn_four_human_vs_rhodopsin.xml' )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
32 >>> BlastXml().sniff( fname )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
33 True
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
34 >>> fname = get_test_fname( 'interval.interval' )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
35 >>> BlastXml().sniff( fname )
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
36 False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
37 """
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
38 #TODO - Use a context manager on Python 2.5+ to close handle
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
39 handle = open(filename)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
40 line = handle.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
41 if line.strip() != '<?xml version="1.0"?>':
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
42 handle.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
43 return False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
44 line = handle.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
45 if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">',
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
46 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
47 handle.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
48 return False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
49 line = handle.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
50 if line.strip() != '<BlastOutput>':
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
51 handle.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
52 return False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
53 handle.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
54 return True
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
55
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
56 def merge(split_files, output_file):
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
57 """Merging multiple XML files is non-trivial and must be done in subclasses."""
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
58 if len(split_files) == 1:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
59 #For one file only, use base class method (move/copy)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
60 return Text.merge(split_files, output_file)
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
61 if not split_files:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
62 raise ValueError("Given no BLAST XML files, %r, to merge into %s" \
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
63 % (split_files, output_file))
3
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
64 out = open(output_file, "w")
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
65 h = None
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
66 for f in split_files:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
67 h = open(f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
68 body = False
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
69 header = h.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
70 if not header:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
71 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
72 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
73 raise ValueError("BLAST XML file %s was empty" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
74 if header.strip() != '<?xml version="1.0"?>':
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
75 out.write(header) #for diagnosis
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
76 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
77 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
78 raise ValueError("%s is not an XML file!" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
79 line = h.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
80 header += line
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
81 if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">',
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
82 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
83 out.write(header) #for diagnosis
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
84 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
85 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
86 raise ValueError("%s is not a BLAST XML file!" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
87 while True:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
88 line = h.readline()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
89 if not line:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
90 out.write(header) #for diagnosis
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
91 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
92 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
93 raise ValueError("BLAST XML file %s ended prematurely" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
94 header += line
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
95 if "<Iteration>" in line:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
96 break
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
97 if len(header) > 10000:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
98 #Something has gone wrong, don't load too much into memory!
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
99 #Write what we have to the merged file for diagnostics
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
100 out.write(header)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
101 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
102 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
103 raise ValueError("BLAST XML file %s has too long a header!" % f)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
104 if "<BlastOutput>" not in header:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
105 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
106 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
107 raise ValueError("%s is not a BLAST XML file:\n%s\n..." % (f, header))
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
108 if f == split_files[0]:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
109 out.write(header)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
110 old_header = header
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
111 elif old_header[:300] != header[:300]:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
112 #Enough to check <BlastOutput_program> and <BlastOutput_version> match
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
113 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
114 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
115 raise ValueError("BLAST XML headers don't match for %s and %s - have:\n%s\n...\n\nAnd:\n%s\n...\n" \
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
116 % (split_files[0], f, old_header[:300], header[:300]))
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
117 else:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
118 out.write(" <Iteration>\n")
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
119 for line in h:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
120 if "</BlastOutput_iterations>" in line:
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
121 break
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
122 #TODO - Increment <Iteration_iter-num> and if required automatic query names
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
123 #like <Iteration_query-ID>Query_3</Iteration_query-ID> to be increasing?
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
124 out.write(line)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
125 h.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
126 out.write(" </BlastOutput_iterations>\n")
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
127 out.write("</BlastOutput>\n")
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
128 out.close()
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
129 merge = staticmethod(merge)
6ef523b390e0 Uploaded correct file.
peterjc
parents:
diff changeset
130
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
131
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
132 class _BlastDb(object):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
133 """Base class for BLAST database datatype."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
134
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
135 def set_peek( self, dataset, is_multi_byte=False ):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
136 """Set the peek and blurb text."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
137 if not dataset.dataset.purged:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
138 dataset.peek = "BLAST database (multiple files)"
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
139 dataset.blurb = "BLAST database (multiple files)"
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
140 else:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
141 dataset.peek = 'file does not exist'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
142 dataset.blurb = 'file purged from disk'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
143
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
144 def display_peek( self, dataset ):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
145 """Create HTML content, used for displaying peek."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
146 try:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
147 return dataset.peek
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
148 except:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
149 return "BLAST database (multiple files)"
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
150
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
151 def display_data(self, trans, data, preview=False, filename=None,
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
152 to_ext=None, size=None, offset=None, **kwd):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
153 """Apparently an old display method, but still gets called.
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
154
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
155 This allows us to format the data shown in the central pane via the "eye" icon.
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
156 """
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
157 return "This is a BLAST database."
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
158
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
159 def get_mime(self):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
160 """Returns the mime type of the datatype (pretend it is text for peek)"""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
161 return 'text/plain'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
162
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
163 def merge(split_files, output_file):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
164 """Merge BLAST databases (not implemented for now)."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
165 raise NotImplementedError("Merging BLAST databases is non-trivial (do this via makeblastdb?)")
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
166
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
167 def split( cls, input_datasets, subdir_generator_function, split_params):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
168 """Split a BLAST database (not implemented for now)."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
169 if split_params is None:
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
170 return None
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
171 raise NotImplementedError("Can't split BLAST databases")
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
172
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
173
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
174 class BlastNucDb( _BlastDb, Data ):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
175 """Class for nucleotide BLAST database files."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
176 file_ext = 'blastdbn'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
177 composite_type ='basic'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
178
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
179 def __init__(self, **kwd):
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
180 Data.__init__(self, **kwd)
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
181 self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
182 self.add_composite_file('blastdb.nin', is_binary=True) # index file
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
183 self.add_composite_file('blastdb.nsq', is_binary=True) # nucleotide sequences
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
184 self.add_composite_file('blastdb.nal', is_binary=False, optional=True) # alias ( -gi_mask option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
185 self.add_composite_file('blastdb.nhd', is_binary=True, optional=True) # sorted sequence hash values ( -hash_index option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
186 self.add_composite_file('blastdb.nhi', is_binary=True, optional=True) # index of sequence hash values ( -hash_index option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
187 self.add_composite_file('blastdb.nnd', is_binary=True, optional=True) # sorted GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
188 self.add_composite_file('blastdb.nni', is_binary=True, optional=True) # index of GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
189 self.add_composite_file('blastdb.nog', is_binary=True, optional=True) # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
190 self.add_composite_file('blastdb.nsd', is_binary=True, optional=True) # sorted sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
191 self.add_composite_file('blastdb.nsi', is_binary=True, optional=True) # index of sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
192 # self.add_composite_file('blastdb.naa', is_binary=True, optional=True) # index of a WriteDB column for e.g. mask data
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
193 # self.add_composite_file('blastdb.nab', is_binary=True, optional=True) # data of a WriteDB column
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
194 # self.add_composite_file('blastdb.nac', is_binary=True, optional=True) # multiple byte order for a WriteDB column
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
195 # The last 3 lines should be repeated for each WriteDB column, with filename extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc.
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
196
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
197 def display_data(self, trans, data, preview=False, filename=None,
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
198 to_ext=None, size=None, offset=None, **kwd):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
199 """Apparently an old display method, but still gets called.
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
200
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
201 This allows us to format the data shown in the central pane via the "eye" icon.
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
202 """
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
203 return "This is a BLAST nucleotide database."
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
204
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
205
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
206 class BlastProtDb( _BlastDb, Data ):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
207 """Class for protein BLAST database files."""
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
208 file_ext = 'blastdbp'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
209 composite_type ='basic'
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
210
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
211 def __init__(self, **kwd):
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
212 Data.__init__(self, **kwd)
5
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
213 # Component file comments are as in BlastNucDb except where noted
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
214 self.add_composite_file('blastdb.phr', is_binary=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
215 self.add_composite_file('blastdb.pin', is_binary=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
216 self.add_composite_file('blastdb.psq', is_binary=True) # protein sequences
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
217 self.add_composite_file('blastdb.phd', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
218 self.add_composite_file('blastdb.phi', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
219 self.add_composite_file('blastdb.pnd', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
220 self.add_composite_file('blastdb.pni', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
221 self.add_composite_file('blastdb.pog', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
222 self.add_composite_file('blastdb.psd', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
223 self.add_composite_file('blastdb.psi', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
224 # self.add_composite_file('blastdb.paa', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
225 # self.add_composite_file('blastdb.pab', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
226 # self.add_composite_file('blastdb.pac', is_binary=True, optional=True)
b3a3ba0c1d47 Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents: 4
diff changeset
227 # The last 3 lines should be repeated for each WriteDB column, with filename extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc.
4
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
228
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
229 def display_data(self, trans, data, preview=False, filename=None,
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
230 to_ext=None, size=None, offset=None, **kwd):
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
231 """Apparently an old display method, but still gets called.
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
232
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
233 This allows us to format the data shown in the central pane via the "eye" icon.
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
234 """
f9a7783ed7b6 Uploaded v0.0.14 adding BLAST database support.
peterjc
parents: 3
diff changeset
235 return "This is a BLAST protein database."