Mercurial > repos > yating-l > jbrowsearchivecreator
annotate blastxmlToGff3.py @ 3:eda851e52060 draft
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 479fc6490e24ca0e5034ae6c3579882e97e095e6-dirty
| author | yating-l | 
|---|---|
| date | Wed, 31 May 2017 15:45:47 -0400 | 
| parents | 804a93e87cc8 | 
| children | 
| rev | line source | 
|---|---|
| 
0
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
1 #!/usr/bin/env python | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
2 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
3 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
4 from Bio.Blast import NCBIXML | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
5 from collections import OrderedDict | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
6 import utils | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
7 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
8 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
9 def align2cigar(hsp_query, hsp_reference): | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
10 """ | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
11 Build CIGAR representation from an hsp_query | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
12 input: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
13 hsp_query | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
14 hsp_sbjct | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
15 output: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
16 CIGAR string | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
17 """ | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
18 query = hsp_query | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
19 ref = hsp_reference | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
20 # preType, curType: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
21 # 'M' represents match, | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
22 # 'I' represents insert a gap into the reference sequence, | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
23 # 'D' represents insert a gap into the target (delete from reference) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
24 # some ideas of this algin2cigar function are coming from | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
25 # https://gist.github.com/ozagordi/099bdb796507da8d9426 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
26 prevType = 'M' | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
27 curType = 'M' | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
28 count = 0 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
29 cigar = [] | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
30 num = len(query) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
31 for i in range(num): | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
32 if query[i] == '-': | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
33 curType = 'D' | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
34 elif ref[i] == '-': | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
35 curType = 'I' | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
36 else: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
37 curType = 'M' | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
38 if curType == prevType: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
39 count += 1 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
40 else: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
41 cigar.append('%s%d' % (prevType, count)) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
42 prevType = curType | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
43 count = 1 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
44 cigar.append('%s%d' % (curType, count)) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
45 return ' '.join(cigar) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
46 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
47 def gff3_writer(blast_records, gff3_file): | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
48 gff3 = open(gff3_file, 'a') | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
49 gff3.write("##gff-version 3\n") | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
50 seq_regions = dict() | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
51 for blast_record in blast_records: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
52 query_name = blast_record.query.split(" ")[0] | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
53 source = blast_record.application | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
54 method = blast_record.matrix | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
55 for alignment in blast_record.alignments: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
56 group = { | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
57 "parent_field" : OrderedDict(), | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
58 "parent_attribute" : OrderedDict(), | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
59 "alignments" : [] | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
60 } | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
61 title = alignment.title.split(" ") | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
62 contig_name = title[len(title) - 1] | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
63 length = alignment.length | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
64 group['parent_field']['seqid'] = contig_name | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
65 group['parent_field']['source'] = source | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
66 group['parent_field']['type'] = 'match' | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
67 group['parent_attribute']['ID'] = contig_name + '_' + query_name | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
68 group['parent_attribute']['method'] = method | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
69 group['parent_attribute']['length'] = length | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
70 if contig_name not in seq_regions: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
71 gff3.write("##sequence-region " + contig_name + ' 1 ' + str(length) + '\n') | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
72 seq_regions[contig_name] = length | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
73 match_num = 0 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
74 coords = [length, 0] | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
75 for hsp in alignment.hsps: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
76 hsp_align = {} | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
77 field = OrderedDict() | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
78 attribute = OrderedDict() | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
79 ref = hsp.sbjct | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
80 query = hsp.query | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
81 field['seqid'] = contig_name | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
82 field['source'] = source | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
83 field['type'] = 'match_part' | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
84 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
85 field['start'] = hsp.sbjct_start | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
86 if field['start'] < coords[0]: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
87 coords[0] = field['start'] | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
88 ref_length = len(ref.replace('-', '')) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
89 # if run tblastn, the actual length of reference should be multiplied by 3 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
90 if source.lower() == "tblastn": | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
91 ref_length *= 3 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
92 field['end'] = field['start'] + ref_length - 1 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
93 if field['end'] > coords[1]: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
94 coords[1] = field['end'] | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
95 field['score'] = hsp.score | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
96 #decide if the alignment in the same strand or reverse strand | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
97 #reading frame | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
98 # (+, +), (0, 0), (-, -) => + | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
99 # (+, -), (-, +) => - | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
100 if hsp.frame[1] * hsp.frame[0] > 0: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
101 field['strand'] = '+' | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
102 elif hsp.frame[1] * hsp.frame[0] < 0: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
103 field['strand'] = '-' | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
104 else: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
105 if hsp.frame[0] + hsp.frame[1] >= 0: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
106 field['strand'] = '+' | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
107 else: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
108 field['strand'] = '-' | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
109 field['phase'] = '.' | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
110 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
111 target_start = hsp.query_start | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
112 target_len = len(query.replace('-', '')) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
113 # if run blastx, the actual length of query should be multiplied by 3 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
114 if source.lower() == "blastx": | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
115 target_len *= 3 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
116 target_end = target_start + target_len -1 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
117 attribute['ID'] = group['parent_attribute']['ID'] + '_match_' + str(match_num) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
118 attribute['Parent'] = group['parent_attribute']['ID'] | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
119 attribute['Target'] = query_name + " " + str(target_start) + " " + str(target_end) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
120 attribute['Gap'] = align2cigar(query, ref) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
121 #store the query sequence and match string in the file in order to display alignment with BlastAlignment plugin | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
122 attribute['subject'] = hsp.sbjct | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
123 attribute['query'] = hsp.query | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
124 attribute['match'] = hsp.match | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
125 attribute['gaps'] = attribute['match'].count(' ') | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
126 similar = attribute['match'].count('+') | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
127 attribute['identities'] = len(attribute['match']) - similar - attribute['gaps'] | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
128 attribute['positives'] = attribute['identities'] + similar | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
129 attribute['expect'] = hsp.expect | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
130 # show reading frame attribute only if the frame is not (0, 0) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
131 attribute['frame'] = hsp.frame[1] | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
132 match_num += 1 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
133 hsp_align['field'] = field | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
134 hsp_align['attribute'] = attribute | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
135 group['alignments'].append(hsp_align) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
136 group['parent_field']['start'] = coords[0] | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
137 group['parent_field']['end'] = coords[1] | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
138 group['parent_field']['score'] = group['parent_field']['strand'] = group['parent_field']['phase'] = '.' | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
139 group['parent_attribute']['match_num'] = match_num | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
140 group['alignments'].sort(key=lambda x: (x['field']['start'], x['field']['end'])) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
141 utils.write_features(group['parent_field'], group['parent_attribute'], gff3) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
142 prev_end = -1 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
143 for align in group['alignments']: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
144 overlap = '' | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
145 if align['field']['start'] <= prev_end: | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
146 overlap += str(align['field']['start']) + ',' + str(prev_end) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
147 prev_end = align['field']['end'] | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
148 align['attribute']['overlap'] = overlap | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
149 utils.write_features(align['field'], align['attribute'], gff3) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
150 gff3.close() | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
151 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
152 def blastxml2gff3(xml_file, gff3_file): | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
153 result_handle = open(xml_file) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
154 blast_records = NCBIXML.parse(result_handle) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
155 gff3_writer(blast_records, gff3_file) | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
156 | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
157 if __name__ == "__main__": | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
158 blastxml2gff3("../dbia3/raw/tblastn_dmel-hits-translation-r6.11.fa_vs_nucleotide_BLAST_database_from_data_3.blastxml", "gff3.txt") | 
| 
 
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
 
yating-l 
parents:  
diff
changeset
 | 
159 | 
