annotate blastxmlToGff3.py @ 3:eda851e52060 draft

planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 479fc6490e24ca0e5034ae6c3579882e97e095e6-dirty
author yating-l
date Wed, 31 May 2017 15:45:47 -0400
parents 804a93e87cc8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
1 #!/usr/bin/env python
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
2
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
3
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
4 from Bio.Blast import NCBIXML
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
5 from collections import OrderedDict
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
6 import utils
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
7
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
8
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
9 def align2cigar(hsp_query, hsp_reference):
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
10 """
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
11 Build CIGAR representation from an hsp_query
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
12 input:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
13 hsp_query
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
14 hsp_sbjct
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
15 output:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
16 CIGAR string
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
17 """
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
18 query = hsp_query
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
19 ref = hsp_reference
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
20 # preType, curType:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
21 # 'M' represents match,
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
22 # 'I' represents insert a gap into the reference sequence,
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
23 # 'D' represents insert a gap into the target (delete from reference)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
24 # some ideas of this algin2cigar function are coming from
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
25 # https://gist.github.com/ozagordi/099bdb796507da8d9426
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
26 prevType = 'M'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
27 curType = 'M'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
28 count = 0
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
29 cigar = []
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
30 num = len(query)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
31 for i in range(num):
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
32 if query[i] == '-':
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
33 curType = 'D'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
34 elif ref[i] == '-':
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
35 curType = 'I'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
36 else:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
37 curType = 'M'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
38 if curType == prevType:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
39 count += 1
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
40 else:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
41 cigar.append('%s%d' % (prevType, count))
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
42 prevType = curType
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
43 count = 1
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
44 cigar.append('%s%d' % (curType, count))
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
45 return ' '.join(cigar)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
46
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
47 def gff3_writer(blast_records, gff3_file):
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
48 gff3 = open(gff3_file, 'a')
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
49 gff3.write("##gff-version 3\n")
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
50 seq_regions = dict()
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
51 for blast_record in blast_records:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
52 query_name = blast_record.query.split(" ")[0]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
53 source = blast_record.application
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
54 method = blast_record.matrix
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
55 for alignment in blast_record.alignments:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
56 group = {
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
57 "parent_field" : OrderedDict(),
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
58 "parent_attribute" : OrderedDict(),
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
59 "alignments" : []
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
60 }
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
61 title = alignment.title.split(" ")
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
62 contig_name = title[len(title) - 1]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
63 length = alignment.length
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
64 group['parent_field']['seqid'] = contig_name
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
65 group['parent_field']['source'] = source
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
66 group['parent_field']['type'] = 'match'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
67 group['parent_attribute']['ID'] = contig_name + '_' + query_name
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
68 group['parent_attribute']['method'] = method
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
69 group['parent_attribute']['length'] = length
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
70 if contig_name not in seq_regions:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
71 gff3.write("##sequence-region " + contig_name + ' 1 ' + str(length) + '\n')
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
72 seq_regions[contig_name] = length
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
73 match_num = 0
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
74 coords = [length, 0]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
75 for hsp in alignment.hsps:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
76 hsp_align = {}
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
77 field = OrderedDict()
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
78 attribute = OrderedDict()
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
79 ref = hsp.sbjct
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
80 query = hsp.query
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
81 field['seqid'] = contig_name
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
82 field['source'] = source
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
83 field['type'] = 'match_part'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
84
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
85 field['start'] = hsp.sbjct_start
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
86 if field['start'] < coords[0]:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
87 coords[0] = field['start']
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
88 ref_length = len(ref.replace('-', ''))
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
89 # if run tblastn, the actual length of reference should be multiplied by 3
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
90 if source.lower() == "tblastn":
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
91 ref_length *= 3
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
92 field['end'] = field['start'] + ref_length - 1
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
93 if field['end'] > coords[1]:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
94 coords[1] = field['end']
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
95 field['score'] = hsp.score
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
96 #decide if the alignment in the same strand or reverse strand
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
97 #reading frame
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
98 # (+, +), (0, 0), (-, -) => +
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
99 # (+, -), (-, +) => -
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
100 if hsp.frame[1] * hsp.frame[0] > 0:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
101 field['strand'] = '+'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
102 elif hsp.frame[1] * hsp.frame[0] < 0:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
103 field['strand'] = '-'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
104 else:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
105 if hsp.frame[0] + hsp.frame[1] >= 0:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
106 field['strand'] = '+'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
107 else:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
108 field['strand'] = '-'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
109 field['phase'] = '.'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
110
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
111 target_start = hsp.query_start
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
112 target_len = len(query.replace('-', ''))
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
113 # if run blastx, the actual length of query should be multiplied by 3
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
114 if source.lower() == "blastx":
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
115 target_len *= 3
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
116 target_end = target_start + target_len -1
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
117 attribute['ID'] = group['parent_attribute']['ID'] + '_match_' + str(match_num)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
118 attribute['Parent'] = group['parent_attribute']['ID']
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
119 attribute['Target'] = query_name + " " + str(target_start) + " " + str(target_end)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
120 attribute['Gap'] = align2cigar(query, ref)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
121 #store the query sequence and match string in the file in order to display alignment with BlastAlignment plugin
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
122 attribute['subject'] = hsp.sbjct
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
123 attribute['query'] = hsp.query
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
124 attribute['match'] = hsp.match
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
125 attribute['gaps'] = attribute['match'].count(' ')
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
126 similar = attribute['match'].count('+')
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
127 attribute['identities'] = len(attribute['match']) - similar - attribute['gaps']
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
128 attribute['positives'] = attribute['identities'] + similar
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
129 attribute['expect'] = hsp.expect
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
130 # show reading frame attribute only if the frame is not (0, 0)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
131 attribute['frame'] = hsp.frame[1]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
132 match_num += 1
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
133 hsp_align['field'] = field
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
134 hsp_align['attribute'] = attribute
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
135 group['alignments'].append(hsp_align)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
136 group['parent_field']['start'] = coords[0]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
137 group['parent_field']['end'] = coords[1]
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
138 group['parent_field']['score'] = group['parent_field']['strand'] = group['parent_field']['phase'] = '.'
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
139 group['parent_attribute']['match_num'] = match_num
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
140 group['alignments'].sort(key=lambda x: (x['field']['start'], x['field']['end']))
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
141 utils.write_features(group['parent_field'], group['parent_attribute'], gff3)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
142 prev_end = -1
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
143 for align in group['alignments']:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
144 overlap = ''
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
145 if align['field']['start'] <= prev_end:
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
146 overlap += str(align['field']['start']) + ',' + str(prev_end)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
147 prev_end = align['field']['end']
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
148 align['attribute']['overlap'] = overlap
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
149 utils.write_features(align['field'], align['attribute'], gff3)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
150 gff3.close()
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
151
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
152 def blastxml2gff3(xml_file, gff3_file):
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
153 result_handle = open(xml_file)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
154 blast_records = NCBIXML.parse(result_handle)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
155 gff3_writer(blast_records, gff3_file)
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
156
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
157 if __name__ == "__main__":
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
158 blastxml2gff3("../dbia3/raw/tblastn_dmel-hits-translation-r6.11.fa_vs_nucleotide_BLAST_database_from_data_3.blastxml", "gff3.txt")
804a93e87cc8 planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff changeset
159