Mercurial > repos > yating-l > jbrowsearchivecreator
annotate blastxmlToGff3.py @ 0:804a93e87cc8 draft
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
author | yating-l |
---|---|
date | Wed, 12 Apr 2017 17:41:55 -0400 |
parents | |
children |
rev | line source |
---|---|
0
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
1 #!/usr/bin/env python |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
2 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
3 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
4 from Bio.Blast import NCBIXML |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
5 from collections import OrderedDict |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
6 import utils |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
7 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
8 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
9 def align2cigar(hsp_query, hsp_reference): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
10 """ |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
11 Build CIGAR representation from an hsp_query |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
12 input: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
13 hsp_query |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
14 hsp_sbjct |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
15 output: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
16 CIGAR string |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
17 """ |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
18 query = hsp_query |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
19 ref = hsp_reference |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
20 # preType, curType: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
21 # 'M' represents match, |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
22 # 'I' represents insert a gap into the reference sequence, |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
23 # 'D' represents insert a gap into the target (delete from reference) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
24 # some ideas of this algin2cigar function are coming from |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
25 # https://gist.github.com/ozagordi/099bdb796507da8d9426 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
26 prevType = 'M' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
27 curType = 'M' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
28 count = 0 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
29 cigar = [] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
30 num = len(query) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
31 for i in range(num): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
32 if query[i] == '-': |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
33 curType = 'D' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
34 elif ref[i] == '-': |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
35 curType = 'I' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
36 else: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
37 curType = 'M' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
38 if curType == prevType: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
39 count += 1 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
40 else: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
41 cigar.append('%s%d' % (prevType, count)) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
42 prevType = curType |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
43 count = 1 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
44 cigar.append('%s%d' % (curType, count)) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
45 return ' '.join(cigar) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
46 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
47 def gff3_writer(blast_records, gff3_file): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
48 gff3 = open(gff3_file, 'a') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
49 gff3.write("##gff-version 3\n") |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
50 seq_regions = dict() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
51 for blast_record in blast_records: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
52 query_name = blast_record.query.split(" ")[0] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
53 source = blast_record.application |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
54 method = blast_record.matrix |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
55 for alignment in blast_record.alignments: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
56 group = { |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
57 "parent_field" : OrderedDict(), |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
58 "parent_attribute" : OrderedDict(), |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
59 "alignments" : [] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
60 } |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
61 title = alignment.title.split(" ") |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
62 contig_name = title[len(title) - 1] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
63 length = alignment.length |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
64 group['parent_field']['seqid'] = contig_name |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
65 group['parent_field']['source'] = source |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
66 group['parent_field']['type'] = 'match' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
67 group['parent_attribute']['ID'] = contig_name + '_' + query_name |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
68 group['parent_attribute']['method'] = method |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
69 group['parent_attribute']['length'] = length |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
70 if contig_name not in seq_regions: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
71 gff3.write("##sequence-region " + contig_name + ' 1 ' + str(length) + '\n') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
72 seq_regions[contig_name] = length |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
73 match_num = 0 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
74 coords = [length, 0] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
75 for hsp in alignment.hsps: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
76 hsp_align = {} |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
77 field = OrderedDict() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
78 attribute = OrderedDict() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
79 ref = hsp.sbjct |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
80 query = hsp.query |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
81 field['seqid'] = contig_name |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
82 field['source'] = source |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
83 field['type'] = 'match_part' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
84 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
85 field['start'] = hsp.sbjct_start |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
86 if field['start'] < coords[0]: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
87 coords[0] = field['start'] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
88 ref_length = len(ref.replace('-', '')) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
89 # if run tblastn, the actual length of reference should be multiplied by 3 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
90 if source.lower() == "tblastn": |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
91 ref_length *= 3 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
92 field['end'] = field['start'] + ref_length - 1 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
93 if field['end'] > coords[1]: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
94 coords[1] = field['end'] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
95 field['score'] = hsp.score |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
96 #decide if the alignment in the same strand or reverse strand |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
97 #reading frame |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
98 # (+, +), (0, 0), (-, -) => + |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
99 # (+, -), (-, +) => - |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
100 if hsp.frame[1] * hsp.frame[0] > 0: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
101 field['strand'] = '+' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
102 elif hsp.frame[1] * hsp.frame[0] < 0: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
103 field['strand'] = '-' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
104 else: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
105 if hsp.frame[0] + hsp.frame[1] >= 0: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
106 field['strand'] = '+' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
107 else: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
108 field['strand'] = '-' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
109 field['phase'] = '.' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
110 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
111 target_start = hsp.query_start |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
112 target_len = len(query.replace('-', '')) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
113 # if run blastx, the actual length of query should be multiplied by 3 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
114 if source.lower() == "blastx": |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
115 target_len *= 3 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
116 target_end = target_start + target_len -1 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
117 attribute['ID'] = group['parent_attribute']['ID'] + '_match_' + str(match_num) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
118 attribute['Parent'] = group['parent_attribute']['ID'] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
119 attribute['Target'] = query_name + " " + str(target_start) + " " + str(target_end) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
120 attribute['Gap'] = align2cigar(query, ref) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
121 #store the query sequence and match string in the file in order to display alignment with BlastAlignment plugin |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
122 attribute['subject'] = hsp.sbjct |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
123 attribute['query'] = hsp.query |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
124 attribute['match'] = hsp.match |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
125 attribute['gaps'] = attribute['match'].count(' ') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
126 similar = attribute['match'].count('+') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
127 attribute['identities'] = len(attribute['match']) - similar - attribute['gaps'] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
128 attribute['positives'] = attribute['identities'] + similar |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
129 attribute['expect'] = hsp.expect |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
130 # show reading frame attribute only if the frame is not (0, 0) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
131 attribute['frame'] = hsp.frame[1] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
132 match_num += 1 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
133 hsp_align['field'] = field |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
134 hsp_align['attribute'] = attribute |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
135 group['alignments'].append(hsp_align) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
136 group['parent_field']['start'] = coords[0] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
137 group['parent_field']['end'] = coords[1] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
138 group['parent_field']['score'] = group['parent_field']['strand'] = group['parent_field']['phase'] = '.' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
139 group['parent_attribute']['match_num'] = match_num |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
140 group['alignments'].sort(key=lambda x: (x['field']['start'], x['field']['end'])) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
141 utils.write_features(group['parent_field'], group['parent_attribute'], gff3) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
142 prev_end = -1 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
143 for align in group['alignments']: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
144 overlap = '' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
145 if align['field']['start'] <= prev_end: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
146 overlap += str(align['field']['start']) + ',' + str(prev_end) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
147 prev_end = align['field']['end'] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
148 align['attribute']['overlap'] = overlap |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
149 utils.write_features(align['field'], align['attribute'], gff3) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
150 gff3.close() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
151 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
152 def blastxml2gff3(xml_file, gff3_file): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
153 result_handle = open(xml_file) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
154 blast_records = NCBIXML.parse(result_handle) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
155 gff3_writer(blast_records, gff3_file) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
156 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
157 if __name__ == "__main__": |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
158 blastxml2gff3("../dbia3/raw/tblastn_dmel-hits-translation-r6.11.fa_vs_nucleotide_BLAST_database_from_data_3.blastxml", "gff3.txt") |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
159 |