Mercurial > repos > iuc > jbrowse
comparison blastxml_to_gapped_gff3.py @ 3:7342f467507b draft
Uploaded v0.4 of JBrowse
author | iuc |
---|---|
date | Thu, 31 Dec 2015 13:58:43 -0500 |
parents | 497c6bb3b717 |
children | ad4b9d7eae6a |
comparison
equal
deleted
inserted
replaced
2:b6a0e126dbee | 3:7342f467507b |
---|---|
26 from Bio.SeqFeature import SeqFeature, FeatureLocation | 26 from Bio.SeqFeature import SeqFeature, FeatureLocation |
27 | 27 |
28 blast_records = NCBIXML.parse(blastxml) | 28 blast_records = NCBIXML.parse(blastxml) |
29 records = [] | 29 records = [] |
30 for record in blast_records: | 30 for record in blast_records: |
31 # http://www.sequenceontology.org/browser/release_2.4/term/SO:0000343 | |
32 match_type = { # Currently we can only handle BLASTN, BLASTP | |
33 'BLASTN': 'nucleotide_match', | |
34 'BLASTP': 'protein_match', | |
35 }.get(record.application, 'match') | |
36 | |
31 rec = SeqRecord(Seq("ACTG"), id=record.query) | 37 rec = SeqRecord(Seq("ACTG"), id=record.query) |
32 for hit in record.alignments: | 38 for hit in record.alignments: |
33 for hsp in hit.hsps: | 39 for hsp in hit.hsps: |
34 qualifiers = { | 40 qualifiers = { |
35 "source": "blast", | 41 "source": "blast", |
65 | 71 |
66 if trim or trim_end: | 72 if trim or trim_end: |
67 if parent_match_end > hsp.query_end: | 73 if parent_match_end > hsp.query_end: |
68 parent_match_end = hsp.query_end + 1 | 74 parent_match_end = hsp.query_end + 1 |
69 | 75 |
70 # The ``protein_match`` feature will hold one or more ``match_part``s | 76 # The ``match`` feature will hold one or more ``match_part``s |
71 top_feature = SeqFeature( | 77 top_feature = SeqFeature( |
72 FeatureLocation(parent_match_start, parent_match_end), | 78 FeatureLocation(parent_match_start, parent_match_end), |
73 type="protein_match", strand=0, | 79 type=match_type, strand=0, |
74 qualifiers=qualifiers | 80 qualifiers=qualifiers |
75 ) | 81 ) |
76 | 82 |
77 # Unlike the parent feature, ``match_part``s have sources. | 83 # Unlike the parent feature, ``match_part``s have sources. |
78 part_qualifiers = { | 84 part_qualifiers = { |
85 part_qualifiers['Gap'] = cigar | 91 part_qualifiers['Gap'] = cigar |
86 part_qualifiers['ID'] = hit.hit_id | 92 part_qualifiers['ID'] = hit.hit_id |
87 | 93 |
88 if trim: | 94 if trim: |
89 # If trimming, then we start relative to the | 95 # If trimming, then we start relative to the |
90 # protein_match's start | 96 # match's start |
91 match_part_start = parent_match_start + start | 97 match_part_start = parent_match_start + start |
92 else: | 98 else: |
93 # Otherwise, we have to account for the subject start's location | 99 # Otherwise, we have to account for the subject start's location |
94 match_part_start = parent_match_start + hsp.sbjct_start + start - 1 | 100 match_part_start = parent_match_start + hsp.sbjct_start + start - 1 |
95 | 101 |
106 type="match_part", strand=0, | 112 type="match_part", strand=0, |
107 qualifiers=copy.deepcopy(part_qualifiers)) | 113 qualifiers=copy.deepcopy(part_qualifiers)) |
108 ) | 114 ) |
109 | 115 |
110 rec.features.append(top_feature) | 116 rec.features.append(top_feature) |
117 rec.annotations = {} | |
111 records.append(rec) | 118 records.append(rec) |
112 return records | 119 return records |
113 | 120 |
114 | 121 |
115 def __remove_query_gaps(query, match, subject): | 122 def __remove_query_gaps(query, match, subject): |
250 parser.add_argument('--trim', action='store_true', help='Trim blast hits to be only as long as the parent feature') | 257 parser.add_argument('--trim', action='store_true', help='Trim blast hits to be only as long as the parent feature') |
251 parser.add_argument('--trim_end', action='store_true', help='Cut blast results off at end of gene') | 258 parser.add_argument('--trim_end', action='store_true', help='Cut blast results off at end of gene') |
252 args = parser.parse_args() | 259 args = parser.parse_args() |
253 | 260 |
254 result = blastxml2gff3(**vars(args)) | 261 result = blastxml2gff3(**vars(args)) |
255 | |
256 GFF.write(result, sys.stdout) | 262 GFF.write(result, sys.stdout) |