Mercurial > repos > iuc > jbrowse
comparison blastxml_to_gapped_gff3.py @ 3:7342f467507b draft
Uploaded v0.4 of JBrowse
| author | iuc |
|---|---|
| date | Thu, 31 Dec 2015 13:58:43 -0500 |
| parents | 497c6bb3b717 |
| children | ad4b9d7eae6a |
comparison
equal
deleted
inserted
replaced
| 2:b6a0e126dbee | 3:7342f467507b |
|---|---|
| 26 from Bio.SeqFeature import SeqFeature, FeatureLocation | 26 from Bio.SeqFeature import SeqFeature, FeatureLocation |
| 27 | 27 |
| 28 blast_records = NCBIXML.parse(blastxml) | 28 blast_records = NCBIXML.parse(blastxml) |
| 29 records = [] | 29 records = [] |
| 30 for record in blast_records: | 30 for record in blast_records: |
| 31 # http://www.sequenceontology.org/browser/release_2.4/term/SO:0000343 | |
| 32 match_type = { # Currently we can only handle BLASTN, BLASTP | |
| 33 'BLASTN': 'nucleotide_match', | |
| 34 'BLASTP': 'protein_match', | |
| 35 }.get(record.application, 'match') | |
| 36 | |
| 31 rec = SeqRecord(Seq("ACTG"), id=record.query) | 37 rec = SeqRecord(Seq("ACTG"), id=record.query) |
| 32 for hit in record.alignments: | 38 for hit in record.alignments: |
| 33 for hsp in hit.hsps: | 39 for hsp in hit.hsps: |
| 34 qualifiers = { | 40 qualifiers = { |
| 35 "source": "blast", | 41 "source": "blast", |
| 65 | 71 |
| 66 if trim or trim_end: | 72 if trim or trim_end: |
| 67 if parent_match_end > hsp.query_end: | 73 if parent_match_end > hsp.query_end: |
| 68 parent_match_end = hsp.query_end + 1 | 74 parent_match_end = hsp.query_end + 1 |
| 69 | 75 |
| 70 # The ``protein_match`` feature will hold one or more ``match_part``s | 76 # The ``match`` feature will hold one or more ``match_part``s |
| 71 top_feature = SeqFeature( | 77 top_feature = SeqFeature( |
| 72 FeatureLocation(parent_match_start, parent_match_end), | 78 FeatureLocation(parent_match_start, parent_match_end), |
| 73 type="protein_match", strand=0, | 79 type=match_type, strand=0, |
| 74 qualifiers=qualifiers | 80 qualifiers=qualifiers |
| 75 ) | 81 ) |
| 76 | 82 |
| 77 # Unlike the parent feature, ``match_part``s have sources. | 83 # Unlike the parent feature, ``match_part``s have sources. |
| 78 part_qualifiers = { | 84 part_qualifiers = { |
| 85 part_qualifiers['Gap'] = cigar | 91 part_qualifiers['Gap'] = cigar |
| 86 part_qualifiers['ID'] = hit.hit_id | 92 part_qualifiers['ID'] = hit.hit_id |
| 87 | 93 |
| 88 if trim: | 94 if trim: |
| 89 # If trimming, then we start relative to the | 95 # If trimming, then we start relative to the |
| 90 # protein_match's start | 96 # match's start |
| 91 match_part_start = parent_match_start + start | 97 match_part_start = parent_match_start + start |
| 92 else: | 98 else: |
| 93 # Otherwise, we have to account for the subject start's location | 99 # Otherwise, we have to account for the subject start's location |
| 94 match_part_start = parent_match_start + hsp.sbjct_start + start - 1 | 100 match_part_start = parent_match_start + hsp.sbjct_start + start - 1 |
| 95 | 101 |
| 106 type="match_part", strand=0, | 112 type="match_part", strand=0, |
| 107 qualifiers=copy.deepcopy(part_qualifiers)) | 113 qualifiers=copy.deepcopy(part_qualifiers)) |
| 108 ) | 114 ) |
| 109 | 115 |
| 110 rec.features.append(top_feature) | 116 rec.features.append(top_feature) |
| 117 rec.annotations = {} | |
| 111 records.append(rec) | 118 records.append(rec) |
| 112 return records | 119 return records |
| 113 | 120 |
| 114 | 121 |
| 115 def __remove_query_gaps(query, match, subject): | 122 def __remove_query_gaps(query, match, subject): |
| 250 parser.add_argument('--trim', action='store_true', help='Trim blast hits to be only as long as the parent feature') | 257 parser.add_argument('--trim', action='store_true', help='Trim blast hits to be only as long as the parent feature') |
| 251 parser.add_argument('--trim_end', action='store_true', help='Cut blast results off at end of gene') | 258 parser.add_argument('--trim_end', action='store_true', help='Cut blast results off at end of gene') |
| 252 args = parser.parse_args() | 259 args = parser.parse_args() |
| 253 | 260 |
| 254 result = blastxml2gff3(**vars(args)) | 261 result = blastxml2gff3(**vars(args)) |
| 255 | |
| 256 GFF.write(result, sys.stdout) | 262 GFF.write(result, sys.stdout) |
