Mercurial > repos > iuc > jbrowse
diff blastxml_to_gapped_gff3.py @ 3:7342f467507b draft
Uploaded v0.4 of JBrowse
author | iuc |
---|---|
date | Thu, 31 Dec 2015 13:58:43 -0500 |
parents | 497c6bb3b717 |
children | ad4b9d7eae6a |
line wrap: on
line diff
--- a/blastxml_to_gapped_gff3.py Tue Jun 23 12:10:15 2015 -0400 +++ b/blastxml_to_gapped_gff3.py Thu Dec 31 13:58:43 2015 -0500 @@ -28,6 +28,12 @@ blast_records = NCBIXML.parse(blastxml) records = [] for record in blast_records: + # http://www.sequenceontology.org/browser/release_2.4/term/SO:0000343 + match_type = { # Currently we can only handle BLASTN, BLASTP + 'BLASTN': 'nucleotide_match', + 'BLASTP': 'protein_match', + }.get(record.application, 'match') + rec = SeqRecord(Seq("ACTG"), id=record.query) for hit in record.alignments: for hsp in hit.hsps: @@ -67,10 +73,10 @@ if parent_match_end > hsp.query_end: parent_match_end = hsp.query_end + 1 - # The ``protein_match`` feature will hold one or more ``match_part``s + # The ``match`` feature will hold one or more ``match_part``s top_feature = SeqFeature( FeatureLocation(parent_match_start, parent_match_end), - type="protein_match", strand=0, + type=match_type, strand=0, qualifiers=qualifiers ) @@ -87,7 +93,7 @@ if trim: # If trimming, then we start relative to the - # protein_match's start + # match's start match_part_start = parent_match_start + start else: # Otherwise, we have to account for the subject start's location @@ -108,6 +114,7 @@ ) rec.features.append(top_feature) + rec.annotations = {} records.append(rec) return records @@ -252,5 +259,4 @@ args = parser.parse_args() result = blastxml2gff3(**vars(args)) - GFF.write(result, sys.stdout)