diff blastxml_to_gapped_gff3.py @ 3:7342f467507b draft

Uploaded v0.4 of JBrowse
author iuc
date Thu, 31 Dec 2015 13:58:43 -0500
parents 497c6bb3b717
children ad4b9d7eae6a
line wrap: on
line diff
--- a/blastxml_to_gapped_gff3.py	Tue Jun 23 12:10:15 2015 -0400
+++ b/blastxml_to_gapped_gff3.py	Thu Dec 31 13:58:43 2015 -0500
@@ -28,6 +28,12 @@
     blast_records = NCBIXML.parse(blastxml)
     records = []
     for record in blast_records:
+        # http://www.sequenceontology.org/browser/release_2.4/term/SO:0000343
+        match_type = {  # Currently we can only handle BLASTN, BLASTP
+            'BLASTN': 'nucleotide_match',
+            'BLASTP': 'protein_match',
+        }.get(record.application, 'match')
+
         rec = SeqRecord(Seq("ACTG"), id=record.query)
         for hit in record.alignments:
             for hsp in hit.hsps:
@@ -67,10 +73,10 @@
                     if parent_match_end > hsp.query_end:
                         parent_match_end = hsp.query_end + 1
 
-                # The ``protein_match`` feature will hold one or more ``match_part``s
+                # The ``match`` feature will hold one or more ``match_part``s
                 top_feature = SeqFeature(
                     FeatureLocation(parent_match_start, parent_match_end),
-                    type="protein_match", strand=0,
+                    type=match_type, strand=0,
                     qualifiers=qualifiers
                 )
 
@@ -87,7 +93,7 @@
 
                     if trim:
                         # If trimming, then we start relative to the
-                        # protein_match's start
+                        # match's start
                         match_part_start = parent_match_start + start
                     else:
                         # Otherwise, we have to account for the subject start's location
@@ -108,6 +114,7 @@
                     )
 
                 rec.features.append(top_feature)
+        rec.annotations = {}
         records.append(rec)
     return records
 
@@ -252,5 +259,4 @@
     args = parser.parse_args()
 
     result = blastxml2gff3(**vars(args))
-
     GFF.write(result, sys.stdout)