diff select_product.py @ 10:82186605ba75 draft default tip

Uploaded
author brigidar
date Fri, 09 Oct 2015 17:30:50 -0400
parents 53f1f83e80a1
children
line wrap: on
line diff
--- a/select_product.py	Fri Oct 09 11:52:37 2015 -0400
+++ b/select_product.py	Fri Oct 09 17:30:50 2015 -0400
@@ -38,33 +38,6 @@
     return FH
 
 
-def feature_table(chr_id, source, orient, genes, transcripts, cds, exons, product, keys_1, n):
-    """
-    Write the feature information
-    """
-    
-    lines = []
-    
-    for gname, ginfo in genes.items():
-        line = [str(chr_id),
-                ginfo[3],
-                str(ginfo[0]-n),
-                str(ginfo[1]+n),
-                ginfo[2],
-                str(product)]
-                
-# to key on specfic products only
-        for k in keys_1.split(","):
-            if k in str(product):
-        
-                lines.append('\t'.join(line)+"\n")
-                break
-
-    return lines
-
-
-
-
 def gbk_parse(fname,keys_1,n,outfile=""):
     """
     Extract genome annotation recods from genbank format 
@@ -77,9 +50,7 @@
 
     for record in SeqIO.parse(fhand, "genbank"):
         gene_tags = dict()
-        tx_tags = collections.defaultdict(list) 
-        exon = collections.defaultdict(list) 
-        cds = collections.defaultdict(list) 
+ 
         mol_type, chr_id = None, None 
 
         for rec in record.features:
@@ -90,7 +61,7 @@
             if rec.qualifiers.has_key('product'):
             
                 product = rec.qualifiers['product'][0]
-            
+
 
             if rec.type == 'source':
                 try:
@@ -103,49 +74,25 @@
                 except:
                     chr_id = record.name 
                 continue 
+            
 
-            strand='-'
-            strand='+' if rec.strand>0 else strand
-            
-            fid = None 
-            try:
-                fid = rec.qualifiers['gene'][0]
-            except:
-                pass
-
-            transcript_id = None
-            try:
-                transcript_id = rec.qualifiers['transcript_id'][0]
-            except:
-                pass 
+            start = int(rec.location.start)+1-n
+            stop = int(rec.location.end)+n
 
-            if re.search(r'gene', rec.type):
-                gene_tags[fid] = (rec.location._start.position+1, 
-                                    rec.location._end.position, 
-                                    strand,
-                                    rec.type
-                                    )
-            elif rec.type == 'exon':
-                exon[fid].append((rec.location._start.position+1, 
-                                    rec.location._end.position))
-            elif rec.type=='CDS':
-                cds[fid].append((rec.location._start.position+1, 
-                                    rec.location._end.position))
-            else: 
-                # get all transcripts 
-                if transcript_id: 
-                    tx_tags[fid].append((rec.location._start.position+1,
-                                    rec.location._end.position, 
-                                    transcript_id,
-                                    rec.type))
-        # record extracted, generate feature table
-        lines += feature_table(chr_id, mol_type, strand, gene_tags, tx_tags, cds, exon, product,keys_1,n)
-        
+            if start < 0:
+                start = 1
+                    
+            for k in keys_1.split(","):
+    
+                if str(k) in str(product):
+                    lines.append('\t'.join([str(chr_id),str(start), str(stop), product])+"\n")
+
+    
     fhand.close()
 
     return lines
 
-if __name__=='__main__': 
+if __name__=='__main__':
 
     try:
         parser = argparse.ArgumentParser()