changeset 1:f8dee15a72a4 draft

Uploaded
author pedro_araujo
date Wed, 27 Jan 2021 14:52:31 +0000
parents e4b3fc88efe0
children 8674f554d76b
files run_galaxy.py
diffstat 1 files changed, 23 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/run_galaxy.py	Wed Jan 27 13:50:11 2021 +0000
+++ b/run_galaxy.py	Wed Jan 27 14:52:31 2021 +0000
@@ -6,7 +6,7 @@
 		import pickle
 		import os
 		import re
-		with open('files/FeatureDataset', 'rb') as f:
+		with open('files/feature_dataset', 'rb') as f:
 			dataset = pickle.load(f)
 		self.all_phages = []
 		self.all_bacteria = []
@@ -96,12 +96,15 @@
 				from Bio import SeqIO
 				phage = {}
 				Entrez.email = 'insert@email.com'
-				with Entrez.efetch(db="nucleotide", rettype="gb", retmode="text", id=ID) as handle:
-					genome = SeqIO.read(handle, "gb")
-				for feat in genome.features:
-					if feat.type == 'CDS':
-						try: temp_phage[ID][feat.qualifiers['protein_id'][0]] = [feat.qualifiers['product'][0], feat.qualifiers['translation'][0]]
-						except: pass
+				try:
+					with Entrez.efetch(db="nucleotide", rettype="gb", retmode="text", id=ID) as handle:
+						genome = SeqIO.read(handle, "gb")
+					for feat in genome.features:
+						if feat.type == 'CDS':
+							try: temp_phage[ID][feat.qualifiers['protein_id'][0]] = [feat.qualifiers['product'][0], feat.qualifiers['translation'][0]]
+							except: pass
+				except:
+					print(ID, 'not found in GenBank')
 		return temp_phage
 
 	def _retrieve_from_bact_id(self, bacteria):
@@ -110,15 +113,16 @@
 			temp_bacteria[ID] = {}
 			if '.' in ID:
 				ID = ID[:ID.find('.')]
-			if ID in self.all_bacteria:
-				import json
-				with open('files/bacteria/' + ID + '.json', encoding='utf-8') as f:
-					temp_bacteria[ID] = json.loads(f.read())
-			else:
-				from Bio import Entrez
-				from Bio import SeqIO
-				bacteria = {}
-				Entrez.email = 'insert@email.com'
+			#if ID in self.all_bacteria:
+			#	import json
+			#	with open('files/bacteria/' + ID + '.json', encoding='utf-8') as f:
+			#		temp_bacteria[ID] = json.loads(f.read())
+			#else:
+			from Bio import Entrez
+			from Bio import SeqIO
+			bacteria = {}
+			Entrez.email = 'insert@email.com'
+			try:
 				with Entrez.efetch(db="nucleotide", rettype="gb", retmode="text", id=ID+'.1') as handle:
 					genome = SeqIO.read(handle, "gb")
 				for feat in genome.features:
@@ -154,6 +158,8 @@
 								else:
 									j += 1
 							temp_bacteria[ID][protKey] = [product, protSeq[:protSeq.find('"')]]
+			except:
+				print(ID, 'not found in GenBank')
 		return temp_bacteria
 
 	def _find_phage_functions(self, phage_dict, run_interpro):
@@ -363,5 +369,5 @@
 		run_interpro = False
 	model = sys.argv[6]
 	GalaxyPrediction(phage_input_type=phage_input_type, bact_input_type=bact_input_type, phage=Phages, bacteria=Bacts, ml_model=model, run_interpro=run_interpro)
-	# rg = GalaxyPrediction(phage_input_type='ID', bact_input_type='ID', phage='NC_050154', bacteria='NC_007414,NZ_MK033499,NZ_CP031214')
+	#rg = GalaxyPrediction(phage_input_type='ID', bact_input_type='ID', phage='NC_050154', bacteria='NC_007414,NZ_MK033499,NZ_CP031214')
 	# GalaxyPrediction(phage_input_type='ID', bact_input_type='ID', phage='NC_031087,NC_049833,NC_049838,NC_049444', bacteria='LR133964', ml_model='SVM')