Mercurial > repos > pedro_araujo > phage_host_prediction
changeset 1:f8dee15a72a4 draft
Uploaded
author | pedro_araujo |
---|---|
date | Wed, 27 Jan 2021 14:52:31 +0000 |
parents | e4b3fc88efe0 |
children | 8674f554d76b |
files | run_galaxy.py |
diffstat | 1 files changed, 23 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/run_galaxy.py Wed Jan 27 13:50:11 2021 +0000 +++ b/run_galaxy.py Wed Jan 27 14:52:31 2021 +0000 @@ -6,7 +6,7 @@ import pickle import os import re - with open('files/FeatureDataset', 'rb') as f: + with open('files/feature_dataset', 'rb') as f: dataset = pickle.load(f) self.all_phages = [] self.all_bacteria = [] @@ -96,12 +96,15 @@ from Bio import SeqIO phage = {} Entrez.email = 'insert@email.com' - with Entrez.efetch(db="nucleotide", rettype="gb", retmode="text", id=ID) as handle: - genome = SeqIO.read(handle, "gb") - for feat in genome.features: - if feat.type == 'CDS': - try: temp_phage[ID][feat.qualifiers['protein_id'][0]] = [feat.qualifiers['product'][0], feat.qualifiers['translation'][0]] - except: pass + try: + with Entrez.efetch(db="nucleotide", rettype="gb", retmode="text", id=ID) as handle: + genome = SeqIO.read(handle, "gb") + for feat in genome.features: + if feat.type == 'CDS': + try: temp_phage[ID][feat.qualifiers['protein_id'][0]] = [feat.qualifiers['product'][0], feat.qualifiers['translation'][0]] + except: pass + except: + print(ID, 'not found in GenBank') return temp_phage def _retrieve_from_bact_id(self, bacteria): @@ -110,15 +113,16 @@ temp_bacteria[ID] = {} if '.' in ID: ID = ID[:ID.find('.')] - if ID in self.all_bacteria: - import json - with open('files/bacteria/' + ID + '.json', encoding='utf-8') as f: - temp_bacteria[ID] = json.loads(f.read()) - else: - from Bio import Entrez - from Bio import SeqIO - bacteria = {} - Entrez.email = 'insert@email.com' + #if ID in self.all_bacteria: + # import json + # with open('files/bacteria/' + ID + '.json', encoding='utf-8') as f: + # temp_bacteria[ID] = json.loads(f.read()) + #else: + from Bio import Entrez + from Bio import SeqIO + bacteria = {} + Entrez.email = 'insert@email.com' + try: with Entrez.efetch(db="nucleotide", rettype="gb", retmode="text", id=ID+'.1') as handle: genome = SeqIO.read(handle, "gb") for feat in genome.features: @@ -154,6 +158,8 @@ else: j += 1 temp_bacteria[ID][protKey] = [product, protSeq[:protSeq.find('"')]] + except: + print(ID, 'not found in GenBank') return temp_bacteria def _find_phage_functions(self, phage_dict, run_interpro): @@ -363,5 +369,5 @@ run_interpro = False model = sys.argv[6] GalaxyPrediction(phage_input_type=phage_input_type, bact_input_type=bact_input_type, phage=Phages, bacteria=Bacts, ml_model=model, run_interpro=run_interpro) - # rg = GalaxyPrediction(phage_input_type='ID', bact_input_type='ID', phage='NC_050154', bacteria='NC_007414,NZ_MK033499,NZ_CP031214') + #rg = GalaxyPrediction(phage_input_type='ID', bact_input_type='ID', phage='NC_050154', bacteria='NC_007414,NZ_MK033499,NZ_CP031214') # GalaxyPrediction(phage_input_type='ID', bact_input_type='ID', phage='NC_031087,NC_049833,NC_049838,NC_049444', bacteria='LR133964', ml_model='SVM')