| 2 | 1 import ast | 
|  | 2 import json | 
|  | 3 import os | 
|  | 4 | 
|  | 5 import pandas as pd | 
|  | 6 | 
|  | 7 from FeatureConstruction import * | 
|  | 8 | 
|  | 9 | 
|  | 10 def phages_bact(): | 
|  | 11 	count_bacteria = 0 | 
|  | 12 	for phage in data.index: | 
|  | 13 		if ast.literal_eval(data.loc[phage, 'Host_ID']): | 
|  | 14 			count_bacteria += 1 | 
|  | 15 	return count_bacteria | 
|  | 16 | 
|  | 17 | 
|  | 18 data = pd.read_csv('files/NCBI_Phage_Bacteria_Data.csv', header=0, index_col=0) | 
|  | 19 | 
|  | 20 with open('C:/Users/Pedro/Downloads/pha_in_bac_2_test.json', encoding='utf-8') as F: | 
|  | 21 	prophage = json.loads(F.read()) | 
|  | 22 | 
|  | 23 for bact in prophage.keys(): | 
|  | 24 	for phage in prophage[bact]: | 
|  | 25 		if phage in data.index: | 
|  | 26 			temp = ast.literal_eval(data.loc[phage, 'Host_ID']) | 
|  | 27 			if bact + '.1' not in temp: | 
|  | 28 				temp.append(bact+'.1') | 
|  | 29 				data.loc[phage, 'Host_ID'] = str(temp) | 
|  | 30 | 
|  | 31 data.to_csv('files/NCBI_Phage_Bacteria_Data.csv') | 
|  | 32 | 
|  | 33 fc = FeatureConstruction() | 
|  | 34 phageTails = fc.phageTails | 
|  | 35 | 
|  | 36 os.system('cd-hit -i files/tails.fasta -o files/cdhit') | 
|  | 37 | 
|  | 38 temp_cluster = [] | 
|  | 39 with open('files/cdhit.clstr', 'r') as f: | 
|  | 40 	for line in f.readlines(): | 
|  | 41 		if '>Cluster' in line: | 
|  | 42 			for prot in temp_cluster: | 
|  | 43 				for phage in phageTails: | 
|  | 44 					if prot in phageTails[phage].keys(): | 
|  | 45 						if phage in data.index: | 
|  | 46 							temp_ref = ast.literal_eval(data.loc[ref_phage, 'Bacteria ID']) | 
|  | 47 							temp = ast.literal_eval(data.loc[phage, 'Bacteria ID']) | 
|  | 48 							for i in temp_ref: | 
|  | 49 								if i not in temp: | 
|  | 50 									temp.append(i) | 
|  | 51 									data.loc[phage, 'Bacteria ID'] = str(temp) | 
|  | 52 						break | 
|  | 53 			temp_cluster = [] | 
|  | 54 		elif line[0] == '0': | 
|  | 55 			pos_i = line.find('>') + 1 | 
|  | 56 			pos_f = line.find('...') | 
|  | 57 			ref_prot = line[pos_i:pos_f] | 
|  | 58 			for phage in phageTails: | 
|  | 59 				if ref_prot in phageTails[phage].keys(): | 
|  | 60 					ref_phage = phage | 
|  | 61 					break | 
|  | 62 		else: | 
|  | 63 			pos_i = line.find('>') + 1 | 
|  | 64 			pos_f = line.find('...') | 
|  | 65 			temp_cluster.append(line[pos_i:pos_f]) | 
|  | 66 | 
|  | 67 | 
|  | 68 with open('files/bactDNA.json', encoding='utf-8') as F: | 
|  | 69 	bacProt = json.loads(F.read()) | 
|  | 70 | 
|  | 71 listDone = [] | 
|  | 72 for bact in bacProt: | 
|  | 73 	if bact in listDone: | 
|  | 74 		pass | 
|  | 75 	else: | 
|  | 76 		listDone.append(bact) | 
|  | 77 		with open('files/temp_genome.fasta', 'w') as F: | 
|  | 78 			F.write('>' + bact + '\n' + bacProt[bact] + '\n') | 
|  | 79 		os.system('phigaro -f files/temp_genome.fasta --not-open -d -o files/temp_phigaro')  # Phigaro | 
|  | 80 		with open('files/temp_phigaro.html', 'r') as Ph: | 
|  | 81 			tempPhigaro = Ph.readlines() | 
|  | 82 		for line in tempPhigaro: | 
|  | 83 			if '<div class="accordion-body collapse"' in line: | 
|  | 84 				VOGs = line[line.find('>')+1:].strip('\n').split(', ') | 
|  | 85 				for vog in VOGs: | 
|  | 86 					with open('files/VOG_tables/' + vog + '.txt', 'r') as f: | 
|  | 87 						temp_phages = f.readlines() | 
|  | 88 					for i in range(len(temp_phages)): | 
|  | 89 						if i != 0: | 
|  | 90 							phage = temp_phages[i].split('\t')[2] | 
|  | 91 							if phage in data.index: | 
|  | 92 								temp = ast.literal_eval(data.loc[phage, 'Bacteria ID']) | 
|  | 93 								if bact not in temp: | 
|  | 94 									temp.append(bact) | 
|  | 95 									data.loc[phage, 'Bacteria ID'] = str(temp) | 
|  | 96 		print('Number of phages with associated bacteria strains:', phages_bact(), end="\r") | 
|  | 97 | 
|  | 98 '''os.system('wget --post-file="files/temp_genome.fasta" "http://phaster.ca/phaster_api?contigs=1" -O files/temp_phaster') # Phaster | 
|  | 99 with open('files/temp_phaster', encoding='utf-8') as F: | 
|  | 100 	temp = json.loads(F.read()) | 
|  | 101 os.system('wget "http://phaster.ca/phaster_api?acc=' + temp['job_id'] + 'Z" -O files/temp_phaster') # servidor cheio | 
|  | 102 os.system('PhiSpy.py files/temp_genome.fasta -o files/temp_phipsy') # Phipsy - não possível com fastas''' |