| 
2
 | 
     1 import ast
 | 
| 
 | 
     2 import json
 | 
| 
 | 
     3 import os
 | 
| 
 | 
     4 
 | 
| 
 | 
     5 import pandas as pd
 | 
| 
 | 
     6 
 | 
| 
 | 
     7 from FeatureConstruction import *
 | 
| 
 | 
     8 
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 def phages_bact():
 | 
| 
 | 
    11 	count_bacteria = 0
 | 
| 
 | 
    12 	for phage in data.index:
 | 
| 
 | 
    13 		if ast.literal_eval(data.loc[phage, 'Host_ID']):
 | 
| 
 | 
    14 			count_bacteria += 1
 | 
| 
 | 
    15 	return count_bacteria
 | 
| 
 | 
    16 
 | 
| 
 | 
    17 
 | 
| 
 | 
    18 data = pd.read_csv('files/NCBI_Phage_Bacteria_Data.csv', header=0, index_col=0)
 | 
| 
 | 
    19 
 | 
| 
 | 
    20 with open('C:/Users/Pedro/Downloads/pha_in_bac_2_test.json', encoding='utf-8') as F:
 | 
| 
 | 
    21 	prophage = json.loads(F.read())
 | 
| 
 | 
    22 
 | 
| 
 | 
    23 for bact in prophage.keys():
 | 
| 
 | 
    24 	for phage in prophage[bact]:
 | 
| 
 | 
    25 		if phage in data.index:
 | 
| 
 | 
    26 			temp = ast.literal_eval(data.loc[phage, 'Host_ID'])
 | 
| 
 | 
    27 			if bact + '.1' not in temp:
 | 
| 
 | 
    28 				temp.append(bact+'.1')
 | 
| 
 | 
    29 				data.loc[phage, 'Host_ID'] = str(temp)
 | 
| 
 | 
    30 
 | 
| 
 | 
    31 data.to_csv('files/NCBI_Phage_Bacteria_Data.csv')
 | 
| 
 | 
    32 
 | 
| 
 | 
    33 fc = FeatureConstruction()
 | 
| 
 | 
    34 phageTails = fc.phageTails
 | 
| 
 | 
    35 
 | 
| 
 | 
    36 os.system('cd-hit -i files/tails.fasta -o files/cdhit')
 | 
| 
 | 
    37 
 | 
| 
 | 
    38 temp_cluster = []
 | 
| 
 | 
    39 with open('files/cdhit.clstr', 'r') as f:
 | 
| 
 | 
    40 	for line in f.readlines():
 | 
| 
 | 
    41 		if '>Cluster' in line:
 | 
| 
 | 
    42 			for prot in temp_cluster:
 | 
| 
 | 
    43 				for phage in phageTails:
 | 
| 
 | 
    44 					if prot in phageTails[phage].keys():
 | 
| 
 | 
    45 						if phage in data.index:
 | 
| 
 | 
    46 							temp_ref = ast.literal_eval(data.loc[ref_phage, 'Bacteria ID'])
 | 
| 
 | 
    47 							temp = ast.literal_eval(data.loc[phage, 'Bacteria ID'])
 | 
| 
 | 
    48 							for i in temp_ref:
 | 
| 
 | 
    49 								if i not in temp:
 | 
| 
 | 
    50 									temp.append(i)
 | 
| 
 | 
    51 									data.loc[phage, 'Bacteria ID'] = str(temp)
 | 
| 
 | 
    52 						break
 | 
| 
 | 
    53 			temp_cluster = []
 | 
| 
 | 
    54 		elif line[0] == '0':
 | 
| 
 | 
    55 			pos_i = line.find('>') + 1
 | 
| 
 | 
    56 			pos_f = line.find('...')
 | 
| 
 | 
    57 			ref_prot = line[pos_i:pos_f]
 | 
| 
 | 
    58 			for phage in phageTails:
 | 
| 
 | 
    59 				if ref_prot in phageTails[phage].keys():
 | 
| 
 | 
    60 					ref_phage = phage
 | 
| 
 | 
    61 					break
 | 
| 
 | 
    62 		else:
 | 
| 
 | 
    63 			pos_i = line.find('>') + 1
 | 
| 
 | 
    64 			pos_f = line.find('...')
 | 
| 
 | 
    65 			temp_cluster.append(line[pos_i:pos_f])
 | 
| 
 | 
    66 
 | 
| 
 | 
    67 
 | 
| 
 | 
    68 with open('files/bactDNA.json', encoding='utf-8') as F:
 | 
| 
 | 
    69 	bacProt = json.loads(F.read())
 | 
| 
 | 
    70 
 | 
| 
 | 
    71 listDone = []
 | 
| 
 | 
    72 for bact in bacProt:
 | 
| 
 | 
    73 	if bact in listDone:
 | 
| 
 | 
    74 		pass
 | 
| 
 | 
    75 	else:
 | 
| 
 | 
    76 		listDone.append(bact)
 | 
| 
 | 
    77 		with open('files/temp_genome.fasta', 'w') as F:
 | 
| 
 | 
    78 			F.write('>' + bact + '\n' + bacProt[bact] + '\n')
 | 
| 
 | 
    79 		os.system('phigaro -f files/temp_genome.fasta --not-open -d -o files/temp_phigaro')  # Phigaro
 | 
| 
 | 
    80 		with open('files/temp_phigaro.html', 'r') as Ph:
 | 
| 
 | 
    81 			tempPhigaro = Ph.readlines()
 | 
| 
 | 
    82 		for line in tempPhigaro:
 | 
| 
 | 
    83 			if '<div class="accordion-body collapse"' in line:
 | 
| 
 | 
    84 				VOGs = line[line.find('>')+1:].strip('\n').split(', ')
 | 
| 
 | 
    85 				for vog in VOGs:
 | 
| 
 | 
    86 					with open('files/VOG_tables/' + vog + '.txt', 'r') as f:
 | 
| 
 | 
    87 						temp_phages = f.readlines()
 | 
| 
 | 
    88 					for i in range(len(temp_phages)):
 | 
| 
 | 
    89 						if i != 0:
 | 
| 
 | 
    90 							phage = temp_phages[i].split('\t')[2]
 | 
| 
 | 
    91 							if phage in data.index:
 | 
| 
 | 
    92 								temp = ast.literal_eval(data.loc[phage, 'Bacteria ID'])
 | 
| 
 | 
    93 								if bact not in temp:
 | 
| 
 | 
    94 									temp.append(bact)
 | 
| 
 | 
    95 									data.loc[phage, 'Bacteria ID'] = str(temp)
 | 
| 
 | 
    96 		print('Number of phages with associated bacteria strains:', phages_bact(), end="\r")
 | 
| 
 | 
    97 
 | 
| 
 | 
    98 '''os.system('wget --post-file="files/temp_genome.fasta" "http://phaster.ca/phaster_api?contigs=1" -O files/temp_phaster') # Phaster
 | 
| 
 | 
    99 with open('files/temp_phaster', encoding='utf-8') as F:
 | 
| 
 | 
   100 	temp = json.loads(F.read())
 | 
| 
 | 
   101 os.system('wget "http://phaster.ca/phaster_api?acc=' + temp['job_id'] + 'Z" -O files/temp_phaster') # servidor cheio
 | 
| 
 | 
   102 os.system('PhiSpy.py files/temp_genome.fasta -o files/temp_phipsy') # Phipsy - não possível com fastas'''
 |