2
|
1 import ast
|
|
2 import json
|
|
3 import os
|
|
4
|
|
5 import pandas as pd
|
|
6
|
|
7 from FeatureConstruction import *
|
|
8
|
|
9
|
|
10 def phages_bact():
|
|
11 count_bacteria = 0
|
|
12 for phage in data.index:
|
|
13 if ast.literal_eval(data.loc[phage, 'Host_ID']):
|
|
14 count_bacteria += 1
|
|
15 return count_bacteria
|
|
16
|
|
17
|
|
18 data = pd.read_csv('files/NCBI_Phage_Bacteria_Data.csv', header=0, index_col=0)
|
|
19
|
|
20 with open('C:/Users/Pedro/Downloads/pha_in_bac_2_test.json', encoding='utf-8') as F:
|
|
21 prophage = json.loads(F.read())
|
|
22
|
|
23 for bact in prophage.keys():
|
|
24 for phage in prophage[bact]:
|
|
25 if phage in data.index:
|
|
26 temp = ast.literal_eval(data.loc[phage, 'Host_ID'])
|
|
27 if bact + '.1' not in temp:
|
|
28 temp.append(bact+'.1')
|
|
29 data.loc[phage, 'Host_ID'] = str(temp)
|
|
30
|
|
31 data.to_csv('files/NCBI_Phage_Bacteria_Data.csv')
|
|
32
|
|
33 fc = FeatureConstruction()
|
|
34 phageTails = fc.phageTails
|
|
35
|
|
36 os.system('cd-hit -i files/tails.fasta -o files/cdhit')
|
|
37
|
|
38 temp_cluster = []
|
|
39 with open('files/cdhit.clstr', 'r') as f:
|
|
40 for line in f.readlines():
|
|
41 if '>Cluster' in line:
|
|
42 for prot in temp_cluster:
|
|
43 for phage in phageTails:
|
|
44 if prot in phageTails[phage].keys():
|
|
45 if phage in data.index:
|
|
46 temp_ref = ast.literal_eval(data.loc[ref_phage, 'Bacteria ID'])
|
|
47 temp = ast.literal_eval(data.loc[phage, 'Bacteria ID'])
|
|
48 for i in temp_ref:
|
|
49 if i not in temp:
|
|
50 temp.append(i)
|
|
51 data.loc[phage, 'Bacteria ID'] = str(temp)
|
|
52 break
|
|
53 temp_cluster = []
|
|
54 elif line[0] == '0':
|
|
55 pos_i = line.find('>') + 1
|
|
56 pos_f = line.find('...')
|
|
57 ref_prot = line[pos_i:pos_f]
|
|
58 for phage in phageTails:
|
|
59 if ref_prot in phageTails[phage].keys():
|
|
60 ref_phage = phage
|
|
61 break
|
|
62 else:
|
|
63 pos_i = line.find('>') + 1
|
|
64 pos_f = line.find('...')
|
|
65 temp_cluster.append(line[pos_i:pos_f])
|
|
66
|
|
67
|
|
68 with open('files/bactDNA.json', encoding='utf-8') as F:
|
|
69 bacProt = json.loads(F.read())
|
|
70
|
|
71 listDone = []
|
|
72 for bact in bacProt:
|
|
73 if bact in listDone:
|
|
74 pass
|
|
75 else:
|
|
76 listDone.append(bact)
|
|
77 with open('files/temp_genome.fasta', 'w') as F:
|
|
78 F.write('>' + bact + '\n' + bacProt[bact] + '\n')
|
|
79 os.system('phigaro -f files/temp_genome.fasta --not-open -d -o files/temp_phigaro') # Phigaro
|
|
80 with open('files/temp_phigaro.html', 'r') as Ph:
|
|
81 tempPhigaro = Ph.readlines()
|
|
82 for line in tempPhigaro:
|
|
83 if '<div class="accordion-body collapse"' in line:
|
|
84 VOGs = line[line.find('>')+1:].strip('\n').split(', ')
|
|
85 for vog in VOGs:
|
|
86 with open('files/VOG_tables/' + vog + '.txt', 'r') as f:
|
|
87 temp_phages = f.readlines()
|
|
88 for i in range(len(temp_phages)):
|
|
89 if i != 0:
|
|
90 phage = temp_phages[i].split('\t')[2]
|
|
91 if phage in data.index:
|
|
92 temp = ast.literal_eval(data.loc[phage, 'Bacteria ID'])
|
|
93 if bact not in temp:
|
|
94 temp.append(bact)
|
|
95 data.loc[phage, 'Bacteria ID'] = str(temp)
|
|
96 print('Number of phages with associated bacteria strains:', phages_bact(), end="\r")
|
|
97
|
|
98 '''os.system('wget --post-file="files/temp_genome.fasta" "http://phaster.ca/phaster_api?contigs=1" -O files/temp_phaster') # Phaster
|
|
99 with open('files/temp_phaster', encoding='utf-8') as F:
|
|
100 temp = json.loads(F.read())
|
|
101 os.system('wget "http://phaster.ca/phaster_api?acc=' + temp['job_id'] + 'Z" -O files/temp_phaster') # servidor cheio
|
|
102 os.system('PhiSpy.py files/temp_genome.fasta -o files/temp_phipsy') # Phipsy - não possível com fastas'''
|