comparison phage_host_prediction/prophage_finder.py @ 2:3e1e8be4e65c draft default tip

Uploaded
author pedro_araujo
date Fri, 02 Apr 2021 10:11:13 +0000
parents
children
comparison
equal deleted inserted replaced
1:d9cda08472ea 2:3e1e8be4e65c
1 import ast
2 import json
3 import os
4
5 import pandas as pd
6
7 from FeatureConstruction import *
8
9
10 def phages_bact():
11 count_bacteria = 0
12 for phage in data.index:
13 if ast.literal_eval(data.loc[phage, 'Host_ID']):
14 count_bacteria += 1
15 return count_bacteria
16
17
18 data = pd.read_csv('files/NCBI_Phage_Bacteria_Data.csv', header=0, index_col=0)
19
20 with open('C:/Users/Pedro/Downloads/pha_in_bac_2_test.json', encoding='utf-8') as F:
21 prophage = json.loads(F.read())
22
23 for bact in prophage.keys():
24 for phage in prophage[bact]:
25 if phage in data.index:
26 temp = ast.literal_eval(data.loc[phage, 'Host_ID'])
27 if bact + '.1' not in temp:
28 temp.append(bact+'.1')
29 data.loc[phage, 'Host_ID'] = str(temp)
30
31 data.to_csv('files/NCBI_Phage_Bacteria_Data.csv')
32
33 fc = FeatureConstruction()
34 phageTails = fc.phageTails
35
36 os.system('cd-hit -i files/tails.fasta -o files/cdhit')
37
38 temp_cluster = []
39 with open('files/cdhit.clstr', 'r') as f:
40 for line in f.readlines():
41 if '>Cluster' in line:
42 for prot in temp_cluster:
43 for phage in phageTails:
44 if prot in phageTails[phage].keys():
45 if phage in data.index:
46 temp_ref = ast.literal_eval(data.loc[ref_phage, 'Bacteria ID'])
47 temp = ast.literal_eval(data.loc[phage, 'Bacteria ID'])
48 for i in temp_ref:
49 if i not in temp:
50 temp.append(i)
51 data.loc[phage, 'Bacteria ID'] = str(temp)
52 break
53 temp_cluster = []
54 elif line[0] == '0':
55 pos_i = line.find('>') + 1
56 pos_f = line.find('...')
57 ref_prot = line[pos_i:pos_f]
58 for phage in phageTails:
59 if ref_prot in phageTails[phage].keys():
60 ref_phage = phage
61 break
62 else:
63 pos_i = line.find('>') + 1
64 pos_f = line.find('...')
65 temp_cluster.append(line[pos_i:pos_f])
66
67
68 with open('files/bactDNA.json', encoding='utf-8') as F:
69 bacProt = json.loads(F.read())
70
71 listDone = []
72 for bact in bacProt:
73 if bact in listDone:
74 pass
75 else:
76 listDone.append(bact)
77 with open('files/temp_genome.fasta', 'w') as F:
78 F.write('>' + bact + '\n' + bacProt[bact] + '\n')
79 os.system('phigaro -f files/temp_genome.fasta --not-open -d -o files/temp_phigaro') # Phigaro
80 with open('files/temp_phigaro.html', 'r') as Ph:
81 tempPhigaro = Ph.readlines()
82 for line in tempPhigaro:
83 if '<div class="accordion-body collapse"' in line:
84 VOGs = line[line.find('>')+1:].strip('\n').split(', ')
85 for vog in VOGs:
86 with open('files/VOG_tables/' + vog + '.txt', 'r') as f:
87 temp_phages = f.readlines()
88 for i in range(len(temp_phages)):
89 if i != 0:
90 phage = temp_phages[i].split('\t')[2]
91 if phage in data.index:
92 temp = ast.literal_eval(data.loc[phage, 'Bacteria ID'])
93 if bact not in temp:
94 temp.append(bact)
95 data.loc[phage, 'Bacteria ID'] = str(temp)
96 print('Number of phages with associated bacteria strains:', phages_bact(), end="\r")
97
98 '''os.system('wget --post-file="files/temp_genome.fasta" "http://phaster.ca/phaster_api?contigs=1" -O files/temp_phaster') # Phaster
99 with open('files/temp_phaster', encoding='utf-8') as F:
100 temp = json.loads(F.read())
101 os.system('wget "http://phaster.ca/phaster_api?acc=' + temp['job_id'] + 'Z" -O files/temp_phaster') # servidor cheio
102 os.system('PhiSpy.py files/temp_genome.fasta -o files/temp_phipsy') # Phipsy - não possível com fastas'''