Mercurial > repos > pedro_araujo > phage_host_prediction
comparison prophage_finder.py @ 0:e4b3fc88efe0 draft
Uploaded
author | pedro_araujo |
---|---|
date | Wed, 27 Jan 2021 13:50:11 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e4b3fc88efe0 |
---|---|
1 import ast | |
2 import json | |
3 import os | |
4 | |
5 import pandas as pd | |
6 | |
7 from FeatureConstruction import * | |
8 | |
9 | |
10 def phages_bact(): | |
11 count_bacteria = 0 | |
12 for phage in data.index: | |
13 if ast.literal_eval(data.loc[phage, 'Host_ID']): | |
14 count_bacteria += 1 | |
15 return count_bacteria | |
16 | |
17 | |
18 data = pd.read_csv('files/NCBI_Phage_Bacteria_Data.csv', header=0, index_col=0) | |
19 | |
20 with open('C:/Users/Pedro/Downloads/pha_in_bac_2_test.json', encoding='utf-8') as F: | |
21 prophage = json.loads(F.read()) | |
22 | |
23 for bact in prophage.keys(): | |
24 for phage in prophage[bact]: | |
25 if phage in data.index: | |
26 temp = ast.literal_eval(data.loc[phage, 'Host_ID']) | |
27 if bact + '.1' not in temp: | |
28 temp.append(bact+'.1') | |
29 data.loc[phage, 'Host_ID'] = str(temp) | |
30 | |
31 data.to_csv('files/NCBI_Phage_Bacteria_Data.csv') | |
32 | |
33 fc = FeatureConstruction() | |
34 phageTails = fc.phageTails | |
35 | |
36 os.system('cd-hit -i files/tails.fasta -o files/cdhit') | |
37 | |
38 temp_cluster = [] | |
39 with open('files/cdhit.clstr', 'r') as f: | |
40 for line in f.readlines(): | |
41 if '>Cluster' in line: | |
42 for prot in temp_cluster: | |
43 for phage in phageTails: | |
44 if prot in phageTails[phage].keys(): | |
45 if phage in data.index: | |
46 temp_ref = ast.literal_eval(data.loc[ref_phage, 'Bacteria ID']) | |
47 temp = ast.literal_eval(data.loc[phage, 'Bacteria ID']) | |
48 for i in temp_ref: | |
49 if i not in temp: | |
50 temp.append(i) | |
51 data.loc[phage, 'Bacteria ID'] = str(temp) | |
52 break | |
53 temp_cluster = [] | |
54 elif line[0] == '0': | |
55 pos_i = line.find('>') + 1 | |
56 pos_f = line.find('...') | |
57 ref_prot = line[pos_i:pos_f] | |
58 for phage in phageTails: | |
59 if ref_prot in phageTails[phage].keys(): | |
60 ref_phage = phage | |
61 break | |
62 else: | |
63 pos_i = line.find('>') + 1 | |
64 pos_f = line.find('...') | |
65 temp_cluster.append(line[pos_i:pos_f]) | |
66 | |
67 | |
68 with open('files/bactDNA.json', encoding='utf-8') as F: | |
69 bacProt = json.loads(F.read()) | |
70 | |
71 listDone = [] | |
72 for bact in bacProt: | |
73 if bact in listDone: | |
74 pass | |
75 else: | |
76 listDone.append(bact) | |
77 with open('files/temp_genome.fasta', 'w') as F: | |
78 F.write('>' + bact + '\n' + bacProt[bact] + '\n') | |
79 os.system('phigaro -f files/temp_genome.fasta --not-open -d -o files/temp_phigaro') # Phigaro | |
80 with open('files/temp_phigaro.html', 'r') as Ph: | |
81 tempPhigaro = Ph.readlines() | |
82 for line in tempPhigaro: | |
83 if '<div class="accordion-body collapse"' in line: | |
84 VOGs = line[line.find('>')+1:].strip('\n').split(', ') | |
85 for vog in VOGs: | |
86 with open('files/VOG_tables/' + vog + '.txt', 'r') as f: | |
87 temp_phages = f.readlines() | |
88 for i in range(len(temp_phages)): | |
89 if i != 0: | |
90 phage = temp_phages[i].split('\t')[2] | |
91 if phage in data.index: | |
92 temp = ast.literal_eval(data.loc[phage, 'Bacteria ID']) | |
93 if bact not in temp: | |
94 temp.append(bact) | |
95 data.loc[phage, 'Bacteria ID'] = str(temp) | |
96 print('Number of phages with associated bacteria strains:', phages_bact(), end="\r") | |
97 | |
98 '''os.system('wget --post-file="files/temp_genome.fasta" "http://phaster.ca/phaster_api?contigs=1" -O files/temp_phaster') # Phaster | |
99 with open('files/temp_phaster', encoding='utf-8') as F: | |
100 temp = json.loads(F.read()) | |
101 os.system('wget "http://phaster.ca/phaster_api?acc=' + temp['job_id'] + 'Z" -O files/temp_phaster') # servidor cheio | |
102 os.system('PhiSpy.py files/temp_genome.fasta -o files/temp_phipsy') # Phipsy - não possível com fastas''' |