Mercurial > repos > pedro_araujo > phage_host
comparison phage_host_prediction/prophage_finder.py @ 2:3e1e8be4e65c draft default tip
Uploaded
| author | pedro_araujo |
|---|---|
| date | Fri, 02 Apr 2021 10:11:13 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:d9cda08472ea | 2:3e1e8be4e65c |
|---|---|
| 1 import ast | |
| 2 import json | |
| 3 import os | |
| 4 | |
| 5 import pandas as pd | |
| 6 | |
| 7 from FeatureConstruction import * | |
| 8 | |
| 9 | |
| 10 def phages_bact(): | |
| 11 count_bacteria = 0 | |
| 12 for phage in data.index: | |
| 13 if ast.literal_eval(data.loc[phage, 'Host_ID']): | |
| 14 count_bacteria += 1 | |
| 15 return count_bacteria | |
| 16 | |
| 17 | |
| 18 data = pd.read_csv('files/NCBI_Phage_Bacteria_Data.csv', header=0, index_col=0) | |
| 19 | |
| 20 with open('C:/Users/Pedro/Downloads/pha_in_bac_2_test.json', encoding='utf-8') as F: | |
| 21 prophage = json.loads(F.read()) | |
| 22 | |
| 23 for bact in prophage.keys(): | |
| 24 for phage in prophage[bact]: | |
| 25 if phage in data.index: | |
| 26 temp = ast.literal_eval(data.loc[phage, 'Host_ID']) | |
| 27 if bact + '.1' not in temp: | |
| 28 temp.append(bact+'.1') | |
| 29 data.loc[phage, 'Host_ID'] = str(temp) | |
| 30 | |
| 31 data.to_csv('files/NCBI_Phage_Bacteria_Data.csv') | |
| 32 | |
| 33 fc = FeatureConstruction() | |
| 34 phageTails = fc.phageTails | |
| 35 | |
| 36 os.system('cd-hit -i files/tails.fasta -o files/cdhit') | |
| 37 | |
| 38 temp_cluster = [] | |
| 39 with open('files/cdhit.clstr', 'r') as f: | |
| 40 for line in f.readlines(): | |
| 41 if '>Cluster' in line: | |
| 42 for prot in temp_cluster: | |
| 43 for phage in phageTails: | |
| 44 if prot in phageTails[phage].keys(): | |
| 45 if phage in data.index: | |
| 46 temp_ref = ast.literal_eval(data.loc[ref_phage, 'Bacteria ID']) | |
| 47 temp = ast.literal_eval(data.loc[phage, 'Bacteria ID']) | |
| 48 for i in temp_ref: | |
| 49 if i not in temp: | |
| 50 temp.append(i) | |
| 51 data.loc[phage, 'Bacteria ID'] = str(temp) | |
| 52 break | |
| 53 temp_cluster = [] | |
| 54 elif line[0] == '0': | |
| 55 pos_i = line.find('>') + 1 | |
| 56 pos_f = line.find('...') | |
| 57 ref_prot = line[pos_i:pos_f] | |
| 58 for phage in phageTails: | |
| 59 if ref_prot in phageTails[phage].keys(): | |
| 60 ref_phage = phage | |
| 61 break | |
| 62 else: | |
| 63 pos_i = line.find('>') + 1 | |
| 64 pos_f = line.find('...') | |
| 65 temp_cluster.append(line[pos_i:pos_f]) | |
| 66 | |
| 67 | |
| 68 with open('files/bactDNA.json', encoding='utf-8') as F: | |
| 69 bacProt = json.loads(F.read()) | |
| 70 | |
| 71 listDone = [] | |
| 72 for bact in bacProt: | |
| 73 if bact in listDone: | |
| 74 pass | |
| 75 else: | |
| 76 listDone.append(bact) | |
| 77 with open('files/temp_genome.fasta', 'w') as F: | |
| 78 F.write('>' + bact + '\n' + bacProt[bact] + '\n') | |
| 79 os.system('phigaro -f files/temp_genome.fasta --not-open -d -o files/temp_phigaro') # Phigaro | |
| 80 with open('files/temp_phigaro.html', 'r') as Ph: | |
| 81 tempPhigaro = Ph.readlines() | |
| 82 for line in tempPhigaro: | |
| 83 if '<div class="accordion-body collapse"' in line: | |
| 84 VOGs = line[line.find('>')+1:].strip('\n').split(', ') | |
| 85 for vog in VOGs: | |
| 86 with open('files/VOG_tables/' + vog + '.txt', 'r') as f: | |
| 87 temp_phages = f.readlines() | |
| 88 for i in range(len(temp_phages)): | |
| 89 if i != 0: | |
| 90 phage = temp_phages[i].split('\t')[2] | |
| 91 if phage in data.index: | |
| 92 temp = ast.literal_eval(data.loc[phage, 'Bacteria ID']) | |
| 93 if bact not in temp: | |
| 94 temp.append(bact) | |
| 95 data.loc[phage, 'Bacteria ID'] = str(temp) | |
| 96 print('Number of phages with associated bacteria strains:', phages_bact(), end="\r") | |
| 97 | |
| 98 '''os.system('wget --post-file="files/temp_genome.fasta" "http://phaster.ca/phaster_api?contigs=1" -O files/temp_phaster') # Phaster | |
| 99 with open('files/temp_phaster', encoding='utf-8') as F: | |
| 100 temp = json.loads(F.read()) | |
| 101 os.system('wget "http://phaster.ca/phaster_api?acc=' + temp['job_id'] + 'Z" -O files/temp_phaster') # servidor cheio | |
| 102 os.system('PhiSpy.py files/temp_genome.fasta -o files/temp_phipsy') # Phipsy - não possível com fastas''' |
