annotate data_manager/resource_building.py @ 2:9ec42cb35abd draft

planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
author proteore
date Wed, 19 Jun 2019 04:42:03 -0400
parents f3507260b30f
children af0250fd023c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
1 # -*- coding: utf-8 -*-
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
2 """
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
3 The purpose of this script is to create source files from different databases to be used in other proteore tools
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
4 """
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
5
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
6 import os, sys, argparse, requests, time, csv, re, json, shutil, zipfile
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
7 from io import BytesIO
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
8 from zipfile import ZipFile
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
9 from galaxy.util.json import from_json_string, to_json_string
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
10
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
11 #######################################################################################################
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
12 # General functions
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
13 #######################################################################################################
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
14 def unzip(url, output_file):
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
15 """
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
16 Get a zip file content from a link and unzip
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
17 """
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
18 content = requests.get(url)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
19 zipfile = ZipFile(BytesIO(content.content))
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
20 output_content = ""
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
21 output_content += zipfile.open(zipfile.namelist()[0]).read()
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
22 output = open(output_file, "w")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
23 output.write(output_content)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
24 output.close()
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
25
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
26 def _add_data_table_entry(data_manager_dict, data_table_entry,data_table):
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
27 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
28 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, [])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
29 data_manager_dict['data_tables'][data_table].append(data_table_entry)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
30 return data_manager_dict
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
31
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
32 #######################################################################################################
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
33 # 1. Human Protein Atlas
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
34 # - Normal tissue
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
35 # - Pathology
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
36 # - Full Atlas
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
37 #######################################################################################################
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
38 def HPA_sources(data_manager_dict, tissue, target_directory):
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
39 if tissue == "HPA_normal_tissue":
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
40 tissue_name = "HPA normal tissue"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
41 url = "https://www.proteinatlas.org/download/normal_tissue.tsv.zip"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
42 table = "proteore_protein_atlas_normal_tissue"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
43 elif tissue == "HPA_pathology":
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
44 tissue_name = "HPA pathology"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
45 url = "https://www.proteinatlas.org/download/pathology.tsv.zip"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
46 table = "proteore_protein_atlas_tumor_tissue"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
47 elif tissue == "HPA_full_atlas":
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
48 tissue_name = "HPA full atlas"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
49 url = "https://www.proteinatlas.org/download/proteinatlas.tsv.zip"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
50 table = "proteore_protein_full_atlas"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
51
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
52 output_file = tissue +"_"+ time.strftime("%d-%m-%Y") + ".tsv"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
53 path = os.path.join(target_directory, output_file)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
54 unzip(url, path) #download and save file
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
55 tissue_name = tissue_name + " " + time.strftime("%d/%m/%Y")
2
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
56 release = tissue_name.replace(" ","_").replace("/","-")
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
57 id = str(10000000000 - int(time.strftime("%Y%m%d")))
0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
58
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
59
2
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
60 data_table_entry = dict(id=id, release=release, name = tissue_name, tissue = tissue, value = path)
0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
61 _add_data_table_entry(data_manager_dict, data_table_entry, table)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
62
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
63
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
64 #######################################################################################################
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
65 # 2. Peptide Atlas
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
66 #######################################################################################################
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
67 def peptide_atlas_sources(data_manager_dict, tissue, date, target_directory):
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
68 # Define organism_id (here Human) - to be upraded when other organism added to the project
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
69 organism_id = "2"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
70 # Extract sample_category_id and output filename
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
71 tissue=tissue.split(".")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
72 sample_category_id = tissue[0]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
73 tissue_name = tissue[1]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
74 output_file = tissue_name+"_"+date + ".tsv"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
75
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
76 query="https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id="+ \
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
77 sample_category_id+"&display_options=ShowAbundances&organism_id="+organism_id+ \
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
78 "&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf\
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
79 &QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
80
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
81 with requests.Session() as s:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
82 download = s.get(query)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
83 decoded_content = download.content.decode('utf-8')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
84 cr = csv.reader(decoded_content.splitlines(), delimiter='\t')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
85
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
86 uni_dict = build_dictionary(cr)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
87
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
88 #columns of data table peptide_atlas
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
89 tissue_id = tissue_name+"_"+date
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
90 name = tissue_id.replace("-","/").replace("_"," ")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
91 path = os.path.join(target_directory,output_file)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
92
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
93 with open(path,"w") as out :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
94 w = csv.writer(out,delimiter='\t')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
95 w.writerow(["Uniprot_AC","nb_obs"])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
96 w.writerows(uni_dict.items())
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
97
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
98 data_table_entry = dict(id=tissue_id, name=name, value = path, tissue = tissue_name)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
99 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_peptide_atlas")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
100
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
101 #function to count the number of observations by uniprot id
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
102 def build_dictionary (csv) :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
103 uni_dict = {}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
104 for line in csv :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
105 if "-" not in line[0] and check_uniprot_access(line[0]) :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
106 if line[0] in uni_dict :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
107 uni_dict[line[0]] += int(line[5])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
108 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
109 uni_dict[line[0]] = int(line[5])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
110
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
111 return uni_dict
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
112
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
113 #function to check if an id is an uniprot accession number : return True or False-
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
114 def check_uniprot_access (id) :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
115 uniprot_pattern = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
116 if uniprot_pattern.match(id) :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
117 return True
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
118 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
119 return False
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
120
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
121 def check_entrez_geneid (id) :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
122 entrez_pattern = re.compile("[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
123 if entrez_pattern.match(id) :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
124 return True
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
125 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
126 return False
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
127
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
128 #######################################################################################################
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
129 # 3. ID mapping file
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
130 #######################################################################################################
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
131 import ftplib, gzip
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
132 csv.field_size_limit(sys.maxsize) # to handle big files
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
133
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
134 def id_mapping_sources (data_manager_dict, species, target_directory) :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
135
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
136 human = species == "Human"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
137 species_dict = { "Human" : "HUMAN_9606", "Mouse" : "MOUSE_10090", "Rat" : "RAT_10116" }
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
138 files=["idmapping_selected.tab.gz","idmapping.dat.gz"]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
139
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
140 #header
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
141 if human : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","neXtProt","BioGrid","STRING","KEGG"]]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
142 else : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","BioGrid","STRING","KEGG"]]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
143
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
144 #get selected.tab and keep only ids of interest
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
145 selected_tab_file=species_dict[species]+"_"+files[0]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
146 tab_path = download_from_uniprot_ftp(selected_tab_file,target_directory)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
147 with gzip.open(tab_path,"rt") as select :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
148 tab_reader = csv.reader(select,delimiter="\t")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
149 for line in tab_reader :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
150 tab.append([line[i] for i in [0,1,2,3,4,5,6,11,13,14,18,19,20]])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
151 os.remove(tab_path)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
152
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
153 #print("selected_tab ok")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
154
2
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
155 #get uniprot-AC reviewed
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
156 organism = species_dict[species].split("_")[1]
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
157 query = "https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:"+organism+"&format=list"
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
158
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
159 with requests.Session() as s:
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
160 download = s.get(query)
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
161 decoded_content = download.content.decode('utf-8')
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
162 uniprot_reviewed_list = decoded_content.splitlines()
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
163
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
164 for line in tab[1:]:
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
165 UniProtAC = line[0]
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
166 if UniProtAC not in uniprot_reviewed_list :
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
167 line[0]=""
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
168 line[1]=""
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
169
0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
170 """
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
171 Supplementary ID to get from HUMAN_9606_idmapping.dat :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
172 -NextProt,BioGrid,STRING,KEGG
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
173 """
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
174
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
175 #there's more id type for human
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
176 if human : ids = ['neXtProt','BioGrid','STRING','KEGG' ] #ids to get from dat_file
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
177 else : ids = ['BioGrid','STRING','KEGG' ]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
178 unidict = {}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
179
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
180 #keep only ids of interest in dictionaries
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
181 dat_file=species_dict[species]+"_"+files[1]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
182 dat_path = download_from_uniprot_ftp(dat_file,target_directory)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
183 with gzip.open(dat_path,"rt") as dat :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
184 dat_reader = csv.reader(dat,delimiter="\t")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
185 for line in dat_reader :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
186 uniprotID=line[0] #UniProtID as key
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
187 id_type=line[1] #ID type of corresponding id, key of sub-dictionnary
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
188 cor_id=line[2] #corresponding id
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
189 if "-" not in id_type : #we don't keep isoform
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
190 if id_type in ids and uniprotID in unidict :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
191 if id_type in unidict[uniprotID] :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
192 unidict[uniprotID][id_type]= ";".join([unidict[uniprotID][id_type],cor_id]) #if there is already a value in the dictionnary
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
193 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
194 unidict[uniprotID].update({ id_type : cor_id })
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
195 elif id_type in ids :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
196 unidict[uniprotID]={id_type : cor_id}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
197 os.remove(dat_path)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
198
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
199 #print("dat_file ok")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
200
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
201 #add ids from idmapping.dat to the final tab
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
202 for line in tab[1:] :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
203 uniprotID=line[0]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
204 if human :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
205 if uniprotID in unidict :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
206 nextprot = access_dictionary(unidict,uniprotID,'neXtProt')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
207 if nextprot != '' : nextprot = clean_nextprot_id(nextprot,line[0])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
208 line.extend([nextprot,access_dictionary(unidict,uniprotID,'BioGrid'),access_dictionary(unidict,uniprotID,'STRING'),
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
209 access_dictionary(unidict,uniprotID,'KEGG')])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
210 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
211 line.extend(["","","",""])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
212 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
213 if uniprotID in unidict :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
214 line.extend([access_dictionary(unidict,uniprotID,'BioGrid'),access_dictionary(unidict,uniprotID,'STRING'),
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
215 access_dictionary(unidict,uniprotID,'KEGG')])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
216 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
217 line.extend(["","",""])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
218
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
219 #print ("tab ok")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
220
2
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
221 #add missing nextprot ID for human or replace old ones
0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
222 if human :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
223 #build next_dict
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
224 nextprot_ids = id_list_from_nextprot_ftp("nextprot_ac_list_all.txt",target_directory)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
225 next_dict = {}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
226 for nextid in nextprot_ids :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
227 next_dict[nextid.replace("NX_","")] = nextid
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
228 os.remove(os.path.join(target_directory,"nextprot_ac_list_all.txt"))
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
229
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
230 #add missing nextprot ID
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
231 for line in tab[1:] :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
232 uniprotID=line[0]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
233 nextprotID=line[13]
2
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
234 if uniprotID in next_dict and (nextprotID == '' or (nextprotID != "NX_"+uniprotID and next_dict[uniprotID] == "NX_"+uniprotID)) :
0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
235 line[13]=next_dict[uniprotID]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
236
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
237 output_file = species+"_id_mapping_"+ time.strftime("%d-%m-%Y") + ".tsv"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
238 path = os.path.join(target_directory,output_file)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
239
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
240 with open(path,"w") as out :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
241 w = csv.writer(out,delimiter='\t')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
242 w.writerows(tab)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
243
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
244 name_dict={"Human" : "Homo sapiens", "Mouse" : "Mus musculus", "Rat" : "Rattus norvegicus"}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
245 name = species +" (" + name_dict[species]+" "+time.strftime("%d/%m/%Y")+")"
2
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
246 release = species+"_id_mapping_"+ time.strftime("%d-%m-%Y")
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
247 id = str(10000000000 - int(time.strftime("%Y%m%d"))) #new ids must be inferior to previous id -> sort by <filter> in xml only in descending order
0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
248
2
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
249 data_table_entry = dict(id=id, release=release , name = name, species = species, value = path)
0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
250 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_id_mapping_"+species)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
251
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
252 def download_from_uniprot_ftp(file,target_directory) :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
253 ftp_dir = "pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
254 path = os.path.join(target_directory, file)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
255 ftp = ftplib.FTP("ftp.uniprot.org")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
256 ftp.login("anonymous", "anonymous")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
257 ftp.cwd(ftp_dir)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
258 ftp.retrbinary("RETR " + file, open(path, 'wb').write)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
259 ftp.quit()
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
260 return (path)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
261
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
262 def id_list_from_nextprot_ftp(file,target_directory) :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
263 ftp_dir = "pub/current_release/ac_lists/"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
264 path = os.path.join(target_directory, file)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
265 ftp = ftplib.FTP("ftp.nextprot.org")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
266 ftp.login("anonymous", "anonymous")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
267 ftp.cwd(ftp_dir)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
268 ftp.retrbinary("RETR " + file, open(path, 'wb').write)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
269 ftp.quit()
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
270 with open(path,'r') as nextprot_ids :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
271 nextprot_ids = nextprot_ids.read().splitlines()
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
272 return (nextprot_ids)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
273
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
274 #return '' if there's no value in a dictionary, avoid error
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
275 def access_dictionary (dico,key1,key2) :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
276 if key1 in dico :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
277 if key2 in dico[key1] :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
278 return (dico[key1][key2])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
279 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
280 return ("")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
281 #print (key2,"not in ",dico,"[",key1,"]")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
282 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
283 return ('')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
284
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
285 #if there are several nextprot ID for one uniprotID, return the uniprot like ID
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
286 def clean_nextprot_id (next_id,uniprotAc) :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
287 if len(next_id.split(";")) > 1 :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
288 tmp = next_id.split(";")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
289 if "NX_"+uniprotAc in tmp :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
290 return ("NX_"+uniprotAc)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
291 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
292 return (tmp[1])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
293 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
294 return (next_id)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
295
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
296
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
297 #######################################################################################################
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
298 # 4. Build protein interaction maps files
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
299 #######################################################################################################
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
300
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
301 def get_interactant_name(line,dico):
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
302
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
303 if line[0] in dico :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
304 interactant_A = dico[line[0]]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
305 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
306 interactant_A = "NA"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
307
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
308 if line[1] in dico :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
309 interactant_B = dico[line[1]]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
310 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
311 interactant_B = "NA"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
312
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
313 return interactant_A, interactant_B
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
314
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
315 def PPI_ref_files(data_manager_dict, species, interactome, target_directory):
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
316
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
317 species_dict={'Human':'Homo sapiens',"Mouse":"Mus musculus","Rat":"Rattus norvegicus"}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
318
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
319 ##BioGRID
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
320 if interactome=="biogrid":
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
321
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
322 tab2_link="https://downloads.thebiogrid.org/Download/BioGRID/Release-Archive/BIOGRID-3.5.167/BIOGRID-ORGANISM-3.5.167.tab2.zip"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
323
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
324 #download zip file
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
325 r = requests.get(tab2_link)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
326 with open("BioGRID.zip", "wb") as code:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
327 code.write(r.content)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
328
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
329 #unzip files
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
330 with zipfile.ZipFile("BioGRID.zip", 'r') as zip_ref:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
331 if not os.path.exists("tmp_BioGRID"): os.makedirs("tmp_BioGRID")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
332 zip_ref.extractall("tmp_BioGRID")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
333
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
334 #import file of interest and build dictionary
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
335 file_path="tmp_BioGRID/BIOGRID-ORGANISM-"+species_dict[species].replace(" ","_")+"-3.5.167.tab2.txt"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
336 with open(file_path,"r") as handle :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
337 tab_file = csv.reader(handle,delimiter="\t")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
338 dico_network = {}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
339 GeneID_index=1
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
340 network_cols=[1,2,7,8,11,12,14,18,20]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
341 for line in tab_file :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
342 if line[GeneID_index] not in dico_network:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
343 dico_network[line[GeneID_index]]=[[line[i] for i in network_cols]]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
344 else:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
345 dico_network[line[GeneID_index]].append([line[i] for i in network_cols])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
346
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
347 #delete tmp_BioGRID directory
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
348 os.remove("BioGRID.zip")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
349 shutil.rmtree("tmp_BioGRID", ignore_errors=True)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
350
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
351 #download NCBI2Reactome.txt file and build dictionary
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
352 with requests.Session() as s:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
353 r = s.get('https://www.reactome.org/download/current/NCBI2Reactome.txt')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
354 r.encoding ="utf-8"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
355 tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
356
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
357 dico_nodes = {}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
358 geneid_index=0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
359 pathway_description_index=3
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
360 species_index=5
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
361 for line in tab_file :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
362 if line[species_index]==species_dict[species]:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
363 if line[geneid_index] in dico_nodes :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
364 dico_nodes[line[geneid_index]].append(line[pathway_description_index])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
365 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
366 dico_nodes[line[geneid_index]] = [line[pathway_description_index]]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
367
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
368 dico={}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
369 dico['network']=dico_network
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
370 dico['nodes']=dico_nodes
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
371
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
372 ##Bioplex
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
373 elif interactome=="bioplex":
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
374
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
375 with requests.Session() as s:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
376 r = s.get('http://bioplex.hms.harvard.edu/data/BioPlex_interactionList_v4a.tsv')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
377 r = r.content.decode('utf-8')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
378 bioplex = csv.reader(r.splitlines(), delimiter='\t')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
379
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
380 dico_network = {}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
381 dico_network["GeneID"]={}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
382 network_geneid_cols=[0,1,4,5,8]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
383 dico_network["UniProt-AC"]={}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
384 network_uniprot_cols=[2,3,4,5,8]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
385 dico_GeneID_to_UniProt = {}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
386 for line in bioplex :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
387 if line[0] not in dico_network["GeneID"]:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
388 dico_network["GeneID"][line[0]]=[[line[i] for i in network_geneid_cols]]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
389 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
390 dico_network["GeneID"][line[0]].append([line[i] for i in network_geneid_cols])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
391 if line[1] not in dico_network["UniProt-AC"]:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
392 dico_network["UniProt-AC"][line[2]]=[[line[i] for i in network_uniprot_cols]]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
393 else:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
394 dico_network["UniProt-AC"][line[2]].append([line[i] for i in network_uniprot_cols])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
395 dico_GeneID_to_UniProt[line[0]]=line[2]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
396
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
397 with requests.Session() as s:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
398 r = s.get('https://reactome.org/download/current/UniProt2Reactome.txt')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
399 r.encoding ="utf-8"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
400 tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
401
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
402 dico_nodes_uniprot = {}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
403 uniProt_index=0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
404 pathway_description_index=3
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
405 species_index=5
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
406 for line in tab_file :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
407 if line[species_index]==species_dict[species]:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
408 if line[uniProt_index] in dico_nodes_uniprot :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
409 dico_nodes_uniprot[line[uniProt_index]].append(line[pathway_description_index])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
410 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
411 dico_nodes_uniprot[line[uniProt_index]] = [line[pathway_description_index]]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
412
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
413 with requests.Session() as s:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
414 r = s.get('https://www.reactome.org/download/current/NCBI2Reactome.txt')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
415 r.encoding ="utf-8"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
416 tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
417
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
418 dico_nodes_geneid = {}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
419 geneid_index=0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
420 pathway_description_index=3
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
421 species_index=5
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
422 for line in tab_file :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
423 if line[species_index]==species_dict[species]:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
424 if line[geneid_index] in dico_nodes_geneid :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
425 dico_nodes_geneid[line[geneid_index]].append(line[pathway_description_index])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
426 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
427 dico_nodes_geneid[line[geneid_index]] = [line[pathway_description_index]]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
428
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
429 dico={}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
430 dico_nodes={}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
431 dico_nodes['GeneID']=dico_nodes_geneid
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
432 dico_nodes['UniProt-AC']=dico_nodes_uniprot
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
433 dico['network']=dico_network
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
434 dico['nodes']=dico_nodes
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
435 dico['convert']=dico_GeneID_to_UniProt
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
436
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
437 ##Humap
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
438 elif interactome=="humap":
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
439
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
440 with requests.Session() as s:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
441 r = s.get('http://proteincomplexes.org/static/downloads/nodeTable.txt')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
442 r = r.content.decode('utf-8')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
443 humap_nodes = csv.reader(r.splitlines(), delimiter=',')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
444
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
445 dico_geneid_to_gene_name={}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
446 dico_protein_name={}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
447 for line in humap_nodes :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
448 if check_entrez_geneid(line[4]):
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
449 if line[4] not in dico_geneid_to_gene_name:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
450 dico_geneid_to_gene_name[line[4]]=line[3]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
451 if line[4] not in dico_protein_name:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
452 dico_protein_name[line[4]]=line[5]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
453
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
454 with requests.Session() as s:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
455 r = s.get('http://proteincomplexes.org/static/downloads/pairsWprob.txt')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
456 r = r.content.decode('utf-8')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
457 humap = csv.reader(r.splitlines(), delimiter='\t')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
458
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
459 dico_network = {}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
460 for line in humap :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
461 if check_entrez_geneid(line[0]) and check_entrez_geneid(line[1]):
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
462
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
463 interactant_A, interactant_B = get_interactant_name(line,dico_geneid_to_gene_name)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
464
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
465 #first interactant (first column)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
466 if line[0] not in dico_network:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
467 dico_network[line[0]]=[line[:2]+[interactant_A,interactant_B,line[2]]]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
468 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
469 dico_network[line[0]].append(line[:2]+[interactant_A,interactant_B,line[2]])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
470
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
471 #second interactant (second column)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
472 if line[1] not in dico_network:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
473 dico_network[line[1]]=[[line[1],line[0],interactant_B,interactant_A,line[2]]]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
474 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
475 dico_network[line[1]].append([line[1],line[0],interactant_B,interactant_A,line[2]])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
476
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
477 with requests.Session() as s:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
478 r = s.get('https://www.reactome.org/download/current/NCBI2Reactome.txt')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
479 r.encoding ="utf-8"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
480 tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
481
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
482 dico_nodes = {}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
483 geneid_index=0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
484 pathway_description_index=3
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
485 species_index=5
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
486 for line in tab_file :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
487 if line[species_index]==species_dict[species]:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
488 #Fill dictionary with pathways
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
489 if line[geneid_index] in dico_nodes :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
490 dico_nodes[line[geneid_index]].append(line[pathway_description_index])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
491 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
492 dico_nodes[line[geneid_index]] = [line[pathway_description_index]]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
493
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
494 dico={}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
495 dico['network']=dico_network
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
496 dico['nodes']=dico_nodes
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
497 dico['gene_name']=dico_geneid_to_gene_name
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
498 dico['protein_name']=dico_protein_name
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
499
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
500 #writing output
2
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
501 output_file = species+'_'+interactome+'_'+ time.strftime("%Y-%m-%d") + ".json"
0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
502 path = os.path.join(target_directory,output_file)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
503 name = species+" ("+species_dict[species]+") "+time.strftime("%d/%m/%Y")
2
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
504 release = species+"_"+interactome+"_"+ time.strftime("%Y-%m-%d")
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
505 id = str(10000000000 - int(time.strftime("%Y%m%d")))
0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
506
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
507 with open(path, 'w') as handle:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
508 json.dump(dico, handle, sort_keys=True)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
509
2
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
510 data_table_entry = dict(id=id, release=release, name = name, species = species, value = path)
0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
511 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_"+interactome+"_dictionaries")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
512
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
513 #######################################################################################################
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
514 # 5. nextprot (add protein features)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
515 #######################################################################################################
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
516
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
517 def Build_nextprot_ref_file(data_manager_dict,target_directory):
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
518 nextprot_ids_file = "nextprot_ac_list_all.txt"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
519 ids = id_list_from_nextprot_ftp(nextprot_ids_file,target_directory)
2
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
520
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
521 output_file = 'nextprot_ref_'+ time.strftime("%d-%m-%Y") + ".tsv"
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
522 path = os.path.join(target_directory,output_file)
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
523 name = "neXtProt release "+time.strftime("%d-%m-%Y")
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
524 release_id = "nextprot_ref_"+time.strftime("%d-%m-%Y")
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
525
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
526 output = open(path, 'w')
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
527 writer = csv.writer(output,delimiter="\t")
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
528
0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
529 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]]
2
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
530 writer.writerows(nextprot_file)
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
531
0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
532 for id in ids :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
533 #print (id)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
534 query="https://api.nextprot.org/entry/"+id+".json"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
535 resp = requests.get(url=query)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
536 data = resp.json()
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
537
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
538 #get info from json dictionary
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
539 mass_mol = data["entry"]["isoforms"][0]["massAsString"]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
540 seq_length = data['entry']["isoforms"][0]["sequenceLength"]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
541 iso_elec_point = data['entry']["isoforms"][0]["isoelectricPointAsString"]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
542 chr_loc = data['entry']["chromosomalLocations"][0]["chromosome"]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
543 protein_existence = "PE"+str(data['entry']["overview"]['proteinExistence']['level'])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
544
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
545 #put all subcell loc in a set
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
546 if "subcellular-location" in data['entry']["annotationsByCategory"].keys() :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
547 subcell_locs = data['entry']["annotationsByCategory"]["subcellular-location"]
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
548 all_subcell_locs = set()
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
549 for loc in subcell_locs :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
550 all_subcell_locs.add(loc['cvTermName'])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
551 all_subcell_locs.discard("")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
552 all_subcell_locs = ";".join(all_subcell_locs)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
553 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
554 all_subcell_locs = "NA"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
555
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
556 #put all subcell loc in a set
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
557 if ('disease') in data['entry']['annotationsByCategory'].keys() :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
558 diseases = data['entry']['annotationsByCategory']['disease']
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
559 all_diseases = set()
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
560 for disease in diseases :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
561 if (disease['cvTermName'] is not None and disease['cvTermName'] != ""):
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
562 all_diseases.add(disease['cvTermName'])
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
563 if len(all_diseases) > 0 : all_diseases = ";".join(all_diseases)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
564 else : all_diseases="NA"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
565 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
566 all_diseases="NA"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
567
1
f3507260b30f planemo upload commit 39a9e2bf22b07beeca3fb77d86cda25820eb309c
proteore
parents: 0
diff changeset
568 #get all tm domain
0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
569 nb_domains = 0
1
f3507260b30f planemo upload commit 39a9e2bf22b07beeca3fb77d86cda25820eb309c
proteore
parents: 0
diff changeset
570 if "transmembrane-region" in data['entry']['annotationsByCategory'].keys():
f3507260b30f planemo upload commit 39a9e2bf22b07beeca3fb77d86cda25820eb309c
proteore
parents: 0
diff changeset
571 tm_domains = data['entry']['annotationsByCategory']["transmembrane-region"]
f3507260b30f planemo upload commit 39a9e2bf22b07beeca3fb77d86cda25820eb309c
proteore
parents: 0
diff changeset
572 all_tm_domains = set()
f3507260b30f planemo upload commit 39a9e2bf22b07beeca3fb77d86cda25820eb309c
proteore
parents: 0
diff changeset
573 for tm in tm_domains :
f3507260b30f planemo upload commit 39a9e2bf22b07beeca3fb77d86cda25820eb309c
proteore
parents: 0
diff changeset
574 all_tm_domains.add(tm['cvTermName'])
f3507260b30f planemo upload commit 39a9e2bf22b07beeca3fb77d86cda25820eb309c
proteore
parents: 0
diff changeset
575 nb_domains+=1
2
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
576 #print "nb domains ++"
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
577 #print (nb_domains)
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
578 nextprot_file[:] = []
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
579 nextprot_file.append([id,mass_mol,str(seq_length),iso_elec_point,chr_loc,all_subcell_locs,all_diseases,str(nb_domains),protein_existence])
0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
580 writer.writerows(nextprot_file)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
581
2
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
582 id = str(10000000000 - int(time.strftime("%Y%m%d")))
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
583
9ec42cb35abd planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
proteore
parents: 1
diff changeset
584 data_table_entry = dict(id=id, release=release_id, name = name, value = path)
0
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
585 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_nextprot_ref")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
586
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
587 #######################################################################################################
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
588 # Main function
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
589 #######################################################################################################
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
590 def main():
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
591 parser = argparse.ArgumentParser()
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
592 parser.add_argument("--hpa", metavar = ("HPA_OPTION"))
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
593 parser.add_argument("--peptideatlas", metavar=("SAMPLE_CATEGORY_ID"))
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
594 parser.add_argument("--id_mapping", metavar = ("ID_MAPPING_SPECIES"))
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
595 parser.add_argument("--interactome", metavar = ("PPI"))
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
596 parser.add_argument("--species")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
597 parser.add_argument("--date")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
598 parser.add_argument("-o", "--output")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
599 parser.add_argument("--database")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
600 args = parser.parse_args()
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
601
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
602 data_manager_dict = {}
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
603 # Extract json file params
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
604 filename = args.output
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
605 params = from_json_string(open(filename).read())
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
606 target_directory = params[ 'output_data' ][0]['extra_files_path']
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
607 os.mkdir(target_directory)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
608
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
609 ## Download source files from HPA
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
610 try:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
611 hpa = args.hpa
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
612 except NameError:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
613 hpa = None
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
614 if hpa is not None:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
615 #target_directory = "/projet/galaxydev/galaxy/tools/proteore/ProteoRE/tools/resources_building/test-data/"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
616 hpa = hpa.split(",")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
617 for hpa_tissue in hpa:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
618 HPA_sources(data_manager_dict, hpa_tissue, target_directory)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
619
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
620 ## Download source file from Peptide Atlas query
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
621 try:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
622 peptide_atlas = args.peptideatlas
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
623 date = args.date
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
624 except NameError:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
625 peptide_atlas = None
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
626 if peptide_atlas is not None:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
627 #target_directory = "/projet/galaxydev/galaxy/tools/proteore/ProteoRE/tools/resources_building/test-data/"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
628 peptide_atlas = peptide_atlas.split(",")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
629 for pa_tissue in peptide_atlas:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
630 peptide_atlas_sources(data_manager_dict, pa_tissue, date, target_directory)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
631
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
632 ## Download ID_mapping source file from Uniprot
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
633 try:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
634 id_mapping=args.id_mapping
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
635 except NameError:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
636 id_mapping = None
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
637 if id_mapping is not None:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
638 id_mapping = id_mapping .split(",")
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
639 for species in id_mapping :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
640 id_mapping_sources(data_manager_dict, species, target_directory)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
641
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
642 ## Download PPI ref files from biogrid/bioplex/humap
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
643 try:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
644 interactome=args.interactome
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
645 if interactome == "biogrid" :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
646 species=args.species
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
647 else :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
648 species="Human"
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
649 except NameError:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
650 interactome=None
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
651 species=None
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
652 if interactome is not None and species is not None:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
653 PPI_ref_files(data_manager_dict, species, interactome, target_directory)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
654
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
655 ## Build nextprot ref file for add protein features
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
656 try:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
657 database=args.database
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
658 except NameError:
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
659 database=None
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
660 if database is not None :
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
661 Build_nextprot_ref_file(data_manager_dict,target_directory)
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
662
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
663 #save info to json file
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
664 filename = args.output
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
665 open(filename, 'wb').write(to_json_string(data_manager_dict))
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
666
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
667 if __name__ == "__main__":
9e31ea9fc7ea planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
proteore
parents:
diff changeset
668 main()