Mercurial > repos > proteore > proteore_data_manager

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/resource_building.py	Wed Mar 13 06:30:42 2019 -0400
@@ -0,0 +1,649 @@
+# -*- coding: utf-8 -*-
+"""
+The purpose of this script is to create source files from different databases to be used in other proteore tools
+"""
+
+import os, sys, argparse, requests, time, csv, re, json, shutil, zipfile
+from io import BytesIO
+from zipfile import ZipFile
+from galaxy.util.json import from_json_string, to_json_string
+
+#######################################################################################################
+# General functions
+#######################################################################################################
+def unzip(url, output_file):
+    """
+    Get a zip file content from a link and unzip
+    """
+    content = requests.get(url)
+    zipfile = ZipFile(BytesIO(content.content))
+    output_content = ""
+    output_content += zipfile.open(zipfile.namelist()[0]).read()
+    output = open(output_file, "w")
+    output.write(output_content)
+    output.close()
+
+def _add_data_table_entry(data_manager_dict, data_table_entry,data_table):
+    data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
+    data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, [])
+    data_manager_dict['data_tables'][data_table].append(data_table_entry)
+    return data_manager_dict
+
+#######################################################################################################
+# 1. Human Protein Atlas
+#    - Normal tissue
+#    - Pathology
+#    - Full Atlas
+#######################################################################################################
+def HPA_sources(data_manager_dict, tissue, target_directory):
+    if tissue == "HPA_normal_tissue":
+        tissue_name = "HPA normal tissue"
+        url = "https://www.proteinatlas.org/download/normal_tissue.tsv.zip"
+        table = "proteore_protein_atlas_normal_tissue"
+    elif tissue == "HPA_pathology":
+        tissue_name = "HPA pathology"
+        url = "https://www.proteinatlas.org/download/pathology.tsv.zip"
+        table = "proteore_protein_atlas_tumor_tissue"
+    elif tissue == "HPA_full_atlas":
+        tissue_name = "HPA full atlas"
+        url = "https://www.proteinatlas.org/download/proteinatlas.tsv.zip"
+        table = "proteore_protein_full_atlas"
+
+    output_file = tissue +"_"+ time.strftime("%d-%m-%Y") + ".tsv"
+    path = os.path.join(target_directory, output_file)
+    unzip(url, path)    #download and save file
+    tissue_name = tissue_name + " " + time.strftime("%d/%m/%Y")
+    tissue_id = tissue_name.replace(" ","_").replace("/","-")
+
+
+    data_table_entry = dict(id=tissue_id, name = tissue_name, tissue = tissue, value = path)
+    _add_data_table_entry(data_manager_dict, data_table_entry, table)
+
+
+#######################################################################################################
+# 2. Peptide Atlas
+#######################################################################################################
+def peptide_atlas_sources(data_manager_dict, tissue, date, target_directory):
+    # Define organism_id (here Human) - to be upraded when other organism added to the project
+    organism_id = "2"
+    # Extract sample_category_id and output filename
+    tissue=tissue.split(".")
+    sample_category_id = tissue[0]
+    tissue_name = tissue[1]
+    output_file = tissue_name+"_"+date + ".tsv"
+
+    query="https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id="+ \
+    sample_category_id+"&display_options=ShowAbundances&organism_id="+organism_id+ \
+    "&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf\
+    &QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY"
+
+    with requests.Session() as s:
+        download = s.get(query)
+        decoded_content = download.content.decode('utf-8')
+        cr = csv.reader(decoded_content.splitlines(), delimiter='\t')
+
+    uni_dict = build_dictionary(cr)
+
+    #columns of data table peptide_atlas
+    tissue_id = tissue_name+"_"+date
+    name = tissue_id.replace("-","/").replace("_"," ")
+    path = os.path.join(target_directory,output_file)
+
+    with open(path,"w") as out :
+        w = csv.writer(out,delimiter='\t')
+        w.writerow(["Uniprot_AC","nb_obs"])
+        w.writerows(uni_dict.items())
+
+    data_table_entry = dict(id=tissue_id, name=name, value = path, tissue = tissue_name)
+    _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_peptide_atlas")
+
+#function to count the number of observations by uniprot id
+def build_dictionary (csv) :
+    uni_dict = {}
+    for line in csv :
+        if "-" not in line[0] and check_uniprot_access(line[0]) :
+            if line[0] in uni_dict :
+                uni_dict[line[0]] += int(line[5])
+            else :
+                uni_dict[line[0]] = int(line[5])
+
+    return uni_dict
+
+#function to check if an id is an uniprot accession number : return True or False-
+def check_uniprot_access (id) :
+    uniprot_pattern = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}")
+    if uniprot_pattern.match(id) :
+        return True
+    else :
+        return False
+
+def check_entrez_geneid (id) :
+    entrez_pattern = re.compile("[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+")
+    if entrez_pattern.match(id) :
+        return True
+    else :
+        return False
+
+#######################################################################################################
+# 3. ID mapping file
+#######################################################################################################
+import ftplib, gzip
+csv.field_size_limit(sys.maxsize) # to handle big files
+
+def id_mapping_sources (data_manager_dict, species, target_directory) :
+
+    human = species == "Human"
+    species_dict = { "Human" : "HUMAN_9606", "Mouse" : "MOUSE_10090", "Rat" : "RAT_10116" }
+    files=["idmapping_selected.tab.gz","idmapping.dat.gz"]
+
+    #header
+    if human : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","neXtProt","BioGrid","STRING","KEGG"]]
+    else : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","BioGrid","STRING","KEGG"]]
+
+    #print("header ok")
+
+    #get selected.tab and keep only ids of interest
+    selected_tab_file=species_dict[species]+"_"+files[0]
+    tab_path = download_from_uniprot_ftp(selected_tab_file,target_directory)
+    with gzip.open(tab_path,"rt") as select :
+        tab_reader = csv.reader(select,delimiter="\t")
+        for line in tab_reader :
+            tab.append([line[i] for i in [0,1,2,3,4,5,6,11,13,14,18,19,20]])
+    os.remove(tab_path)
+
+    #print("selected_tab ok")
+
+    """
+    Supplementary ID to get from HUMAN_9606_idmapping.dat :
+    -NextProt,BioGrid,STRING,KEGG
+    """
+
+    #there's more id type for human
+    if human : ids = ['neXtProt','BioGrid','STRING','KEGG' ]   #ids to get from dat_file
+    else : ids = ['BioGrid','STRING','KEGG' ]
+    unidict = {}
+
+    #keep only ids of interest in dictionaries
+    dat_file=species_dict[species]+"_"+files[1]
+    dat_path = download_from_uniprot_ftp(dat_file,target_directory)
+    with gzip.open(dat_path,"rt") as dat :
+        dat_reader = csv.reader(dat,delimiter="\t")
+        for line in dat_reader :
+            uniprotID=line[0]       #UniProtID as key
+            id_type=line[1]         #ID type of corresponding id, key of sub-dictionnary
+            cor_id=line[2]          #corresponding id
+            if "-" not in id_type :                                 #we don't keep isoform
+                if id_type in ids and uniprotID in unidict :
+                    if id_type in unidict[uniprotID] :
+                        unidict[uniprotID][id_type]= ";".join([unidict[uniprotID][id_type],cor_id])    #if there is already a value in the dictionnary
+                    else :
+                        unidict[uniprotID].update({ id_type : cor_id })
+                elif  id_type in ids :
+                    unidict[uniprotID]={id_type : cor_id}
+    os.remove(dat_path)
+
+    #print("dat_file ok")
+
+    #add ids from idmapping.dat to the final tab
+    for line in tab[1:] :
+        uniprotID=line[0]
+        if human :
+            if uniprotID in unidict :
+                nextprot = access_dictionary(unidict,uniprotID,'neXtProt')
+                if nextprot != '' : nextprot = clean_nextprot_id(nextprot,line[0])
+                line.extend([nextprot,access_dictionary(unidict,uniprotID,'BioGrid'),access_dictionary(unidict,uniprotID,'STRING'),
+                        access_dictionary(unidict,uniprotID,'KEGG')])
+            else :
+                line.extend(["","","",""])
+        else :
+            if uniprotID in unidict :
+                line.extend([access_dictionary(unidict,uniprotID,'BioGrid'),access_dictionary(unidict,uniprotID,'STRING'),
+                        access_dictionary(unidict,uniprotID,'KEGG')])
+            else :
+                line.extend(["","",""])
+
+    #print ("tab ok")
+
+    #add missing nextprot ID for human
+    if human :
+        #build next_dict
+        nextprot_ids = id_list_from_nextprot_ftp("nextprot_ac_list_all.txt",target_directory)
+        next_dict = {}
+        for nextid in nextprot_ids :
+            next_dict[nextid.replace("NX_","")] = nextid
+        os.remove(os.path.join(target_directory,"nextprot_ac_list_all.txt"))
+
+        #add missing nextprot ID
+        for line in tab[1:] :
+            uniprotID=line[0]
+            nextprotID=line[13]
+            if nextprotID == '' and uniprotID in next_dict :
+                line[13]=next_dict[uniprotID]
+
+    output_file = species+"_id_mapping_"+ time.strftime("%d-%m-%Y") + ".tsv"
+    path = os.path.join(target_directory,output_file)
+
+    with open(path,"w") as out :
+        w = csv.writer(out,delimiter='\t')
+        w.writerows(tab)
+
+    name_dict={"Human" : "Homo sapiens", "Mouse" : "Mus musculus", "Rat" : "Rattus norvegicus"}
+    name = species +" (" + name_dict[species]+" "+time.strftime("%d/%m/%Y")+")"
+    id = species+"_id_mapping_"+ time.strftime("%d-%m-%Y")
+
+    data_table_entry = dict(id=id, name = name, species = species, value = path)
+    _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_id_mapping_"+species)
+
+def download_from_uniprot_ftp(file,target_directory) :
+    ftp_dir = "pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/"
+    path = os.path.join(target_directory, file)
+    ftp = ftplib.FTP("ftp.uniprot.org")
+    ftp.login("anonymous", "anonymous")
+    ftp.cwd(ftp_dir)
+    ftp.retrbinary("RETR " + file, open(path, 'wb').write)
+    ftp.quit()
+    return (path)
+
+def id_list_from_nextprot_ftp(file,target_directory) :
+    ftp_dir = "pub/current_release/ac_lists/"
+    path = os.path.join(target_directory, file)
+    ftp = ftplib.FTP("ftp.nextprot.org")
+    ftp.login("anonymous", "anonymous")
+    ftp.cwd(ftp_dir)
+    ftp.retrbinary("RETR " + file, open(path, 'wb').write)
+    ftp.quit()
+    with open(path,'r') as nextprot_ids :
+        nextprot_ids = nextprot_ids.read().splitlines()
+    return (nextprot_ids)
+
+#return '' if there's no value in a dictionary, avoid error
+def access_dictionary (dico,key1,key2) :
+    if key1 in dico :
+        if key2 in dico[key1] :
+            return (dico[key1][key2])
+        else :
+            return ("")
+            #print (key2,"not in ",dico,"[",key1,"]")
+    else :
+        return ('')
+
+#if there are several nextprot ID for one uniprotID, return the uniprot like ID
+def clean_nextprot_id (next_id,uniprotAc) :
+    if len(next_id.split(";")) > 1 :
+        tmp = next_id.split(";")
+        if "NX_"+uniprotAc in tmp :
+            return ("NX_"+uniprotAc)
+        else :
+            return (tmp[1])
+    else :
+        return (next_id)
+
+
+#######################################################################################################
+# 4. Build protein interaction maps files
+#######################################################################################################
+
+def get_interactant_name(line,dico):
+
+    if line[0] in dico :
+        interactant_A = dico[line[0]]
+    else :
+        interactant_A = "NA"
+
+    if line[1] in dico :
+        interactant_B = dico[line[1]]
+    else :
+        interactant_B = "NA"
+
+    return interactant_A, interactant_B
+
+def PPI_ref_files(data_manager_dict, species, interactome, target_directory):
+
+    species_dict={'Human':'Homo sapiens',"Mouse":"Mus musculus","Rat":"Rattus norvegicus"}
+
+    ##BioGRID
+    if interactome=="biogrid":
+
+        tab2_link="https://downloads.thebiogrid.org/Download/BioGRID/Release-Archive/BIOGRID-3.5.167/BIOGRID-ORGANISM-3.5.167.tab2.zip"
+
+        #download zip file
+        r = requests.get(tab2_link)
+        with open("BioGRID.zip", "wb") as code:
+            code.write(r.content)
+
+        #unzip files
+        with zipfile.ZipFile("BioGRID.zip", 'r') as zip_ref:
+            if not os.path.exists("tmp_BioGRID"): os.makedirs("tmp_BioGRID")
+            zip_ref.extractall("tmp_BioGRID")
+
+        #import file of interest and build dictionary
+        file_path="tmp_BioGRID/BIOGRID-ORGANISM-"+species_dict[species].replace(" ","_")+"-3.5.167.tab2.txt"
+        with open(file_path,"r") as handle :
+            tab_file = csv.reader(handle,delimiter="\t")
+            dico_network = {}
+            GeneID_index=1
+            network_cols=[1,2,7,8,11,12,14,18,20]
+            for line in tab_file :
+                if line[GeneID_index] not in dico_network:
+                    dico_network[line[GeneID_index]]=[[line[i] for i in network_cols]]
+                else:
+                    dico_network[line[GeneID_index]].append([line[i] for i in network_cols])
+
+        #delete tmp_BioGRID directory
+        os.remove("BioGRID.zip")
+        shutil.rmtree("tmp_BioGRID", ignore_errors=True)
+
+        #download NCBI2Reactome.txt file and build dictionary
+        with requests.Session() as s:
+            r = s.get('https://www.reactome.org/download/current/NCBI2Reactome.txt')
+            r.encoding ="utf-8"
+            tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
+
+        dico_nodes = {}
+        geneid_index=0
+        pathway_description_index=3
+        species_index=5
+        for line in tab_file :
+            if line[species_index]==species_dict[species]:
+                if line[geneid_index] in dico_nodes :
+                    dico_nodes[line[geneid_index]].append(line[pathway_description_index])
+                else :
+                    dico_nodes[line[geneid_index]] = [line[pathway_description_index]]
+
+        dico={}
+        dico['network']=dico_network
+        dico['nodes']=dico_nodes
+
+    ##Bioplex
+    elif interactome=="bioplex":
+
+        with requests.Session() as s:
+            r = s.get('http://bioplex.hms.harvard.edu/data/BioPlex_interactionList_v4a.tsv')
+            r = r.content.decode('utf-8')
+            bioplex = csv.reader(r.splitlines(), delimiter='\t')
+
+        dico_network = {}
+        dico_network["GeneID"]={}
+        network_geneid_cols=[0,1,4,5,8]
+        dico_network["UniProt-AC"]={}
+        network_uniprot_cols=[2,3,4,5,8]
+        dico_GeneID_to_UniProt = {}
+        for line in bioplex :
+            if line[0] not in dico_network["GeneID"]:
+                dico_network["GeneID"][line[0]]=[[line[i] for i in network_geneid_cols]]
+            else :
+                dico_network["GeneID"][line[0]].append([line[i] for i in network_geneid_cols])
+            if line[1] not in dico_network["UniProt-AC"]:
+                dico_network["UniProt-AC"][line[2]]=[[line[i] for i in network_uniprot_cols]]
+            else:
+                dico_network["UniProt-AC"][line[2]].append([line[i] for i in network_uniprot_cols])
+            dico_GeneID_to_UniProt[line[0]]=line[2]
+
+        with requests.Session() as s:
+            r = s.get('https://reactome.org/download/current/UniProt2Reactome.txt')
+            r.encoding ="utf-8"
+            tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
+
+        dico_nodes_uniprot = {}
+        uniProt_index=0
+        pathway_description_index=3
+        species_index=5
+        for line in tab_file :
+            if line[species_index]==species_dict[species]:
+                if line[uniProt_index] in dico_nodes_uniprot :
+                    dico_nodes_uniprot[line[uniProt_index]].append(line[pathway_description_index])
+                else :
+                    dico_nodes_uniprot[line[uniProt_index]] = [line[pathway_description_index]]
+
+        with requests.Session() as s:
+            r = s.get('https://www.reactome.org/download/current/NCBI2Reactome.txt')
+            r.encoding ="utf-8"
+            tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
+
+        dico_nodes_geneid = {}
+        geneid_index=0
+        pathway_description_index=3
+        species_index=5
+        for line in tab_file :
+            if line[species_index]==species_dict[species]:
+                if line[geneid_index] in dico_nodes_geneid :
+                    dico_nodes_geneid[line[geneid_index]].append(line[pathway_description_index])
+                else :
+                    dico_nodes_geneid[line[geneid_index]] = [line[pathway_description_index]]
+
+        dico={}
+        dico_nodes={}
+        dico_nodes['GeneID']=dico_nodes_geneid
+        dico_nodes['UniProt-AC']=dico_nodes_uniprot
+        dico['network']=dico_network
+        dico['nodes']=dico_nodes
+        dico['convert']=dico_GeneID_to_UniProt
+
+    ##Humap
+    elif interactome=="humap":
+
+        with requests.Session() as s:
+            r = s.get('http://proteincomplexes.org/static/downloads/nodeTable.txt')
+            r = r.content.decode('utf-8')
+            humap_nodes = csv.reader(r.splitlines(), delimiter=',')
+
+        dico_geneid_to_gene_name={}
+        dico_protein_name={}
+        for line in humap_nodes :
+            if check_entrez_geneid(line[4]):
+                if line[4] not in dico_geneid_to_gene_name:
+                    dico_geneid_to_gene_name[line[4]]=line[3]
+                if line[4] not in dico_protein_name:
+                    dico_protein_name[line[4]]=line[5]
+
+        with requests.Session() as s:
+            r = s.get('http://proteincomplexes.org/static/downloads/pairsWprob.txt')
+            r = r.content.decode('utf-8')
+            humap = csv.reader(r.splitlines(), delimiter='\t')
+
+        dico_network = {}
+        for line in humap :
+            if check_entrez_geneid(line[0]) and check_entrez_geneid(line[1]):
+
+                interactant_A, interactant_B = get_interactant_name(line,dico_geneid_to_gene_name)
+
+                #first interactant (first column)
+                if line[0] not in dico_network:
+                    dico_network[line[0]]=[line[:2]+[interactant_A,interactant_B,line[2]]]
+                else :
+                    dico_network[line[0]].append(line[:2]+[interactant_A,interactant_B,line[2]])
+
+                #second interactant (second column)
+                if line[1] not in dico_network:
+                    dico_network[line[1]]=[[line[1],line[0],interactant_B,interactant_A,line[2]]]
+                else :
+                    dico_network[line[1]].append([line[1],line[0],interactant_B,interactant_A,line[2]])
+
+        with requests.Session() as s:
+            r = s.get('https://www.reactome.org/download/current/NCBI2Reactome.txt')
+            r.encoding ="utf-8"
+            tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
+
+        dico_nodes = {}
+        geneid_index=0
+        pathway_description_index=3
+        species_index=5
+        for line in tab_file :
+            if line[species_index]==species_dict[species]:
+                #Fill dictionary with pathways
+                if line[geneid_index] in dico_nodes :
+                    dico_nodes[line[geneid_index]].append(line[pathway_description_index])
+                else :
+                    dico_nodes[line[geneid_index]] = [line[pathway_description_index]]
+
+        dico={}
+        dico['network']=dico_network
+        dico['nodes']=dico_nodes
+        dico['gene_name']=dico_geneid_to_gene_name
+        dico['protein_name']=dico_protein_name
+
+    #writing output
+    output_file = species+'_'+interactome+'_'+ time.strftime("%d-%m-%Y") + ".json"
+    path = os.path.join(target_directory,output_file)
+    name = species+" ("+species_dict[species]+") "+time.strftime("%d/%m/%Y")
+    id = species+"_"+interactome+"_"+ time.strftime("%d-%m-%Y")
+
+    with open(path, 'w') as handle:
+        json.dump(dico, handle, sort_keys=True)
+
+    data_table_entry = dict(id=id, name = name, species = species, value = path)
+    _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_"+interactome+"_dictionaries")
+
+#######################################################################################################
+# 5. nextprot (add protein features)
+#######################################################################################################
+
+def Build_nextprot_ref_file(data_manager_dict,target_directory):
+    nextprot_ids_file = "nextprot_ac_list_all.txt"
+    ids = id_list_from_nextprot_ftp(nextprot_ids_file,target_directory)
+
+    nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]]
+    for id in ids :
+        #print (id)
+        query="https://api.nextprot.org/entry/"+id+".json"
+        resp = requests.get(url=query)
+        data = resp.json()
+
+        #get info from json dictionary
+        mass_mol = data["entry"]["isoforms"][0]["massAsString"]
+        seq_length = data['entry']["isoforms"][0]["sequenceLength"]
+        iso_elec_point = data['entry']["isoforms"][0]["isoelectricPointAsString"]
+        chr_loc = data['entry']["chromosomalLocations"][0]["chromosome"]
+        protein_existence = "PE"+str(data['entry']["overview"]['proteinExistence']['level'])
+
+        #put all subcell loc in a set
+        if "subcellular-location" in data['entry']["annotationsByCategory"].keys() :
+            subcell_locs = data['entry']["annotationsByCategory"]["subcellular-location"]
+            all_subcell_locs = set()
+            for loc in subcell_locs :
+                all_subcell_locs.add(loc['cvTermName'])
+            all_subcell_locs.discard("")
+            all_subcell_locs = ";".join(all_subcell_locs)
+        else :
+            all_subcell_locs = "NA"
+
+        #put all subcell loc in a set
+        if ('disease') in data['entry']['annotationsByCategory'].keys() :
+            diseases = data['entry']['annotationsByCategory']['disease']
+            all_diseases = set()
+            for disease in diseases :
+                if (disease['cvTermName'] is not None and disease['cvTermName'] != ""):
+                    all_diseases.add(disease['cvTermName'])
+            if len(all_diseases) > 0 : all_diseases = ";".join(all_diseases)
+            else : all_diseases="NA"
+        else :
+            all_diseases="NA"
+
+        #get all tm domain
+        nb_domains = 0
+        if  "domain" in data['entry']['annotationsByCategory'].keys():
+            tm_domains = data['entry']['annotationsByCategory']["domain"]
+            for tm_domain in tm_domains :
+                if "properties" in tm_domain.keys() and tm_domain['properties']!=[]:
+                    domains = tm_domains["properties"]
+                    for domain in domains :
+                        if domain["name"]=="region structure" and domain["value"]=="Helical" :
+                            nb_domains+=1
+
+
+        nextprot_file.append([id,mass_mol,str(seq_length),iso_elec_point,chr_loc,all_subcell_locs,all_diseases,str(nb_domains),protein_existence])
+
+    output_file = 'nextprot_ref_'+ time.strftime("%d-%m-%Y") + ".tsv"
+    path = os.path.join(target_directory,output_file)
+    name = "neXtProt release "+time.strftime("%d-%m-%Y")
+    id = "nextprot_ref_"+time.strftime("%d-%m-%Y")
+
+    with open(path, 'w') as output:
+        writer = csv.writer(output,delimiter="\t")
+        writer.writerows(nextprot_file)
+
+    data_table_entry = dict(id=id, name = name, value = path)
+    _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_nextprot_ref")
+
+#######################################################################################################
+# Main function
+#######################################################################################################
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--hpa", metavar = ("HPA_OPTION"))
+    parser.add_argument("--peptideatlas", metavar=("SAMPLE_CATEGORY_ID"))
+    parser.add_argument("--id_mapping", metavar = ("ID_MAPPING_SPECIES"))
+    parser.add_argument("--interactome", metavar = ("PPI"))
+    parser.add_argument("--species")
+    parser.add_argument("--date")
+    parser.add_argument("-o", "--output")
+    parser.add_argument("--database")
+    args = parser.parse_args()
+
+    data_manager_dict = {}
+    # Extract json file params
+    filename = args.output
+    params = from_json_string(open(filename).read())
+    target_directory = params[ 'output_data' ][0]['extra_files_path']
+    os.mkdir(target_directory)
+
+    ## Download source files from HPA
+    try:
+        hpa = args.hpa
+    except NameError:
+        hpa = None
+    if hpa is not None:
+        #target_directory = "/projet/galaxydev/galaxy/tools/proteore/ProteoRE/tools/resources_building/test-data/"
+        hpa = hpa.split(",")
+        for hpa_tissue in hpa:
+            HPA_sources(data_manager_dict, hpa_tissue, target_directory)
+
+    ## Download source file from Peptide Atlas query
+    try:
+        peptide_atlas = args.peptideatlas
+        date = args.date
+    except NameError:
+        peptide_atlas = None
+    if peptide_atlas is not None:
+        #target_directory = "/projet/galaxydev/galaxy/tools/proteore/ProteoRE/tools/resources_building/test-data/"
+        peptide_atlas = peptide_atlas.split(",")
+        for pa_tissue in peptide_atlas:
+            peptide_atlas_sources(data_manager_dict, pa_tissue, date, target_directory)
+
+    ## Download ID_mapping source file from Uniprot
+    try:
+        id_mapping=args.id_mapping
+    except NameError:
+        id_mapping = None
+    if id_mapping is not None:
+        id_mapping = id_mapping .split(",")
+        for species in id_mapping :
+            id_mapping_sources(data_manager_dict, species, target_directory)
+
+    ## Download PPI ref files from biogrid/bioplex/humap
+    try:
+        interactome=args.interactome
+        if interactome == "biogrid" :
+            species=args.species
+        else :
+            species="Human"
+    except NameError:
+        interactome=None
+        species=None
+    if interactome is not None and species is not None:
+        PPI_ref_files(data_manager_dict, species, interactome, target_directory)
+
+    ## Build nextprot ref file for add protein features
+    try:
+        database=args.database
+    except NameError:
+        database=None
+    if database is not None :
+        Build_nextprot_ref_file(data_manager_dict,target_directory)
+
+    #save info to json file
+    filename = args.output
+    open(filename, 'wb').write(to_json_string(data_manager_dict))
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/resource_building.xml	Wed Mar 13 06:30:42 2019 -0400
@@ -0,0 +1,223 @@
+<tool id="data_manager_proteore" name="Get source files for proteore tools" version="2019.03.13" tool_type="manage_data">
+<description>
+to create or update reference files for proteore tools
+</description>
+<requirements>
+    <!--requirement type="package" version="1.8.2">sparqlwrapper</requirement-->
+</requirements>
+<stdio>
+  <exit_code range="1:" />
+</stdio>
+<command><![CDATA[
+
+    python $__tool_directory__/resource_building.py
+    #if $database.database == "human_protein_atlas"
+        --hpa "$database.tissues"
+    #else if $database.database == "peptide_atlas"
+        --peptideatlas="$database.tissues"
+        --date="$database.date"
+    #else if $database.database == "id_mapping"
+        --id_mapping="$database.species"
+    #else if $database.database == "PPI"
+        --interactome="$database.base.interactome"
+        #if $database.base.interactome == "biogrid"
+            --species="$database.base.species"
+        #end if
+    #else if $database.database == "nextprot"
+        --database=$database.database
+    #end if
+    --output "$output"
+
+]]></command>
+
+<inputs>
+    <conditional name="database">
+        <param name="database" type="select">
+            <option value="human_protein_atlas">Human Protein Atlas</option>
+            <option value="peptide_atlas">Peptide Atlas</option>
+            <option value="id_mapping">ID mapping</option>
+            <option value="PPI">Build protein interaction maps</option>
+            <option value="nextprot">neXtProt</option>
+        </param>
+        <when value="human_protein_atlas">
+            <param name="tissues" type="select" multiple="false" label="Please select tissue">
+                <option value="HPA_normal_tissue">Normal tissue</option>
+                <option value="HPA_pathology">Pathology</option>
+                <!--option value="HPA_full_atlas">Full Atlas</option-->
+            </param>
+        </when>
+        <when value="peptide_atlas">
+            <param name="tissues" type="select" multiple="false" label="Please select the tissue">
+                <option value="432.Human_Adrenal_gland">Human Adrenal gland proteome</option>
+                <option value="441.Human_Brain">Human Brain proteome</option>
+                <option value="427.Human_Breast">Human Breast proteome</option>
+                <option value="434.Human_CSF">Human CSF (Cerebro Spinal Fluid) proteome</option>
+                <option value="374.Human_Colon_cancer">Human Colon cancer proteome</option>
+                <option value="429.Human_Digestive_system">Human Digestive system proteome</option>
+                <option value="430.Human_Female_reproductive_system">Human Female reproductive system proteome</option>
+                <option value="418.Human_Heart">Human Heart proteome</option>
+                <option value="424.Human_Kidney">Human Kidney proteome</option>
+                <option value="425.Human_Liver">Human Liver proteome</option>
+                <option value="419.Human_Lung">Human Lung proteome</option>
+                <option value="431.Human_Male_reproductive_system">Human Male reproductive system proteome</option>
+                <option value="420.Human_Pancreas">Human Pancreas proteome</option>
+                <option value="465.Human_Plasma_non_glyco">Human Plasma non glyco proteome</option>
+                <option value="421.Human_Spleen">Human Spleen proteome</option>
+                <option value="463.Human_Testis">Human Testis proteome</option>
+                <option value="422.Human_Urinary_bladder">Human Bladder proteome</option>
+                <option value="423.Human_Urine">Human Urine proteome</option>
+            </param>
+            <param name="date" type="text" value="" label="enter the build date" help="for example: '2018-04'"/>
+        </when>
+        <when value="id_mapping">
+            <param name="species" type="select" multiple="false" label="Please select the species">
+                <option value="Human">Human (Homo sapiens)</option>
+                <option value="Mouse">Mouse (Mus musculus)</option>
+                <option value="Rat">Rat (Rattus norvegicus)</option>
+            </param>
+        </when>
+        <when value="PPI">
+            <conditional name="base">
+                <param name="interactome" type="select" multiple="false" label="Please select interactome">
+                    <option value="biogrid">BioGRID</option>
+                    <option value="bioplex">Human Bioplex 2.0</option>
+                    <option value="humap">Human protein complex Map (Hu.map)</option>
+                </param>
+                <when value="biogrid">
+                    <param name="species" type="select" multiple="false" label="Please select the species">
+                        <option value="Human">Human (Homo sapiens)</option>
+                        <option value="Mouse">Mouse (Mus musculus)</option>
+                        <option value="Rat">Rat (Rattus norvegicus)</option>
+                    </param>
+                </when>
+                <when value="bioplex"/>
+                <when value="humap"/>
+            </conditional>
+        </when>
+    </conditional>
+</inputs>
+
+<outputs>
+    <!--data format="tabular" name="output">
+        <discover_datasets pattern="(?P&lt;designation&gt;.+).tsv" ext="tabular" visible="true" assign_primary_output="true" />
+    </data-->
+    <data name="output" format="data_manager_json"/>
+</outputs>
+
+<tests>
+</tests>
+
+<help><![CDATA[
+
+**Description**
+
+This tool is a data manager designed to update resources files of ProteoRe tools. For now, only resources files for tools listed below are handled:
+
+* "Get MS/MS observations in tissue/fluid [Peptide Atlas]"
+* "Get expression profiles by (normal or tumor) tissue/cell type [Human Protein Atlas]"
+* "ID converter"
+
+-----
+
+**Input**
+
+There's no input needed, once you selected the tool and file you want to update, it will be generated automatically.
+
+-----
+
+**Parameters**
+
+* database: the database to update (for now one per tool)
+
+Once a database is selected, there's a second dropdown menu to select the specific file you want to update.
+
+* for 'Human Protein Atlas': 'Normal tissue', 'Pathology' and 'Full Atlas'
+
+* for 'Peptide Atlas':  'Human liver', 'Human brain', 'Human heart', 'Human kidney', 'Human blood plasma', 'Human urine' and 'Human cerebrospinal fluid'
+
+* for  'ID mapping': 'Human (Homo sapiens)', 'Mouse (Mus musculus)' and 'Rat (Rattus norvegicus)'
+
+* for 'Build protein interaction maps': "BioGRID", "Bioplex" and "Human (Homo sapiens)", "Mouse (Mus musculus)", "Rat (Rattus norvegicus)"
+
+-----
+
+**Output**
+
+The output is the reference file selected for update in input.
+
+For example, if you select database="Human Protein Atlas" and Please select tissue="Normal tissue":
+
+the output is a new reference file for "Get expression profiles by (normal or tumor) tissue/cell type [Human Protein Atlas]"
+dated from the day and listed in the dropdown menu "Normal tissue HPA version".
+
+.. class:: warningmark
+
+A reference file created with this data manager will appears in the concerned ProteoRE tool. It can not be removed with the data manager.
+
+-----
+
+**Data sources**
+
+For 'Human Protein Atlas':
+
+* `Normal Tissue <https://www.proteinatlas.org/download/normal_tissue.tsv.zip>`_.
+* `Pathology <https://www.proteinatlas.org/download/pathology.tsv.zip>`_.
+* `Full Atlas <https://www.proteinatlas.org/download/proteinatlas.tsv.zip>`_.
+
+For 'Peptide Atlas':
+
+
+* `Human Adrenal gland proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=432&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+* `Human Brain proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=441&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+* `Human Breast Proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=427&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+* `Human CSF proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=434&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+* `Human Digestive System proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=429&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+* `Human female reproductive system proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=430&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+* `Human Heart proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=418&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+* `Human Kidney man Kidney Proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=424&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+* `Human Liver proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=425&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+* `Human Lung proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=419&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+* `Human Male Reproductive System proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=431&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+* `Human Pancreas proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=420&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+* `Human Plasma Non-Glyco proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=465&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+* `Human Spleen proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=421&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+* `Human Testis proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=463&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+* `Human Urinary Bladder proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=422&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+* `Human Urine proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=423&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_.
+
+For ID mapping:
+
+* `HUMAN_9606_idmapping_selected.tab <ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping_selected.tab.gz>`_.
+* `HUMAN_9606_idmapping.dat <ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping.dat.gz>`_.
+* `nextprot_ac_list_all.txt <ftp://ftp.nextprot.org/pub/current_release/ac_lists/nextprot_ac_list_all.txt>`_.
+* `MOUSE_10090_idmapping_selected.tab <ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/MOUSE_10090_idmapping_selected.tab.gz>`_.
+* `MOUSE_10090_idmapping.dat <ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/MOUSE_10090_idmapping.dat.gz>`_.
+* `RAT_10116_idmapping_selected.tab <ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/RAT_10116_idmapping_selected.tab.gz>`_.
+* `RAT_10116_idmapping.dat <ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/RAT_10116_idmapping.dat.gz>`_.
+
+For Build protein interaction maps:
+
+* `BIOGRID_oragism <https://downloads.thebiogrid.org/Download/BioGRID/Release-Archive/BIOGRID-3.5.167/BIOGRID-ORGANISM-3.5.167.tab2.zip>`_.
+* `NCBI2Reactome.txt <https://www.reactome.org/download/current/NCBI2Reactome.txt>`_.
+* `Bioplex_interactionList_v4a.tsv <http://bioplex.hms.harvard.edu/data/BioPlex_interactionList_v4a.tsv>`_.
+* `UniProt2Reactome.txt <https://reactome.org/download/current/UniProt2Reactome.txt>`_.
+
+-----
+
+.. class:: infomark
+
+**Authors**
+
+David Christiany, Lisa Peru, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+
+Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform
+
+This work has been partially funded through the French National Agency for Research (ANR) IFB project.
+
+Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
+
+    ]]></help>
+    <citations>
+    </citations>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Wed Mar 13 06:30:42 2019 -0400
@@ -0,0 +1,152 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/resource_building.xml" id="resource_building">
+        <data_table name="proteore_peptide_atlas">
+            <output>
+                <column name="id"/>
+                <column name="name" />
+                <column name="tissue" />
+                <column name="value" output_ref="output" >
+                    <move type="file">
+                        <!--source>${path}/${value}.tsv</source-->
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">peptide_atlas/</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/peptide_atlas/${id}.tsv</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+        <data_table name="proteore_protein_atlas_normal_tissue">
+            <output>
+                <column name="id"/>
+                <column name="name" />
+                <column name="tissue" />
+                <column name="value" output_ref="output" >
+                    <move type="file">
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">protein_atlas/</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/protein_atlas/${id}.tsv</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+        <data_table name="proteore_protein_atlas_tumor_tissue">
+            <output>
+                <column name="id"/>
+                <column name="name" />
+                <column name="tissue" />
+                <column name="value" output_ref="output" >
+                    <move type="file">
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">protein_atlas/</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/protein_atlas/${id}.tsv</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+        <data_table name="proteore_id_mapping_Human">
+            <output>
+                <column name="id" />
+                <column name="name" />
+                <column name="species" />
+                <column name="value" output_ref="output" >
+                    <move type="file">
+                        <!--source>${path}</source-->
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">id_mapping/</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/id_mapping/${id}.tsv</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+        <data_table name="proteore_id_mapping_Mouse">
+            <output>
+                <column name="id" />
+                <column name="name" />
+                <column name="species" />
+                <column name="value" output_ref="output" >
+                    <move type="file">
+                        <!--source>${path}</source-->
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">id_mapping/</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/id_mapping/${id}.tsv</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+        <data_table name="proteore_id_mapping_Rat">
+            <output>
+                <column name="id" />
+                <column name="name" />
+                <column name="species" />
+                <column name="value" output_ref="output" >
+                    <move type="file">
+                        <!--source>${path}</source-->
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">id_mapping/</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/id_mapping/${id}.tsv</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+        <data_table name="proteore_biogrid_dictionaries">
+            <output>
+                <column name="id" />
+                <column name="name" />
+                <column name="species" />
+                <column name="value" output_ref="output" >
+                    <move type="file">
+                        <!--source>${path}</source-->
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">PPI_dictionaries/</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/PPI_dictionaries/${id}.json</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+        <data_table name="proteore_bioplex_dictionaries">
+            <output>
+                <column name="id" />
+                <column name="name" />
+                <column name="species" />
+                <column name="value" output_ref="output" >
+                    <move type="file">
+                        <!--source>${path}</source-->
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">PPI_dictionaries/</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/PPI_dictionaries/${id}.json</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+        <data_table name="proteore_humap_dictionaries">
+            <output>
+                <column name="id" />
+                <column name="name" />
+                <column name="species" />
+                <column name="value" output_ref="output" >
+                    <move type="file">
+                        <!--source>${path}</source-->
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">PPI_dictionaries/</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/PPI_dictionaries/${id}.json</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+        <data_table name="proteore_nextprot_ref">
+            <output>
+                <column name="id" />
+                <column name="name" />
+                <column name="value" output_ref="output" >
+                    <move type="file">
+                        <!--source>${path}</source-->
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">proteore_nextprot_ref/</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/proteore_nextprot_ref/${id}.tsv</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/proteore_biogrid_dictionaries.loc.sample	Wed Mar 13 06:30:42 2019 -0400
@@ -0,0 +1,4 @@
+#id	name	species	value
+#biogrid_human_08-01-2019	Human (Homo sapiens) 08/01/2019	Human	PPI_dictionaries/Human_biogrid.json
+#biogrid_mouse_08-01-2019	Mouse (Mus musculus) 08/01/2019	Mouse	PPI_dictionaries/Mouse_biogrid.json
+#biogrid_rat_08-01-2019	Rat (Rattus norvegicus) 08/01/2019 Rat	PPI_dictionaries/Rat_biogrid.json
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/proteore_bioplex_dictionaries.loc.sample	Wed Mar 13 06:30:42 2019 -0400
@@ -0,0 +1,2 @@
+#id	name	species	value
+#bioplex_human_08-01-2019	Human (Homo sapiens) 08/01/2019	Human	PPI_dictionaries/human_bioplex.json
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/proteore_humap_dictionaries.loc.sample	Wed Mar 13 06:30:42 2019 -0400
@@ -0,0 +1,2 @@
+#id	name	species	value
+#humap_human_01-02-2019 Human (Homo sapiens) 01/02/19	Human	PPI_dictionaries/Human_humap_01-02-2019.json
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/proteore_id_mapping_Human.loc.sample	Wed Mar 13 06:30:42 2019 -0400
@@ -0,0 +1,3 @@
+#This file lists the locations of reference file for id_converter tool
+#<id>	<name>	<value>	<path>
+#human_id_mapping_01-01-2018	Human (homo sapiens)	human_id_mapping	tool-data/human_id_mapping.tsv
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/proteore_id_mapping_Mouse.loc.sample	Wed Mar 13 06:30:42 2019 -0400
@@ -0,0 +1,3 @@
+#This file lists the locations of reference file for id_converter tool
+#<id>	<name>	<value>	<path>
+#mouse_id_mapping_01-01-2018	Mouse (Mus musculus)	mouse_id_mapping	tool-data/mouse_id_mapping.tsv
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/proteore_id_mapping_Rat.loc.sample	Wed Mar 13 06:30:42 2019 -0400
@@ -0,0 +1,3 @@
+#This file lists the locations of reference file for id_converter tool
+#<id>	<name>	<value>	<path>
+#rat_id_mapping_01-01-2018	Rat (Rattus norvegicus)	rat_id_mapping	tool-data/rat_id_mapping.tsv
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/proteore_nextprot_ref.loc.sample	Wed Mar 13 06:30:42 2019 -0400
@@ -0,0 +1,2 @@
+#<id>	<name>	<value>
+#nextprot_ref_09-03-2019	neXtProt release 09-03-2019	tool-data/nextprot_ref_09-03-2019.tsv
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/proteore_protein_atlas_normal_tissue.loc.sample	Wed Mar 13 06:30:42 2019 -0400
@@ -0,0 +1,4 @@
+#This file lists the locations name and values of reference files for Get expression data tool
+#This is a tab separated file (TAB, not 4 spaces !)
+#<id>	<name> <tissue>	<value>
+#HPA_normal_tissue_19-07-2018	HPA normal tissue 19/07/2018	HPA_normal_tissue	/projet/galaxydev/galaxy/tool-data/protein_atlas/projet/galaxydev/galaxy/database/jobs_directory/019/19159/dataset_39307_files/HPA_normal_tissue_19-07-2018.tsv
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/proteore_protein_atlas_tumor_tissue.loc.sample	Wed Mar 13 06:30:42 2019 -0400
@@ -0,0 +1,4 @@
+#This file lists the locations name and values of reference files for Get expression data tool
+#This is a tab separated file (TAB, not 4 spaces !)
+#<id>	<name> <tissue>	<value>
+#HPA_pathology_19-07-2018	HPA pathology 19/07/2018	HPA_pathology	/projet/galaxydev/galaxy/tool-data/protein_atlas/projet/galaxydev/galaxy/database/jobs_directory/019/19160/dataset_39308_files/HPA_pathology_19-07-2018.tsv
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Mar 13 06:30:42 2019 -0400
@@ -0,0 +1,43 @@
+<?xml version="1.0"?>
+<tables>
+    <table name='proteore_peptide_atlas' comment_char="#">
+      <columns>id, name, tissue, value</columns>
+      <file path="tool-data/proteore_peptide_atlas.loc"/>
+    </table>
+    <table name="proteore_protein_atlas_normal_tissue" comment_char="#">
+      <columns>id, name, tissue, value</columns>
+      <file path="tool-data/proteore_protein_atlas_normal_tissue.loc" />
+    </table>
+    <table name="proteore_protein_atlas_tumor_tissue" comment_char="#">
+      <columns>id, name, tissue, value</columns>
+      <file path="tool-data/proteore_protein_atlas_tumor_tissue.loc" />
+    </table>
+    <table name="proteore_id_mapping_Human" comment_char="#">
+      <columns>id, name, species, value</columns>
+      <file path="tool-data/proteore_id_mapping_Human.loc" />
+    </table>
+    <table name="proteore_id_mapping_Mouse" comment_char="#">
+      <columns>id, name, species, value</columns>
+      <file path="tool-data/proteore_id_mapping_Mouse.loc" />
+    </table>
+    <table name="proteore_id_mapping_Rat" comment_char="#">
+      <columns>id, name, species, value</columns>
+      <file path="tool-data/proteore_id_mapping_Rat.loc" />
+    </table>
+    <table name="proteore_biogrid_dictionaries" comment_char="#">
+      <columns>id, name, species, value</columns>
+      <file path="tool-data/proteore_biogrid_dictionaries.loc" />
+    </table>
+    <table name="proteore_bioplex_dictionaries" comment_char="#">
+      <columns>id, name, species, value</columns>
+      <file path="tool-data/proteore_bioplex_dictionaries.loc" />
+    </table>
+    <table name="proteore_humap_dictionaries" comment_char="#">
+      <columns>id, name, species, value</columns>
+      <file path="tool-data/proteore_humap_dictionaries.loc" />
+    </table>
+    <table name='proteore_nextprot_ref' comment_char="#">
+      <columns>id, name, value</columns>
+      <file path="tool-data/proteore_nextprot_ref.loc"/>
+    </table>
+</tables>