Mercurial > repos > proteore > proteore_data_manager
changeset 0:9e31ea9fc7ea draft
planemo upload commit 567ba7934c0ca55529dfeb5e7ca0935ace260ad7-dirty
author | proteore |
---|---|
date | Wed, 13 Mar 2019 06:30:42 -0400 |
parents | |
children | f3507260b30f |
files | data_manager/resource_building.py data_manager/resource_building.xml data_manager_conf.xml tool-data/proteore_biogrid_dictionaries.loc.sample tool-data/proteore_bioplex_dictionaries.loc.sample tool-data/proteore_humap_dictionaries.loc.sample tool-data/proteore_id_mapping_Human.loc.sample tool-data/proteore_id_mapping_Mouse.loc.sample tool-data/proteore_id_mapping_Rat.loc.sample tool-data/proteore_nextprot_ref.loc.sample tool-data/proteore_protein_atlas_normal_tissue.loc.sample tool-data/proteore_protein_atlas_tumor_tissue.loc.sample tool_data_table_conf.xml.sample |
diffstat | 13 files changed, 1094 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/resource_building.py Wed Mar 13 06:30:42 2019 -0400 @@ -0,0 +1,649 @@ +# -*- coding: utf-8 -*- +""" +The purpose of this script is to create source files from different databases to be used in other proteore tools +""" + +import os, sys, argparse, requests, time, csv, re, json, shutil, zipfile +from io import BytesIO +from zipfile import ZipFile +from galaxy.util.json import from_json_string, to_json_string + +####################################################################################################### +# General functions +####################################################################################################### +def unzip(url, output_file): + """ + Get a zip file content from a link and unzip + """ + content = requests.get(url) + zipfile = ZipFile(BytesIO(content.content)) + output_content = "" + output_content += zipfile.open(zipfile.namelist()[0]).read() + output = open(output_file, "w") + output.write(output_content) + output.close() + +def _add_data_table_entry(data_manager_dict, data_table_entry,data_table): + data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) + data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, []) + data_manager_dict['data_tables'][data_table].append(data_table_entry) + return data_manager_dict + +####################################################################################################### +# 1. Human Protein Atlas +# - Normal tissue +# - Pathology +# - Full Atlas +####################################################################################################### +def HPA_sources(data_manager_dict, tissue, target_directory): + if tissue == "HPA_normal_tissue": + tissue_name = "HPA normal tissue" + url = "https://www.proteinatlas.org/download/normal_tissue.tsv.zip" + table = "proteore_protein_atlas_normal_tissue" + elif tissue == "HPA_pathology": + tissue_name = "HPA pathology" + url = "https://www.proteinatlas.org/download/pathology.tsv.zip" + table = "proteore_protein_atlas_tumor_tissue" + elif tissue == "HPA_full_atlas": + tissue_name = "HPA full atlas" + url = "https://www.proteinatlas.org/download/proteinatlas.tsv.zip" + table = "proteore_protein_full_atlas" + + output_file = tissue +"_"+ time.strftime("%d-%m-%Y") + ".tsv" + path = os.path.join(target_directory, output_file) + unzip(url, path) #download and save file + tissue_name = tissue_name + " " + time.strftime("%d/%m/%Y") + tissue_id = tissue_name.replace(" ","_").replace("/","-") + + + data_table_entry = dict(id=tissue_id, name = tissue_name, tissue = tissue, value = path) + _add_data_table_entry(data_manager_dict, data_table_entry, table) + + +####################################################################################################### +# 2. Peptide Atlas +####################################################################################################### +def peptide_atlas_sources(data_manager_dict, tissue, date, target_directory): + # Define organism_id (here Human) - to be upraded when other organism added to the project + organism_id = "2" + # Extract sample_category_id and output filename + tissue=tissue.split(".") + sample_category_id = tissue[0] + tissue_name = tissue[1] + output_file = tissue_name+"_"+date + ".tsv" + + query="https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id="+ \ + sample_category_id+"&display_options=ShowAbundances&organism_id="+organism_id+ \ + "&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf\ + &QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY" + + with requests.Session() as s: + download = s.get(query) + decoded_content = download.content.decode('utf-8') + cr = csv.reader(decoded_content.splitlines(), delimiter='\t') + + uni_dict = build_dictionary(cr) + + #columns of data table peptide_atlas + tissue_id = tissue_name+"_"+date + name = tissue_id.replace("-","/").replace("_"," ") + path = os.path.join(target_directory,output_file) + + with open(path,"w") as out : + w = csv.writer(out,delimiter='\t') + w.writerow(["Uniprot_AC","nb_obs"]) + w.writerows(uni_dict.items()) + + data_table_entry = dict(id=tissue_id, name=name, value = path, tissue = tissue_name) + _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_peptide_atlas") + +#function to count the number of observations by uniprot id +def build_dictionary (csv) : + uni_dict = {} + for line in csv : + if "-" not in line[0] and check_uniprot_access(line[0]) : + if line[0] in uni_dict : + uni_dict[line[0]] += int(line[5]) + else : + uni_dict[line[0]] = int(line[5]) + + return uni_dict + +#function to check if an id is an uniprot accession number : return True or False- +def check_uniprot_access (id) : + uniprot_pattern = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") + if uniprot_pattern.match(id) : + return True + else : + return False + +def check_entrez_geneid (id) : + entrez_pattern = re.compile("[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+") + if entrez_pattern.match(id) : + return True + else : + return False + +####################################################################################################### +# 3. ID mapping file +####################################################################################################### +import ftplib, gzip +csv.field_size_limit(sys.maxsize) # to handle big files + +def id_mapping_sources (data_manager_dict, species, target_directory) : + + human = species == "Human" + species_dict = { "Human" : "HUMAN_9606", "Mouse" : "MOUSE_10090", "Rat" : "RAT_10116" } + files=["idmapping_selected.tab.gz","idmapping.dat.gz"] + + #header + if human : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","neXtProt","BioGrid","STRING","KEGG"]] + else : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","BioGrid","STRING","KEGG"]] + + #print("header ok") + + #get selected.tab and keep only ids of interest + selected_tab_file=species_dict[species]+"_"+files[0] + tab_path = download_from_uniprot_ftp(selected_tab_file,target_directory) + with gzip.open(tab_path,"rt") as select : + tab_reader = csv.reader(select,delimiter="\t") + for line in tab_reader : + tab.append([line[i] for i in [0,1,2,3,4,5,6,11,13,14,18,19,20]]) + os.remove(tab_path) + + #print("selected_tab ok") + + """ + Supplementary ID to get from HUMAN_9606_idmapping.dat : + -NextProt,BioGrid,STRING,KEGG + """ + + #there's more id type for human + if human : ids = ['neXtProt','BioGrid','STRING','KEGG' ] #ids to get from dat_file + else : ids = ['BioGrid','STRING','KEGG' ] + unidict = {} + + #keep only ids of interest in dictionaries + dat_file=species_dict[species]+"_"+files[1] + dat_path = download_from_uniprot_ftp(dat_file,target_directory) + with gzip.open(dat_path,"rt") as dat : + dat_reader = csv.reader(dat,delimiter="\t") + for line in dat_reader : + uniprotID=line[0] #UniProtID as key + id_type=line[1] #ID type of corresponding id, key of sub-dictionnary + cor_id=line[2] #corresponding id + if "-" not in id_type : #we don't keep isoform + if id_type in ids and uniprotID in unidict : + if id_type in unidict[uniprotID] : + unidict[uniprotID][id_type]= ";".join([unidict[uniprotID][id_type],cor_id]) #if there is already a value in the dictionnary + else : + unidict[uniprotID].update({ id_type : cor_id }) + elif id_type in ids : + unidict[uniprotID]={id_type : cor_id} + os.remove(dat_path) + + #print("dat_file ok") + + #add ids from idmapping.dat to the final tab + for line in tab[1:] : + uniprotID=line[0] + if human : + if uniprotID in unidict : + nextprot = access_dictionary(unidict,uniprotID,'neXtProt') + if nextprot != '' : nextprot = clean_nextprot_id(nextprot,line[0]) + line.extend([nextprot,access_dictionary(unidict,uniprotID,'BioGrid'),access_dictionary(unidict,uniprotID,'STRING'), + access_dictionary(unidict,uniprotID,'KEGG')]) + else : + line.extend(["","","",""]) + else : + if uniprotID in unidict : + line.extend([access_dictionary(unidict,uniprotID,'BioGrid'),access_dictionary(unidict,uniprotID,'STRING'), + access_dictionary(unidict,uniprotID,'KEGG')]) + else : + line.extend(["","",""]) + + #print ("tab ok") + + #add missing nextprot ID for human + if human : + #build next_dict + nextprot_ids = id_list_from_nextprot_ftp("nextprot_ac_list_all.txt",target_directory) + next_dict = {} + for nextid in nextprot_ids : + next_dict[nextid.replace("NX_","")] = nextid + os.remove(os.path.join(target_directory,"nextprot_ac_list_all.txt")) + + #add missing nextprot ID + for line in tab[1:] : + uniprotID=line[0] + nextprotID=line[13] + if nextprotID == '' and uniprotID in next_dict : + line[13]=next_dict[uniprotID] + + output_file = species+"_id_mapping_"+ time.strftime("%d-%m-%Y") + ".tsv" + path = os.path.join(target_directory,output_file) + + with open(path,"w") as out : + w = csv.writer(out,delimiter='\t') + w.writerows(tab) + + name_dict={"Human" : "Homo sapiens", "Mouse" : "Mus musculus", "Rat" : "Rattus norvegicus"} + name = species +" (" + name_dict[species]+" "+time.strftime("%d/%m/%Y")+")" + id = species+"_id_mapping_"+ time.strftime("%d-%m-%Y") + + data_table_entry = dict(id=id, name = name, species = species, value = path) + _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_id_mapping_"+species) + +def download_from_uniprot_ftp(file,target_directory) : + ftp_dir = "pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/" + path = os.path.join(target_directory, file) + ftp = ftplib.FTP("ftp.uniprot.org") + ftp.login("anonymous", "anonymous") + ftp.cwd(ftp_dir) + ftp.retrbinary("RETR " + file, open(path, 'wb').write) + ftp.quit() + return (path) + +def id_list_from_nextprot_ftp(file,target_directory) : + ftp_dir = "pub/current_release/ac_lists/" + path = os.path.join(target_directory, file) + ftp = ftplib.FTP("ftp.nextprot.org") + ftp.login("anonymous", "anonymous") + ftp.cwd(ftp_dir) + ftp.retrbinary("RETR " + file, open(path, 'wb').write) + ftp.quit() + with open(path,'r') as nextprot_ids : + nextprot_ids = nextprot_ids.read().splitlines() + return (nextprot_ids) + +#return '' if there's no value in a dictionary, avoid error +def access_dictionary (dico,key1,key2) : + if key1 in dico : + if key2 in dico[key1] : + return (dico[key1][key2]) + else : + return ("") + #print (key2,"not in ",dico,"[",key1,"]") + else : + return ('') + +#if there are several nextprot ID for one uniprotID, return the uniprot like ID +def clean_nextprot_id (next_id,uniprotAc) : + if len(next_id.split(";")) > 1 : + tmp = next_id.split(";") + if "NX_"+uniprotAc in tmp : + return ("NX_"+uniprotAc) + else : + return (tmp[1]) + else : + return (next_id) + + +####################################################################################################### +# 4. Build protein interaction maps files +####################################################################################################### + +def get_interactant_name(line,dico): + + if line[0] in dico : + interactant_A = dico[line[0]] + else : + interactant_A = "NA" + + if line[1] in dico : + interactant_B = dico[line[1]] + else : + interactant_B = "NA" + + return interactant_A, interactant_B + +def PPI_ref_files(data_manager_dict, species, interactome, target_directory): + + species_dict={'Human':'Homo sapiens',"Mouse":"Mus musculus","Rat":"Rattus norvegicus"} + + ##BioGRID + if interactome=="biogrid": + + tab2_link="https://downloads.thebiogrid.org/Download/BioGRID/Release-Archive/BIOGRID-3.5.167/BIOGRID-ORGANISM-3.5.167.tab2.zip" + + #download zip file + r = requests.get(tab2_link) + with open("BioGRID.zip", "wb") as code: + code.write(r.content) + + #unzip files + with zipfile.ZipFile("BioGRID.zip", 'r') as zip_ref: + if not os.path.exists("tmp_BioGRID"): os.makedirs("tmp_BioGRID") + zip_ref.extractall("tmp_BioGRID") + + #import file of interest and build dictionary + file_path="tmp_BioGRID/BIOGRID-ORGANISM-"+species_dict[species].replace(" ","_")+"-3.5.167.tab2.txt" + with open(file_path,"r") as handle : + tab_file = csv.reader(handle,delimiter="\t") + dico_network = {} + GeneID_index=1 + network_cols=[1,2,7,8,11,12,14,18,20] + for line in tab_file : + if line[GeneID_index] not in dico_network: + dico_network[line[GeneID_index]]=[[line[i] for i in network_cols]] + else: + dico_network[line[GeneID_index]].append([line[i] for i in network_cols]) + + #delete tmp_BioGRID directory + os.remove("BioGRID.zip") + shutil.rmtree("tmp_BioGRID", ignore_errors=True) + + #download NCBI2Reactome.txt file and build dictionary + with requests.Session() as s: + r = s.get('https://www.reactome.org/download/current/NCBI2Reactome.txt') + r.encoding ="utf-8" + tab_file = csv.reader(r.content.splitlines(), delimiter='\t') + + dico_nodes = {} + geneid_index=0 + pathway_description_index=3 + species_index=5 + for line in tab_file : + if line[species_index]==species_dict[species]: + if line[geneid_index] in dico_nodes : + dico_nodes[line[geneid_index]].append(line[pathway_description_index]) + else : + dico_nodes[line[geneid_index]] = [line[pathway_description_index]] + + dico={} + dico['network']=dico_network + dico['nodes']=dico_nodes + + ##Bioplex + elif interactome=="bioplex": + + with requests.Session() as s: + r = s.get('http://bioplex.hms.harvard.edu/data/BioPlex_interactionList_v4a.tsv') + r = r.content.decode('utf-8') + bioplex = csv.reader(r.splitlines(), delimiter='\t') + + dico_network = {} + dico_network["GeneID"]={} + network_geneid_cols=[0,1,4,5,8] + dico_network["UniProt-AC"]={} + network_uniprot_cols=[2,3,4,5,8] + dico_GeneID_to_UniProt = {} + for line in bioplex : + if line[0] not in dico_network["GeneID"]: + dico_network["GeneID"][line[0]]=[[line[i] for i in network_geneid_cols]] + else : + dico_network["GeneID"][line[0]].append([line[i] for i in network_geneid_cols]) + if line[1] not in dico_network["UniProt-AC"]: + dico_network["UniProt-AC"][line[2]]=[[line[i] for i in network_uniprot_cols]] + else: + dico_network["UniProt-AC"][line[2]].append([line[i] for i in network_uniprot_cols]) + dico_GeneID_to_UniProt[line[0]]=line[2] + + with requests.Session() as s: + r = s.get('https://reactome.org/download/current/UniProt2Reactome.txt') + r.encoding ="utf-8" + tab_file = csv.reader(r.content.splitlines(), delimiter='\t') + + dico_nodes_uniprot = {} + uniProt_index=0 + pathway_description_index=3 + species_index=5 + for line in tab_file : + if line[species_index]==species_dict[species]: + if line[uniProt_index] in dico_nodes_uniprot : + dico_nodes_uniprot[line[uniProt_index]].append(line[pathway_description_index]) + else : + dico_nodes_uniprot[line[uniProt_index]] = [line[pathway_description_index]] + + with requests.Session() as s: + r = s.get('https://www.reactome.org/download/current/NCBI2Reactome.txt') + r.encoding ="utf-8" + tab_file = csv.reader(r.content.splitlines(), delimiter='\t') + + dico_nodes_geneid = {} + geneid_index=0 + pathway_description_index=3 + species_index=5 + for line in tab_file : + if line[species_index]==species_dict[species]: + if line[geneid_index] in dico_nodes_geneid : + dico_nodes_geneid[line[geneid_index]].append(line[pathway_description_index]) + else : + dico_nodes_geneid[line[geneid_index]] = [line[pathway_description_index]] + + dico={} + dico_nodes={} + dico_nodes['GeneID']=dico_nodes_geneid + dico_nodes['UniProt-AC']=dico_nodes_uniprot + dico['network']=dico_network + dico['nodes']=dico_nodes + dico['convert']=dico_GeneID_to_UniProt + + ##Humap + elif interactome=="humap": + + with requests.Session() as s: + r = s.get('http://proteincomplexes.org/static/downloads/nodeTable.txt') + r = r.content.decode('utf-8') + humap_nodes = csv.reader(r.splitlines(), delimiter=',') + + dico_geneid_to_gene_name={} + dico_protein_name={} + for line in humap_nodes : + if check_entrez_geneid(line[4]): + if line[4] not in dico_geneid_to_gene_name: + dico_geneid_to_gene_name[line[4]]=line[3] + if line[4] not in dico_protein_name: + dico_protein_name[line[4]]=line[5] + + with requests.Session() as s: + r = s.get('http://proteincomplexes.org/static/downloads/pairsWprob.txt') + r = r.content.decode('utf-8') + humap = csv.reader(r.splitlines(), delimiter='\t') + + dico_network = {} + for line in humap : + if check_entrez_geneid(line[0]) and check_entrez_geneid(line[1]): + + interactant_A, interactant_B = get_interactant_name(line,dico_geneid_to_gene_name) + + #first interactant (first column) + if line[0] not in dico_network: + dico_network[line[0]]=[line[:2]+[interactant_A,interactant_B,line[2]]] + else : + dico_network[line[0]].append(line[:2]+[interactant_A,interactant_B,line[2]]) + + #second interactant (second column) + if line[1] not in dico_network: + dico_network[line[1]]=[[line[1],line[0],interactant_B,interactant_A,line[2]]] + else : + dico_network[line[1]].append([line[1],line[0],interactant_B,interactant_A,line[2]]) + + with requests.Session() as s: + r = s.get('https://www.reactome.org/download/current/NCBI2Reactome.txt') + r.encoding ="utf-8" + tab_file = csv.reader(r.content.splitlines(), delimiter='\t') + + dico_nodes = {} + geneid_index=0 + pathway_description_index=3 + species_index=5 + for line in tab_file : + if line[species_index]==species_dict[species]: + #Fill dictionary with pathways + if line[geneid_index] in dico_nodes : + dico_nodes[line[geneid_index]].append(line[pathway_description_index]) + else : + dico_nodes[line[geneid_index]] = [line[pathway_description_index]] + + dico={} + dico['network']=dico_network + dico['nodes']=dico_nodes + dico['gene_name']=dico_geneid_to_gene_name + dico['protein_name']=dico_protein_name + + #writing output + output_file = species+'_'+interactome+'_'+ time.strftime("%d-%m-%Y") + ".json" + path = os.path.join(target_directory,output_file) + name = species+" ("+species_dict[species]+") "+time.strftime("%d/%m/%Y") + id = species+"_"+interactome+"_"+ time.strftime("%d-%m-%Y") + + with open(path, 'w') as handle: + json.dump(dico, handle, sort_keys=True) + + data_table_entry = dict(id=id, name = name, species = species, value = path) + _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_"+interactome+"_dictionaries") + +####################################################################################################### +# 5. nextprot (add protein features) +####################################################################################################### + +def Build_nextprot_ref_file(data_manager_dict,target_directory): + nextprot_ids_file = "nextprot_ac_list_all.txt" + ids = id_list_from_nextprot_ftp(nextprot_ids_file,target_directory) + + nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]] + for id in ids : + #print (id) + query="https://api.nextprot.org/entry/"+id+".json" + resp = requests.get(url=query) + data = resp.json() + + #get info from json dictionary + mass_mol = data["entry"]["isoforms"][0]["massAsString"] + seq_length = data['entry']["isoforms"][0]["sequenceLength"] + iso_elec_point = data['entry']["isoforms"][0]["isoelectricPointAsString"] + chr_loc = data['entry']["chromosomalLocations"][0]["chromosome"] + protein_existence = "PE"+str(data['entry']["overview"]['proteinExistence']['level']) + + #put all subcell loc in a set + if "subcellular-location" in data['entry']["annotationsByCategory"].keys() : + subcell_locs = data['entry']["annotationsByCategory"]["subcellular-location"] + all_subcell_locs = set() + for loc in subcell_locs : + all_subcell_locs.add(loc['cvTermName']) + all_subcell_locs.discard("") + all_subcell_locs = ";".join(all_subcell_locs) + else : + all_subcell_locs = "NA" + + #put all subcell loc in a set + if ('disease') in data['entry']['annotationsByCategory'].keys() : + diseases = data['entry']['annotationsByCategory']['disease'] + all_diseases = set() + for disease in diseases : + if (disease['cvTermName'] is not None and disease['cvTermName'] != ""): + all_diseases.add(disease['cvTermName']) + if len(all_diseases) > 0 : all_diseases = ";".join(all_diseases) + else : all_diseases="NA" + else : + all_diseases="NA" + + #get all tm domain + nb_domains = 0 + if "domain" in data['entry']['annotationsByCategory'].keys(): + tm_domains = data['entry']['annotationsByCategory']["domain"] + for tm_domain in tm_domains : + if "properties" in tm_domain.keys() and tm_domain['properties']!=[]: + domains = tm_domains["properties"] + for domain in domains : + if domain["name"]=="region structure" and domain["value"]=="Helical" : + nb_domains+=1 + + + nextprot_file.append([id,mass_mol,str(seq_length),iso_elec_point,chr_loc,all_subcell_locs,all_diseases,str(nb_domains),protein_existence]) + + output_file = 'nextprot_ref_'+ time.strftime("%d-%m-%Y") + ".tsv" + path = os.path.join(target_directory,output_file) + name = "neXtProt release "+time.strftime("%d-%m-%Y") + id = "nextprot_ref_"+time.strftime("%d-%m-%Y") + + with open(path, 'w') as output: + writer = csv.writer(output,delimiter="\t") + writer.writerows(nextprot_file) + + data_table_entry = dict(id=id, name = name, value = path) + _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_nextprot_ref") + +####################################################################################################### +# Main function +####################################################################################################### +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--hpa", metavar = ("HPA_OPTION")) + parser.add_argument("--peptideatlas", metavar=("SAMPLE_CATEGORY_ID")) + parser.add_argument("--id_mapping", metavar = ("ID_MAPPING_SPECIES")) + parser.add_argument("--interactome", metavar = ("PPI")) + parser.add_argument("--species") + parser.add_argument("--date") + parser.add_argument("-o", "--output") + parser.add_argument("--database") + args = parser.parse_args() + + data_manager_dict = {} + # Extract json file params + filename = args.output + params = from_json_string(open(filename).read()) + target_directory = params[ 'output_data' ][0]['extra_files_path'] + os.mkdir(target_directory) + + ## Download source files from HPA + try: + hpa = args.hpa + except NameError: + hpa = None + if hpa is not None: + #target_directory = "/projet/galaxydev/galaxy/tools/proteore/ProteoRE/tools/resources_building/test-data/" + hpa = hpa.split(",") + for hpa_tissue in hpa: + HPA_sources(data_manager_dict, hpa_tissue, target_directory) + + ## Download source file from Peptide Atlas query + try: + peptide_atlas = args.peptideatlas + date = args.date + except NameError: + peptide_atlas = None + if peptide_atlas is not None: + #target_directory = "/projet/galaxydev/galaxy/tools/proteore/ProteoRE/tools/resources_building/test-data/" + peptide_atlas = peptide_atlas.split(",") + for pa_tissue in peptide_atlas: + peptide_atlas_sources(data_manager_dict, pa_tissue, date, target_directory) + + ## Download ID_mapping source file from Uniprot + try: + id_mapping=args.id_mapping + except NameError: + id_mapping = None + if id_mapping is not None: + id_mapping = id_mapping .split(",") + for species in id_mapping : + id_mapping_sources(data_manager_dict, species, target_directory) + + ## Download PPI ref files from biogrid/bioplex/humap + try: + interactome=args.interactome + if interactome == "biogrid" : + species=args.species + else : + species="Human" + except NameError: + interactome=None + species=None + if interactome is not None and species is not None: + PPI_ref_files(data_manager_dict, species, interactome, target_directory) + + ## Build nextprot ref file for add protein features + try: + database=args.database + except NameError: + database=None + if database is not None : + Build_nextprot_ref_file(data_manager_dict,target_directory) + + #save info to json file + filename = args.output + open(filename, 'wb').write(to_json_string(data_manager_dict)) + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/resource_building.xml Wed Mar 13 06:30:42 2019 -0400 @@ -0,0 +1,223 @@ +<tool id="data_manager_proteore" name="Get source files for proteore tools" version="2019.03.13" tool_type="manage_data"> +<description> +to create or update reference files for proteore tools +</description> +<requirements> + <!--requirement type="package" version="1.8.2">sparqlwrapper</requirement--> +</requirements> +<stdio> + <exit_code range="1:" /> +</stdio> +<command><![CDATA[ + + python $__tool_directory__/resource_building.py + #if $database.database == "human_protein_atlas" + --hpa "$database.tissues" + #else if $database.database == "peptide_atlas" + --peptideatlas="$database.tissues" + --date="$database.date" + #else if $database.database == "id_mapping" + --id_mapping="$database.species" + #else if $database.database == "PPI" + --interactome="$database.base.interactome" + #if $database.base.interactome == "biogrid" + --species="$database.base.species" + #end if + #else if $database.database == "nextprot" + --database=$database.database + #end if + --output "$output" + +]]></command> + +<inputs> + <conditional name="database"> + <param name="database" type="select"> + <option value="human_protein_atlas">Human Protein Atlas</option> + <option value="peptide_atlas">Peptide Atlas</option> + <option value="id_mapping">ID mapping</option> + <option value="PPI">Build protein interaction maps</option> + <option value="nextprot">neXtProt</option> + </param> + <when value="human_protein_atlas"> + <param name="tissues" type="select" multiple="false" label="Please select tissue"> + <option value="HPA_normal_tissue">Normal tissue</option> + <option value="HPA_pathology">Pathology</option> + <!--option value="HPA_full_atlas">Full Atlas</option--> + </param> + </when> + <when value="peptide_atlas"> + <param name="tissues" type="select" multiple="false" label="Please select the tissue"> + <option value="432.Human_Adrenal_gland">Human Adrenal gland proteome</option> + <option value="441.Human_Brain">Human Brain proteome</option> + <option value="427.Human_Breast">Human Breast proteome</option> + <option value="434.Human_CSF">Human CSF (Cerebro Spinal Fluid) proteome</option> + <option value="374.Human_Colon_cancer">Human Colon cancer proteome</option> + <option value="429.Human_Digestive_system">Human Digestive system proteome</option> + <option value="430.Human_Female_reproductive_system">Human Female reproductive system proteome</option> + <option value="418.Human_Heart">Human Heart proteome</option> + <option value="424.Human_Kidney">Human Kidney proteome</option> + <option value="425.Human_Liver">Human Liver proteome</option> + <option value="419.Human_Lung">Human Lung proteome</option> + <option value="431.Human_Male_reproductive_system">Human Male reproductive system proteome</option> + <option value="420.Human_Pancreas">Human Pancreas proteome</option> + <option value="465.Human_Plasma_non_glyco">Human Plasma non glyco proteome</option> + <option value="421.Human_Spleen">Human Spleen proteome</option> + <option value="463.Human_Testis">Human Testis proteome</option> + <option value="422.Human_Urinary_bladder">Human Bladder proteome</option> + <option value="423.Human_Urine">Human Urine proteome</option> + </param> + <param name="date" type="text" value="" label="enter the build date" help="for example: '2018-04'"/> + </when> + <when value="id_mapping"> + <param name="species" type="select" multiple="false" label="Please select the species"> + <option value="Human">Human (Homo sapiens)</option> + <option value="Mouse">Mouse (Mus musculus)</option> + <option value="Rat">Rat (Rattus norvegicus)</option> + </param> + </when> + <when value="PPI"> + <conditional name="base"> + <param name="interactome" type="select" multiple="false" label="Please select interactome"> + <option value="biogrid">BioGRID</option> + <option value="bioplex">Human Bioplex 2.0</option> + <option value="humap">Human protein complex Map (Hu.map)</option> + </param> + <when value="biogrid"> + <param name="species" type="select" multiple="false" label="Please select the species"> + <option value="Human">Human (Homo sapiens)</option> + <option value="Mouse">Mouse (Mus musculus)</option> + <option value="Rat">Rat (Rattus norvegicus)</option> + </param> + </when> + <when value="bioplex"/> + <when value="humap"/> + </conditional> + </when> + </conditional> +</inputs> + +<outputs> + <!--data format="tabular" name="output"> + <discover_datasets pattern="(?P<designation>.+).tsv" ext="tabular" visible="true" assign_primary_output="true" /> + </data--> + <data name="output" format="data_manager_json"/> +</outputs> + +<tests> +</tests> + +<help><![CDATA[ + +**Description** + +This tool is a data manager designed to update resources files of ProteoRe tools. For now, only resources files for tools listed below are handled: + +* "Get MS/MS observations in tissue/fluid [Peptide Atlas]" +* "Get expression profiles by (normal or tumor) tissue/cell type [Human Protein Atlas]" +* "ID converter" + +----- + +**Input** + +There's no input needed, once you selected the tool and file you want to update, it will be generated automatically. + +----- + +**Parameters** + +* database: the database to update (for now one per tool) + +Once a database is selected, there's a second dropdown menu to select the specific file you want to update. + +* for 'Human Protein Atlas': 'Normal tissue', 'Pathology' and 'Full Atlas' + +* for 'Peptide Atlas': 'Human liver', 'Human brain', 'Human heart', 'Human kidney', 'Human blood plasma', 'Human urine' and 'Human cerebrospinal fluid' + +* for 'ID mapping': 'Human (Homo sapiens)', 'Mouse (Mus musculus)' and 'Rat (Rattus norvegicus)' + +* for 'Build protein interaction maps': "BioGRID", "Bioplex" and "Human (Homo sapiens)", "Mouse (Mus musculus)", "Rat (Rattus norvegicus)" + +----- + +**Output** + +The output is the reference file selected for update in input. + +For example, if you select database="Human Protein Atlas" and Please select tissue="Normal tissue": + +the output is a new reference file for "Get expression profiles by (normal or tumor) tissue/cell type [Human Protein Atlas]" +dated from the day and listed in the dropdown menu "Normal tissue HPA version". + +.. class:: warningmark + +A reference file created with this data manager will appears in the concerned ProteoRE tool. It can not be removed with the data manager. + +----- + +**Data sources** + +For 'Human Protein Atlas': + +* `Normal Tissue <https://www.proteinatlas.org/download/normal_tissue.tsv.zip>`_. +* `Pathology <https://www.proteinatlas.org/download/pathology.tsv.zip>`_. +* `Full Atlas <https://www.proteinatlas.org/download/proteinatlas.tsv.zip>`_. + +For 'Peptide Atlas': + + +* `Human Adrenal gland proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=432&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. +* `Human Brain proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=441&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. +* `Human Breast Proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=427&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. +* `Human CSF proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=434&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. +* `Human Digestive System proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=429&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. +* `Human female reproductive system proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=430&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. +* `Human Heart proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=418&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. +* `Human Kidney man Kidney Proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=424&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. +* `Human Liver proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=425&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. +* `Human Lung proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=419&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. +* `Human Male Reproductive System proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=431&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. +* `Human Pancreas proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=420&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. +* `Human Plasma Non-Glyco proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=465&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. +* `Human Spleen proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=421&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. +* `Human Testis proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=463&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. +* `Human Urinary Bladder proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=422&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. +* `Human Urine proteome <https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id=423&display_options=ShowAbundances&organism_id=2&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY>`_. + +For ID mapping: + +* `HUMAN_9606_idmapping_selected.tab <ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping_selected.tab.gz>`_. +* `HUMAN_9606_idmapping.dat <ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping.dat.gz>`_. +* `nextprot_ac_list_all.txt <ftp://ftp.nextprot.org/pub/current_release/ac_lists/nextprot_ac_list_all.txt>`_. +* `MOUSE_10090_idmapping_selected.tab <ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/MOUSE_10090_idmapping_selected.tab.gz>`_. +* `MOUSE_10090_idmapping.dat <ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/MOUSE_10090_idmapping.dat.gz>`_. +* `RAT_10116_idmapping_selected.tab <ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/RAT_10116_idmapping_selected.tab.gz>`_. +* `RAT_10116_idmapping.dat <ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/RAT_10116_idmapping.dat.gz>`_. + +For Build protein interaction maps: + +* `BIOGRID_oragism <https://downloads.thebiogrid.org/Download/BioGRID/Release-Archive/BIOGRID-3.5.167/BIOGRID-ORGANISM-3.5.167.tab2.zip>`_. +* `NCBI2Reactome.txt <https://www.reactome.org/download/current/NCBI2Reactome.txt>`_. +* `Bioplex_interactionList_v4a.tsv <http://bioplex.hms.harvard.edu/data/BioPlex_interactionList_v4a.tsv>`_. +* `UniProt2Reactome.txt <https://reactome.org/download/current/UniProt2Reactome.txt>`_. + +----- + +.. class:: infomark + +**Authors** + +David Christiany, Lisa Peru, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR + +Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform + +This work has been partially funded through the French National Agency for Research (ANR) IFB project. + +Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool. + + ]]></help> + <citations> + </citations> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Wed Mar 13 06:30:42 2019 -0400 @@ -0,0 +1,152 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/resource_building.xml" id="resource_building"> + <data_table name="proteore_peptide_atlas"> + <output> + <column name="id"/> + <column name="name" /> + <column name="tissue" /> + <column name="value" output_ref="output" > + <move type="file"> + <!--source>${path}/${value}.tsv</source--> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">peptide_atlas/</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/peptide_atlas/${id}.tsv</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + <data_table name="proteore_protein_atlas_normal_tissue"> + <output> + <column name="id"/> + <column name="name" /> + <column name="tissue" /> + <column name="value" output_ref="output" > + <move type="file"> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">protein_atlas/</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/protein_atlas/${id}.tsv</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + <data_table name="proteore_protein_atlas_tumor_tissue"> + <output> + <column name="id"/> + <column name="name" /> + <column name="tissue" /> + <column name="value" output_ref="output" > + <move type="file"> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">protein_atlas/</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/protein_atlas/${id}.tsv</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + <data_table name="proteore_id_mapping_Human"> + <output> + <column name="id" /> + <column name="name" /> + <column name="species" /> + <column name="value" output_ref="output" > + <move type="file"> + <!--source>${path}</source--> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">id_mapping/</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/id_mapping/${id}.tsv</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + <data_table name="proteore_id_mapping_Mouse"> + <output> + <column name="id" /> + <column name="name" /> + <column name="species" /> + <column name="value" output_ref="output" > + <move type="file"> + <!--source>${path}</source--> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">id_mapping/</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/id_mapping/${id}.tsv</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + <data_table name="proteore_id_mapping_Rat"> + <output> + <column name="id" /> + <column name="name" /> + <column name="species" /> + <column name="value" output_ref="output" > + <move type="file"> + <!--source>${path}</source--> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">id_mapping/</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/id_mapping/${id}.tsv</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + <data_table name="proteore_biogrid_dictionaries"> + <output> + <column name="id" /> + <column name="name" /> + <column name="species" /> + <column name="value" output_ref="output" > + <move type="file"> + <!--source>${path}</source--> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">PPI_dictionaries/</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/PPI_dictionaries/${id}.json</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + <data_table name="proteore_bioplex_dictionaries"> + <output> + <column name="id" /> + <column name="name" /> + <column name="species" /> + <column name="value" output_ref="output" > + <move type="file"> + <!--source>${path}</source--> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">PPI_dictionaries/</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/PPI_dictionaries/${id}.json</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + <data_table name="proteore_humap_dictionaries"> + <output> + <column name="id" /> + <column name="name" /> + <column name="species" /> + <column name="value" output_ref="output" > + <move type="file"> + <!--source>${path}</source--> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">PPI_dictionaries/</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/PPI_dictionaries/${id}.json</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + <data_table name="proteore_nextprot_ref"> + <output> + <column name="id" /> + <column name="name" /> + <column name="value" output_ref="output" > + <move type="file"> + <!--source>${path}</source--> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">proteore_nextprot_ref/</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/proteore_nextprot_ref/${id}.tsv</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/proteore_biogrid_dictionaries.loc.sample Wed Mar 13 06:30:42 2019 -0400 @@ -0,0 +1,4 @@ +#id name species value +#biogrid_human_08-01-2019 Human (Homo sapiens) 08/01/2019 Human PPI_dictionaries/Human_biogrid.json +#biogrid_mouse_08-01-2019 Mouse (Mus musculus) 08/01/2019 Mouse PPI_dictionaries/Mouse_biogrid.json +#biogrid_rat_08-01-2019 Rat (Rattus norvegicus) 08/01/2019 Rat PPI_dictionaries/Rat_biogrid.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/proteore_bioplex_dictionaries.loc.sample Wed Mar 13 06:30:42 2019 -0400 @@ -0,0 +1,2 @@ +#id name species value +#bioplex_human_08-01-2019 Human (Homo sapiens) 08/01/2019 Human PPI_dictionaries/human_bioplex.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/proteore_humap_dictionaries.loc.sample Wed Mar 13 06:30:42 2019 -0400 @@ -0,0 +1,2 @@ +#id name species value +#humap_human_01-02-2019 Human (Homo sapiens) 01/02/19 Human PPI_dictionaries/Human_humap_01-02-2019.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/proteore_id_mapping_Human.loc.sample Wed Mar 13 06:30:42 2019 -0400 @@ -0,0 +1,3 @@ +#This file lists the locations of reference file for id_converter tool +#<id> <name> <value> <path> +#human_id_mapping_01-01-2018 Human (homo sapiens) human_id_mapping tool-data/human_id_mapping.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/proteore_id_mapping_Mouse.loc.sample Wed Mar 13 06:30:42 2019 -0400 @@ -0,0 +1,3 @@ +#This file lists the locations of reference file for id_converter tool +#<id> <name> <value> <path> +#mouse_id_mapping_01-01-2018 Mouse (Mus musculus) mouse_id_mapping tool-data/mouse_id_mapping.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/proteore_id_mapping_Rat.loc.sample Wed Mar 13 06:30:42 2019 -0400 @@ -0,0 +1,3 @@ +#This file lists the locations of reference file for id_converter tool +#<id> <name> <value> <path> +#rat_id_mapping_01-01-2018 Rat (Rattus norvegicus) rat_id_mapping tool-data/rat_id_mapping.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/proteore_nextprot_ref.loc.sample Wed Mar 13 06:30:42 2019 -0400 @@ -0,0 +1,2 @@ +#<id> <name> <value> +#nextprot_ref_09-03-2019 neXtProt release 09-03-2019 tool-data/nextprot_ref_09-03-2019.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/proteore_protein_atlas_normal_tissue.loc.sample Wed Mar 13 06:30:42 2019 -0400 @@ -0,0 +1,4 @@ +#This file lists the locations name and values of reference files for Get expression data tool +#This is a tab separated file (TAB, not 4 spaces !) +#<id> <name> <tissue> <value> +#HPA_normal_tissue_19-07-2018 HPA normal tissue 19/07/2018 HPA_normal_tissue /projet/galaxydev/galaxy/tool-data/protein_atlas/projet/galaxydev/galaxy/database/jobs_directory/019/19159/dataset_39307_files/HPA_normal_tissue_19-07-2018.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/proteore_protein_atlas_tumor_tissue.loc.sample Wed Mar 13 06:30:42 2019 -0400 @@ -0,0 +1,4 @@ +#This file lists the locations name and values of reference files for Get expression data tool +#This is a tab separated file (TAB, not 4 spaces !) +#<id> <name> <tissue> <value> +#HPA_pathology_19-07-2018 HPA pathology 19/07/2018 HPA_pathology /projet/galaxydev/galaxy/tool-data/protein_atlas/projet/galaxydev/galaxy/database/jobs_directory/019/19160/dataset_39308_files/HPA_pathology_19-07-2018.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Mar 13 06:30:42 2019 -0400 @@ -0,0 +1,43 @@ +<?xml version="1.0"?> +<tables> + <table name='proteore_peptide_atlas' comment_char="#"> + <columns>id, name, tissue, value</columns> + <file path="tool-data/proteore_peptide_atlas.loc"/> + </table> + <table name="proteore_protein_atlas_normal_tissue" comment_char="#"> + <columns>id, name, tissue, value</columns> + <file path="tool-data/proteore_protein_atlas_normal_tissue.loc" /> + </table> + <table name="proteore_protein_atlas_tumor_tissue" comment_char="#"> + <columns>id, name, tissue, value</columns> + <file path="tool-data/proteore_protein_atlas_tumor_tissue.loc" /> + </table> + <table name="proteore_id_mapping_Human" comment_char="#"> + <columns>id, name, species, value</columns> + <file path="tool-data/proteore_id_mapping_Human.loc" /> + </table> + <table name="proteore_id_mapping_Mouse" comment_char="#"> + <columns>id, name, species, value</columns> + <file path="tool-data/proteore_id_mapping_Mouse.loc" /> + </table> + <table name="proteore_id_mapping_Rat" comment_char="#"> + <columns>id, name, species, value</columns> + <file path="tool-data/proteore_id_mapping_Rat.loc" /> + </table> + <table name="proteore_biogrid_dictionaries" comment_char="#"> + <columns>id, name, species, value</columns> + <file path="tool-data/proteore_biogrid_dictionaries.loc" /> + </table> + <table name="proteore_bioplex_dictionaries" comment_char="#"> + <columns>id, name, species, value</columns> + <file path="tool-data/proteore_bioplex_dictionaries.loc" /> + </table> + <table name="proteore_humap_dictionaries" comment_char="#"> + <columns>id, name, species, value</columns> + <file path="tool-data/proteore_humap_dictionaries.loc" /> + </table> + <table name='proteore_nextprot_ref' comment_char="#"> + <columns>id, name, value</columns> + <file path="tool-data/proteore_nextprot_ref.loc"/> + </table> +</tables>