Mercurial > repos > cpt > cpt_search_file
view editDB.py @ 1:6e3a843b6304 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:53:18 +0000 |
parents | |
children |
line wrap: on
line source
# Remove duplicate terms. As well as add any that is needed. import explodeJSON as ej from explodeJSON import save_dict_to_json ### create new key def add_new_key(db, add_key=[]): """Set of keys to add to the database""" for new_key in add_key: db[new_key] = [] return db ### Add values to dbase: def add_value_to_term(index_val, db, add_value=[]): """index value, put in value""" for val in add_value: db[index_val].append(val) return db ### Remove values from dbase: def remove_value_from_term(index_val, db, remove_value=[]): """remove values from list""" for val in remove_value: db[index_val].remove(val) return db ### Terms to add from a file def add_from_file(input_file, index_val, db, sep="\n"): """input file, new line separated currently, and append files to correct key, return is altered dictionary""" terms = open(input_file).read().splitlines() db = add_value_to_term(index_val, db, terms) return db if __name__ == "__main__": lysis_json = "data/lysis-family-v1.0.2.json" # insert json of choice db = ej.explodeJSON(lysis_json) db = db.readJSON() # revise_db = add_new_key(db=db,add_key=["spanins"]) # files = ["data/term_additions/200505_holin_domains.txt","data/term_additions/200505_Spanin_Domains.txt"] terms = [ "DUF2570", "PF10828", "IPR022538", "DUF2514", "PF10721", "IPR019659", "DUF2681", "PF10883", "IPR020274", ] # revise_db = add_from_file(files[0],"holin_domains",revise_db) # revise_db = add_from_file(files[1],"spanin_domains",revise_db) revise_db = add_value_to_term("spanin_domains", db, add_value=terms) save_dict_to_json(obj=revise_db, filename="data/lysis-family-v1.0.3.json")