Mercurial > repos > iuc > data_manager_snpeff
changeset 10:c6fbc5421697 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_snpeff commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
author | iuc |
---|---|
date | Sun, 22 Nov 2020 12:53:42 +0000 |
parents | 08d7998c3afb |
children | def511e8e005 |
files | data_manager/data_manager_snpEff_databases.py data_manager/data_manager_snpEff_download.py |
diffstat | 2 files changed, 34 insertions(+), 41 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/data_manager_snpEff_databases.py Wed Feb 12 18:33:53 2020 -0500 +++ b/data_manager/data_manager_snpEff_databases.py Sun Nov 22 12:53:42 2020 +0000 @@ -10,17 +10,16 @@ if not os.path.exists(target_directory): os.makedirs(target_directory) databases_path = os.path.join(target_directory, 'databases.out') - databases_output = open(databases_path, 'w') args = ['snpEff', 'databases'] - return_code = subprocess.call(args=args, shell=False, stdout=databases_output.fileno()) + with open(databases_path, 'w') as databases_output: + return_code = subprocess.call(args=args, shell=False, stdout=databases_output.fileno()) if return_code: sys.exit(return_code) - databases_output.close() data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) data_manager_dict['data_tables']['snpeffv_databases'] = data_manager_dict['data_tables'].get('snpeffv_databases', []) data_table_entries = [] with open(databases_path, 'r') as fh: - for i, line in enumerate(fh): + for line in fh: fields = line.split('\t') if len(fields) >= 2: genome_version = fields[0].strip() @@ -41,7 +40,8 @@ filename = args[0] - params = json.loads(open(filename).read()) + with open(filename) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) data_manager_dict = {} @@ -50,7 +50,8 @@ data_manager_dict = fetch_databases(data_manager_dict, target_directory) # save info to json file - open(filename, 'w').write(json.dumps(data_manager_dict, sort_keys=True)) + with open(filename, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__":
--- a/data_manager/data_manager_snpEff_download.py Wed Feb 12 18:33:53 2020 -0500 +++ b/data_manager/data_manager_snpEff_download.py Sun Nov 22 12:53:42 2020 +0000 @@ -7,36 +7,28 @@ import sys -def stop_err(msg): - sys.stderr.write(msg) - sys.exit(1) - - def fetch_databases(genome_list=None): snpDBs = dict() databases_path = 'databases.out' - databases_output = open(databases_path, 'w') args = ['snpEff', 'databases'] - return_code = subprocess.call(args=args, shell=False, stdout=databases_output.fileno()) + with open(databases_path, 'w') as databases_output: + return_code = subprocess.call(args=args, shell=False, stdout=databases_output.fileno()) if return_code: sys.exit(return_code) - databases_output.close() try: - fh = open(databases_path, 'r') - for i, line in enumerate(fh): - fields = line.split('\t') - if len(fields) >= 2: - genome_version = fields[0].strip() - if genome_list and genome_version not in genome_list: - continue - if genome_version.startswith("Genome") or genome_version.startswith("-"): - continue - description = fields[1].strip() - snpDBs[genome_version] = description + with open(databases_path, 'r') as fh: + for line in fh: + fields = line.split('\t') + if len(fields) >= 2: + genome_version = fields[0].strip() + if genome_list and genome_version not in genome_list: + continue + if genome_version.startswith("Genome") or genome_version.startswith("-"): + continue + description = fields[1].strip() + snpDBs[genome_version] = description except Exception as e: - stop_err('Error parsing %s %s\n' % (databases_path, str(e))) - else: - fh.close() + sys.exit('Error parsing %s %s\n' % (databases_path, str(e))) return snpDBs @@ -55,19 +47,17 @@ def getSnpeffVersion(): snpeff_version = 'SnpEff ?.?' stderr_path = 'snpeff.err' - stderr_fh = open(stderr_path, 'w') args = ['snpEff', '-h'] - return_code = subprocess.call(args=args, shell=False, stderr=stderr_fh.fileno()) + with open(stderr_path, 'w') as stderr_fh: + return_code = subprocess.call(args=args, shell=False, stderr=stderr_fh.fileno()) if return_code != 255: sys.exit(return_code) - stderr_fh.close() - fh = open(stderr_path, 'r') - for line in fh: - m = re.match(r'^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$', line) - if m: - snpeff_version = m.groups()[0] + m.groups()[1] - break - fh.close() + with open(stderr_path) as fh: + for line in fh: + m = re.match(r'^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$', line) + if m: + snpeff_version = m.groups()[0] + m.groups()[1] + break return snpeff_version @@ -97,7 +87,7 @@ snpeff_version = getSnpeffVersion() key = snpeff_version + '_' + genome_version if os.path.isdir(genome_path): - for root, dirs, files in os.walk(genome_path): + for _, _, files in os.walk(genome_path): for fname in files: if fname.startswith('snpEffectPredictor'): # if snpEffectPredictor.bin download succeeded @@ -128,7 +118,8 @@ filename = args[0] - params = json.loads(open(filename).read()) + with open(filename) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) data_manager_dict = {} @@ -138,7 +129,8 @@ download_database(data_manager_dict, target_directory, genome_version, organism) # save info to json file - open(filename, 'w').write(json.dumps(data_manager_dict, sort_keys=True)) + with open(filename, 'w'): + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__":