Mercurial > repos > yating-l > jbrowsearchivecreator
diff util/subtools.py @ 6:237707a6b74d draft
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit a500f7ab2119cc5faaf80393bd87428389d06880-dirty
author | yating-l |
---|---|
date | Thu, 15 Feb 2018 17:05:05 -0500 |
parents | |
children | 5d5fdcb798da |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/util/subtools.py Thu Feb 15 17:05:05 2018 -0500 @@ -0,0 +1,332 @@ +#!/usr/bin/env python + +""" +This file include common used functions for converting file format to gff3 +""" +from collections import OrderedDict +import json +import subprocess +import os +import sys +import tempfile +import string +import logging + +class PopenError(Exception): + def __init__(self, cmd, error, return_code): + self.cmd = cmd + self.error = error + self.return_code = return_code + + def __str__(self): + message = "The subprocess {0} has returned the error: {1}.".format( + self.cmd, self.return_code) + message = ','.join( + (message, "Its error message is: {0}".format(self.error))) + return repr(message) + + +def _handleExceptionAndCheckCall(array_call, **kwargs): + """ + This class handle exceptions and call the tool. + It maps the signature of subprocess.check_call: + See https://docs.python.org/2/library/subprocess.html#subprocess.check_call + """ + stdout = kwargs.get('stdout', subprocess.PIPE) + stderr = kwargs.get('stderr', subprocess.PIPE) + shell = kwargs.get('shell', False) + stdin = kwargs.get('stdin', None) + + cmd = array_call[0] + + output = None + error = None + + # TODO: Check the value of array_call and <=[0] + logging.debug("Calling {0}:".format(cmd)) + logging.debug("%s", array_call) + logging.debug("---------") + + # TODO: Use universal_newlines option from Popen? + try: + p = subprocess.Popen(array_call, stdout=stdout, + stderr=stderr, shell=shell, stdin=stdin) + + # TODO: Change this because of possible memory issues => https://docs.python.org/2/library/subprocess.html#subprocess.Popen.communicate + + output, error = p.communicate() + + if stdout == subprocess.PIPE: + logging.debug("\t{0}".format(output)) + else: + logging.debug("\tOutput in file {0}".format(stdout.name)) + # If we detect an error from the subprocess, then we raise an exception + # TODO: Manage if we raise an exception for everything, or use CRITICAL etc... but not stop process + # TODO: The responsability of returning a sys.exit() should not be there, but up in the app. + if p.returncode: + if stderr == subprocess.PIPE: + raise PopenError(cmd, error, p.returncode) + else: + # TODO: To Handle properly with a design behind, if we received a option as a file for the error + raise Exception("Error when calling {0}. Error as been logged in your file {1}. Error code: {2}" + .format(cmd, stderr.name, p.returncode)) + + except OSError as e: + message = "The subprocess {0} has encountered an OSError: {1}".format( + cmd, e.strerror) + if e.filename: + message = '\n'.join( + (message, ", against this file: {0}".format(e.filename))) + logging.error(message) + sys.exit(-1) + except PopenError as p: + message = "The subprocess {0} has returned the error: {1}.".format( + p.cmd, p.return_code) + message = '\n'.join( + (message, "Its error message is: {0}".format(p.error))) + + logging.exception(message) + + sys.exit(p.return_code) + except Exception as e: + message = "The subprocess {0} has encountered an unknown error: {1}".format( + cmd, e) + logging.exception(message) + + sys.exit(-1) + return p + + +def write_features(field, attribute, gff3): + """ + The function write the features to gff3 format (defined in https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md) + field, attribute are ordered dictionary + gff3 is the file handler + """ + attr = [] + for v in field.values(): + gff3.write(str(v) + '\t') + for k, v in attribute.items(): + s = str(k) + '=' + str(v) + attr.append(s) + gff3.write(';'.join(attr)) + gff3.write('\n') + +def twoBitInfo(two_bit_file_name, two_bit_info_file): + """ + Call twoBitInfo and write the result into twoBit_info_file + :param two_bit_file_name: + :param two_bit_info_file: + :return the subprocess.check_call return object: + """ + array_call = ['twoBitInfo', two_bit_file_name, two_bit_info_file] + p = _handleExceptionAndCheckCall(array_call) + return p + + +def faToTwoBit(fasta_file_name, twoBitFile): + """ + This function call faToTwoBit UCSC tool, and return the twoBitFile + :param fasta_file_name: + :param mySpecieFolder: + :return: + """ + + array_call = ['faToTwoBit', fasta_file_name, twoBitFile] + _handleExceptionAndCheckCall(array_call) + + return twoBitFile + +def sortChromSizes(two_bit_info_file_name, chrom_sizes_file_name): + """ + Call sort with -k2rn on two_bit_info_file_name and write the result into chrom_sizes_file_name + :param two_bit_info_file_name: + :param chrom_sizes_file_name: + :return: + """ + array_call = ['sort', '-k2rn', two_bit_info_file_name, + '-o', chrom_sizes_file_name] + p = _handleExceptionAndCheckCall(array_call) + return p + +def getChromSizes(reference, tool_dir): + #TODO: find a better way instead of shipping the two exec files with the tool + faToTwoBit = os.path.join(tool_dir, 'faToTwoBit') + twoBitInfo = os.path.join(tool_dir, 'twoBitInfo') + try: + twoBitFile = tempfile.NamedTemporaryFile(bufsize=0) + chrom_sizes = tempfile.NamedTemporaryFile(bufsize=0, suffix='.chrom.sizes', delete=False) + except IOError as err: + print "Cannot create tempfile err({0}): {1}".format(err.errno, err.strerror) + try: + subprocess.call(['faToTwoBit', reference, twoBitFile.name]) + except OSError as err: + print "Cannot generate twoBitFile from faToTwoBit err({0}): {1}".format(err.errno, err.strerror) + try: + subprocess.call(['twoBitInfo', twoBitFile.name, chrom_sizes.name]) + except OSError as err: + print "Cannot generate chrom_sizes from twoBitInfo err({0}): {1}".format(err.errno, err.strerror) + return chrom_sizes + +def sequence_region(chrom_sizes): + """ + This function read from a chromatin size file generated by twoBitInfo and write the information to dict + return a dict + """ + f = open(chrom_sizes, 'r') + sizes = f.readlines() + sizes_dict = {} + for line in sizes: + chrom_info = line.rstrip().split('\t') + sizes_dict[chrom_info[0]] = chrom_info[1] + return sizes_dict + +def child_blocks(parent_field, parent_attr, gff3, child_type): + num = 0 + blockcount = int(parent_attr['blockcount']) + chromstart = parent_attr['chromstarts'].split(',') + blocksize = parent_attr['blocksizes'].split(',') + parent_start = parent_field['start'] + while num < blockcount: + child_attr = OrderedDict() + child_field = parent_field + child_field['type'] = child_type + child_field['start'] = int(chromstart[num]) + int(parent_start) + child_field['end'] = int(child_field['start']) + int(blocksize[num]) - 1 + child_attr['ID'] = parent_attr['ID'] + '_part_' + str(num+1) + child_attr['Parent'] = parent_attr['ID'] + write_features(child_field, child_attr, gff3) + num = num + 1 + +def add_tracks_to_json(trackList_json, new_tracks, modify_type): + """ + Add to track configuration (trackList.json) + # modify_type = 'add_tracks': add a new track like bam or bigwig, new_track = dict() + # modify_type = 'add_attr': add configuration to the existing track, new_track = dict(track_name: dict()) + """ + with open(trackList_json, 'r+') as f: + data = json.load(f) + if modify_type == 'add_tracks': + data['tracks'].append(new_tracks) + elif modify_type == 'add_attr': + for k in new_tracks: + for track in data['tracks']: + if k.lower() in track['urlTemplate'].lower(): + attr = new_tracks[k] + for k, v in attr.items(): + track[k] = v + f.seek(0, 0) + f.write(json.dumps(data, separators=(',' , ':'), indent=4)) + f.truncate() + f.close() + + +def createBamIndex(bamfile): + subprocess.call(['samtools', 'index', bamfile]) + filename = bamfile + '.bai' + if os.path.exists(filename): + return filename + else: + raise ValueError('Did not find bai file') + +def flatfile_to_json(inputFile, dataType, trackType, trackLabel, outputFolder, options=None, compress=True): + if "bed" in dataType: + fileType = "--bed" + elif "gff" in dataType: + fileType = "--gff" + else: + raise ValueError("%s is not a valid filetype for flatfile_to_json" % dataType) + + + array_call = ['flatfile-to-json.pl', + fileType, inputFile, + '--trackType', trackType, + '--trackLabel', trackLabel, + '--out', outputFolder] + if compress: + array_call.append('--compress') + if options: + config = options.get("config") + clientConfig = options.get("clientConfig") + renderClassName = options.get('renderClassName') + subfeatureClasses = options.get('subfeatureClasses') + load_type = options.get("type") + if clientConfig: + array_call.append('--clientConfig') + array_call.append(clientConfig) + if config: + array_call.append('--config') + array_call.append(config) + if load_type: + array_call.append('--type') + array_call.append(load_type) + if renderClassName: + array_call.append('--renderClassName') + array_call.append(renderClassName) + if subfeatureClasses: + array_call.append('--subfeatureClasses') + array_call.append(json.dumps(subfeatureClasses)) + + p = _handleExceptionAndCheckCall(array_call) + return p + +def bam_to_json(inputFile, trackLabel, outputFolder, options=None, compress=False): + + array_call = ['bam-to-json.pl', + '--bam', inputFile, + '--trackLabel', trackLabel, + '--out', outputFolder] + if compress: + array_call.append('--compress') + if options: + config = options.get('config') + clientConfig = options.get('clientConfig') + if clientConfig: + array_call.append('--clientConfig') + array_call.append(clientConfig) + if config: + array_call.append('--config') + array_call.append(config) + + p = _handleExceptionAndCheckCall(array_call) + return p + +def add_track_json(trackList, track_json): + track_json = json.dumps(track_json) + new_track = subprocess.Popen(['echo', track_json], stdout=subprocess.PIPE) + p = subprocess.call(['add-track-json.pl', trackList], stdin=new_track.stdout) + return p + +def prepare_refseqs(fasta_file_name, outputFolder): + array_call = ['prepare-refseqs.pl', '--fasta', fasta_file_name, '--out', outputFolder] + p = _handleExceptionAndCheckCall(array_call) + return p + +def generate_names(outputFolder): + array_call = ['generate-names.pl', '-v', '--out', outputFolder] + p = _handleExceptionAndCheckCall(array_call) + return p + +def validateFiles(input_file, chrom_sizes_file_name, file_type, options=None): + """ + Call validateFiles on input_file, using chrom_sizes_file_name and file_type + :param input_file: + :param chrom_sizes_file_name: + :param file_type: + :return: + """ + + array_call = ['validateFiles', '-chromInfo=' + chrom_sizes_file_name, '-type='+ file_type, input_file] + if options: + tab = options.get("tab") + autoSql = options.get("autoSql") + logging.debug("tab: {0}".format(tab)) + logging.debug("autoSql: {0}".format(autoSql)) + if autoSql: + autoSql = ''.join(['-as=', autoSql]) + array_call.append(autoSql) + if tab: + array_call.append('-tab') + p = _handleExceptionAndCheckCall(array_call) + return p +