Mercurial > repos > rmarenco > hubarchivecreator
diff util/subtools.py @ 29:7e8a8b732db3 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1a81ebd0ddea950b84af3fc830e9267a4814b29f
author | yating-l |
---|---|
date | Wed, 16 May 2018 18:04:20 -0400 |
parents | df42241d3731 |
children |
line wrap: on
line diff
--- a/util/subtools.py Mon Jul 10 17:08:38 2017 -0400 +++ b/util/subtools.py Wed May 16 18:04:20 2018 -0400 @@ -13,6 +13,7 @@ import string import tempfile + class PopenError(Exception): def __init__(self, cmd, error, return_code): self.cmd = cmd @@ -20,10 +21,13 @@ self.return_code = return_code def __str__(self): - message = "The subprocess {0} has returned the error: {1}.".format(self.cmd, self.return_code) - message = ','.join((message, "Its error message is: {0}".format(self.error))) + message = "The subprocess {0} has returned the error: {1}.".format( + self.cmd, self.return_code) + message = ','.join( + (message, "Its error message is: {0}".format(self.error))) return repr(message) + def _handleExceptionAndCheckCall(array_call, **kwargs): """ This class handle exceptions and call the tool. @@ -41,12 +45,13 @@ # TODO: Check the value of array_call and <=[0] logging.debug("Calling {0}:".format(cmd)) - + logging.debug("%s", array_call) logging.debug("---------") # TODO: Use universal_newlines option from Popen? try: - p = subprocess.Popen(array_call, stdout=stdout, stderr=stderr, shell=shell) + p = subprocess.Popen(array_call, stdout=stdout, + stderr=stderr, shell=shell) # TODO: Change this because of possible memory issues => https://docs.python.org/2/library/subprocess.html#subprocess.Popen.communicate @@ -64,29 +69,35 @@ raise PopenError(cmd, error, p.returncode) else: # TODO: To Handle properly with a design behind, if we received a option as a file for the error - raise Exception("Error when calling {0}. Error as been logged in your file {1}. Error code: {2}"\ + raise Exception("Error when calling {0}. Error as been logged in your file {1}. Error code: {2}" .format(cmd, stderr.name, p.returncode)) except OSError as e: - message = "The subprocess {0} has encountered an OSError: {1}".format(cmd, e.strerror) + message = "The subprocess {0} has encountered an OSError: {1}".format( + cmd, e.strerror) if e.filename: - message = '\n'.join((message, ", against this file: {0}".format(e.filename))) + message = '\n'.join( + (message, ", against this file: {0}".format(e.filename))) logging.error(message) sys.exit(-1) except PopenError as p: - message = "The subprocess {0} has returned the error: {1}.".format(p.cmd, p.return_code) - message = '\n'.join((message, "Its error message is: {0}".format(p.error))) + message = "The subprocess {0} has returned the error: {1}.".format( + p.cmd, p.return_code) + message = '\n'.join( + (message, "Its error message is: {0}".format(p.error))) logging.exception(message) sys.exit(p.return_code) except Exception as e: - message = "The subprocess {0} has encountered an unknown error: {1}".format(cmd, e) + message = "The subprocess {0} has encountered an unknown error: {1}".format( + cmd, e) logging.exception(message) sys.exit(-1) return p + def twoBitInfo(two_bit_file_name, two_bit_info_file): """ Call twoBitInfo and write the result into twoBit_info_file @@ -98,6 +109,7 @@ p = _handleExceptionAndCheckCall(array_call) return p + def faToTwoBit(fasta_file_name, twoBitFile): """ This function call faToTwoBit UCSC tool, and return the twoBitFile @@ -111,6 +123,7 @@ return twoBitFile + def gtfToGenePred(input_gtf_file_name, gene_pred_file_name): """ Call gtfToGenePred and write the result into gene_pred_file_name @@ -122,6 +135,7 @@ p = _handleExceptionAndCheckCall(array_call) return p + def gff3ToGenePred(input_gff3_file_name, gene_pred_file_name): """ Call gff3ToGenePred and write the result into gene_pred_file_name @@ -129,12 +143,11 @@ :param gene_pred_file_name: :return: """ - valid_gff3_file = tempfile.NamedTemporaryFile(bufsize=0, suffix=".gff3") - validateGff(input_gff3_file_name, valid_gff3_file.name) - array_call = ['gff3ToGenePred', valid_gff3_file.name, gene_pred_file_name] + array_call = ['gff3ToGenePred', input_gff3_file_name, gene_pred_file_name] p = _handleExceptionAndCheckCall(array_call) return p + def genePredToBigGenePred(gene_pred_file_name, unsorted_bigGenePred_file_name): """ Call genePredToBigGenePred and write the result into unsorted_bigGenePred_file_name @@ -148,6 +161,7 @@ p = _handleExceptionAndCheckCall(array_call) return p + def genePredToBed(gene_pred_file_name, unsorted_bed_file_name): """ Call genePredToBed and write the result into unsorted_bed_file_name @@ -159,6 +173,7 @@ p = _handleExceptionAndCheckCall(array_call) return p + def sort(unsorted_bed_file_name, sorted_bed_file_name): """ Call sort with -k1,1 -k2,2n on unsorted_bed_file_name and write the result into sorted_bed_file_name @@ -166,10 +181,12 @@ :param sorted_bed_file_name: :return: """ - array_call = ['sort', '-k', '1,1', '-k', '2,2n', unsorted_bed_file_name, '-o', sorted_bed_file_name] + array_call = ['sort', '-k', '1,1', '-k', '2,2n', + unsorted_bed_file_name, '-o', sorted_bed_file_name] p = _handleExceptionAndCheckCall(array_call) return p + def sortChromSizes(two_bit_info_file_name, chrom_sizes_file_name): """ Call sort with -k2rn on two_bit_info_file_name and write the result into chrom_sizes_file_name @@ -177,12 +194,13 @@ :param chrom_sizes_file_name: :return: """ - array_call = ['sort', '-k2rn', two_bit_info_file_name, '-o', chrom_sizes_file_name] + array_call = ['sort', '-k2rn', two_bit_info_file_name, + '-o', chrom_sizes_file_name] p = _handleExceptionAndCheckCall(array_call) return p -def bedToBigBed(sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name, - typeOption=None, autoSql=None, tab=False, extraIndex=None): + +def bedToBigBed(sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name, options=None): """ Call bedToBigBed on sorted_bed_file_name, using chrom_sizes_file_name and write the result into big_bed_file_name :param sorted_bed_file_name: @@ -191,28 +209,24 @@ :return: """ - # TODO: Move this into the _handleExceptionAndCheckCall function - # Parse the array - logging.debug("sorted_bed_file_name: {0}".format(sorted_bed_file_name)) - logging.debug("chrom_sizes_file_name: {0}".format(chrom_sizes_file_name)) - logging.debug("big_bed_file_name: {0}".format(big_bed_file_name)) - logging.debug("typeOption: {0}".format(typeOption)) - logging.debug("autoSql: {0}".format(autoSql)) - logging.debug("tab option: {0}".format(tab)) - - array_call = ['bedToBigBed', sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name] - if typeOption: - typeOption = ''.join(['-type=', typeOption]) - array_call.append(typeOption) - if autoSql: - autoSql = ''.join(['-as=', autoSql]) - array_call.append(autoSql) - if tab: - array_call.append('-tab') - if extraIndex: - index = ''.join(['-extraIndex=', extraIndex]) - array_call.append(index) - + array_call = ['bedToBigBed', sorted_bed_file_name, + chrom_sizes_file_name, big_bed_file_name] + if options: + typeOption = options.get("typeOption") + autoSql = options.get("autoSql") + tab = options.get("tab") + extraIndex = options.get("extraIndex") + if typeOption: + typeOption = ''.join(['-type=', typeOption]) + array_call.append(typeOption) + if autoSql: + autoSql = ''.join(['-as=', autoSql]) + array_call.append(autoSql) + if tab: + array_call.append('-tab') + if extraIndex: + index = ''.join(['-extraIndex=', extraIndex]) + array_call.append(index) p = _handleExceptionAndCheckCall(array_call) return p @@ -223,10 +237,12 @@ :param output_sorted_bam_name: :return: """ - array_call = ['samtools', 'sort', input_bam_file_name, '-o', output_sorted_bam_name] + array_call = ['samtools', 'sort', + input_bam_file_name, '-o', output_sorted_bam_name] p = _handleExceptionAndCheckCall(array_call) return p + def createBamIndex(input_sorted_bam_file_name, output_name_index_name): """ Call `samtools index` on imput_sorted_bam_file_name and output the result in output_name_index_name @@ -234,10 +250,12 @@ :param output_name_index_name: :return: """ - array_call = ['samtools', 'index', input_sorted_bam_file_name, output_name_index_name] + array_call = ['samtools', 'index', + input_sorted_bam_file_name, output_name_index_name] p = _handleExceptionAndCheckCall(array_call) return p + def pslToBigPsl(input_psl_file_name, output_bed12_file_name): """ Call `pslToBigPsl` on input_psl_file_name and output the result in output_bed12_file_name @@ -251,30 +269,44 @@ p = _handleExceptionAndCheckCall(array_call) return p -#santitize trackName. Because track name must begin with a letter and +# santitize trackName. Because track name must begin with a letter and # contain only the following chars: [a-zA-Z0-9_]. # See the "track" Common settings at: -#https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html#bigPsl_-_Pairwise_Alignments -def fixName(filename): - if filename == 'cytoBandIdeo': - return filename - valid_chars = "_%s%s" % (string.ascii_letters, string.digits) - sanitize_name = ''.join([c if c in valid_chars else '_' for c in filename]) - sanitize_name = "gonramp_" + sanitize_name - return sanitize_name +# https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html#bigPsl_-_Pairwise_Alignments -def validateGff(orig_gff3, valid_gff3): +def validateFiles(input_file, chrom_sizes_file_name, file_type, options=None): + """ + Call validateFiles on input_file, using chrom_sizes_file_name and file_type + :param input_file: + :param chrom_sizes_file_name: + :param file_type: + :return: """ - Remove extra meta line: ##gff-version 3 + + array_call = ['validateFiles', '-chromInfo=' + chrom_sizes_file_name, '-type='+ file_type, input_file] + if options: + tab = options.get("tab") + autoSql = options.get("autoSql") + logging.debug("tab: {0}".format(tab)) + logging.debug("autoSql: {0}".format(autoSql)) + if autoSql: + autoSql = ''.join(['-as=', autoSql]) + array_call.append(autoSql) + if tab: + array_call.append('-tab') + p = _handleExceptionAndCheckCall(array_call) + return p + +def pslCheck(input_file, options=None): """ - valid = open(valid_gff3, 'w') - num = 0 - with open(orig_gff3, 'r') as f: - for line in f: - if '##gff-version 3' in line: - if num == 0: - num += 1 - else: - continue - valid.write(line) - + Call pslCheck on input_file + :param input_file: + :return: + """ + + array_call = ['pslCheck', input_file] + p = _handleExceptionAndCheckCall(array_call) + return p + + +