jbrowsearchivecreator: util/subtools.py comparison

comparison util/subtools.py @ 6:237707a6b74d draft

planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit a500f7ab2119cc5faaf80393bd87428389d06880-dirty

author	yating-l
date	Thu, 15 Feb 2018 17:05:05 -0500
parents
children	5d5fdcb798da

comparison

equal deleted inserted replaced

-:e762f4b9e4bd
+:237707a6b74d
+#!/usr/bin/env python
+"""
+This file include common used functions for converting file format to gff3
+"""
+from collections import OrderedDict
+import json
+import subprocess
+import os
+import sys
+import tempfile
+import string
+import logging
+class PopenError(Exception):
+def __init__(self, cmd, error, return_code):
+self.cmd = cmd
+self.error = error
+self.return_code = return_code
+def __str__(self):
+message = "The subprocess {0} has returned the error: {1}.".format(
+self.cmd, self.return_code)
+message = ','.join(
+(message, "Its error message is: {0}".format(self.error)))
+return repr(message)
+def _handleExceptionAndCheckCall(array_call, **kwargs):
+"""
+This class handle exceptions and call the tool.
+It maps the signature of subprocess.check_call:
+See https://docs.python.org/2/library/subprocess.html#subprocess.check_call
+"""
+stdout = kwargs.get('stdout', subprocess.PIPE)
+stderr = kwargs.get('stderr', subprocess.PIPE)
+shell = kwargs.get('shell', False)
+stdin = kwargs.get('stdin', None)
+cmd = array_call[0]
+output = None
+error = None
+# TODO: Check the value of array_call and <=[0]
+logging.debug("Calling {0}:".format(cmd))
+logging.debug("%s", array_call)
+logging.debug("---------")
+# TODO: Use universal_newlines option from Popen?
+try:
+p = subprocess.Popen(array_call, stdout=stdout,
+stderr=stderr, shell=shell, stdin=stdin)
+# TODO: Change this because of possible memory issues => https://docs.python.org/2/library/subprocess.html#subprocess.Popen.communicate
+output, error = p.communicate()
+if stdout == subprocess.PIPE:
+logging.debug("\t{0}".format(output))
+else:
+logging.debug("\tOutput in file {0}".format(stdout.name))
+# If we detect an error from the subprocess, then we raise an exception
+# TODO: Manage if we raise an exception for everything, or use CRITICAL etc... but not stop process
+# TODO: The responsability of returning a sys.exit() should not be there, but up in the app.
+if p.returncode:
+if stderr == subprocess.PIPE:
+raise PopenError(cmd, error, p.returncode)
+else:
+# TODO: To Handle properly with a design behind, if we received a option as a file for the error
+raise Exception("Error when calling {0}. Error as been logged in your file {1}. Error code: {2}"
+.format(cmd, stderr.name, p.returncode))
+except OSError as e:
+message = "The subprocess {0} has encountered an OSError: {1}".format(
+cmd, e.strerror)
+if e.filename:
+message = '\n'.join(
+(message, ", against this file: {0}".format(e.filename)))
+logging.error(message)
+sys.exit(-1)
+except PopenError as p:
+message = "The subprocess {0} has returned the error: {1}.".format(
+p.cmd, p.return_code)
+message = '\n'.join(
+(message, "Its error message is: {0}".format(p.error)))
+logging.exception(message)
+sys.exit(p.return_code)
+except Exception as e:
+message = "The subprocess {0} has encountered an unknown error: {1}".format(
+cmd, e)
+logging.exception(message)
+sys.exit(-1)
+return p
+def write_features(field, attribute, gff3):
+"""
+The function write the features to gff3 format (defined in https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md)
+field, attribute are ordered dictionary
+gff3 is the file handler
+"""
+attr = []
+for v in field.values():
+gff3.write(str(v) + '\t')
+for k, v in attribute.items():
+s = str(k) + '=' + str(v)
+attr.append(s)
+gff3.write(';'.join(attr))
+gff3.write('\n')
+def twoBitInfo(two_bit_file_name, two_bit_info_file):
+"""
+Call twoBitInfo and write the result into twoBit_info_file
+:param two_bit_file_name:
+:param two_bit_info_file:
+:return the subprocess.check_call return object:
+"""
+array_call = ['twoBitInfo', two_bit_file_name, two_bit_info_file]
+p = _handleExceptionAndCheckCall(array_call)
+return p
+def faToTwoBit(fasta_file_name, twoBitFile):
+"""
+This function call faToTwoBit UCSC tool, and return the twoBitFile
+:param fasta_file_name:
+:param mySpecieFolder:
+:return:
+"""
+array_call = ['faToTwoBit', fasta_file_name, twoBitFile]
+_handleExceptionAndCheckCall(array_call)
+return twoBitFile
+def sortChromSizes(two_bit_info_file_name, chrom_sizes_file_name):
+"""
+Call sort with -k2rn on two_bit_info_file_name and write the result into chrom_sizes_file_name
+:param two_bit_info_file_name:
+:param chrom_sizes_file_name:
+:return:
+"""
+array_call = ['sort', '-k2rn', two_bit_info_file_name,
+'-o', chrom_sizes_file_name]
+p = _handleExceptionAndCheckCall(array_call)
+return p
+def getChromSizes(reference, tool_dir):
+#TODO: find a better way instead of shipping the two exec files with the tool
+faToTwoBit = os.path.join(tool_dir, 'faToTwoBit')
+twoBitInfo = os.path.join(tool_dir, 'twoBitInfo')
+try:
+twoBitFile = tempfile.NamedTemporaryFile(bufsize=0)
+chrom_sizes = tempfile.NamedTemporaryFile(bufsize=0, suffix='.chrom.sizes', delete=False)
+except IOError as err:
+print "Cannot create tempfile err({0}): {1}".format(err.errno, err.strerror)
+try:
+subprocess.call(['faToTwoBit', reference, twoBitFile.name])
+except OSError as err:
+print "Cannot generate twoBitFile from faToTwoBit err({0}): {1}".format(err.errno, err.strerror)
+try:
+subprocess.call(['twoBitInfo', twoBitFile.name, chrom_sizes.name])
+except OSError as err:
+print "Cannot generate chrom_sizes from twoBitInfo err({0}): {1}".format(err.errno, err.strerror)
+return chrom_sizes
+def sequence_region(chrom_sizes):
+"""
+This function read from a chromatin size file generated by twoBitInfo and write the information to dict
+return a dict
+"""
+f = open(chrom_sizes, 'r')
+sizes = f.readlines()
+sizes_dict = {}
+for line in sizes:
+chrom_info = line.rstrip().split('\t')
+sizes_dict[chrom_info[0]] = chrom_info[1]
+return sizes_dict
+def child_blocks(parent_field, parent_attr, gff3, child_type):
+num = 0
+blockcount = int(parent_attr['blockcount'])
+chromstart = parent_attr['chromstarts'].split(',')
+blocksize = parent_attr['blocksizes'].split(',')
+parent_start = parent_field['start']
+while num < blockcount:
+child_attr = OrderedDict()
+child_field = parent_field
+child_field['type'] = child_type
+child_field['start'] = int(chromstart[num]) + int(parent_start)
+child_field['end'] = int(child_field['start']) + int(blocksize[num]) - 1
+child_attr['ID'] = parent_attr['ID'] + '_part_' + str(num+1)
+child_attr['Parent'] = parent_attr['ID']
+write_features(child_field, child_attr, gff3)
+num = num + 1
+def add_tracks_to_json(trackList_json, new_tracks, modify_type):
+"""
+Add to track configuration (trackList.json)
+# modify_type =  'add_tracks': add a new track like bam or bigwig, new_track = dict()
+# modify_type = 'add_attr': add configuration to the existing track, new_track = dict(track_name: dict())
+"""
+with open(trackList_json, 'r+') as f:
+data = json.load(f)
+if modify_type == 'add_tracks':
+data['tracks'].append(new_tracks)
+elif modify_type == 'add_attr':
+for k in new_tracks:
+for track in data['tracks']:
+if k.lower() in track['urlTemplate'].lower():
+attr = new_tracks[k]
+for k, v in attr.items():
+track[k] = v
+f.seek(0, 0)
+f.write(json.dumps(data, separators=(',' , ':'), indent=4))
+f.truncate()
+f.close()
+def createBamIndex(bamfile):
+subprocess.call(['samtools', 'index', bamfile])
+filename = bamfile + '.bai'
+if os.path.exists(filename):
+return filename
+else:
+raise ValueError('Did not find bai file')
+def flatfile_to_json(inputFile, dataType, trackType, trackLabel, outputFolder, options=None, compress=True):
+if "bed" in dataType:
+fileType = "--bed"
+elif "gff" in dataType:
+fileType = "--gff"
+else:
+raise ValueError("%s is not a valid filetype for flatfile_to_json" % dataType)
+array_call = ['flatfile-to-json.pl',
+fileType, inputFile,
+'--trackType', trackType,
+'--trackLabel', trackLabel,
+'--out', outputFolder]
+if compress:
+array_call.append('--compress')
+if options:
+config = options.get("config")
+clientConfig = options.get("clientConfig")
+renderClassName = options.get('renderClassName')
+subfeatureClasses = options.get('subfeatureClasses')
+load_type = options.get("type")
+if clientConfig:
+array_call.append('--clientConfig')
+array_call.append(clientConfig)
+if config:
+array_call.append('--config')
+array_call.append(config)
+if load_type:
+array_call.append('--type')
+array_call.append(load_type)
+if renderClassName:
+array_call.append('--renderClassName')
+array_call.append(renderClassName)
+if subfeatureClasses:
+array_call.append('--subfeatureClasses')
+array_call.append(json.dumps(subfeatureClasses))
+p = _handleExceptionAndCheckCall(array_call)
+return p
+def bam_to_json(inputFile, trackLabel, outputFolder, options=None, compress=False):
+array_call = ['bam-to-json.pl',
+'--bam', inputFile,
+'--trackLabel', trackLabel,
+'--out', outputFolder]
+if compress:
+array_call.append('--compress')
+if options:
+config = options.get('config')
+clientConfig = options.get('clientConfig')
+if clientConfig:
+array_call.append('--clientConfig')
+array_call.append(clientConfig)
+if config:
+array_call.append('--config')
+array_call.append(config)
+p = _handleExceptionAndCheckCall(array_call)
+return p
+def add_track_json(trackList, track_json):
+track_json = json.dumps(track_json)
+new_track = subprocess.Popen(['echo', track_json], stdout=subprocess.PIPE)
+p = subprocess.call(['add-track-json.pl', trackList], stdin=new_track.stdout)
+return p
+def prepare_refseqs(fasta_file_name, outputFolder):
+array_call = ['prepare-refseqs.pl', '--fasta', fasta_file_name, '--out', outputFolder]
+p = _handleExceptionAndCheckCall(array_call)
+return p
+def generate_names(outputFolder):
+array_call = ['generate-names.pl', '-v', '--out', outputFolder]
+p = _handleExceptionAndCheckCall(array_call)
+return p
+def validateFiles(input_file, chrom_sizes_file_name, file_type, options=None):
+"""
+Call validateFiles on input_file, using chrom_sizes_file_name and file_type
+:param input_file:
+:param chrom_sizes_file_name:
+:param file_type:
+:return:
+"""
+array_call = ['validateFiles', '-chromInfo=' + chrom_sizes_file_name, '-type='+ file_type, input_file]
+if options:
+tab = options.get("tab")
+autoSql = options.get("autoSql")
+logging.debug("tab: {0}".format(tab))
+logging.debug("autoSql: {0}".format(autoSql))
+if autoSql:
+autoSql = ''.join(['-as=', autoSql])
+array_call.append(autoSql)
+if tab:
+array_call.append('-tab')
+p = _handleExceptionAndCheckCall(array_call)
+return p

Mercurial > repos > yating-l > jbrowsearchivecreator

comparison util/subtools.py @ 6:237707a6b74d draft