changeset 0:804a93e87cc8 draft

planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
author yating-l
date Wed, 12 Apr 2017 17:41:55 -0400
parents
children ac83821b0e06
files TrackHub.py bedToGff3.py blastxmlToGff3.py jbrowse_hub.py jbrowse_hub.xml tool_dependencies.xml trackObject.py utils.py
diffstat 8 files changed, 1231 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TrackHub.py	Wed Apr 12 17:41:55 2017 -0400
@@ -0,0 +1,192 @@
+#!/usr/bin/env python
+
+import os
+import subprocess
+import shutil
+import json
+import utils
+
+
+class TrackHub:
+    def __init__(self, inputFiles, reference, outputDirect, tool_dir, genome, extra_files_path, metaData, jbrowse_host):
+        self.input_files = inputFiles.tracks
+        self.outfile = outputDirect
+        self.outfolder = extra_files_path
+        self.out_path = os.path.join(extra_files_path, genome)
+        self.reference = reference
+        self.tool_dir = tool_dir
+        self.metaData = metaData
+        self.raw = os.path.join(self.out_path, 'raw')
+        self.json = os.path.join(self.out_path, 'json')
+        self.jbrowse_host = jbrowse_host
+        try: 
+            if os.path.exists(self.json):
+                shutil.rmtree(self.json)
+            os.makedirs(self.json)
+        except OSError as e:
+            print "Cannot create json folder error({0}): {1}".format(e.errno, e.strerror)
+        else:
+            print "Create jbrowse folder {}".format(self.out_path)
+    
+    def createHub(self):
+        self.prepareRefseq()
+        for input_file in self.input_files:
+            self.addTrack(input_file)
+        self.indexName()
+        slink = self.makeArchive()
+        self.outHtml(slink)
+        print "Success!\n"
+    
+    def prepareRefseq(self):
+        try:
+            #print os.path.join(self.tool_dir, 'prepare-refseqs.pl') + ", '--fasta', " + self.reference +", '--out', self.json])"
+            subprocess.call(['prepare-refseqs.pl', '--fasta', self.reference, '--out', self.json])
+        except OSError as e:
+            print "Cannot prepare reference error({0}): {1}".format(e.errno, e.strerror)
+    #TODO: hard coded the bam and bigwig tracks. Need to allow users to customize the settings
+    def addTrack(self, track):
+        #print "false_path" , track['false_path']
+        if track['false_path'] in self.metaData.keys():
+            metadata = self.metaData[track['false_path']]
+        else:
+            metadata = {}
+        self.SetMetadata(track, metadata)
+        if track['dataType'] == 'bam':
+            self.Bam(track, metadata)
+           # print "add bam track\n"
+        elif track['dataType'] == 'bigwig':
+            self.BigWig(track, metadata)
+        else: 
+            flat_file = os.path.join(self.raw, track['fileName'])
+            if track['dataType'] == 'bed':
+                subprocess.call(['flatfile-to-json.pl', '--bed', flat_file, '--trackType', metadata['type'], '--trackLabel', metadata['label'], '--Config', '{"category" : "%s"}' % metadata['category'], '--clientConfig', '{"color" : "%s"}' % metadata['color'], '--out', self.json])
+            elif track['dataType'] == 'bedSpliceJunctions' or track['dataType'] == 'gtf' or track['dataType'] == 'blastxml':
+                subprocess.call(['flatfile-to-json.pl', '--gff', flat_file, '--trackType', metadata['type'], '--trackLabel', metadata['label'], '--Config', '{"glyph": "JBrowse/View/FeatureGlyph/Segments", "category" : "%s"}' % metadata['category'], '--clientConfig', '{"color" : "%s"}' % metadata['color'], '--out', self.json])
+            elif track['dataType'] == 'gff3_transcript':
+                subprocess.call(['flatfile-to-json.pl', '--gff', flat_file, '--trackType', metadata['type'], '--trackLabel', metadata['label'], '--Config', '{"transcriptType": "transcript", "category" : "%s"}' % metadata['category'], '--clientConfig', '{"color" : "%s"}' % metadata['color'], '--out', self.json])
+            else:
+                subprocess.call(['flatfile-to-json.pl', '--gff', flat_file, '--trackType', metadata['type'], '--trackLabel', metadata['label'], '--Config', '{"category" : "%s"}' % metadata['category'], '--clientConfig', '{"color" : "%s"}' % metadata['color'], '--out', self.json])
+            
+    def indexName(self):
+        subprocess.call(['generate-names.pl', '-v', '--out', self.json])
+        print "finished name index \n"
+
+    def makeArchive(self):
+        shutil.make_archive(self.out_path, 'zip', self.out_path)
+        file_dir = os.path.abspath(self.outfile)
+        source_dir = os.path.dirname(file_dir)
+        folder_name = os.path.basename(self.outfolder)
+        source_name = os.path.basename(self.out_path)
+        source = os.path.join(source_dir, folder_name, source_name)
+        slink = source.replace('/', '_')
+        slink = os.path.join('/var/www/html/JBrowse-1.12.1/data', slink)
+        try:
+            if os.path.islink(slink):
+                os.unlink(slink)
+        except OSError as oserror:
+            print "Cannot create symlink to the data({0}): {1}".format(oserror.errno, oserror.strerror)
+        os.symlink(source, slink)
+        return slink
+        '''
+        data_folder = '/gonramp/static/JBrowse-1.12.1/jbrowse_hub'
+        try:
+            if os.path.exists(data_folder):
+                if os.path.isdir(data_folder):
+                    shutil.rmtree(data_folder)
+                else:
+                    os.remove(data_folder)
+        except OSError as oserror:
+            print "Cannot create data folder({0}): {1}".format(oserror.errno, oserror.strerror)
+        shutil.copytree(self.out_path, data_folder)
+        subprocess.call(['chmod', '-R', 'o+rx', '/var/www/html/JBrowse-1.12.1/jbrowse_hub'])
+        shutil.rmtree(self.out_path)
+        '''
+    
+    #TODO: this will list all zip files in the filedir and sub-dirs. worked in Galaxy but all list zip files in test-data when
+    #run it locally. May need modify
+    def outHtml(self, slink):
+        with open(self.outfile, 'w') as htmlfile:
+            htmlstr = 'The JBrowse Hub is created: <br>'
+            zipfiles = '<li><a href = "%s">Download</a></li>'
+            url = self.jbrowse_host + "/JBrowse-1.12.1/index.html?data=%s"
+            jbrowse_hub = '<li><a href = "%s" target="_blank">View JBrowse Hub</a></li>' % url
+            filedir_abs = os.path.abspath(self.outfile)
+            filedir = os.path.dirname(filedir_abs)
+            filedir = os.path.join(filedir, self.outfolder)
+            for root, dirs, files in os.walk(filedir):
+                for file in files:
+                    if file.endswith('.zip'):   
+                        relative_directory = os.path.relpath(root, filedir)
+                        relative_file_path = os.path.join(relative_directory, file)
+                        htmlstr += zipfiles % relative_file_path
+            link_name = os.path.basename(slink)
+            relative_path = os.path.join('data', link_name + '/json')
+            htmlstr += jbrowse_hub % relative_path
+            htmlfile.write(htmlstr)
+
+    def createTrackList(self):
+        trackList = os.path.join(self.json, "trackList.json")
+        if not os.path.exists(trackList):
+            os.mknod(trackList)
+    
+    def Bam(self, track, metadata):
+        #create trackList.json if not exist
+        self.createTrackList()
+        json_file = os.path.join(self.json, "trackList.json")
+        bam_track = dict()
+        bam_track['type'] = 'JBrowse/View/Track/Alignments2'
+        bam_track['storeClass'] = 'JBrowse/Store/SeqFeature/BAM'
+        bam_track['urlTemplate'] = os.path.join('../raw', track['fileName'])
+        bam_track['baiUrlTemplate'] = os.path.join('../raw', track['index'])
+        bam_track['label'] = metadata['label']
+        bam_track['category'] = metadata['category']
+        bam_track = json.dumps(bam_track)
+        #Use add-track-json.pl to add bam track to json file
+        new_track = subprocess.Popen(['echo', bam_track], stdout=subprocess.PIPE)
+        subprocess.call(['add-track-json.pl', json_file], stdin=new_track.stdout)
+    
+    def BigWig(self, track, metadata):
+        #create trackList.json if not exist
+        self.createTrackList()
+        json_file = os.path.join(self.json, "trackList.json")
+        bigwig_track = dict()
+        bigwig_track['urlTemplate'] = os.path.join('../raw', track['fileName'])
+        bigwig_track['type'] = 'JBrowse/View/Track/Wiggle/XYPlot'
+        bigwig_track['storeClass'] = 'JBrowse/Store/SeqFeature/BigWig'
+        bigwig_track['label'] = metadata['label']
+        bigwig_track['style'] = metadata['style']
+        bigwig_track['category'] = metadata['category']
+        bigwig_track = json.dumps(bigwig_track)
+        #Use add-track-json.pl to add bigwig track to json file
+        new_track = subprocess.Popen(['echo', bigwig_track], stdout=subprocess.PIPE)
+        #output = new_track.communicate()[0]
+        subprocess.call(['add-track-json.pl', json_file], stdin=new_track.stdout)
+
+    #If the metadata is not set, use the default value
+    def SetMetadata(self, track, metadata):
+        if 'label' not in metadata.keys() or metadata['label'] == '':
+            metadata['label'] = track['fileName']
+        if 'color' not in metadata.keys() or metadata['color'] == '':
+            metadata['color'] = "#daa520"
+        if track['dataType'] == 'bigwig':
+            if 'style' not in metadata.keys():
+                metadata['style'] = {}
+            if 'pos_color' not in metadata['style'] or metadata['style']['pos_color'] == '':
+                metadata['style']['pos_color'] = "#FFA600"
+            if 'neg_color' not in metadata['style'] or metadata['style']['neg_color'] == '':
+                metadata['style']['neg_color'] = "#005EFF"
+        if 'category' not in metadata.keys() or metadata['category'] == '':
+            metadata['category'] = "Default group"
+        if track['dataType'] == 'blastxml':
+            metadata['type'] = "G-OnRamp_plugin/BlastAlignment"
+        elif track['dataType'] == 'gff3_transcript' or track['dataType'] == 'gff3_mrna':
+            metadata['type'] = "G-OnRamp_plugin/GenePred"
+        else:
+            metadata['type'] = "CanvasFeatures"
+
+
+
+   
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bedToGff3.py	Wed Apr 12 17:41:55 2017 -0400
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+
+'''
+Convert BED format to gff3
+'''
+import os
+from collections import OrderedDict
+import utils
+
+class bedToGff3():
+    def __init__(self, inputBedFile, chrom_sizes, bed_type, output):
+        self.input = inputBedFile
+        #file_dir = os.path.basename(inputBedFile)
+        #print file_dir + "\n\n"
+        self.output = output
+        self.chrom_sizes = chrom_sizes
+        self.type = bed_type
+        if self.type == "trfbig":
+            self.trfbig_to_gff3()
+        if self.type == "regtools":
+            self.splicejunctions_to_gff3()
+
+    def trfbig_to_gff3(self):
+        gff3 = open(self.output, 'w')
+        gff3.write("##gff-version 3\n")
+        sizes_dict = utils.sequence_region(self.chrom_sizes)
+        seq_regions = dict()
+        with open(self.input, 'r') as bed:
+            for line in bed:
+                field = OrderedDict()
+                attribute = OrderedDict()
+                li = line.rstrip().split("\t")
+                field['seqid'] = li[0]
+                if field['seqid'] not in seq_regions:
+                    end_region = sizes_dict[field['seqid']]
+                    gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
+                    seq_regions[field['seqid']] = end_region
+                field['source'] = li[3]
+                field['type'] = 'tandem_repeat'
+                # The first base in a chromosome is numbered 0 in BED format
+                field['start'] = str(int(li[1]) + 1)
+                field['end'] = li[2]
+                field['score'] = li[9]
+                field['strand'] = '+'
+                field['phase'] = '.'
+                attribute['length of repeat unit'] = li[4]
+                attribute['mean number of copies of repeat'] = li[5]
+                attribute['length of consensus sequence'] = li[6]
+                attribute['percentage match'] = li[7]
+                attribute['percentage indel'] = li[8]
+                attribute['percent of a\'s in repeat unit'] = li[10]
+                attribute['percent of c\'s in repeat unit'] = li[11]
+                attribute['percent of g\'s in repeat unit'] = li[12]
+                attribute['percent of t\'s in repeat unit'] = li[13]
+                attribute['entropy'] = li[14]
+                attribute['sequence of repeat unit element'] = li[15]
+                utils.write_features(field, attribute, gff3)
+        gff3.close()
+
+
+    def splicejunctions_to_gff3(self):
+        gff3 = open(self.output, 'w')
+        gff3.write("##gff-version 3\n")
+        sizes_dict = utils.sequence_region(self.chrom_sizes)
+        seq_regions = dict()
+        with open(self.input, 'r') as bed:
+            for line in bed:
+                field = OrderedDict()
+                attribute = OrderedDict()
+                li = line.rstrip().split("\t")
+                field['seqid'] = li[0]
+                if field['seqid'] not in seq_regions:
+                    end_region = sizes_dict[field['seqid']]
+                    gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
+                    seq_regions[field['seqid']] = end_region
+                field['source'] = li[3]
+                field['type'] = 'junction'
+                # The first base in a chromosome is numbered 0 in BED format
+                field['start'] = int(li[1]) + 1
+                field['end'] = li[2]
+                field['score'] = li[12]
+                field['strand'] = li[5]
+                field['phase'] = '.'
+                attribute['ID'] = li[3]
+                attribute['Name'] = li[3]
+                attribute['blockcount'] = li[9]
+                attribute['blocksizes'] = li[10]
+                attribute['chromstarts'] = li[11]
+                utils.write_features(field, attribute, gff3)
+                utils.child_blocks(field, attribute, gff3)
+        gff3.close()
+        
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blastxmlToGff3.py	Wed Apr 12 17:41:55 2017 -0400
@@ -0,0 +1,159 @@
+#!/usr/bin/env python
+
+
+from Bio.Blast import NCBIXML
+from collections import OrderedDict
+import utils
+
+
+def align2cigar(hsp_query, hsp_reference):
+    """
+        Build CIGAR representation from an hsp_query
+        input:
+            hsp_query
+            hsp_sbjct
+        output:
+            CIGAR string
+    """
+    query = hsp_query
+    ref = hsp_reference
+    # preType, curType:
+    # 'M' represents match,
+    # 'I' represents insert a gap into the reference sequence,
+    # 'D' represents insert a gap into the target (delete from reference)
+    # some ideas of this algin2cigar function are coming from
+    # https://gist.github.com/ozagordi/099bdb796507da8d9426
+    prevType = 'M'
+    curType = 'M'
+    count = 0
+    cigar = []
+    num = len(query)
+    for i in range(num):
+        if query[i] == '-':
+            curType = 'D'
+        elif ref[i] == '-':
+            curType = 'I'
+        else:
+            curType = 'M'
+        if curType == prevType:
+            count += 1
+        else:
+            cigar.append('%s%d' % (prevType, count))
+            prevType = curType
+            count = 1
+    cigar.append('%s%d' % (curType, count))
+    return ' '.join(cigar)
+
+def gff3_writer(blast_records, gff3_file):
+    gff3 = open(gff3_file, 'a')
+    gff3.write("##gff-version 3\n")
+    seq_regions = dict()
+    for blast_record in blast_records:
+        query_name = blast_record.query.split(" ")[0]
+        source = blast_record.application
+        method = blast_record.matrix
+        for alignment in blast_record.alignments:
+            group = {
+            "parent_field" : OrderedDict(),
+            "parent_attribute" : OrderedDict(),
+            "alignments" : []
+            }
+            title = alignment.title.split(" ")
+            contig_name = title[len(title) - 1]
+            length = alignment.length
+            group['parent_field']['seqid'] = contig_name
+            group['parent_field']['source'] = source
+            group['parent_field']['type'] = 'match'
+            group['parent_attribute']['ID'] = contig_name + '_' + query_name
+            group['parent_attribute']['method'] = method
+            group['parent_attribute']['length'] = length
+            if contig_name not in seq_regions:
+                gff3.write("##sequence-region " + contig_name + ' 1 ' + str(length) + '\n')
+                seq_regions[contig_name] = length
+            match_num = 0
+            coords = [length, 0]
+            for hsp in alignment.hsps:
+                hsp_align = {}
+                field = OrderedDict()
+                attribute = OrderedDict()
+                ref = hsp.sbjct
+                query = hsp.query
+                field['seqid'] = contig_name
+                field['source'] = source
+                field['type'] = 'match_part'
+                
+                field['start'] = hsp.sbjct_start
+                if field['start'] < coords[0]:
+                    coords[0] = field['start']
+                ref_length = len(ref.replace('-', ''))
+                # if run tblastn, the actual length of reference should be multiplied by 3
+                if source.lower() == "tblastn":
+                    ref_length *= 3
+                field['end'] = field['start'] + ref_length - 1
+                if field['end'] > coords[1]:
+                    coords[1] = field['end']
+                field['score'] = hsp.score
+                #decide if the alignment in the same strand or reverse strand
+                #reading frame
+                # (+, +), (0, 0), (-, -) => +
+                # (+, -), (-, +) => -
+                if hsp.frame[1] * hsp.frame[0] > 0:
+                    field['strand'] = '+'
+                elif hsp.frame[1] * hsp.frame[0] < 0:
+                    field['strand'] = '-'
+                else:
+                    if hsp.frame[0] + hsp.frame[1] >= 0:
+                        field['strand'] = '+'
+                    else:
+                        field['strand'] = '-'
+                field['phase'] = '.'
+
+                target_start = hsp.query_start
+                target_len = len(query.replace('-', ''))
+                # if run blastx, the actual length of query should be multiplied by 3
+                if source.lower() == "blastx":
+                    target_len *= 3
+                target_end = target_start + target_len -1
+                attribute['ID'] = group['parent_attribute']['ID'] + '_match_' + str(match_num)
+                attribute['Parent'] = group['parent_attribute']['ID']
+                attribute['Target'] = query_name + " " + str(target_start) + " " + str(target_end)
+                attribute['Gap'] = align2cigar(query, ref)
+                #store the query sequence and match string in the file in order to display alignment with BlastAlignment plugin
+                attribute['subject'] = hsp.sbjct
+                attribute['query'] = hsp.query
+                attribute['match'] = hsp.match
+                attribute['gaps'] = attribute['match'].count(' ')
+                similar = attribute['match'].count('+')
+                attribute['identities'] = len(attribute['match']) - similar - attribute['gaps']
+                attribute['positives'] = attribute['identities'] + similar
+                attribute['expect'] = hsp.expect
+                # show reading frame attribute only if the frame is not (0, 0)
+                attribute['frame'] = hsp.frame[1]
+                match_num += 1
+                hsp_align['field'] = field
+                hsp_align['attribute'] = attribute
+                group['alignments'].append(hsp_align)
+            group['parent_field']['start'] = coords[0]
+            group['parent_field']['end'] = coords[1]
+            group['parent_field']['score'] = group['parent_field']['strand'] = group['parent_field']['phase'] = '.'
+            group['parent_attribute']['match_num'] = match_num
+            group['alignments'].sort(key=lambda x: (x['field']['start'], x['field']['end']))
+            utils.write_features(group['parent_field'], group['parent_attribute'], gff3)
+            prev_end = -1
+            for align in group['alignments']:
+                overlap = ''
+                if align['field']['start'] <= prev_end:
+                    overlap += str(align['field']['start']) + ',' + str(prev_end)
+                prev_end = align['field']['end']
+                align['attribute']['overlap'] = overlap
+                utils.write_features(align['field'], align['attribute'], gff3)
+    gff3.close()
+
+def blastxml2gff3(xml_file, gff3_file):
+    result_handle = open(xml_file)
+    blast_records = NCBIXML.parse(result_handle)
+    gff3_writer(blast_records, gff3_file)
+
+if __name__ == "__main__":
+    blastxml2gff3("../dbia3/raw/tblastn_dmel-hits-translation-r6.11.fa_vs_nucleotide_BLAST_database_from_data_3.blastxml", "gff3.txt")
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jbrowse_hub.py	Wed Apr 12 17:41:55 2017 -0400
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+
+import sys
+import argparse
+import json
+import utils
+import trackObject
+import TrackHub
+
+
+
+def main(argv):
+    parser = argparse.ArgumentParser(description='Create a hub to display in jbrowse.')
+
+    # Reference genome mandatory
+    parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome (Required)')
+
+    # Genome name
+    parser.add_argument('-g', '--genome_name', help='Name of reference genome')
+
+    # Output folder
+    parser.add_argument('-o', '--out', help='output html')
+
+    # Output folder
+    parser.add_argument('-e', '--extra_files_path', help='Directory of JBrowse Hub folder')
+
+    #Tool Directory
+    parser.add_argument('-d', '--tool_directory', help='The directory of JBrowse file convertion scripts and UCSC tools')
+
+    #GFF3
+    parser.add_argument('--gff3', action='append', help='GFF3 format')
+
+    # GFF3 structure: gene->transcription->CDS
+    parser.add_argument('--gff3_transcript', action='append', help='GFF3 format for gene prediction, structure: gene->transcription->CDS')
+
+    # GFF3 structure: gene->mRNA->CDS
+    parser.add_argument('--gff3_mrna', action='append', help='GFF3 format for gene prediction, structure: gene->mRNA->CDS')
+
+    # generic BED 
+    parser.add_argument('--bed', action='append', help='BED format')
+
+    # trfBig simple repeats (BED 4+12)
+    parser.add_argument('--bedSimpleRepeats', action='append', help='BED 4+12 format, using simpleRepeats.as')
+
+    # regtools (BED 12+1)
+    parser.add_argument('--bedSpliceJunctions', action='append', help='BED 12+1 format, using spliceJunctions.as')
+
+    # tblastn alignment (blastxml)
+    parser.add_argument('--blastxml', action='append', help='blastxml format from tblastn')
+
+    # BAM format
+    parser.add_argument('--bam', action='append', help='BAM format from HISAT')
+
+    # BIGWIG format
+    parser.add_argument('--bigwig', action='append', help='BIGWIG format to show rnaseq coverage')
+
+    # GTF format
+    parser.add_argument('--gtf', action='append', help='GTF format from StringTie')
+
+    # Metadata json format
+    parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs')
+
+    #JBrowse host
+    parser.add_argument('--jbrowse_host', help="JBrowse Host")
+
+    args = parser.parse_args()
+    all_datatype_dictionary = dict()
+    
+
+    if not args.fasta:
+        parser.print_help()
+        raise RuntimeError("No reference genome\n")
+    reference = args.fasta
+    genome = 'unknown'
+    out_path = 'unknown.html'
+    extra_files_path = '.'
+    tool_directory = '.'
+    jbrowse_host = ''
+    if args.jbrowse_host:
+        jbrowse_host = args.jbrowse_host
+    if args.genome_name:
+        genome = args.genome_name
+    if args.out:
+        out_path = args.out
+    if args.extra_files_path:
+        extra_files_path = args.extra_files_path
+
+    #tool_directory not work for Galaxy tool, all tools need to exist in the current PATH, deal with it with tool dependencies
+    if args.tool_directory:
+        tool_directory = args.tool_directory
+
+    #Calculate chromsome sizes using genome reference and uscs tools
+    chrom_size = utils.getChromSizes(reference, tool_directory)
+
+    #get metadata from json file
+    json_inputs_data = args.data_json
+    if json_inputs_data:
+        inputs_data = json.loads(json_inputs_data)
+    else:
+        inputs_data = {}
+    
+    #print inputs_data
+
+    #Initate trackObject
+    all_tracks = trackObject.trackObject(chrom_size.name, genome, extra_files_path) 
+    
+    array_inputs_bam = args.bam
+    array_inputs_bed = args.bed
+    array_inputs_bed_simple_repeats = args.bedSimpleRepeats
+    array_inputs_bed_splice_junctions = args.bedSpliceJunctions
+    array_inputs_bigwig = args.bigwig
+    array_inputs_gff3 = args.gff3
+    array_inputs_gff3_transcript = args.gff3_transcript
+    array_inputs_gff3_mrna = args.gff3_mrna
+    array_inputs_gtf = args.gtf
+    array_inputs_blastxml = args.blastxml
+
+    if array_inputs_bam:
+        all_datatype_dictionary['bam'] = array_inputs_bam
+    if array_inputs_bed:
+        all_datatype_dictionary['bed'] = array_inputs_bed
+    if array_inputs_bed_simple_repeats:
+        all_datatype_dictionary['bedSimpleRepeats'] = array_inputs_bed_simple_repeats
+    if array_inputs_bed_splice_junctions:
+        all_datatype_dictionary['bedSpliceJunctions'] = array_inputs_bed_splice_junctions
+    if array_inputs_bigwig:
+        all_datatype_dictionary['bigwig'] = array_inputs_bigwig
+    if array_inputs_gff3:
+        all_datatype_dictionary['gff3'] = array_inputs_gff3
+    if array_inputs_gff3_transcript:
+        all_datatype_dictionary['gff3_transcript'] = array_inputs_gff3_transcript
+    if array_inputs_gff3_mrna:
+        all_datatype_dictionary['gff3_mrna'] = array_inputs_gff3_mrna
+    if array_inputs_gtf:
+        all_datatype_dictionary['gtf'] = array_inputs_gtf
+    if array_inputs_blastxml:
+        all_datatype_dictionary['blastxml'] = array_inputs_blastxml
+    
+    print "input tracks: \n", all_datatype_dictionary
+
+    for datatype, inputfiles in all_datatype_dictionary.items():
+        try:
+            if not inputfiles:
+                raise ValueError('empty input, must provide track files!\n')
+        except IOError:
+            print 'Cannot open', datatype
+        else:
+            for f in inputfiles:
+                #metadata = {}
+                #print f
+                #if f in inputs_data.keys():
+                   # metadata = inputs_data[f]
+                    #print metadata
+                #Convert tracks into gff3 format
+                all_tracks.addToRaw(f, datatype)
+
+    jbrowseHub = TrackHub.TrackHub(all_tracks, reference, out_path, tool_directory, genome, extra_files_path, inputs_data, jbrowse_host)
+    jbrowseHub.createHub()
+
+"""        
+def extractMetadata(array_inputs, inputs_data):
+    metadata_dict = {}
+    for input_false_path in array_inputs:
+        for key, data_value in inputs_data.items():
+            if key == input_false_path:
+                metadata_dict[input_false_path]
+"""
+
+if __name__ == "__main__":
+    main(sys.argv)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jbrowse_hub.xml	Wed Apr 12 17:41:55 2017 -0400
@@ -0,0 +1,293 @@
+<tool id="jbrowse_hub" name="JBrowse Hub Creator" version="1.0">
+    <description>
+        This Galaxy tool is used to prepare your files to be ready for displaying on JBrowse
+    </description>
+
+    <requirements>
+        <requirement type="package" version="1.2">samtools</requirement>
+        <requirement type="package" version="1.9">numpy</requirement>
+        <requirement type="package" version="1.68">biopython</requirement>
+        <requirement type="package" version="1.0">ucsc_tools_340</requirement>
+        <requirement type="package" version="1.12.1">jbrowse_tools</requirement>
+    </requirements>
+
+    <stdio>
+    </stdio>
+
+    <command detect_errors="exit_code"><![CDATA[
+        python $__tool_directory__/jbrowse_hub.py 
+        --fasta '$reference' 
+        --genome_name '$genome_name'
+
+        #set galaxy_url = str($GALAXY_URL)
+        #set $jbrowse_url = galaxy_url.replace("8080", "80")
+        --jbrowse_host '$jbrowse_url'
+        
+        ## json metadata recording from Remi's hub-archive-creator.xml
+        #import json
+        #set global data_parameter_dict = {}
+
+        ## Function to retrieve the data of the inputs
+        #def prepare_json($input_to_prepare, $extra_data_dict={})
+            #set false_path = str($input_to_prepare)
+            #set name = $input_to_prepare.name
+
+            #set data_dict = {"name": $name}
+            #silent data_dict.update($extra_data_dict)
+
+            #silent $data_parameter_dict.update({$false_path: $data_dict})
+
+        #end def
+
+        #for $g in $group
+        #for $f in $g.format
+            #set track_label =  str($f.formatChoice.label)
+            #set group_name = str($g.group_name)
+            #set extra_data_dict = {"label" : $track_label, "category" : $group_name} 
+            #if $f.formatChoice.format_select == 'bed'
+                #set track_color = str($f.formatChoice.track_color)
+                #silent extra_data_dict.update({"color" : $track_color})
+                #if $f.formatChoice.bedChoice.bed_select == 'bed_generic_option'
+                    --bed $f.formatChoice.bedChoice.BED_generic
+                    #silent $prepare_json($f.formatChoice.bedChoice.BED_generic, extra_data_dict)
+                #elif $f.formatChoice.bedChoice.bed_select == 'bed_simple_repeats_option'
+                    --bedSimpleRepeats $f.formatChoice.bedChoice.BED_simple_repeats
+                    #silent $prepare_json($f.formatChoice.bedChoice.BED_simple_repeats, extra_data_dict)
+                #elif $f.formatChoice.bedChoice.bed_select == 'bed_splice_junctions_option'
+                    --bedSpliceJunctions $f.formatChoice.bedChoice.BED_splice_junctions
+                    #silent $prepare_json($f.formatChoice.bedChoice.BED_splice_junctions, extra_data_dict)
+                #end if
+            #end if
+            #if $f.formatChoice.format_select == 'bam'
+                --bam $f.formatChoice.BAM
+                #silent $prepare_json($f.formatChoice.BAM, extra_data_dict)
+            #end if
+            #if $f.formatChoice.format_select == 'gff3'
+                #set track_color = str($f.formatChoice.track_color)
+                #silent extra_data_dict.update({"color" : $track_color})
+                #if $f.formatChoice.gff3Choice.gff3_select == 'gff3_generic'
+                    --gff3 $f.formatChoice.gff3Choice.GFF3_generic
+                    #silent $prepare_json($f.formatChoice.gff3Choice.GFF3_generic, extra_data_dict)
+                #elif $f.formatChoice.gff3Choice.gff3_select == 'gff3_transcript'
+                    --gff3_transcript $f.formatChoice.gff3Choice.GFF3_transcript
+                    #silent $prepare_json($f.formatChoice.gff3Choice.GFF3_transcript, extra_data_dict)
+                #elif $f.formatChoice.gff3Choice.gff3_select == 'gff3_mrna'
+                    --gff3_mrna $f.formatChoice.gff3Choice.GFF3_mrna
+                    #silent $prepare_json($f.formatChoice.gff3Choice.GFF3_mrna, extra_data_dict)
+                #end if
+            #end if
+            #if $f.formatChoice.format_select == 'blastxml'
+                --blastxml $f.formatChoice.BlastXML
+                #silent $prepare_json($f.formatChoice.BlastXML, extra_data_dict)
+            #end if
+            #if $f.formatChoice.format_select == 'gtf'
+                --gtf $f.formatChoice.GTF
+                #set track_color = str($f.formatChoice.track_color)
+                #silent extra_data_dict.update({"color" : $track_color})
+                #silent $prepare_json($f.formatChoice.GTF, extra_data_dict)
+            #end if
+            #if $f.formatChoice.format_select == 'bigwig'
+                --bigwig $f.formatChoice.BIGWIG
+                #set pos_color = str($f.formatChoice.pos_color)
+                #set neg_color = str($f.formatChoice.neg_color)
+                #silent $extra_data_dict.update({"style" : {"pos_color" : $pos_color, "neg_color" : $neg_color}})
+                #silent $prepare_json($f.formatChoice.BIGWIG, extra_data_dict)
+            #end if
+       #end for
+       #end for
+
+       #set all_data_json = json.dumps($data_parameter_dict)
+       -j '$all_data_json'
+       -e '$output.extra_files_path'
+       -o '$output'
+
+    ]]></command>
+
+    <inputs>
+        <param name="GALAXY_URL" type="baseurl" value="" />
+        <param name="reference" type="data" format="fasta" label="Reference Genome" />
+        <param name="genome_name" type="text" size="30" value="unknown" label="Genome name" />
+        <repeat name="group" title="New group">
+            <param type="text" name="group_name" label="Group name" value="Default group"/>
+            <repeat name="format" title="New track">
+                <conditional name="formatChoice">
+                    <param name="format_select" type="select" label="Format">
+                        <option value="bam" selected="true">BAM</option>
+                        <option value="bed">BED</option>
+                        <option value="blastxml">BLASTXML</option>
+                        <option value="bigwig">BIGWIG</option>
+                        <option value="gff3">GFF3</option>
+                        <option value="gtf">GTF</option>
+                    </param>
+
+                    <when value="bam">
+                        <param
+                                format="bam"
+                                name="BAM"
+                                type="data"
+                                label="BAM File"
+                        />
+                        <param name="label" type="text" size="30" value = "Sequence Alignment" label="Track name" />
+                    </when>     
+                    <when value="bed">
+                        <conditional name="bedChoice">
+                            <param name="bed_select" type="select" label="Bed Choice">
+                                <option value="bed_generic_option">BED format</option>
+                                <option value="bed_simple_repeats_option">BED Simple repeat (bed4+12 / simpleRepeat.as)</option>
+                                <option value="bed_splice_junctions_option">BED Splice junctions (bed12+1 / spliceJunctions.as)</option>
+                            </param>
+                            <when value="bed_generic_option">
+                                <param
+                                        format="bed"
+                                        name="BED_generic"
+                                        type="data"
+                                        label="Bed File"
+                                />
+                            </when>
+                            <when value="bed_simple_repeats_option">
+                                <param
+                                        format="bed"
+                                        name="BED_simple_repeats"
+                                        type="data"
+                                        label="Bed Simple Repeats (Bed4+12) File"
+                                />
+                            </when>
+                            <when value="bed_splice_junctions_option">
+                                <param
+                                        format="bed"
+                                        name="BED_splice_junctions"
+                                        type="data"
+                                        label="Bed Splice Junctions (Bed12+1) File"
+                                />
+                            </when>
+                        </conditional>
+                        <param name="label" type="text" size="30" value="BED file" label="Track name" />
+                        <param name="track_color" type="color" label="Track color" value="#daa520">
+                            <sanitizer>
+                                <valid initial="string.letters,string.digits">
+                                    <add value="#"/>
+                                </valid>
+                            </sanitizer>
+                        </param>
+                    </when>
+                    <when value="blastxml">
+                        <param
+                                format="blastxml"
+                                name="BlastXML"
+                                type="data"
+                                label="Blast Alignments File"
+                        />
+                        <param name="label" type="text" size="30" value="Blast Alignment" label="Track name" />
+                    </when>
+                    <when value="bigwig">
+                        <param
+                                format="bigwig"
+                                name="BIGWIG"
+                                type="data"
+                                label="BIGWIG File"
+                        />
+                        <param name="label" type="text" size="30" value="Sequence Coverage" label="Track name" />
+                        <param name="pos_color" type="color" label="Positive Coverage Color" value="#FFA600">
+                            <sanitizer>
+                                <valid initial="string.letters,string.digits">
+                                    <add value="#"/>
+                                </valid>
+                            </sanitizer>
+                        </param>
+                        <param name="neg_color" type="color" label="Negative Coverage Color" value="#005EFF">
+                            <sanitizer>
+                                <valid initial="string.letters,string.digits">
+                                    <add value="#"/>
+                                </valid>
+                            </sanitizer>
+                        </param>
+                    </when>
+                    <when value="gff3">
+                        <conditional name="gff3Choice">
+                            <param name="gff3_select" type="select" label="gff3 type">
+                                <option value="gff3_generic">GFF3 format</option>
+                                <option value="gff3_transcript">GFF3 format output from gene prediction tools (e.g. Augustus), structure: gene->transcription->CDS</option>
+                                <option value="gff3_mrna">GFF3 format output from gene prediction tools (e.g. SNAP), structure: gene->mRNA->CDS</option>
+                            </param>
+                            <when value="gff3_generic">
+                                <param
+                                        format="gff3"
+                                        name="GFF3_generic"
+                                        type="data"
+                                        label="GFF3 File"
+                                />
+                            </when>
+                            <when value="gff3_transcript">
+                                <param
+                                        format="gff3"
+                                        name="GFF3_transcript"
+                                        type="data"
+                                        label="GFF3 File from gene prediction"
+                                />
+                            </when>
+                            <when value="gff3_mrna">
+                                <param
+                                        format="gff3"
+                                        name="GFF3_mrna"
+                                        type="data"
+                                        label="GFF3 File from gene prediction"
+                                />
+                            </when>
+                        </conditional>
+                        <param name="label" type="text" size="30" value="Gene Prediction" label="Track name" />
+                        <param name="track_color" type="color" label="Track color" value="#daa520">
+                            <sanitizer>
+                                <valid initial="string.letters,string.digits">
+                                    <add value="#"/>
+                                </valid>
+                            </sanitizer>
+                        </param>
+                    </when>
+                    <when value="gtf">
+                        <param
+                                format="gtf"
+                                name="GTF"
+                                type="data"
+                                label="GTF File"
+                        />
+                        <param name="label" type="text" size="30" value="Assembled Transcripts" label="Track name" />
+                        <param name="track_color" type="color" label="Track color" value="#daa520">
+                            <sanitizer>
+                                <valid initial="string.letters,string.digits">
+                                    <add value="#"/>
+                                </valid>
+                            </sanitizer>
+                        </param>
+                    </when>  
+                </conditional>
+            </repeat>
+        </repeat>
+    </inputs>
+
+    <outputs>
+        <data format="html" name="output" label="${tool.name}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="reference" value="dbia3/raw/dbia3.fa" />
+            <param name="output" value="hubtest" />
+            <repeat name="group">
+                <param name="group_name" value="Default group"/>
+                <repeat name="format">
+                    <conditional name="formatChoice">
+                        <param name="format_select" value="gff3_transcript"/>
+                        <param name="GFF3_transcript" value="dbia3/raw/Augustus_on_data_3__GTF_GFF.gff3" />
+                    </conditional>
+                </repeat>
+            </repeat>
+            <output name="output" file="hubtest" >
+            </output>
+        </test>
+    </tests>
+    <help>
+        This Galaxy tool will create a tar file which including raw datasets and json datasets that can be used for
+        JBrowse visualization.
+    </help>
+    <citations>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Wed Apr 12 17:41:55 2017 -0400
@@ -0,0 +1,94 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="samtools" version="1.2">
+        <repository changeset_revision="f6ae3ba3f3c1" name="package_samtools_1_2" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+
+    <package name="numpy" version="1.9">
+        <repository changeset_revision="83d12e13dbbd" name="package_numpy_1_9" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+
+    <package name="biopython" version="1.68">
+        <readme>
+This package is based on package_biopython_1_67 owned by biopython. 
+https://toolshed.g2.bx.psu.edu/repository?user_id=fd5c6d0f82f315d8
+
+This Galaxy Tool Shed package installs Biopython from source, having
+first installed NumPy which is a build time depencency. This requires
+and assumes a standard C compiler is already installed, along with
+the Python header files.
+
+Development of this dependency definition is being done here on GitHub:
+https://github.com/biopython/galaxy_packages
+
+The PYTHONPATH for biopython can be accessed through PYTHONPATH_BIOPYTHON.
+        </readme>
+         <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://biopython.org/DIST/biopython-1.68.tar.gz</action>
+                <action type="set_environment_for_install">
+                    <repository changeset_revision="83d12e13dbbd" name="package_numpy_1_9" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu">
+                        <package name="numpy" version="1.9" />
+                    </repository>
+                </action>
+                <action type="make_directory">$INSTALL_DIR/lib/python</action>
+                <action type="shell_command">
+                    export PYTHONPATH=$PYTHONPATH:$INSTALL_DIR/lib/python &amp;&amp; 
+                    export PATH=$PATH:$PATH_NUMPY &amp;&amp; 
+                    export PYTHONPATH=$PYTHONPATH:$PYTHONPATH_NUMPY &amp;&amp; 
+                    python setup.py install --install-lib $INSTALL_DIR/lib/python
+                </action>
+                <action type="set_environment">
+                    <environment_variable action="append_to" name="PYTHONPATH">$INSTALL_DIR/lib/python</environment_variable>
+                    <environment_variable action="append_to" name="PYTHONPATH">$ENV[PYTHONPATH_NUMPY]</environment_variable>
+                    <environment_variable action="prepend_to" name="PATH">$ENV[PATH_NUMPY]</environment_variable>
+                    <environment_variable action="set_to" name="PYTHONPATH_BIOPYTHON">$INSTALL_DIR/lib/python</environment_variable>
+                </action>
+            </actions>
+        </install>
+    </package>
+    
+     <package name="ucsc_tools_340" version="1.0">
+        <install version="1.0">
+            <actions_group>
+                <actions architecture="x86_64" os="linux">
+                    <action type="download_by_url">http://old-gep.wustl.edu/~galaxy/ucsc_tools_340.tar.gz</action>
+                    <action type="move_directory_files">
+                        <source_directory>.</source_directory>                       
+                        <destination_directory>$INSTALL_DIR/bin</destination_directory>
+                    </action>    
+                </actions>
+                <action type="set_environment">
+                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions_group>
+         </install>
+         <readme>The well known UCSC tools from Jim Kent.</readme>
+    </package>
+    <package name="jbrowse_tools" version="1.12.1">
+        <install version="1.0">
+            <actions_group>
+                <actions architecture="x86_64" os="linux">
+                    <action type="download_by_url">http://jbrowse.org/wordpress/wp-content/plugins/download-monitor/download.php?id=105</action>
+                    <action type="make_directory">$INSTALL_DIR/jbrowse</action>
+                    <action type="shell_command">
+                        ./setup.sh
+                    </action>
+                    <action type="move_directory_files">
+                        <source_directory>.</source_directory>                       
+                        <destination_directory>$INSTALL_DIR/jbrowse</destination_directory>
+                    </action>    
+                    <action type="set_environment">
+                        <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/jbrowse</environment_variable>
+                        <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/jbrowse/bin</environment_variable>
+                        <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/jbrowse/src</environment_variable>
+                        <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/jbrowse/extlib</environment_variable>
+                    </action>
+                </actions>
+            </actions_group>
+         </install>
+         <readme>The perl scripts for converting flat files to json.</readme>
+    </package>
+
+    
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trackObject.py	Wed Apr 12 17:41:55 2017 -0400
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+import os
+import shutil
+import utils
+import bedToGff3
+import blastxmlToGff3
+
+
+class trackObject:
+    def __init__(self, chrom_size, genome, extra_files_path):
+        self.chrom_size = chrom_size
+        outputDirect = os.path.join(extra_files_path, genome)
+        self.raw_folder = os.path.join(outputDirect, 'raw')
+        #Store metadata of the tracks
+        self.tracks = []
+        try:
+            if os.path.exists(self.raw_folder):
+                if os.path.isdir(self.raw_folder):
+                    shutil.rmtree(self.raw_folder)
+                else:
+                    os.remove(self.raw_folder)
+            os.makedirs(self.raw_folder)
+        except OSError as oserror:
+            print "Cannot create raw folder error({0}): {1}".format(oserror.errno, oserror.strerror)
+
+    def addToRaw(self, dataFile, dataType):
+        """
+        Convert gff3, BED, blastxml and gtf files into gff3 files 
+        and store converted files in folder 'raw'
+        """
+        false_path = os.path.abspath(dataFile)
+        fileName = os.path.basename(dataFile)
+        des_path = os.path.join(self.raw_folder, fileName)
+        track = {}
+        if dataType == 'bed' or dataType == 'gff3' or dataType == 'gff3_mrna' or dataType == 'gff3_transcript' or dataType == 'fasta' or dataType == 'bam' or dataType == 'bigwig':
+            if dataType == 'bam':
+                # JBrowse will raise error: not a BAM file if the filename hasn't .bam extension
+                extension = os.path.splitext(fileName)[1]
+                if extension != '.bam':
+                    fileName = fileName + '.bam'
+                des_path = os.path.join(self.raw_folder, fileName)
+                bam_index = utils.createBamIndex(dataFile)
+                indexname = os.path.basename(bam_index)
+                des_path_for_index = os.path.join(self.raw_folder, indexname)
+                shutil.copyfile(bam_index, des_path_for_index)  
+                track['index'] = indexname
+
+            try:
+                shutil.copyfile(dataFile, des_path)
+            except shutil.Error as err1:
+                print "Cannot move file, error({0}: {1})".format(err1.errno, err1.strerror)
+            except IOError as err2:
+                print "Cannot move file, error({0}: {1})".format(err2.errno, err2.strerror)
+        elif dataType == 'bedSimpleRepeats':
+            bedToGff3.bedToGff3(dataFile, self.chrom_size, 'trfbig', des_path)
+        elif dataType == 'bedSpliceJunctions':
+            bedToGff3.bedToGff3(dataFile, self.chrom_size, 'regtools', des_path)
+        elif dataType == 'blastxml':
+            blastxmlToGff3.blastxml2gff3(dataFile, des_path)
+        elif dataType == 'gtf':
+            utils.gtfToGff3(dataFile, des_path, self.chrom_size)
+        track['fileName'] = fileName
+        track['dataType'] = dataType
+        track['false_path'] = false_path
+        #self.SetMetadata(track, metaData)
+        self.tracks.append(track)
+
+    
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.py	Wed Apr 12 17:41:55 2017 -0400
@@ -0,0 +1,161 @@
+#!/usr/bin/env python
+
+"""
+This file include common used functions for converting file format to gff3
+"""
+from collections import OrderedDict
+import json
+import subprocess
+import os
+import tempfile
+import string
+
+def write_features(field, attribute, gff3):
+    """
+    The function write the features to gff3 format (defined in https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md)
+    field, attribute are ordered dictionary 
+    gff3 is the file handler
+    """
+    attr = []
+    for v in field.values():
+        gff3.write(str(v) + '\t')
+    for k, v in attribute.items():
+        s = str(k) + '=' + str(v)
+        attr.append(s)
+    gff3.write(';'.join(attr))
+    gff3.write('\n')
+
+def getChromSizes(reference, tool_dir):
+    #TODO: find a better way instead of shipping the two exec files with the tool
+    faToTwoBit = os.path.join(tool_dir, 'faToTwoBit')
+    twoBitInfo = os.path.join(tool_dir, 'twoBitInfo')
+    try:
+        twoBitFile = tempfile.NamedTemporaryFile(bufsize=0)
+        chrom_sizes = tempfile.NamedTemporaryFile(bufsize=0, suffix='.chrom.sizes', delete=False)
+    except IOError as err:
+        print "Cannot create tempfile err({0}): {1}".format(err.errno, err.strerror)
+    try:
+        subprocess.call(['faToTwoBit', reference, twoBitFile.name])
+    except OSError as err:
+        print "Cannot generate twoBitFile from faToTwoBit err({0}): {1}".format(err.errno, err.strerror)
+    try:
+        subprocess.call(['twoBitInfo', twoBitFile.name, chrom_sizes.name])
+    except OSError as err:
+        print "Cannot generate chrom_sizes from twoBitInfo err({0}): {1}".format(err.errno, err.strerror)
+    return chrom_sizes
+
+def sequence_region(chrom_sizes):
+    """
+    This function read from a chromatin size file generated by twoBitInfo and write the information to dict
+    return a dict
+    """
+    f = open(chrom_sizes, 'r')
+    sizes = f.readlines()
+    sizes_dict = {}
+    for line in sizes:
+        chrom_info = line.rstrip().split('\t')
+        sizes_dict[chrom_info[0]] = chrom_info[1]
+    return sizes_dict
+
+def child_blocks(parent_field, parent_attr, gff3):
+    num = 0
+    blockcount = int(parent_attr['blockcount'])
+    chromstart = parent_attr['chromstarts'].split(',')
+    blocksize = parent_attr['blocksizes'].split(',')
+    while num < blockcount:
+        child_attr = OrderedDict()
+        child_field = parent_field
+        child_field['type'] = 'exon_junction'
+        child_field['start'] = int(chromstart[num]) + int(parent_field['start'])
+        child_field['end'] = int(child_field['start']) + int(blocksize[num]) - 1
+        child_attr['ID'] = parent_attr['ID'] + '_exon_' + str(num+1)
+        child_attr['Parent'] = parent_attr['ID']
+        write_features(child_field, child_attr, gff3)
+        num = num + 1
+
+def add_tracks_to_json(trackList_json, new_tracks, modify_type):
+    """
+    Add to track configuration (trackList.json)
+    # modify_type =  'add_tracks': add a new track like bam or bigwig, new_track = dict()
+    # modify_type = 'add_attr': add configuration to the existing track, new_track = dict(track_name: dict())
+    """
+    with open(trackList_json, 'r+') as f:
+        data = json.load(f)
+        if modify_type == 'add_tracks':
+            data['tracks'].append(new_tracks)
+        elif modify_type == 'add_attr':
+            for k in new_tracks:
+                for track in data['tracks']:
+                    if k.lower() in track['urlTemplate'].lower():
+                        attr = new_tracks[k]
+                        for k, v in attr.items():
+                            track[k] = v
+        f.seek(0, 0)
+        f.write(json.dumps(data, separators=(',' , ':'), indent=4))
+        f.truncate()
+        f.close()
+
+def gtfToGff3(gtf_file, gff3_file, chrom_sizes):
+    """
+    Covert gtf file output from StringTie to gff3 format
+    """
+    gff3 = open(gff3_file, 'w')
+    gff3.write("##gff-version 3\n")
+    sizes_dict = sequence_region(chrom_sizes)
+    seq_regions = dict()
+    parents = dict()
+    with open(gtf_file, 'r') as gtf:
+        for line in gtf:
+            if line.startswith('#'):
+                continue
+            field = OrderedDict()
+            attribute = OrderedDict()
+            li = line.rstrip().split("\t")
+            #print li
+            field['seqid'] = li[0]
+            #print field['seqid']
+            if field['seqid'] not in seq_regions:
+                end_region = sizes_dict[field['seqid']]
+                gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
+                seq_regions[field['seqid']] = end_region
+            field['source'] = li[1]
+            field['type'] = li[2]
+                # The first base in a chromosome is numbered 0 in BED format
+            field['start'] = li[3]
+            field['end'] = li[4]
+            field['score'] = li[5]
+            field['strand'] = li[6]
+            field['phase'] = li[7]
+            attr_li = li[8].split(';')
+            gene_id = attr_li[0].split()[1].strip('"')
+            attribute['ID'] = gene_id + '_' + field['type'] + '_' + str(field['start']) + '_' + str(field['end'])
+            if field['type'] == 'transcript':
+                parents[gene_id] = attribute['ID']
+                attribute['transcript_id'] = attr_li[1].split()[1].strip('"')
+                attribute['coverage'] = attr_li[2].split()[1].strip('"')
+                attribute['fpkm'] = attr_li[3].split()[1].strip('"')
+                attribute['tpm'] = attr_li[4].split()[1].strip('"')
+            elif field['type'] == 'exon':
+                attribute['Parent'] = parents[gene_id]
+                attribute['transcript_id'] = attr_li[1].split()[1].strip('"')
+                attribute['coverage'] = attr_li[3].split()[1].strip('"')
+            write_features(field, attribute, gff3)
+    gff3.close()
+
+
+def sanitize_name(input_name):
+    """
+    Galaxy will name all the files and dirs as *.dat, 
+    the function can replace '.' to '_' for the dirs
+    """
+    validChars = "_-%s%s" % (string.ascii_letters, string.digits)
+    sanitized_name = ''.join([c if c in validChars else '_' for c in input_name])
+    return "gonramp_" + sanitized_name
+
+def createBamIndex(bamfile):
+    subprocess.call(['samtools', 'index', bamfile])
+    filename = bamfile + '.bai'
+    if os.path.exists(filename):
+        return filename
+    else:
+        raise ValueError('Did not find bai file')