Mercurial > repos > yating-l > jbrowsearchivecreator
view jbrowse_hub.py @ 3:eda851e52060 draft
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 479fc6490e24ca0e5034ae6c3579882e97e095e6-dirty
author | yating-l |
---|---|
date | Wed, 31 May 2017 15:45:47 -0400 |
parents | 804a93e87cc8 |
children | 7e471cdd9e71 |
line wrap: on
line source
#!/usr/bin/env python import sys import argparse import json import utils import trackObject import TrackHub def main(argv): parser = argparse.ArgumentParser(description='Create a hub to display in jbrowse.') # Reference genome mandatory parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome (Required)') # Genome name parser.add_argument('-g', '--genome_name', help='Name of reference genome') # Output folder parser.add_argument('-o', '--out', help='output html') # Output folder parser.add_argument('-e', '--extra_files_path', help='Directory of JBrowse Hub folder') #Tool Directory parser.add_argument('-d', '--tool_directory', help='The directory of JBrowse file convertion scripts and UCSC tools') #GFF3 parser.add_argument('--gff3', action='append', help='GFF3 format') # GFF3 structure: gene->transcription->CDS parser.add_argument('--gff3_transcript', action='append', help='GFF3 format for gene prediction, structure: gene->transcription->CDS') # GFF3 structure: gene->mRNA->CDS parser.add_argument('--gff3_mrna', action='append', help='GFF3 format for gene prediction, structure: gene->mRNA->CDS') # generic BED parser.add_argument('--bed', action='append', help='BED format') # trfBig simple repeats (BED 4+12) parser.add_argument('--bedSimpleRepeats', action='append', help='BED 4+12 format, using simpleRepeats.as') # regtools (BED 12+1) parser.add_argument('--bedSpliceJunctions', action='append', help='BED 12+1 format, using spliceJunctions.as') # tblastn alignment (blastxml) parser.add_argument('--blastxml', action='append', help='blastxml format from tblastn') # BAM format parser.add_argument('--bam', action='append', help='BAM format from HISAT') # BIGWIG format parser.add_argument('--bigwig', action='append', help='BIGWIG format to show rnaseq coverage') # GTF format parser.add_argument('--gtf', action='append', help='GTF format from StringTie') # Metadata json format parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs') #JBrowse host parser.add_argument('--jbrowse_host', help="JBrowse Host") args = parser.parse_args() all_datatype_dictionary = dict() if not args.fasta: parser.print_help() raise RuntimeError("No reference genome\n") reference = args.fasta genome = 'unknown' out_path = 'unknown.html' extra_files_path = '.' tool_directory = '.' jbrowse_host = '' if args.jbrowse_host: jbrowse_host = args.jbrowse_host if args.genome_name: genome = args.genome_name if args.out: out_path = args.out if args.extra_files_path: extra_files_path = args.extra_files_path #tool_directory not work for Galaxy tool, all tools need to exist in the current PATH, deal with it with tool dependencies if args.tool_directory: tool_directory = args.tool_directory #Calculate chromsome sizes using genome reference and uscs tools chrom_size = utils.getChromSizes(reference, tool_directory) #get metadata from json file json_inputs_data = args.data_json if json_inputs_data: inputs_data = json.loads(json_inputs_data) else: inputs_data = {} #print inputs_data #Initate trackObject all_tracks = trackObject.trackObject(chrom_size.name, genome, extra_files_path) array_inputs_bam = args.bam array_inputs_bed = args.bed array_inputs_bed_simple_repeats = args.bedSimpleRepeats array_inputs_bed_splice_junctions = args.bedSpliceJunctions array_inputs_bigwig = args.bigwig array_inputs_gff3 = args.gff3 array_inputs_gff3_transcript = args.gff3_transcript array_inputs_gff3_mrna = args.gff3_mrna array_inputs_gtf = args.gtf array_inputs_blastxml = args.blastxml if array_inputs_bam: all_datatype_dictionary['bam'] = array_inputs_bam if array_inputs_bed: all_datatype_dictionary['bed'] = array_inputs_bed if array_inputs_bed_simple_repeats: all_datatype_dictionary['bedSimpleRepeats'] = array_inputs_bed_simple_repeats if array_inputs_bed_splice_junctions: all_datatype_dictionary['bedSpliceJunctions'] = array_inputs_bed_splice_junctions if array_inputs_bigwig: all_datatype_dictionary['bigwig'] = array_inputs_bigwig if array_inputs_gff3: all_datatype_dictionary['gff3'] = array_inputs_gff3 if array_inputs_gff3_transcript: all_datatype_dictionary['gff3_transcript'] = array_inputs_gff3_transcript if array_inputs_gff3_mrna: all_datatype_dictionary['gff3_mrna'] = array_inputs_gff3_mrna if array_inputs_gtf: all_datatype_dictionary['gtf'] = array_inputs_gtf if array_inputs_blastxml: all_datatype_dictionary['blastxml'] = array_inputs_blastxml print "input tracks: \n", all_datatype_dictionary for datatype, inputfiles in all_datatype_dictionary.items(): try: if not inputfiles: raise ValueError('empty input, must provide track files!\n') except IOError: print 'Cannot open', datatype else: for f in inputfiles: #metadata = {} #print f #if f in inputs_data.keys(): # metadata = inputs_data[f] #print metadata #Convert tracks into gff3 format all_tracks.addToRaw(f, datatype) jbrowseHub = TrackHub.TrackHub(all_tracks, reference, out_path, tool_directory, genome, extra_files_path, inputs_data, jbrowse_host) jbrowseHub.createHub() """ def extractMetadata(array_inputs, inputs_data): metadata_dict = {} for input_false_path in array_inputs: for key, data_value in inputs_data.items(): if key == input_false_path: metadata_dict[input_false_path] """ if __name__ == "__main__": main(sys.argv)