Mercurial > repos > yating-l > jbrowsearchivecreator
diff jbrowse_hub.py @ 0:804a93e87cc8 draft
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
author | yating-l |
---|---|
date | Wed, 12 Apr 2017 17:41:55 -0400 |
parents | |
children | 7e471cdd9e71 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jbrowse_hub.py Wed Apr 12 17:41:55 2017 -0400 @@ -0,0 +1,171 @@ +#!/usr/bin/env python + +import sys +import argparse +import json +import utils +import trackObject +import TrackHub + + + +def main(argv): + parser = argparse.ArgumentParser(description='Create a hub to display in jbrowse.') + + # Reference genome mandatory + parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome (Required)') + + # Genome name + parser.add_argument('-g', '--genome_name', help='Name of reference genome') + + # Output folder + parser.add_argument('-o', '--out', help='output html') + + # Output folder + parser.add_argument('-e', '--extra_files_path', help='Directory of JBrowse Hub folder') + + #Tool Directory + parser.add_argument('-d', '--tool_directory', help='The directory of JBrowse file convertion scripts and UCSC tools') + + #GFF3 + parser.add_argument('--gff3', action='append', help='GFF3 format') + + # GFF3 structure: gene->transcription->CDS + parser.add_argument('--gff3_transcript', action='append', help='GFF3 format for gene prediction, structure: gene->transcription->CDS') + + # GFF3 structure: gene->mRNA->CDS + parser.add_argument('--gff3_mrna', action='append', help='GFF3 format for gene prediction, structure: gene->mRNA->CDS') + + # generic BED + parser.add_argument('--bed', action='append', help='BED format') + + # trfBig simple repeats (BED 4+12) + parser.add_argument('--bedSimpleRepeats', action='append', help='BED 4+12 format, using simpleRepeats.as') + + # regtools (BED 12+1) + parser.add_argument('--bedSpliceJunctions', action='append', help='BED 12+1 format, using spliceJunctions.as') + + # tblastn alignment (blastxml) + parser.add_argument('--blastxml', action='append', help='blastxml format from tblastn') + + # BAM format + parser.add_argument('--bam', action='append', help='BAM format from HISAT') + + # BIGWIG format + parser.add_argument('--bigwig', action='append', help='BIGWIG format to show rnaseq coverage') + + # GTF format + parser.add_argument('--gtf', action='append', help='GTF format from StringTie') + + # Metadata json format + parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs') + + #JBrowse host + parser.add_argument('--jbrowse_host', help="JBrowse Host") + + args = parser.parse_args() + all_datatype_dictionary = dict() + + + if not args.fasta: + parser.print_help() + raise RuntimeError("No reference genome\n") + reference = args.fasta + genome = 'unknown' + out_path = 'unknown.html' + extra_files_path = '.' + tool_directory = '.' + jbrowse_host = '' + if args.jbrowse_host: + jbrowse_host = args.jbrowse_host + if args.genome_name: + genome = args.genome_name + if args.out: + out_path = args.out + if args.extra_files_path: + extra_files_path = args.extra_files_path + + #tool_directory not work for Galaxy tool, all tools need to exist in the current PATH, deal with it with tool dependencies + if args.tool_directory: + tool_directory = args.tool_directory + + #Calculate chromsome sizes using genome reference and uscs tools + chrom_size = utils.getChromSizes(reference, tool_directory) + + #get metadata from json file + json_inputs_data = args.data_json + if json_inputs_data: + inputs_data = json.loads(json_inputs_data) + else: + inputs_data = {} + + #print inputs_data + + #Initate trackObject + all_tracks = trackObject.trackObject(chrom_size.name, genome, extra_files_path) + + array_inputs_bam = args.bam + array_inputs_bed = args.bed + array_inputs_bed_simple_repeats = args.bedSimpleRepeats + array_inputs_bed_splice_junctions = args.bedSpliceJunctions + array_inputs_bigwig = args.bigwig + array_inputs_gff3 = args.gff3 + array_inputs_gff3_transcript = args.gff3_transcript + array_inputs_gff3_mrna = args.gff3_mrna + array_inputs_gtf = args.gtf + array_inputs_blastxml = args.blastxml + + if array_inputs_bam: + all_datatype_dictionary['bam'] = array_inputs_bam + if array_inputs_bed: + all_datatype_dictionary['bed'] = array_inputs_bed + if array_inputs_bed_simple_repeats: + all_datatype_dictionary['bedSimpleRepeats'] = array_inputs_bed_simple_repeats + if array_inputs_bed_splice_junctions: + all_datatype_dictionary['bedSpliceJunctions'] = array_inputs_bed_splice_junctions + if array_inputs_bigwig: + all_datatype_dictionary['bigwig'] = array_inputs_bigwig + if array_inputs_gff3: + all_datatype_dictionary['gff3'] = array_inputs_gff3 + if array_inputs_gff3_transcript: + all_datatype_dictionary['gff3_transcript'] = array_inputs_gff3_transcript + if array_inputs_gff3_mrna: + all_datatype_dictionary['gff3_mrna'] = array_inputs_gff3_mrna + if array_inputs_gtf: + all_datatype_dictionary['gtf'] = array_inputs_gtf + if array_inputs_blastxml: + all_datatype_dictionary['blastxml'] = array_inputs_blastxml + + print "input tracks: \n", all_datatype_dictionary + + for datatype, inputfiles in all_datatype_dictionary.items(): + try: + if not inputfiles: + raise ValueError('empty input, must provide track files!\n') + except IOError: + print 'Cannot open', datatype + else: + for f in inputfiles: + #metadata = {} + #print f + #if f in inputs_data.keys(): + # metadata = inputs_data[f] + #print metadata + #Convert tracks into gff3 format + all_tracks.addToRaw(f, datatype) + + jbrowseHub = TrackHub.TrackHub(all_tracks, reference, out_path, tool_directory, genome, extra_files_path, inputs_data, jbrowse_host) + jbrowseHub.createHub() + +""" +def extractMetadata(array_inputs, inputs_data): + metadata_dict = {} + for input_false_path in array_inputs: + for key, data_value in inputs_data.items(): + if key == input_false_path: + metadata_dict[input_false_path] +""" + +if __name__ == "__main__": + main(sys.argv) +