Mercurial > repos > yating-l > jbrowsearchivecreator
comparison jbrowse_hub.py @ 0:804a93e87cc8 draft
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
author | yating-l |
---|---|
date | Wed, 12 Apr 2017 17:41:55 -0400 |
parents | |
children | 7e471cdd9e71 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:804a93e87cc8 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import sys | |
4 import argparse | |
5 import json | |
6 import utils | |
7 import trackObject | |
8 import TrackHub | |
9 | |
10 | |
11 | |
12 def main(argv): | |
13 parser = argparse.ArgumentParser(description='Create a hub to display in jbrowse.') | |
14 | |
15 # Reference genome mandatory | |
16 parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome (Required)') | |
17 | |
18 # Genome name | |
19 parser.add_argument('-g', '--genome_name', help='Name of reference genome') | |
20 | |
21 # Output folder | |
22 parser.add_argument('-o', '--out', help='output html') | |
23 | |
24 # Output folder | |
25 parser.add_argument('-e', '--extra_files_path', help='Directory of JBrowse Hub folder') | |
26 | |
27 #Tool Directory | |
28 parser.add_argument('-d', '--tool_directory', help='The directory of JBrowse file convertion scripts and UCSC tools') | |
29 | |
30 #GFF3 | |
31 parser.add_argument('--gff3', action='append', help='GFF3 format') | |
32 | |
33 # GFF3 structure: gene->transcription->CDS | |
34 parser.add_argument('--gff3_transcript', action='append', help='GFF3 format for gene prediction, structure: gene->transcription->CDS') | |
35 | |
36 # GFF3 structure: gene->mRNA->CDS | |
37 parser.add_argument('--gff3_mrna', action='append', help='GFF3 format for gene prediction, structure: gene->mRNA->CDS') | |
38 | |
39 # generic BED | |
40 parser.add_argument('--bed', action='append', help='BED format') | |
41 | |
42 # trfBig simple repeats (BED 4+12) | |
43 parser.add_argument('--bedSimpleRepeats', action='append', help='BED 4+12 format, using simpleRepeats.as') | |
44 | |
45 # regtools (BED 12+1) | |
46 parser.add_argument('--bedSpliceJunctions', action='append', help='BED 12+1 format, using spliceJunctions.as') | |
47 | |
48 # tblastn alignment (blastxml) | |
49 parser.add_argument('--blastxml', action='append', help='blastxml format from tblastn') | |
50 | |
51 # BAM format | |
52 parser.add_argument('--bam', action='append', help='BAM format from HISAT') | |
53 | |
54 # BIGWIG format | |
55 parser.add_argument('--bigwig', action='append', help='BIGWIG format to show rnaseq coverage') | |
56 | |
57 # GTF format | |
58 parser.add_argument('--gtf', action='append', help='GTF format from StringTie') | |
59 | |
60 # Metadata json format | |
61 parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs') | |
62 | |
63 #JBrowse host | |
64 parser.add_argument('--jbrowse_host', help="JBrowse Host") | |
65 | |
66 args = parser.parse_args() | |
67 all_datatype_dictionary = dict() | |
68 | |
69 | |
70 if not args.fasta: | |
71 parser.print_help() | |
72 raise RuntimeError("No reference genome\n") | |
73 reference = args.fasta | |
74 genome = 'unknown' | |
75 out_path = 'unknown.html' | |
76 extra_files_path = '.' | |
77 tool_directory = '.' | |
78 jbrowse_host = '' | |
79 if args.jbrowse_host: | |
80 jbrowse_host = args.jbrowse_host | |
81 if args.genome_name: | |
82 genome = args.genome_name | |
83 if args.out: | |
84 out_path = args.out | |
85 if args.extra_files_path: | |
86 extra_files_path = args.extra_files_path | |
87 | |
88 #tool_directory not work for Galaxy tool, all tools need to exist in the current PATH, deal with it with tool dependencies | |
89 if args.tool_directory: | |
90 tool_directory = args.tool_directory | |
91 | |
92 #Calculate chromsome sizes using genome reference and uscs tools | |
93 chrom_size = utils.getChromSizes(reference, tool_directory) | |
94 | |
95 #get metadata from json file | |
96 json_inputs_data = args.data_json | |
97 if json_inputs_data: | |
98 inputs_data = json.loads(json_inputs_data) | |
99 else: | |
100 inputs_data = {} | |
101 | |
102 #print inputs_data | |
103 | |
104 #Initate trackObject | |
105 all_tracks = trackObject.trackObject(chrom_size.name, genome, extra_files_path) | |
106 | |
107 array_inputs_bam = args.bam | |
108 array_inputs_bed = args.bed | |
109 array_inputs_bed_simple_repeats = args.bedSimpleRepeats | |
110 array_inputs_bed_splice_junctions = args.bedSpliceJunctions | |
111 array_inputs_bigwig = args.bigwig | |
112 array_inputs_gff3 = args.gff3 | |
113 array_inputs_gff3_transcript = args.gff3_transcript | |
114 array_inputs_gff3_mrna = args.gff3_mrna | |
115 array_inputs_gtf = args.gtf | |
116 array_inputs_blastxml = args.blastxml | |
117 | |
118 if array_inputs_bam: | |
119 all_datatype_dictionary['bam'] = array_inputs_bam | |
120 if array_inputs_bed: | |
121 all_datatype_dictionary['bed'] = array_inputs_bed | |
122 if array_inputs_bed_simple_repeats: | |
123 all_datatype_dictionary['bedSimpleRepeats'] = array_inputs_bed_simple_repeats | |
124 if array_inputs_bed_splice_junctions: | |
125 all_datatype_dictionary['bedSpliceJunctions'] = array_inputs_bed_splice_junctions | |
126 if array_inputs_bigwig: | |
127 all_datatype_dictionary['bigwig'] = array_inputs_bigwig | |
128 if array_inputs_gff3: | |
129 all_datatype_dictionary['gff3'] = array_inputs_gff3 | |
130 if array_inputs_gff3_transcript: | |
131 all_datatype_dictionary['gff3_transcript'] = array_inputs_gff3_transcript | |
132 if array_inputs_gff3_mrna: | |
133 all_datatype_dictionary['gff3_mrna'] = array_inputs_gff3_mrna | |
134 if array_inputs_gtf: | |
135 all_datatype_dictionary['gtf'] = array_inputs_gtf | |
136 if array_inputs_blastxml: | |
137 all_datatype_dictionary['blastxml'] = array_inputs_blastxml | |
138 | |
139 print "input tracks: \n", all_datatype_dictionary | |
140 | |
141 for datatype, inputfiles in all_datatype_dictionary.items(): | |
142 try: | |
143 if not inputfiles: | |
144 raise ValueError('empty input, must provide track files!\n') | |
145 except IOError: | |
146 print 'Cannot open', datatype | |
147 else: | |
148 for f in inputfiles: | |
149 #metadata = {} | |
150 #print f | |
151 #if f in inputs_data.keys(): | |
152 # metadata = inputs_data[f] | |
153 #print metadata | |
154 #Convert tracks into gff3 format | |
155 all_tracks.addToRaw(f, datatype) | |
156 | |
157 jbrowseHub = TrackHub.TrackHub(all_tracks, reference, out_path, tool_directory, genome, extra_files_path, inputs_data, jbrowse_host) | |
158 jbrowseHub.createHub() | |
159 | |
160 """ | |
161 def extractMetadata(array_inputs, inputs_data): | |
162 metadata_dict = {} | |
163 for input_false_path in array_inputs: | |
164 for key, data_value in inputs_data.items(): | |
165 if key == input_false_path: | |
166 metadata_dict[input_false_path] | |
167 """ | |
168 | |
169 if __name__ == "__main__": | |
170 main(sys.argv) | |
171 |