comparison jbrowse_hub.py @ 0:804a93e87cc8 draft

planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
author yating-l
date Wed, 12 Apr 2017 17:41:55 -0400
parents
children 7e471cdd9e71
comparison
equal deleted inserted replaced
-1:000000000000 0:804a93e87cc8
1 #!/usr/bin/env python
2
3 import sys
4 import argparse
5 import json
6 import utils
7 import trackObject
8 import TrackHub
9
10
11
12 def main(argv):
13 parser = argparse.ArgumentParser(description='Create a hub to display in jbrowse.')
14
15 # Reference genome mandatory
16 parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome (Required)')
17
18 # Genome name
19 parser.add_argument('-g', '--genome_name', help='Name of reference genome')
20
21 # Output folder
22 parser.add_argument('-o', '--out', help='output html')
23
24 # Output folder
25 parser.add_argument('-e', '--extra_files_path', help='Directory of JBrowse Hub folder')
26
27 #Tool Directory
28 parser.add_argument('-d', '--tool_directory', help='The directory of JBrowse file convertion scripts and UCSC tools')
29
30 #GFF3
31 parser.add_argument('--gff3', action='append', help='GFF3 format')
32
33 # GFF3 structure: gene->transcription->CDS
34 parser.add_argument('--gff3_transcript', action='append', help='GFF3 format for gene prediction, structure: gene->transcription->CDS')
35
36 # GFF3 structure: gene->mRNA->CDS
37 parser.add_argument('--gff3_mrna', action='append', help='GFF3 format for gene prediction, structure: gene->mRNA->CDS')
38
39 # generic BED
40 parser.add_argument('--bed', action='append', help='BED format')
41
42 # trfBig simple repeats (BED 4+12)
43 parser.add_argument('--bedSimpleRepeats', action='append', help='BED 4+12 format, using simpleRepeats.as')
44
45 # regtools (BED 12+1)
46 parser.add_argument('--bedSpliceJunctions', action='append', help='BED 12+1 format, using spliceJunctions.as')
47
48 # tblastn alignment (blastxml)
49 parser.add_argument('--blastxml', action='append', help='blastxml format from tblastn')
50
51 # BAM format
52 parser.add_argument('--bam', action='append', help='BAM format from HISAT')
53
54 # BIGWIG format
55 parser.add_argument('--bigwig', action='append', help='BIGWIG format to show rnaseq coverage')
56
57 # GTF format
58 parser.add_argument('--gtf', action='append', help='GTF format from StringTie')
59
60 # Metadata json format
61 parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs')
62
63 #JBrowse host
64 parser.add_argument('--jbrowse_host', help="JBrowse Host")
65
66 args = parser.parse_args()
67 all_datatype_dictionary = dict()
68
69
70 if not args.fasta:
71 parser.print_help()
72 raise RuntimeError("No reference genome\n")
73 reference = args.fasta
74 genome = 'unknown'
75 out_path = 'unknown.html'
76 extra_files_path = '.'
77 tool_directory = '.'
78 jbrowse_host = ''
79 if args.jbrowse_host:
80 jbrowse_host = args.jbrowse_host
81 if args.genome_name:
82 genome = args.genome_name
83 if args.out:
84 out_path = args.out
85 if args.extra_files_path:
86 extra_files_path = args.extra_files_path
87
88 #tool_directory not work for Galaxy tool, all tools need to exist in the current PATH, deal with it with tool dependencies
89 if args.tool_directory:
90 tool_directory = args.tool_directory
91
92 #Calculate chromsome sizes using genome reference and uscs tools
93 chrom_size = utils.getChromSizes(reference, tool_directory)
94
95 #get metadata from json file
96 json_inputs_data = args.data_json
97 if json_inputs_data:
98 inputs_data = json.loads(json_inputs_data)
99 else:
100 inputs_data = {}
101
102 #print inputs_data
103
104 #Initate trackObject
105 all_tracks = trackObject.trackObject(chrom_size.name, genome, extra_files_path)
106
107 array_inputs_bam = args.bam
108 array_inputs_bed = args.bed
109 array_inputs_bed_simple_repeats = args.bedSimpleRepeats
110 array_inputs_bed_splice_junctions = args.bedSpliceJunctions
111 array_inputs_bigwig = args.bigwig
112 array_inputs_gff3 = args.gff3
113 array_inputs_gff3_transcript = args.gff3_transcript
114 array_inputs_gff3_mrna = args.gff3_mrna
115 array_inputs_gtf = args.gtf
116 array_inputs_blastxml = args.blastxml
117
118 if array_inputs_bam:
119 all_datatype_dictionary['bam'] = array_inputs_bam
120 if array_inputs_bed:
121 all_datatype_dictionary['bed'] = array_inputs_bed
122 if array_inputs_bed_simple_repeats:
123 all_datatype_dictionary['bedSimpleRepeats'] = array_inputs_bed_simple_repeats
124 if array_inputs_bed_splice_junctions:
125 all_datatype_dictionary['bedSpliceJunctions'] = array_inputs_bed_splice_junctions
126 if array_inputs_bigwig:
127 all_datatype_dictionary['bigwig'] = array_inputs_bigwig
128 if array_inputs_gff3:
129 all_datatype_dictionary['gff3'] = array_inputs_gff3
130 if array_inputs_gff3_transcript:
131 all_datatype_dictionary['gff3_transcript'] = array_inputs_gff3_transcript
132 if array_inputs_gff3_mrna:
133 all_datatype_dictionary['gff3_mrna'] = array_inputs_gff3_mrna
134 if array_inputs_gtf:
135 all_datatype_dictionary['gtf'] = array_inputs_gtf
136 if array_inputs_blastxml:
137 all_datatype_dictionary['blastxml'] = array_inputs_blastxml
138
139 print "input tracks: \n", all_datatype_dictionary
140
141 for datatype, inputfiles in all_datatype_dictionary.items():
142 try:
143 if not inputfiles:
144 raise ValueError('empty input, must provide track files!\n')
145 except IOError:
146 print 'Cannot open', datatype
147 else:
148 for f in inputfiles:
149 #metadata = {}
150 #print f
151 #if f in inputs_data.keys():
152 # metadata = inputs_data[f]
153 #print metadata
154 #Convert tracks into gff3 format
155 all_tracks.addToRaw(f, datatype)
156
157 jbrowseHub = TrackHub.TrackHub(all_tracks, reference, out_path, tool_directory, genome, extra_files_path, inputs_data, jbrowse_host)
158 jbrowseHub.createHub()
159
160 """
161 def extractMetadata(array_inputs, inputs_data):
162 metadata_dict = {}
163 for input_false_path in array_inputs:
164 for key, data_value in inputs_data.items():
165 if key == input_false_path:
166 metadata_dict[input_false_path]
167 """
168
169 if __name__ == "__main__":
170 main(sys.argv)
171