# HG changeset patch
# User yating-l
# Date 1499458677 14400
# Node ID 7e471cdd9e71f8629654e40bd0937d1847f881bc
# Parent eda851e52060c6e384d861d87ca631f3fb1b3ef7
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
diff -r eda851e52060 -r 7e471cdd9e71 TrackHub.py
--- a/TrackHub.py Wed May 31 15:45:47 2017 -0400
+++ b/TrackHub.py Fri Jul 07 16:17:57 2017 -0400
@@ -13,7 +13,7 @@
self.input_files = inputFiles.tracks
self.outfile = outputDirect
self.outfolder = extra_files_path
- self.out_path = os.path.join(extra_files_path, genome)
+ self.out_path = os.path.join(extra_files_path, 'myHub')
self.reference = reference
self.tool_dir = tool_dir
self.metaData = metaData
@@ -153,6 +153,8 @@
metadata['category'] = "Default group"
if track['dataType'] == 'blastxml':
metadata['type'] = "G-OnRamp_plugin/BlastAlignment"
+ elif track['dataType'] == 'bigpsl':
+ metadata['type'] = "G-OnRamp_plugin/BlatAlignment"
elif track['dataType'] == 'gff3_transcript' or track['dataType'] == 'gff3_mrna':
metadata['type'] = "G-OnRamp_plugin/GenePred"
else:
diff -r eda851e52060 -r 7e471cdd9e71 TrackHub.pyc
Binary file TrackHub.pyc has changed
diff -r eda851e52060 -r 7e471cdd9e71 bedToGff3.py
--- a/bedToGff3.py Wed May 31 15:45:47 2017 -0400
+++ b/bedToGff3.py Fri Jul 07 16:17:57 2017 -0400
@@ -2,6 +2,7 @@
'''
Convert BED format to gff3
+reference for gff3: https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md
'''
import os
from collections import OrderedDict
@@ -19,6 +20,8 @@
self.trfbig_to_gff3()
if self.type == "regtools":
self.splicejunctions_to_gff3()
+ if self.type == "blat":
+ self.bigpsl_to_gff3()
def trfbig_to_gff3(self):
gff3 = open(self.output, 'w')
@@ -81,12 +84,56 @@
field['score'] = li[12]
field['strand'] = li[5]
field['phase'] = '.'
- attribute['ID'] = li[3]
+ attribute['ID'] = li[0] + '_' + li[3]
attribute['Name'] = li[3]
attribute['blockcount'] = li[9]
attribute['blocksizes'] = li[10]
attribute['chromstarts'] = li[11]
utils.write_features(field, attribute, gff3)
- utils.child_blocks(field, attribute, gff3)
+ utils.child_blocks(field, attribute, gff3, 'exon_junction')
+ gff3.close()
+
+ def bigpsl_to_gff3(self):
+ gff3 = open(self.output, 'w')
+ gff3.write("##gff-version 3\n")
+ sizes_dict = utils.sequence_region(self.chrom_sizes)
+ seq_regions = dict()
+ with open(self.input, 'r') as bed:
+ for line in bed:
+ field = OrderedDict()
+ attribute = OrderedDict()
+ li = line.rstrip().split("\t")
+ field['seqid'] = li[0]
+ if field['seqid'] not in seq_regions:
+ end_region = sizes_dict[field['seqid']]
+ gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
+ seq_regions[field['seqid']] = end_region
+ field['source'] = 'UCSC BLAT alignment tool'
+ field['type'] = 'match'
+ # The first base in a chromosome is numbered 0 in BED format
+ field['start'] = str(int(li[1]) + 1)
+ field['end'] = li[2]
+ field['score'] = li[4]
+ field['strand'] = li[5]
+ field['phase'] = '.'
+ attribute['ID'] = li[0] + '_' + li[3]
+ attribute['Name'] = li[3]
+ attribute['blockcount'] = li[9]
+ attribute['blocksizes'] = li[10]
+ attribute['chromstarts'] = li[11]
+ attribute['ochrom_start'] = li[12]
+ attribute['ochrom_end'] = li[13]
+ attribute['ochrom_strand'] = li[14]
+ attribute['ochrom_size'] = li[15]
+ attribute['ochrom_starts'] = li[16]
+ attribute['sequence on other chromosome'] = li[17]
+ attribute['cds in ncbi format'] = li[18]
+ attribute['size of target chromosome'] = li[19]
+ attribute['number of bases matched'] = li[20]
+ attribute['number of bases that don\'t match'] = li[21]
+ attribute['number of bases that match but are part of repeats'] = li[22]
+ attribute['number of \'N\' bases'] = li[23]
+ utils.write_features(field, attribute, gff3)
+ utils.child_blocks(field, attribute, gff3, 'match_part')
gff3.close()
\ No newline at end of file
diff -r eda851e52060 -r 7e471cdd9e71 bedToGff3.pyc
Binary file bedToGff3.pyc has changed
diff -r eda851e52060 -r 7e471cdd9e71 datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Fri Jul 07 16:17:57 2017 -0400
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r eda851e52060 -r 7e471cdd9e71 jbrowse_hub.py
--- a/jbrowse_hub.py Wed May 31 15:45:47 2017 -0400
+++ b/jbrowse_hub.py Fri Jul 07 16:17:57 2017 -0400
@@ -48,6 +48,9 @@
# tblastn alignment (blastxml)
parser.add_argument('--blastxml', action='append', help='blastxml format from tblastn')
+ # blat alignment (bigpsl 12+12)
+ parser.add_argument('--bigpsl', action='append', help='bigpsl format from blat alignment')
+
# BAM format
parser.add_argument('--bam', action='append', help='BAM format from HISAT')
@@ -114,6 +117,7 @@
array_inputs_gff3_mrna = args.gff3_mrna
array_inputs_gtf = args.gtf
array_inputs_blastxml = args.blastxml
+ array_inputs_bigpsl = args.bigpsl
if array_inputs_bam:
all_datatype_dictionary['bam'] = array_inputs_bam
@@ -135,7 +139,8 @@
all_datatype_dictionary['gtf'] = array_inputs_gtf
if array_inputs_blastxml:
all_datatype_dictionary['blastxml'] = array_inputs_blastxml
-
+ if array_inputs_bigpsl:
+ all_datatype_dictionary['bigpsl'] = array_inputs_bigpsl
print "input tracks: \n", all_datatype_dictionary
for datatype, inputfiles in all_datatype_dictionary.items():
diff -r eda851e52060 -r 7e471cdd9e71 jbrowse_hub.xml
--- a/jbrowse_hub.xml Wed May 31 15:45:47 2017 -0400
+++ b/jbrowse_hub.xml Fri Jul 07 16:17:57 2017 -0400
@@ -1,4 +1,4 @@
-
+
This Galaxy tool is used to prepare your files to be ready for displaying on JBrowse
@@ -80,6 +80,10 @@
--blastxml $f.formatChoice.BlastXML
#silent $prepare_json($f.formatChoice.BlastXML, extra_data_dict)
#end if
+ #if $f.formatChoice.format_select == 'bigpsl'
+ --bigpsl $f.formatChoice.BigPsl
+ #silent $prepare_json($f.formatChoice.BigPsl, extra_data_dict)
+ #end if
#if $f.formatChoice.format_select == 'gtf'
--gtf $f.formatChoice.GTF
#set track_color = str($f.formatChoice.track_color)
@@ -114,8 +118,9 @@
-
-
+
+
+
@@ -179,6 +184,15 @@
/>
+
+
+
+
-
+
diff -r eda851e52060 -r 7e471cdd9e71 trackObject.py
--- a/trackObject.py Wed May 31 15:45:47 2017 -0400
+++ b/trackObject.py Fri Jul 07 16:17:57 2017 -0400
@@ -10,7 +10,7 @@
class trackObject:
def __init__(self, chrom_size, genome, extra_files_path):
self.chrom_size = chrom_size
- outputDirect = os.path.join(extra_files_path, genome)
+ outputDirect = os.path.join(extra_files_path, 'myHub')
self.raw_folder = os.path.join(outputDirect, 'raw')
#Store metadata of the tracks
self.tracks = []
@@ -56,6 +56,8 @@
bedToGff3.bedToGff3(dataFile, self.chrom_size, 'trfbig', des_path)
elif dataType == 'bedSpliceJunctions':
bedToGff3.bedToGff3(dataFile, self.chrom_size, 'regtools', des_path)
+ elif dataType == 'bigpsl':
+ bedToGff3.bedToGff3(dataFile, self.chrom_size, 'blat', des_path)
elif dataType == 'blastxml':
blastxmlToGff3.blastxml2gff3(dataFile, des_path)
elif dataType == 'gtf':
diff -r eda851e52060 -r 7e471cdd9e71 trackObject.pyc
Binary file trackObject.pyc has changed
diff -r eda851e52060 -r 7e471cdd9e71 utils.py
--- a/utils.py Wed May 31 15:45:47 2017 -0400
+++ b/utils.py Fri Jul 07 16:17:57 2017 -0400
@@ -57,18 +57,19 @@
sizes_dict[chrom_info[0]] = chrom_info[1]
return sizes_dict
-def child_blocks(parent_field, parent_attr, gff3):
+def child_blocks(parent_field, parent_attr, gff3, child_type):
num = 0
blockcount = int(parent_attr['blockcount'])
chromstart = parent_attr['chromstarts'].split(',')
blocksize = parent_attr['blocksizes'].split(',')
+ parent_start = parent_field['start']
while num < blockcount:
child_attr = OrderedDict()
child_field = parent_field
- child_field['type'] = 'exon_junction'
- child_field['start'] = int(chromstart[num]) + int(parent_field['start'])
+ child_field['type'] = child_type
+ child_field['start'] = int(chromstart[num]) + int(parent_start)
child_field['end'] = int(child_field['start']) + int(blocksize[num]) - 1
- child_attr['ID'] = parent_attr['ID'] + '_exon_' + str(num+1)
+ child_attr['ID'] = parent_attr['ID'] + '_part_' + str(num+1)
child_attr['Parent'] = parent_attr['ID']
write_features(child_field, child_attr, gff3)
num = num + 1
diff -r eda851e52060 -r 7e471cdd9e71 utils.pyc
Binary file utils.pyc has changed