changeset 4:7e471cdd9e71 draft

planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
author yating-l
date Fri, 07 Jul 2017 16:17:57 -0400
parents eda851e52060
children e762f4b9e4bd
files TrackHub.py TrackHub.pyc bedToGff3.py bedToGff3.pyc datatypes_conf.xml jbrowse_hub.py jbrowse_hub.xml trackObject.py trackObject.pyc utils.py utils.pyc
diffstat 11 files changed, 92 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/TrackHub.py	Wed May 31 15:45:47 2017 -0400
+++ b/TrackHub.py	Fri Jul 07 16:17:57 2017 -0400
@@ -13,7 +13,7 @@
         self.input_files = inputFiles.tracks
         self.outfile = outputDirect
         self.outfolder = extra_files_path
-        self.out_path = os.path.join(extra_files_path, genome)
+        self.out_path = os.path.join(extra_files_path, 'myHub')
         self.reference = reference
         self.tool_dir = tool_dir
         self.metaData = metaData
@@ -153,6 +153,8 @@
             metadata['category'] = "Default group"
         if track['dataType'] == 'blastxml':
             metadata['type'] = "G-OnRamp_plugin/BlastAlignment"
+        elif track['dataType'] == 'bigpsl':
+            metadata['type'] = "G-OnRamp_plugin/BlatAlignment"
         elif track['dataType'] == 'gff3_transcript' or track['dataType'] == 'gff3_mrna':
             metadata['type'] = "G-OnRamp_plugin/GenePred"
         else:
Binary file TrackHub.pyc has changed
--- a/bedToGff3.py	Wed May 31 15:45:47 2017 -0400
+++ b/bedToGff3.py	Fri Jul 07 16:17:57 2017 -0400
@@ -2,6 +2,7 @@
 
 '''
 Convert BED format to gff3
+reference for gff3: https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md
 '''
 import os
 from collections import OrderedDict
@@ -19,6 +20,8 @@
             self.trfbig_to_gff3()
         if self.type == "regtools":
             self.splicejunctions_to_gff3()
+        if self.type == "blat":
+            self.bigpsl_to_gff3()
 
     def trfbig_to_gff3(self):
         gff3 = open(self.output, 'w')
@@ -81,12 +84,56 @@
                 field['score'] = li[12]
                 field['strand'] = li[5]
                 field['phase'] = '.'
-                attribute['ID'] = li[3]
+                attribute['ID'] = li[0] + '_' + li[3]
                 attribute['Name'] = li[3]
                 attribute['blockcount'] = li[9]
                 attribute['blocksizes'] = li[10]
                 attribute['chromstarts'] = li[11]
                 utils.write_features(field, attribute, gff3)
-                utils.child_blocks(field, attribute, gff3)
+                utils.child_blocks(field, attribute, gff3, 'exon_junction')
+        gff3.close()
+
+    def bigpsl_to_gff3(self):
+        gff3 = open(self.output, 'w')
+        gff3.write("##gff-version 3\n")
+        sizes_dict = utils.sequence_region(self.chrom_sizes)
+        seq_regions = dict()
+        with open(self.input, 'r') as bed:
+            for line in bed:
+                field = OrderedDict()
+                attribute = OrderedDict()
+                li = line.rstrip().split("\t")
+                field['seqid'] = li[0]
+                if field['seqid'] not in seq_regions:
+                    end_region = sizes_dict[field['seqid']]
+                    gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
+                    seq_regions[field['seqid']] = end_region
+                field['source'] = 'UCSC BLAT alignment tool'
+                field['type'] = 'match'
+                # The first base in a chromosome is numbered 0 in BED format
+                field['start'] = str(int(li[1]) + 1)
+                field['end'] = li[2]
+                field['score'] = li[4]
+                field['strand'] = li[5]
+                field['phase'] = '.'
+                attribute['ID'] = li[0] + '_' + li[3]
+                attribute['Name'] = li[3]
+                attribute['blockcount'] = li[9]
+                attribute['blocksizes'] = li[10]
+                attribute['chromstarts'] = li[11]
+                attribute['ochrom_start'] = li[12]
+                attribute['ochrom_end'] = li[13]
+                attribute['ochrom_strand'] = li[14]
+                attribute['ochrom_size'] = li[15]
+                attribute['ochrom_starts'] = li[16]
+                attribute['sequence on other chromosome'] = li[17]
+                attribute['cds in ncbi format'] = li[18]
+                attribute['size of target chromosome'] = li[19]
+                attribute['number of bases matched'] = li[20]
+                attribute['number of bases that don\'t match'] = li[21]
+                attribute['number of bases that match but are part of repeats'] = li[22]
+                attribute['number of \'N\' bases'] = li[23]
+                utils.write_features(field, attribute, gff3)
+                utils.child_blocks(field, attribute, gff3, 'match_part')
         gff3.close()
         
\ No newline at end of file
Binary file bedToGff3.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml	Fri Jul 07 16:17:57 2017 -0400
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<datatypes>
+  <registration converters_path="lib/galaxy/datatypes/converters" display_path="display_applications">
+    <datatype extension="jbrowsehub" type="galaxy.datatypes.tracks:UCSCTrackHub" display_in_upload="true">
+        <display file="jbrowse/jbrowsehub.xml" />
+    </datatype>
+  </registration>
+</datatypes>
\ No newline at end of file
--- a/jbrowse_hub.py	Wed May 31 15:45:47 2017 -0400
+++ b/jbrowse_hub.py	Fri Jul 07 16:17:57 2017 -0400
@@ -48,6 +48,9 @@
     # tblastn alignment (blastxml)
     parser.add_argument('--blastxml', action='append', help='blastxml format from tblastn')
 
+    # blat alignment (bigpsl 12+12)
+    parser.add_argument('--bigpsl', action='append', help='bigpsl format from blat alignment')
+
     # BAM format
     parser.add_argument('--bam', action='append', help='BAM format from HISAT')
 
@@ -114,6 +117,7 @@
     array_inputs_gff3_mrna = args.gff3_mrna
     array_inputs_gtf = args.gtf
     array_inputs_blastxml = args.blastxml
+    array_inputs_bigpsl = args.bigpsl
 
     if array_inputs_bam:
         all_datatype_dictionary['bam'] = array_inputs_bam
@@ -135,7 +139,8 @@
         all_datatype_dictionary['gtf'] = array_inputs_gtf
     if array_inputs_blastxml:
         all_datatype_dictionary['blastxml'] = array_inputs_blastxml
-    
+    if array_inputs_bigpsl:
+        all_datatype_dictionary['bigpsl'] =  array_inputs_bigpsl    
     print "input tracks: \n", all_datatype_dictionary
 
     for datatype, inputfiles in all_datatype_dictionary.items():
--- a/jbrowse_hub.xml	Wed May 31 15:45:47 2017 -0400
+++ b/jbrowse_hub.xml	Fri Jul 07 16:17:57 2017 -0400
@@ -1,4 +1,4 @@
-<tool id="jbrowse_hub" name="JBrowse Archive Creator" version="1.0">
+<tool id="jbrowse_hub" name="JBrowse Archive Creator" version="1.0.0">
     <description>
         This Galaxy tool is used to prepare your files to be ready for displaying on JBrowse
     </description>
@@ -80,6 +80,10 @@
                 --blastxml $f.formatChoice.BlastXML
                 #silent $prepare_json($f.formatChoice.BlastXML, extra_data_dict)
             #end if
+            #if $f.formatChoice.format_select == 'bigpsl'
+                --bigpsl $f.formatChoice.BigPsl
+                #silent $prepare_json($f.formatChoice.BigPsl, extra_data_dict)
+            #end if
             #if $f.formatChoice.format_select == 'gtf'
                 --gtf $f.formatChoice.GTF
                 #set track_color = str($f.formatChoice.track_color)
@@ -114,8 +118,9 @@
                     <param name="format_select" type="select" label="Format">
                         <option value="bam" selected="true">BAM</option>
                         <option value="bed">BED</option>
-                        <option value="blastxml">BLASTXML</option>
-                        <option value="bigwig">BIGWIG</option>
+                        <option value="blastxml">BlastXML</option>
+                        <option value="bigpsl">BigPsl</option>
+                        <option value="bigwig">BigWig</option>
                         <option value="gff3">GFF3</option>
                         <option value="gtf">GTF</option>
                     </param>
@@ -179,6 +184,15 @@
                         />
                         <param name="label" type="text" size="30" value="Blast Alignment" label="Track name" />
                     </when>
+                    <when value="bigpsl">
+                        <param
+                                format="bigpsl"
+                                name="BigPsl"
+                                type="data"
+                                label="Blat Alignments File"
+                        />
+                        <param name="label" type="text" size="30" value="Blat Alignment" label="Track name" />
+                    </when>
                     <when value="bigwig">
                         <param
                                 format="bigwig"
@@ -265,7 +279,7 @@
     </inputs>
 
     <outputs>
-        <data format="html" name="output" label="${tool.name}" />
+        <data format="jbrowsehub" name="output" label="${tool.name}" />
     </outputs>
     <tests>
         <test>
--- a/trackObject.py	Wed May 31 15:45:47 2017 -0400
+++ b/trackObject.py	Fri Jul 07 16:17:57 2017 -0400
@@ -10,7 +10,7 @@
 class trackObject:
     def __init__(self, chrom_size, genome, extra_files_path):
         self.chrom_size = chrom_size
-        outputDirect = os.path.join(extra_files_path, genome)
+        outputDirect = os.path.join(extra_files_path, 'myHub')
         self.raw_folder = os.path.join(outputDirect, 'raw')
         #Store metadata of the tracks
         self.tracks = []
@@ -56,6 +56,8 @@
             bedToGff3.bedToGff3(dataFile, self.chrom_size, 'trfbig', des_path)
         elif dataType == 'bedSpliceJunctions':
             bedToGff3.bedToGff3(dataFile, self.chrom_size, 'regtools', des_path)
+        elif dataType == 'bigpsl':
+            bedToGff3.bedToGff3(dataFile, self.chrom_size, 'blat', des_path)
         elif dataType == 'blastxml':
             blastxmlToGff3.blastxml2gff3(dataFile, des_path)
         elif dataType == 'gtf':
Binary file trackObject.pyc has changed
--- a/utils.py	Wed May 31 15:45:47 2017 -0400
+++ b/utils.py	Fri Jul 07 16:17:57 2017 -0400
@@ -57,18 +57,19 @@
         sizes_dict[chrom_info[0]] = chrom_info[1]
     return sizes_dict
 
-def child_blocks(parent_field, parent_attr, gff3):
+def child_blocks(parent_field, parent_attr, gff3, child_type):
     num = 0
     blockcount = int(parent_attr['blockcount'])
     chromstart = parent_attr['chromstarts'].split(',')
     blocksize = parent_attr['blocksizes'].split(',')
+    parent_start = parent_field['start']
     while num < blockcount:
         child_attr = OrderedDict()
         child_field = parent_field
-        child_field['type'] = 'exon_junction'
-        child_field['start'] = int(chromstart[num]) + int(parent_field['start'])
+        child_field['type'] = child_type
+        child_field['start'] = int(chromstart[num]) + int(parent_start)
         child_field['end'] = int(child_field['start']) + int(blocksize[num]) - 1
-        child_attr['ID'] = parent_attr['ID'] + '_exon_' + str(num+1)
+        child_attr['ID'] = parent_attr['ID'] + '_part_' + str(num+1)
         child_attr['Parent'] = parent_attr['ID']
         write_features(child_field, child_attr, gff3)
         num = num + 1
Binary file utils.pyc has changed