# HG changeset patch
# User jjohnson
# Date 1394453341 14400
# Node ID 77151afcd323f573e6fc6dc94b5a7900c9b89bfb
Uploaded
diff -r 000000000000 -r 77151afcd323 datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Mon Mar 10 08:09:01 2014 -0400
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 77151afcd323 rsem.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rsem.py Mon Mar 10 08:09:01 2014 -0400
@@ -0,0 +1,128 @@
+"""
+RSEM datatypes
+"""
+import os,os.path,re,sys
+from galaxy.datatypes.data import get_file_peek
+from galaxy.datatypes.images import Html
+from galaxy.datatypes.tabular import Tabular
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes.sniff import get_headers
+
+import logging
+
+log = logging.getLogger(__name__)
+
+class RsemIsoformsResults( Tabular ):
+ file_ext = "rsem.isoforms.results"
+ """
+ required columns:
+ transcript_id gene_id length effective_length expected_count TPM FPKM IsoPct
+ optional columns:
+ pme_expected_count pme_TPM pme_FPKM IsoPct_from_pme_TPM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound
+ """
+ def __init__(self, **kwd):
+ Tabular.__init__( self, **kwd )
+ """Initialize RsemResults datatype"""
+ self.comment_lines = 1
+ def sniff( self, filename ):
+ headers = get_headers( filename, '\n', count=1 )
+ return len(headers) > 0 and len(headers[0]) >= 8 and headers[0][0] == "transcript_id" and headers[0][1] == "gene_id" and headers[0][6] == "FPKM"
+ def set_meta( self, dataset, **kwd ):
+ Tabular.set_meta( self, dataset, skip=None, **kwd )
+
+class RsemGenesResults( Tabular ):
+ file_ext = "rsem.genes.results"
+ """
+ required columns:
+ gene_id transcript_id(s) length effective_length expected_count TPM FPKM
+ optional columns:
+ pme_expected_count pme_TPM pme_FPKM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound
+ """
+ def __init__(self, **kwd):
+ Tabular.__init__( self, **kwd )
+ """Initialize RsemResults datatype"""
+ self.comment_lines = 1
+ def sniff( self, filename ):
+ headers = get_headers( filename, '\n', count=1 )
+ return len(headers) > 0 and len(headers[0]) >= 7 and headers[0][0] == "gene_id" and headers[0][1].startswith("transcript_id") and headers[0][6] == "FPKM"
+ def set_meta( self, dataset, **kwd ):
+ Tabular.set_meta( self, dataset, skip=None, **kwd )
+
+class RsemReference( Html ):
+ """Class describing an RSEM reference"""
+ MetadataElement( name='reference_name', default='rsem_ref' , desc='RSEM Reference Name', readonly=True, visible=True, set_in_upload=True, no_value='rsem_ref' )
+ file_ext = 'rsem_ref'
+ allow_datatype_change = False
+ composite_type = 'auto_primary_file'
+ def __init__( self, **kwd ):
+ Html.__init__(self, **kwd)
+ """
+ Expecting files:
+ extra_files_path/.grp
+ extra_files_path/.ti
+ extra_files_path/.seq
+ extra_files_path/.transcripts.fa
+ Optionally includes files:
+ extra_files_path/.chrlist
+ extra_files_path/.idx.fa
+ extra_files_path/.1.ebwt
+ extra_files_path/.2.ebwt
+ extra_files_path/.3.ebwt
+ extra_files_path/.4.ebwt
+ extra_files_path/.rev.1.ebwt
+ extra_files_path/.rev.2.ebwt
+ """
+ self.add_composite_file( '%s.grp', description = 'Group File', substitute_name_with_metadata = 'reference_name', is_binary = False )
+ self.add_composite_file( '%s.ti', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False )
+ self.add_composite_file( '%s.seq', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False )
+ self.add_composite_file( '%s.transcripts.fa', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False )
+ self.add_composite_file( '%s.chrlist', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False, optional=True )
+ self.add_composite_file( '%s.idx.fa', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False, optional=True )
+ self.add_composite_file( '%s.1.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True )
+ self.add_composite_file( '%s.2.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True )
+ self.add_composite_file( '%s.3.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True )
+ self.add_composite_file( '%s.4.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True )
+ self.add_composite_file( '%s.rev.1.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True )
+ self.add_composite_file( '%s.rev.2.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True )
+
+ def generate_primary_file( self, dataset = None ):
+ """
+ This is called only at upload to write the file
+ cannot rename the datasets here - they come with the default unfortunately
+ """
+
+ def regenerate_primary_file(self,dataset):
+ """
+ cannot do this until we are setting metadata
+ """
+ link_to_exts = ['.grp','.ti','.seq','.fa','.chrlist','.log']
+ ref_name = dataset.metadata.reference_name
+ efp = dataset.extra_files_path
+ flist = os.listdir(efp)
+ rval = ['%sRSEM Reference %s files:' % (dataset.name,ref_name)]
+ rvalb = []
+ for i,fname in enumerate(flist):
+ sfname = os.path.split(fname)[-1]
+ f,e = os.path.splitext(fname)
+ if e in link_to_exts:
+ rval.append( '- %s
' % ( sfname, sfname) )
+ else:
+ rvalb.append( '- %s
' % (sfname) )
+ if len(rvalb) > 0:
+ rval += rvalb
+ rval.append( '
' )
+ fh = file(dataset.file_name,'w')
+ fh.write("\n".join( rval ))
+ fh.write('\n')
+ fh.close()
+
+ def set_meta( self, dataset, **kwd ):
+ Html.set_meta( self, dataset, **kwd )
+ efp = dataset.extra_files_path
+ flist = os.listdir(efp)
+ for i,fname in enumerate(flist):
+ if fname.endswith('.grp'):
+ dataset.metadata.reference_name = fname[:-4]
+ break
+ self.regenerate_primary_file(dataset)
+