# HG changeset patch # User jjohnson # Date 1394453341 14400 # Node ID 77151afcd323f573e6fc6dc94b5a7900c9b89bfb Uploaded diff -r 000000000000 -r 77151afcd323 datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Mon Mar 10 08:09:01 2014 -0400 @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff -r 000000000000 -r 77151afcd323 rsem.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rsem.py Mon Mar 10 08:09:01 2014 -0400 @@ -0,0 +1,128 @@ +""" +RSEM datatypes +""" +import os,os.path,re,sys +from galaxy.datatypes.data import get_file_peek +from galaxy.datatypes.images import Html +from galaxy.datatypes.tabular import Tabular +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.sniff import get_headers + +import logging + +log = logging.getLogger(__name__) + +class RsemIsoformsResults( Tabular ): + file_ext = "rsem.isoforms.results" + """ + required columns: + transcript_id gene_id length effective_length expected_count TPM FPKM IsoPct + optional columns: + pme_expected_count pme_TPM pme_FPKM IsoPct_from_pme_TPM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound + """ + def __init__(self, **kwd): + Tabular.__init__( self, **kwd ) + """Initialize RsemResults datatype""" + self.comment_lines = 1 + def sniff( self, filename ): + headers = get_headers( filename, '\n', count=1 ) + return len(headers) > 0 and len(headers[0]) >= 8 and headers[0][0] == "transcript_id" and headers[0][1] == "gene_id" and headers[0][6] == "FPKM" + def set_meta( self, dataset, **kwd ): + Tabular.set_meta( self, dataset, skip=None, **kwd ) + +class RsemGenesResults( Tabular ): + file_ext = "rsem.genes.results" + """ + required columns: + gene_id transcript_id(s) length effective_length expected_count TPM FPKM + optional columns: + pme_expected_count pme_TPM pme_FPKM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound + """ + def __init__(self, **kwd): + Tabular.__init__( self, **kwd ) + """Initialize RsemResults datatype""" + self.comment_lines = 1 + def sniff( self, filename ): + headers = get_headers( filename, '\n', count=1 ) + return len(headers) > 0 and len(headers[0]) >= 7 and headers[0][0] == "gene_id" and headers[0][1].startswith("transcript_id") and headers[0][6] == "FPKM" + def set_meta( self, dataset, **kwd ): + Tabular.set_meta( self, dataset, skip=None, **kwd ) + +class RsemReference( Html ): + """Class describing an RSEM reference""" + MetadataElement( name='reference_name', default='rsem_ref' , desc='RSEM Reference Name', readonly=True, visible=True, set_in_upload=True, no_value='rsem_ref' ) + file_ext = 'rsem_ref' + allow_datatype_change = False + composite_type = 'auto_primary_file' + def __init__( self, **kwd ): + Html.__init__(self, **kwd) + """ + Expecting files: + extra_files_path/.grp + extra_files_path/.ti + extra_files_path/.seq + extra_files_path/.transcripts.fa + Optionally includes files: + extra_files_path/.chrlist + extra_files_path/.idx.fa + extra_files_path/.1.ebwt + extra_files_path/.2.ebwt + extra_files_path/.3.ebwt + extra_files_path/.4.ebwt + extra_files_path/.rev.1.ebwt + extra_files_path/.rev.2.ebwt + """ + self.add_composite_file( '%s.grp', description = 'Group File', substitute_name_with_metadata = 'reference_name', is_binary = False ) + self.add_composite_file( '%s.ti', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False ) + self.add_composite_file( '%s.seq', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False ) + self.add_composite_file( '%s.transcripts.fa', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False ) + self.add_composite_file( '%s.chrlist', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False, optional=True ) + self.add_composite_file( '%s.idx.fa', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False, optional=True ) + self.add_composite_file( '%s.1.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True ) + self.add_composite_file( '%s.2.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True ) + self.add_composite_file( '%s.3.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True ) + self.add_composite_file( '%s.4.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True ) + self.add_composite_file( '%s.rev.1.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True ) + self.add_composite_file( '%s.rev.2.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True ) + + def generate_primary_file( self, dataset = None ): + """ + This is called only at upload to write the file + cannot rename the datasets here - they come with the default unfortunately + """ + + def regenerate_primary_file(self,dataset): + """ + cannot do this until we are setting metadata + """ + link_to_exts = ['.grp','.ti','.seq','.fa','.chrlist','.log'] + ref_name = dataset.metadata.reference_name + efp = dataset.extra_files_path + flist = os.listdir(efp) + rval = ['%s

RSEM Reference %s files:

    ' % (dataset.name,ref_name)] + rvalb = [] + for i,fname in enumerate(flist): + sfname = os.path.split(fname)[-1] + f,e = os.path.splitext(fname) + if e in link_to_exts: + rval.append( '
  • %s
  • ' % ( sfname, sfname) ) + else: + rvalb.append( '
  • %s
  • ' % (sfname) ) + if len(rvalb) > 0: + rval += rvalb + rval.append( '
' ) + fh = file(dataset.file_name,'w') + fh.write("\n".join( rval )) + fh.write('\n') + fh.close() + + def set_meta( self, dataset, **kwd ): + Html.set_meta( self, dataset, **kwd ) + efp = dataset.extra_files_path + flist = os.listdir(efp) + for i,fname in enumerate(flist): + if fname.endswith('.grp'): + dataset.metadata.reference_name = fname[:-4] + break + self.regenerate_primary_file(dataset) +