Mercurial > repos > jjohnson > rsem_datatypes
comparison rsem.py @ 0:77151afcd323
Uploaded
| author | jjohnson |
|---|---|
| date | Mon, 10 Mar 2014 08:09:01 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:77151afcd323 |
|---|---|
| 1 """ | |
| 2 RSEM datatypes | |
| 3 """ | |
| 4 import os,os.path,re,sys | |
| 5 from galaxy.datatypes.data import get_file_peek | |
| 6 from galaxy.datatypes.images import Html | |
| 7 from galaxy.datatypes.tabular import Tabular | |
| 8 from galaxy.datatypes.metadata import MetadataElement | |
| 9 from galaxy.datatypes.sniff import get_headers | |
| 10 | |
| 11 import logging | |
| 12 | |
| 13 log = logging.getLogger(__name__) | |
| 14 | |
| 15 class RsemIsoformsResults( Tabular ): | |
| 16 file_ext = "rsem.isoforms.results" | |
| 17 """ | |
| 18 required columns: | |
| 19 transcript_id gene_id length effective_length expected_count TPM FPKM IsoPct | |
| 20 optional columns: | |
| 21 pme_expected_count pme_TPM pme_FPKM IsoPct_from_pme_TPM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound | |
| 22 """ | |
| 23 def __init__(self, **kwd): | |
| 24 Tabular.__init__( self, **kwd ) | |
| 25 """Initialize RsemResults datatype""" | |
| 26 self.comment_lines = 1 | |
| 27 def sniff( self, filename ): | |
| 28 headers = get_headers( filename, '\n', count=1 ) | |
| 29 return len(headers) > 0 and len(headers[0]) >= 8 and headers[0][0] == "transcript_id" and headers[0][1] == "gene_id" and headers[0][6] == "FPKM" | |
| 30 def set_meta( self, dataset, **kwd ): | |
| 31 Tabular.set_meta( self, dataset, skip=None, **kwd ) | |
| 32 | |
| 33 class RsemGenesResults( Tabular ): | |
| 34 file_ext = "rsem.genes.results" | |
| 35 """ | |
| 36 required columns: | |
| 37 gene_id transcript_id(s) length effective_length expected_count TPM FPKM | |
| 38 optional columns: | |
| 39 pme_expected_count pme_TPM pme_FPKM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound | |
| 40 """ | |
| 41 def __init__(self, **kwd): | |
| 42 Tabular.__init__( self, **kwd ) | |
| 43 """Initialize RsemResults datatype""" | |
| 44 self.comment_lines = 1 | |
| 45 def sniff( self, filename ): | |
| 46 headers = get_headers( filename, '\n', count=1 ) | |
| 47 return len(headers) > 0 and len(headers[0]) >= 7 and headers[0][0] == "gene_id" and headers[0][1].startswith("transcript_id") and headers[0][6] == "FPKM" | |
| 48 def set_meta( self, dataset, **kwd ): | |
| 49 Tabular.set_meta( self, dataset, skip=None, **kwd ) | |
| 50 | |
| 51 class RsemReference( Html ): | |
| 52 """Class describing an RSEM reference""" | |
| 53 MetadataElement( name='reference_name', default='rsem_ref' , desc='RSEM Reference Name', readonly=True, visible=True, set_in_upload=True, no_value='rsem_ref' ) | |
| 54 file_ext = 'rsem_ref' | |
| 55 allow_datatype_change = False | |
| 56 composite_type = 'auto_primary_file' | |
| 57 def __init__( self, **kwd ): | |
| 58 Html.__init__(self, **kwd) | |
| 59 """ | |
| 60 Expecting files: | |
| 61 extra_files_path/<reference_name>.grp | |
| 62 extra_files_path/<reference_name>.ti | |
| 63 extra_files_path/<reference_name>.seq | |
| 64 extra_files_path/<reference_name>.transcripts.fa | |
| 65 Optionally includes files: | |
| 66 extra_files_path/<reference_name>.chrlist | |
| 67 extra_files_path/<reference_name>.idx.fa | |
| 68 extra_files_path/<reference_name>.1.ebwt | |
| 69 extra_files_path/<reference_name>.2.ebwt | |
| 70 extra_files_path/<reference_name>.3.ebwt | |
| 71 extra_files_path/<reference_name>.4.ebwt | |
| 72 extra_files_path/<reference_name>.rev.1.ebwt | |
| 73 extra_files_path/<reference_name>.rev.2.ebwt | |
| 74 """ | |
| 75 self.add_composite_file( '%s.grp', description = 'Group File', substitute_name_with_metadata = 'reference_name', is_binary = False ) | |
| 76 self.add_composite_file( '%s.ti', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False ) | |
| 77 self.add_composite_file( '%s.seq', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False ) | |
| 78 self.add_composite_file( '%s.transcripts.fa', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False ) | |
| 79 self.add_composite_file( '%s.chrlist', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False, optional=True ) | |
| 80 self.add_composite_file( '%s.idx.fa', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False, optional=True ) | |
| 81 self.add_composite_file( '%s.1.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True ) | |
| 82 self.add_composite_file( '%s.2.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True ) | |
| 83 self.add_composite_file( '%s.3.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True ) | |
| 84 self.add_composite_file( '%s.4.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True ) | |
| 85 self.add_composite_file( '%s.rev.1.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True ) | |
| 86 self.add_composite_file( '%s.rev.2.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True ) | |
| 87 | |
| 88 def generate_primary_file( self, dataset = None ): | |
| 89 """ | |
| 90 This is called only at upload to write the file | |
| 91 cannot rename the datasets here - they come with the default unfortunately | |
| 92 """ | |
| 93 | |
| 94 def regenerate_primary_file(self,dataset): | |
| 95 """ | |
| 96 cannot do this until we are setting metadata | |
| 97 """ | |
| 98 link_to_exts = ['.grp','.ti','.seq','.fa','.chrlist','.log'] | |
| 99 ref_name = dataset.metadata.reference_name | |
| 100 efp = dataset.extra_files_path | |
| 101 flist = os.listdir(efp) | |
| 102 rval = ['<html><head><title>%s</title></head><body><p/>RSEM Reference %s files:<p/><ul>' % (dataset.name,ref_name)] | |
| 103 rvalb = [] | |
| 104 for i,fname in enumerate(flist): | |
| 105 sfname = os.path.split(fname)[-1] | |
| 106 f,e = os.path.splitext(fname) | |
| 107 if e in link_to_exts: | |
| 108 rval.append( '<li><a href="%s">%s</a></li>' % ( sfname, sfname) ) | |
| 109 else: | |
| 110 rvalb.append( '<li>%s</li>' % (sfname) ) | |
| 111 if len(rvalb) > 0: | |
| 112 rval += rvalb | |
| 113 rval.append( '</ul></body></html>' ) | |
| 114 fh = file(dataset.file_name,'w') | |
| 115 fh.write("\n".join( rval )) | |
| 116 fh.write('\n') | |
| 117 fh.close() | |
| 118 | |
| 119 def set_meta( self, dataset, **kwd ): | |
| 120 Html.set_meta( self, dataset, **kwd ) | |
| 121 efp = dataset.extra_files_path | |
| 122 flist = os.listdir(efp) | |
| 123 for i,fname in enumerate(flist): | |
| 124 if fname.endswith('.grp'): | |
| 125 dataset.metadata.reference_name = fname[:-4] | |
| 126 break | |
| 127 self.regenerate_primary_file(dataset) | |
| 128 |
