annotate rsem.py @ 1:e5082fcdc0bd default tip

correct Tabular path
author Jim Johnson <jj@umn.edu>
date Fri, 18 Apr 2014 11:03:01 -0500
parents 77151afcd323
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
1 """
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
2 RSEM datatypes
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
3 """
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
4 import os,os.path,re,sys
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
5 from galaxy.datatypes.data import get_file_peek
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
6 from galaxy.datatypes.images import Html
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
7 from galaxy.datatypes.tabular import Tabular
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
8 from galaxy.datatypes.metadata import MetadataElement
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
9 from galaxy.datatypes.sniff import get_headers
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
10
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
11 import logging
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
12
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
13 log = logging.getLogger(__name__)
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
14
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
15 class RsemIsoformsResults( Tabular ):
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
16 file_ext = "rsem.isoforms.results"
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
17 """
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
18 required columns:
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
19 transcript_id gene_id length effective_length expected_count TPM FPKM IsoPct
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
20 optional columns:
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
21 pme_expected_count pme_TPM pme_FPKM IsoPct_from_pme_TPM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
22 """
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
23 def __init__(self, **kwd):
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
24 Tabular.__init__( self, **kwd )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
25 """Initialize RsemResults datatype"""
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
26 self.comment_lines = 1
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
27 def sniff( self, filename ):
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
28 headers = get_headers( filename, '\n', count=1 )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
29 return len(headers) > 0 and len(headers[0]) >= 8 and headers[0][0] == "transcript_id" and headers[0][1] == "gene_id" and headers[0][6] == "FPKM"
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
30 def set_meta( self, dataset, **kwd ):
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
31 Tabular.set_meta( self, dataset, skip=None, **kwd )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
32
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
33 class RsemGenesResults( Tabular ):
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
34 file_ext = "rsem.genes.results"
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
35 """
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
36 required columns:
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
37 gene_id transcript_id(s) length effective_length expected_count TPM FPKM
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
38 optional columns:
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
39 pme_expected_count pme_TPM pme_FPKM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
40 """
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
41 def __init__(self, **kwd):
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
42 Tabular.__init__( self, **kwd )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
43 """Initialize RsemResults datatype"""
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
44 self.comment_lines = 1
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
45 def sniff( self, filename ):
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
46 headers = get_headers( filename, '\n', count=1 )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
47 return len(headers) > 0 and len(headers[0]) >= 7 and headers[0][0] == "gene_id" and headers[0][1].startswith("transcript_id") and headers[0][6] == "FPKM"
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
48 def set_meta( self, dataset, **kwd ):
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
49 Tabular.set_meta( self, dataset, skip=None, **kwd )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
50
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
51 class RsemReference( Html ):
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
52 """Class describing an RSEM reference"""
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
53 MetadataElement( name='reference_name', default='rsem_ref' , desc='RSEM Reference Name', readonly=True, visible=True, set_in_upload=True, no_value='rsem_ref' )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
54 file_ext = 'rsem_ref'
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
55 allow_datatype_change = False
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
56 composite_type = 'auto_primary_file'
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
57 def __init__( self, **kwd ):
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
58 Html.__init__(self, **kwd)
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
59 """
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
60 Expecting files:
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
61 extra_files_path/<reference_name>.grp
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
62 extra_files_path/<reference_name>.ti
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
63 extra_files_path/<reference_name>.seq
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
64 extra_files_path/<reference_name>.transcripts.fa
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
65 Optionally includes files:
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
66 extra_files_path/<reference_name>.chrlist
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
67 extra_files_path/<reference_name>.idx.fa
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
68 extra_files_path/<reference_name>.1.ebwt
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
69 extra_files_path/<reference_name>.2.ebwt
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
70 extra_files_path/<reference_name>.3.ebwt
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
71 extra_files_path/<reference_name>.4.ebwt
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
72 extra_files_path/<reference_name>.rev.1.ebwt
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
73 extra_files_path/<reference_name>.rev.2.ebwt
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
74 """
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
75 self.add_composite_file( '%s.grp', description = 'Group File', substitute_name_with_metadata = 'reference_name', is_binary = False )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
76 self.add_composite_file( '%s.ti', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
77 self.add_composite_file( '%s.seq', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
78 self.add_composite_file( '%s.transcripts.fa', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
79 self.add_composite_file( '%s.chrlist', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False, optional=True )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
80 self.add_composite_file( '%s.idx.fa', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False, optional=True )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
81 self.add_composite_file( '%s.1.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
82 self.add_composite_file( '%s.2.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
83 self.add_composite_file( '%s.3.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
84 self.add_composite_file( '%s.4.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
85 self.add_composite_file( '%s.rev.1.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
86 self.add_composite_file( '%s.rev.2.ebwt', description = '', substitute_name_with_metadata = 'reference_name', is_binary = True, optional=True )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
87
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
88 def generate_primary_file( self, dataset = None ):
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
89 """
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
90 This is called only at upload to write the file
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
91 cannot rename the datasets here - they come with the default unfortunately
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
92 """
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
93
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
94 def regenerate_primary_file(self,dataset):
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
95 """
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
96 cannot do this until we are setting metadata
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
97 """
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
98 link_to_exts = ['.grp','.ti','.seq','.fa','.chrlist','.log']
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
99 ref_name = dataset.metadata.reference_name
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
100 efp = dataset.extra_files_path
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
101 flist = os.listdir(efp)
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
102 rval = ['<html><head><title>%s</title></head><body><p/>RSEM Reference %s files:<p/><ul>' % (dataset.name,ref_name)]
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
103 rvalb = []
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
104 for i,fname in enumerate(flist):
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
105 sfname = os.path.split(fname)[-1]
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
106 f,e = os.path.splitext(fname)
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
107 if e in link_to_exts:
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
108 rval.append( '<li><a href="%s">%s</a></li>' % ( sfname, sfname) )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
109 else:
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
110 rvalb.append( '<li>%s</li>' % (sfname) )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
111 if len(rvalb) > 0:
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
112 rval += rvalb
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
113 rval.append( '</ul></body></html>' )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
114 fh = file(dataset.file_name,'w')
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
115 fh.write("\n".join( rval ))
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
116 fh.write('\n')
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
117 fh.close()
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
118
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
119 def set_meta( self, dataset, **kwd ):
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
120 Html.set_meta( self, dataset, **kwd )
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
121 efp = dataset.extra_files_path
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
122 flist = os.listdir(efp)
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
123 for i,fname in enumerate(flist):
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
124 if fname.endswith('.grp'):
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
125 dataset.metadata.reference_name = fname[:-4]
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
126 break
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
127 self.regenerate_primary_file(dataset)
77151afcd323 Uploaded
jjohnson
parents:
diff changeset
128