Mercurial > repos > artbio > rsem
comparison rsem.py @ 0:e5e836936d60 draft
planemo upload for repository https://github.com/artbio/tools-artbio/tree/master/tools/rsem commit d84a0359354698a4b29df12ab581c2618bffcf80
author | artbio |
---|---|
date | Sat, 31 Mar 2018 21:30:07 -0400 |
parents | |
children | 49795544dac7 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e5e836936d60 |
---|---|
1 """ | |
2 RSEM datatypes | |
3 """ | |
4 import logging | |
5 import os | |
6 import os.path | |
7 | |
8 from galaxy.datatypes.images import Html | |
9 from galaxy.datatypes.metadata import MetadataElement | |
10 from galaxy.datatypes.sniff import get_headers | |
11 from galaxy.datatypes.tabular import Tabular | |
12 | |
13 | |
14 log = logging.getLogger(__name__) | |
15 | |
16 | |
17 class RsemIsoformsResults(Tabular): | |
18 file_ext = "rsem.isoforms.results" | |
19 """ | |
20 required columns: | |
21 transcript_id gene_id length effective_length expected_count TPM | |
22 FPKM IsoPct | |
23 optional columns: | |
24 pme_expected_count pme_TPM pme_FPKM IsoPct_from_pme_TPM TPM_ci_lower_bound | |
25 TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound | |
26 """ | |
27 | |
28 def __init__(self, **kwd): | |
29 Tabular.__init__(self, **kwd) | |
30 """Initialize RsemResults datatype""" | |
31 self.comment_lines = 1 | |
32 | |
33 def sniff(self, filename): | |
34 headers = get_headers(filename, '\n', count=1) | |
35 return (len(headers) > 0 and len(headers[0]) >= 8 and | |
36 headers[0][0] == "transcript_id" and | |
37 headers[0][1] == "gene_id" and headers[0][6] == "FPKM") | |
38 | |
39 def set_meta(self, dataset, **kwd): | |
40 Tabular.set_meta(self, dataset, skip=None, **kwd) | |
41 | |
42 | |
43 class RsemGenesResults(Tabular): | |
44 file_ext = "rsem.genes.results" | |
45 """ | |
46 required columns: | |
47 gene_id transcript_id(s) length effective_length expected_count TPM FPKM | |
48 optional columns: | |
49 pme_expected_count pme_TPM pme_FPKM TPM_ci_lower_bound TPM_ci_upper_bound | |
50 FPKM_ci_lower_bound FPKM_ci_upper_bound | |
51 """ | |
52 | |
53 def __init__(self, **kwd): | |
54 Tabular.__init__(self, **kwd) | |
55 """Initialize RsemResults datatype""" | |
56 self.comment_lines = 1 | |
57 | |
58 def sniff(self, filename): | |
59 headers = get_headers(filename, '\n', count=1) | |
60 return (len(headers) > 0 and len(headers[0]) >= 7 and | |
61 headers[0][0] == "gene_id" and | |
62 headers[0][1].startswith("transcript_id") and | |
63 headers[0][6] == "FPKM") | |
64 | |
65 def set_meta(self, dataset, **kwd): | |
66 Tabular.set_meta(self, dataset, skip=None, **kwd) | |
67 | |
68 | |
69 class RsemReference(Html): | |
70 """Class describing an RSEM reference""" | |
71 MetadataElement(name='reference_name', default='rsem_ref', | |
72 desc='RSEM Reference Name', readonly=True, visible=True, | |
73 set_in_upload=True, no_value='rsem_ref') | |
74 file_ext = 'rsem_ref' | |
75 allow_datatype_change = False | |
76 composite_type = 'auto_primary_file' | |
77 | |
78 def __init__(self, **kwd): | |
79 Html.__init__(self, **kwd) | |
80 """ | |
81 Expecting files: | |
82 extra_files_path/<reference_name>.grp | |
83 extra_files_path/<reference_name>.ti | |
84 extra_files_path/<reference_name>.seq | |
85 extra_files_path/<reference_name>.transcripts.fa | |
86 Optionally includes files: | |
87 extra_files_path/<reference_name>.chrlist | |
88 extra_files_path/<reference_name>.idx.fa | |
89 extra_files_path/<reference_name>.1.ebwt | |
90 extra_files_path/<reference_name>.2.ebwt | |
91 extra_files_path/<reference_name>.3.ebwt | |
92 extra_files_path/<reference_name>.4.ebwt | |
93 extra_files_path/<reference_name>.rev.1.ebwt | |
94 extra_files_path/<reference_name>.rev.2.ebwt | |
95 """ | |
96 self.add_composite_file('%s.grp', description='Group File', | |
97 substitute_name_with_metadata='reference_name', | |
98 is_binary=False) | |
99 self.add_composite_file('%s.ti', description='', | |
100 substitute_name_with_metadata='reference_name', | |
101 is_binary=False) | |
102 self.add_composite_file('%s.seq', description='', | |
103 substitute_name_with_metadata='reference_name', | |
104 is_binary=False) | |
105 self.add_composite_file('%s.transcripts.fa', description='', | |
106 substitute_name_with_metadata='reference_name', | |
107 is_binary=False) | |
108 self.add_composite_file('%s.chrlist', description='', | |
109 substitute_name_with_metadata='reference_name', | |
110 is_binary=False, optional=True) | |
111 self.add_composite_file('%s.idx.fa', description='', | |
112 substitute_name_with_metadata='reference_name', | |
113 is_binary=False, optional=True) | |
114 self.add_composite_file('%s.1.ebwt', description='', | |
115 substitute_name_with_metadata='reference_name', | |
116 is_binary=True, optional=True) | |
117 self.add_composite_file('%s.2.ebwt', description='', | |
118 substitute_name_with_metadata='reference_name', | |
119 is_binary=True, optional=True) | |
120 self.add_composite_file('%s.3.ebwt', description='', | |
121 substitute_name_with_metadata='reference_name', | |
122 is_binary=True, optional=True) | |
123 self.add_composite_file('%s.4.ebwt', description='', | |
124 substitute_name_with_metadata='reference_name', | |
125 is_binary=True, optional=True) | |
126 self.add_composite_file('%s.rev.1.ebwt', description='', | |
127 substitute_name_with_metadata='reference_name', | |
128 is_binary=True, optional=True) | |
129 self.add_composite_file('%s.rev.2.ebwt', description='', | |
130 substitute_name_with_metadata='reference_name', | |
131 is_binary=True, optional=True) | |
132 | |
133 def generate_primary_file(self, dataset=None): | |
134 """ | |
135 This is called only at upload to write the file | |
136 cannot rename the datasets here - they come with | |
137 the default unfortunately | |
138 """ | |
139 | |
140 def regenerate_primary_file(self, dataset): | |
141 """ | |
142 cannot do this until we are setting metadata | |
143 """ | |
144 link_to_exts = ['.grp', '.ti', '.seq', '.fa', '.chrlist', '.log'] | |
145 ref_name = dataset.metadata.reference_name | |
146 efp = dataset.extra_files_path | |
147 flist = os.listdir(efp) | |
148 rval = ['<html><head><title>%s</title></head><body><p/>RSEM \ | |
149 Reference %s files:<p/><ul>' % (dataset.name, ref_name)] | |
150 rvalb = [] | |
151 for i, fname in enumerate(flist): | |
152 sfname = os.path.split(fname)[-1] | |
153 f, e = os.path.splitext(fname) | |
154 if e in link_to_exts: | |
155 rval.append('<li><a href="%s">%s</a></li>' % (sfname, sfname)) | |
156 else: | |
157 rvalb.append('<li>%s</li>' % (sfname)) | |
158 if len(rvalb) > 0: | |
159 rval += rvalb | |
160 rval.append('</ul></body></html>') | |
161 fh = file(dataset.file_name, 'w') | |
162 fh.write("\n".join(rval)) | |
163 fh.write('\n') | |
164 fh.close() | |
165 | |
166 def set_meta(self, dataset, **kwd): | |
167 Html.set_meta(self, dataset, **kwd) | |
168 efp = dataset.extra_files_path | |
169 flist = os.listdir(efp) | |
170 for i, fname in enumerate(flist): | |
171 if fname.endswith('.grp'): | |
172 dataset.metadata.reference_name = fname[:-4] | |
173 break | |
174 self.regenerate_primary_file(dataset) |