Mercurial > repos > artbio > rsem
comparison rsem.py @ 0:e5e836936d60 draft
planemo upload for repository https://github.com/artbio/tools-artbio/tree/master/tools/rsem commit d84a0359354698a4b29df12ab581c2618bffcf80
| author | artbio |
|---|---|
| date | Sat, 31 Mar 2018 21:30:07 -0400 |
| parents | |
| children | 49795544dac7 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e5e836936d60 |
|---|---|
| 1 """ | |
| 2 RSEM datatypes | |
| 3 """ | |
| 4 import logging | |
| 5 import os | |
| 6 import os.path | |
| 7 | |
| 8 from galaxy.datatypes.images import Html | |
| 9 from galaxy.datatypes.metadata import MetadataElement | |
| 10 from galaxy.datatypes.sniff import get_headers | |
| 11 from galaxy.datatypes.tabular import Tabular | |
| 12 | |
| 13 | |
| 14 log = logging.getLogger(__name__) | |
| 15 | |
| 16 | |
| 17 class RsemIsoformsResults(Tabular): | |
| 18 file_ext = "rsem.isoforms.results" | |
| 19 """ | |
| 20 required columns: | |
| 21 transcript_id gene_id length effective_length expected_count TPM | |
| 22 FPKM IsoPct | |
| 23 optional columns: | |
| 24 pme_expected_count pme_TPM pme_FPKM IsoPct_from_pme_TPM TPM_ci_lower_bound | |
| 25 TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound | |
| 26 """ | |
| 27 | |
| 28 def __init__(self, **kwd): | |
| 29 Tabular.__init__(self, **kwd) | |
| 30 """Initialize RsemResults datatype""" | |
| 31 self.comment_lines = 1 | |
| 32 | |
| 33 def sniff(self, filename): | |
| 34 headers = get_headers(filename, '\n', count=1) | |
| 35 return (len(headers) > 0 and len(headers[0]) >= 8 and | |
| 36 headers[0][0] == "transcript_id" and | |
| 37 headers[0][1] == "gene_id" and headers[0][6] == "FPKM") | |
| 38 | |
| 39 def set_meta(self, dataset, **kwd): | |
| 40 Tabular.set_meta(self, dataset, skip=None, **kwd) | |
| 41 | |
| 42 | |
| 43 class RsemGenesResults(Tabular): | |
| 44 file_ext = "rsem.genes.results" | |
| 45 """ | |
| 46 required columns: | |
| 47 gene_id transcript_id(s) length effective_length expected_count TPM FPKM | |
| 48 optional columns: | |
| 49 pme_expected_count pme_TPM pme_FPKM TPM_ci_lower_bound TPM_ci_upper_bound | |
| 50 FPKM_ci_lower_bound FPKM_ci_upper_bound | |
| 51 """ | |
| 52 | |
| 53 def __init__(self, **kwd): | |
| 54 Tabular.__init__(self, **kwd) | |
| 55 """Initialize RsemResults datatype""" | |
| 56 self.comment_lines = 1 | |
| 57 | |
| 58 def sniff(self, filename): | |
| 59 headers = get_headers(filename, '\n', count=1) | |
| 60 return (len(headers) > 0 and len(headers[0]) >= 7 and | |
| 61 headers[0][0] == "gene_id" and | |
| 62 headers[0][1].startswith("transcript_id") and | |
| 63 headers[0][6] == "FPKM") | |
| 64 | |
| 65 def set_meta(self, dataset, **kwd): | |
| 66 Tabular.set_meta(self, dataset, skip=None, **kwd) | |
| 67 | |
| 68 | |
| 69 class RsemReference(Html): | |
| 70 """Class describing an RSEM reference""" | |
| 71 MetadataElement(name='reference_name', default='rsem_ref', | |
| 72 desc='RSEM Reference Name', readonly=True, visible=True, | |
| 73 set_in_upload=True, no_value='rsem_ref') | |
| 74 file_ext = 'rsem_ref' | |
| 75 allow_datatype_change = False | |
| 76 composite_type = 'auto_primary_file' | |
| 77 | |
| 78 def __init__(self, **kwd): | |
| 79 Html.__init__(self, **kwd) | |
| 80 """ | |
| 81 Expecting files: | |
| 82 extra_files_path/<reference_name>.grp | |
| 83 extra_files_path/<reference_name>.ti | |
| 84 extra_files_path/<reference_name>.seq | |
| 85 extra_files_path/<reference_name>.transcripts.fa | |
| 86 Optionally includes files: | |
| 87 extra_files_path/<reference_name>.chrlist | |
| 88 extra_files_path/<reference_name>.idx.fa | |
| 89 extra_files_path/<reference_name>.1.ebwt | |
| 90 extra_files_path/<reference_name>.2.ebwt | |
| 91 extra_files_path/<reference_name>.3.ebwt | |
| 92 extra_files_path/<reference_name>.4.ebwt | |
| 93 extra_files_path/<reference_name>.rev.1.ebwt | |
| 94 extra_files_path/<reference_name>.rev.2.ebwt | |
| 95 """ | |
| 96 self.add_composite_file('%s.grp', description='Group File', | |
| 97 substitute_name_with_metadata='reference_name', | |
| 98 is_binary=False) | |
| 99 self.add_composite_file('%s.ti', description='', | |
| 100 substitute_name_with_metadata='reference_name', | |
| 101 is_binary=False) | |
| 102 self.add_composite_file('%s.seq', description='', | |
| 103 substitute_name_with_metadata='reference_name', | |
| 104 is_binary=False) | |
| 105 self.add_composite_file('%s.transcripts.fa', description='', | |
| 106 substitute_name_with_metadata='reference_name', | |
| 107 is_binary=False) | |
| 108 self.add_composite_file('%s.chrlist', description='', | |
| 109 substitute_name_with_metadata='reference_name', | |
| 110 is_binary=False, optional=True) | |
| 111 self.add_composite_file('%s.idx.fa', description='', | |
| 112 substitute_name_with_metadata='reference_name', | |
| 113 is_binary=False, optional=True) | |
| 114 self.add_composite_file('%s.1.ebwt', description='', | |
| 115 substitute_name_with_metadata='reference_name', | |
| 116 is_binary=True, optional=True) | |
| 117 self.add_composite_file('%s.2.ebwt', description='', | |
| 118 substitute_name_with_metadata='reference_name', | |
| 119 is_binary=True, optional=True) | |
| 120 self.add_composite_file('%s.3.ebwt', description='', | |
| 121 substitute_name_with_metadata='reference_name', | |
| 122 is_binary=True, optional=True) | |
| 123 self.add_composite_file('%s.4.ebwt', description='', | |
| 124 substitute_name_with_metadata='reference_name', | |
| 125 is_binary=True, optional=True) | |
| 126 self.add_composite_file('%s.rev.1.ebwt', description='', | |
| 127 substitute_name_with_metadata='reference_name', | |
| 128 is_binary=True, optional=True) | |
| 129 self.add_composite_file('%s.rev.2.ebwt', description='', | |
| 130 substitute_name_with_metadata='reference_name', | |
| 131 is_binary=True, optional=True) | |
| 132 | |
| 133 def generate_primary_file(self, dataset=None): | |
| 134 """ | |
| 135 This is called only at upload to write the file | |
| 136 cannot rename the datasets here - they come with | |
| 137 the default unfortunately | |
| 138 """ | |
| 139 | |
| 140 def regenerate_primary_file(self, dataset): | |
| 141 """ | |
| 142 cannot do this until we are setting metadata | |
| 143 """ | |
| 144 link_to_exts = ['.grp', '.ti', '.seq', '.fa', '.chrlist', '.log'] | |
| 145 ref_name = dataset.metadata.reference_name | |
| 146 efp = dataset.extra_files_path | |
| 147 flist = os.listdir(efp) | |
| 148 rval = ['<html><head><title>%s</title></head><body><p/>RSEM \ | |
| 149 Reference %s files:<p/><ul>' % (dataset.name, ref_name)] | |
| 150 rvalb = [] | |
| 151 for i, fname in enumerate(flist): | |
| 152 sfname = os.path.split(fname)[-1] | |
| 153 f, e = os.path.splitext(fname) | |
| 154 if e in link_to_exts: | |
| 155 rval.append('<li><a href="%s">%s</a></li>' % (sfname, sfname)) | |
| 156 else: | |
| 157 rvalb.append('<li>%s</li>' % (sfname)) | |
| 158 if len(rvalb) > 0: | |
| 159 rval += rvalb | |
| 160 rval.append('</ul></body></html>') | |
| 161 fh = file(dataset.file_name, 'w') | |
| 162 fh.write("\n".join(rval)) | |
| 163 fh.write('\n') | |
| 164 fh.close() | |
| 165 | |
| 166 def set_meta(self, dataset, **kwd): | |
| 167 Html.set_meta(self, dataset, **kwd) | |
| 168 efp = dataset.extra_files_path | |
| 169 flist = os.listdir(efp) | |
| 170 for i, fname in enumerate(flist): | |
| 171 if fname.endswith('.grp'): | |
| 172 dataset.metadata.reference_name = fname[:-4] | |
| 173 break | |
| 174 self.regenerate_primary_file(dataset) |
