Mercurial > repos > iuc > snpsift_dbnsfp_datatypes
comparison snpsift_dbnsfp.py @ 0:0c4372b93e85 draft default tip
Uploaded
author | iuc |
---|---|
date | Thu, 22 Jan 2015 08:04:59 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0c4372b93e85 |
---|---|
1 """ | |
2 SnpSift dbNSFP datatypes | |
3 """ | |
4 import os,os.path,re,sys,gzip,logging | |
5 import traceback | |
6 import galaxy.datatypes.data | |
7 from galaxy.datatypes.data import Text | |
8 from galaxy.datatypes.metadata import MetadataElement | |
9 | |
10 log = logging.getLogger(__name__) | |
11 | |
12 class SnpSiftDbNSFP( Text ): | |
13 """Class describing a dbNSFP database prepared fpr use by SnpSift dbnsfp """ | |
14 MetadataElement( name='reference_name', default='dbSNFP' , desc='Reference Name', readonly=True, visible=True, set_in_upload=True, no_value='dbSNFP' ) | |
15 MetadataElement( name="bgzip", default=None, desc="dbNSFP bgzip", readonly=True, visible=True, no_value=None ) | |
16 MetadataElement( name="index", default=None, desc="Tabix Index File", readonly=True, visible=True, no_value=None) | |
17 MetadataElement( name="annotation", default=[], desc="Annotation Names", readonly=True, visible=True, no_value=[] ) | |
18 file_ext = "snpsiftdbnsfp" | |
19 composite_type = 'auto_primary_file' | |
20 allow_datatype_change = False | |
21 """ | |
22 ## The dbNSFP file is a tabular file with 1 header line | |
23 ## The first 4 columns are required to be: chrom pos ref alt | |
24 ## These match columns 1,2,4,5 of the VCF file | |
25 ## SnpSift requires the file to be block-gzipped and the indexed with samtools tabix | |
26 ## Example: | |
27 ## Compress using block-gzip algorithm | |
28 bgzip dbNSFP2.3.txt | |
29 ## Create tabix index | |
30 tabix -s 1 -b 2 -e 2 dbNSFP2.3.txt.gz | |
31 """ | |
32 def __init__( self, **kwd ): | |
33 Text.__init__( self, **kwd ) | |
34 self.add_composite_file( '%s.grp', description = 'Group File', substitute_name_with_metadata = 'reference_name', is_binary = False ) | |
35 self.add_composite_file( '%s.ti', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False ) | |
36 def init_meta( self, dataset, copy_from=None ): | |
37 Text.init_meta( self, dataset, copy_from=copy_from ) | |
38 def generate_primary_file( self, dataset = None ): | |
39 """ | |
40 This is called only at upload to write the html file | |
41 cannot rename the datasets here - they come with the default unfortunately | |
42 """ | |
43 regenerate_primary_file( self, dataset) | |
44 def regenerate_primary_file(self,dataset): | |
45 """ | |
46 cannot do this until we are setting metadata | |
47 """ | |
48 annotations = "dbNSFP Annotations: %s\n" % ','.join(dataset.metadata.annotation) | |
49 f = open(dataset.file_name,'a') | |
50 if dataset.metadata.bgzip: | |
51 bn = dataset.metadata.bgzip | |
52 f.write(bn) | |
53 f.write('\n') | |
54 f.write(annotations) | |
55 f.close() | |
56 def set_meta( self, dataset, overwrite=True, **kwd ): | |
57 try: | |
58 efp = dataset.extra_files_path | |
59 if os.path.exists(efp): | |
60 flist = os.listdir(efp) | |
61 for i,fname in enumerate(flist): | |
62 if fname.endswith('.gz'): | |
63 dataset.metadata.bgzip = fname | |
64 try: | |
65 fh = gzip.open(os.path.join(efp,fname),'r') | |
66 buf = fh.read(5000) | |
67 lines = buf.splitlines() | |
68 headers = lines[0].split('\t') | |
69 dataset.metadata.annotation = headers[4:] | |
70 except Exception,e: | |
71 log.warn("set_meta fname: %s %s" % (fname,str(e))) | |
72 traceback.print_stack(file=sys.stderr) | |
73 finally: | |
74 fh.close() | |
75 if fname.endswith('.tbi'): | |
76 dataset.metadata.index = fname | |
77 self.regenerate_primary_file(dataset) | |
78 except Exception,e: | |
79 log.warn("set_meta fname: %s %s" % (dataset.file_name if dataset and dataset.file_name else 'Unkwown',str(e))) | |
80 traceback.print_stack(file=sys.stderr) | |
81 | |
82 if __name__ == '__main__': | |
83 import doctest | |
84 doctest.testmod(sys.modules[__name__]) | |
85 |