Mercurial > repos > miller-lab > genome_diversity
comparison specify_restriction_enzymes.py @ 0:2c498d40ecde
Uploaded
author | miller-lab |
---|---|
date | Mon, 09 Apr 2012 12:03:06 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2c498d40ecde |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import os | |
4 import sys | |
5 from optparse import OptionParser | |
6 import genome_diversity as gd | |
7 | |
8 def main_function( parse_arguments=None ): | |
9 if parse_arguments is None: | |
10 parse_arguments = lambda arguments: ( None, arguments ) | |
11 def main_decorator( to_decorate ): | |
12 def decorated_main( arguments=None ): | |
13 if arguments is None: | |
14 arguments = sys.argv | |
15 options, arguments = parse_arguments( arguments ) | |
16 rc = 1 | |
17 try: | |
18 rc = to_decorate( options, arguments ) | |
19 except Exception, err: | |
20 sys.stderr.write( 'ERROR: %s\n' % str( err ) ) | |
21 traceback.print_exc() | |
22 finally: | |
23 sys.exit( rc ) | |
24 return decorated_main | |
25 return main_decorator | |
26 | |
27 def parse_arguments( arguments ): | |
28 parser = OptionParser() | |
29 parser.add_option('--input', | |
30 type='string', dest='input', | |
31 help='file of selected SNPs') | |
32 parser.add_option('--output', | |
33 type='string', dest='output', | |
34 help='output file') | |
35 parser.add_option('--primers_loc', | |
36 type='string', dest='primers_loc', | |
37 help='primers .loc file') | |
38 parser.add_option('--scaffold_col', | |
39 type="int", dest='scaffold_col', | |
40 help='scaffold column in the input file') | |
41 parser.add_option('--pos_col', | |
42 type="int", dest='pos_col', | |
43 help='position column in the input file') | |
44 parser.add_option('--enzyme_list', | |
45 type="string", dest='enzyme_list_string', | |
46 help='comma separated list of enzymes') | |
47 parser.add_option('--species', | |
48 type="string", dest='species', | |
49 help='species') | |
50 return parser.parse_args( arguments[1:] ) | |
51 | |
52 | |
53 @main_function( parse_arguments ) | |
54 def main( options, arguments ): | |
55 if not options.input: | |
56 raise RuntimeError( 'missing --input option' ) | |
57 if not options.output: | |
58 raise RuntimeError( 'missing --output option' ) | |
59 if not options.primers_loc: | |
60 raise RuntimeError( 'missing --primers_loc option' ) | |
61 if not options.scaffold_col: | |
62 raise RuntimeError( 'missing --scaffold_col option' ) | |
63 if not options.pos_col: | |
64 raise RuntimeError( 'missing --pos_col option' ) | |
65 if not options.enzyme_list_string: | |
66 raise RuntimeError( 'missing --enzyme_list option' ) | |
67 if not options.species: | |
68 raise RuntimeError( 'missing --species option' ) | |
69 | |
70 snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) ) | |
71 | |
72 out_fh = gd._openfile( options.output, 'w' ) | |
73 | |
74 enzyme_dict = {} | |
75 for enzyme in options.enzyme_list_string.split( ',' ): | |
76 enzyme = enzyme.strip() | |
77 if enzyme: | |
78 enzyme_dict[enzyme] = 1 | |
79 | |
80 primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc ) | |
81 file_root, file_ext = os.path.splitext( primer_data_file ) | |
82 primer_index_file = file_root + ".cdb" | |
83 primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file ) | |
84 | |
85 comments_printed = False | |
86 | |
87 while snps.next(): | |
88 seq, pos = snps.get_seq_pos() | |
89 enzyme_list = primers.get_enzymes( seq, pos ) | |
90 for enzyme in enzyme_list: | |
91 if enzyme in enzyme_dict: | |
92 if not comments_printed: | |
93 for comment in snps.comments: | |
94 out_fh.write( "%s\n" % comment ) | |
95 comments_printed = True | |
96 out_fh.write( "%s\n" % snps.line ) | |
97 break | |
98 | |
99 out_fh.close() | |
100 | |
101 if __name__ == "__main__": | |
102 main() | |
103 |