Mercurial > repos > devteam > dgidb_annotator
comparison dgidb_annotator.py @ 0:8c6dc9da6c89 draft
Uploaded
author | devteam |
---|---|
date | Wed, 27 Nov 2013 23:51:48 -0500 |
parents | |
children | 8cc7cf4bd833 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8c6dc9da6c89 |
---|---|
1 ''' | |
2 Annotates a tabular file with information from the Drug-Gene Interaction (DGI) database. | |
3 ''' | |
4 | |
5 import optparse, json, urllib2, sys | |
6 | |
7 def __main__(): | |
8 # -- Parse command line. -- | |
9 parser = optparse.OptionParser() | |
10 parser.add_option('-g', '--gene-name-col', dest='gene_name_col', help='column of gene names') | |
11 parser.add_option('-a', '--print-all', dest='print_all', action='store_true', help='print all lines, even though without a result') | |
12 parser.add_option('-e', '--expert-curated', dest='expert_curated', action='store_true', help='use only expert curated results') | |
13 (options, args) = parser.parse_args() | |
14 gene_name_col = int(options.gene_name_col) - 1 | |
15 | |
16 # Open input stream. | |
17 if len(args) > 0: | |
18 input_file = open(args[0], 'r') | |
19 else: | |
20 input_file = sys.stdin | |
21 | |
22 # -- Make connection and get results. -- | |
23 | |
24 # Get gene list. | |
25 gene_list = [] | |
26 lines = [] | |
27 for line in input_file: | |
28 gene_list.append( line.split('\t')[gene_name_col].strip() ) | |
29 lines.append(line.strip()) | |
30 | |
31 # Query for results. | |
32 query_str = 'http://dgidb.genome.wustl.edu/api/v1/interactions.json?genes=%s' % ','.join(set(gene_list)) | |
33 if options.expert_curated: | |
34 query_str += '&source_trust_levels=Expert%20curated' | |
35 results = urllib2.urlopen(query_str).read() | |
36 results_dict = json.loads(results) | |
37 | |
38 # Process results. | |
39 matched_results = results_dict['matchedTerms'] | |
40 for result in matched_results: | |
41 # Process result. | |
42 processed_results = [] | |
43 result_fields = [ result['geneName'], result['geneLongName'], ','.join( result['geneCategories'] ) ] | |
44 for interaction in result['interactions']: | |
45 result_fields = result_fields[0:3] | |
46 result_fields.extend( [ | |
47 interaction['interactionType'], interaction['drugName'], interaction['source'] | |
48 ] ) | |
49 processed_results.append( '\t'.join( result_fields ) ) | |
50 | |
51 # Store processed results. | |
52 results_dict[ result['searchTerm'] ] = processed_results | |
53 | |
54 # -- Annotate input file and produce output. -- | |
55 for line in lines: | |
56 fields = line.split('\t') | |
57 gene = fields[gene_name_col] | |
58 if gene in results_dict: | |
59 for result in results_dict[gene]: | |
60 print line.strip() + '\t' + result | |
61 elif options.print_all: | |
62 print line | |
63 | |
64 if __name__=="__main__": __main__() |