comparison dgidb_annotator.py @ 0:8c6dc9da6c89 draft

Uploaded
author devteam
date Wed, 27 Nov 2013 23:51:48 -0500
parents
children 8cc7cf4bd833
comparison
equal deleted inserted replaced
-1:000000000000 0:8c6dc9da6c89
1 '''
2 Annotates a tabular file with information from the Drug-Gene Interaction (DGI) database.
3 '''
4
5 import optparse, json, urllib2, sys
6
7 def __main__():
8 # -- Parse command line. --
9 parser = optparse.OptionParser()
10 parser.add_option('-g', '--gene-name-col', dest='gene_name_col', help='column of gene names')
11 parser.add_option('-a', '--print-all', dest='print_all', action='store_true', help='print all lines, even though without a result')
12 parser.add_option('-e', '--expert-curated', dest='expert_curated', action='store_true', help='use only expert curated results')
13 (options, args) = parser.parse_args()
14 gene_name_col = int(options.gene_name_col) - 1
15
16 # Open input stream.
17 if len(args) > 0:
18 input_file = open(args[0], 'r')
19 else:
20 input_file = sys.stdin
21
22 # -- Make connection and get results. --
23
24 # Get gene list.
25 gene_list = []
26 lines = []
27 for line in input_file:
28 gene_list.append( line.split('\t')[gene_name_col].strip() )
29 lines.append(line.strip())
30
31 # Query for results.
32 query_str = 'http://dgidb.genome.wustl.edu/api/v1/interactions.json?genes=%s' % ','.join(set(gene_list))
33 if options.expert_curated:
34 query_str += '&source_trust_levels=Expert%20curated'
35 results = urllib2.urlopen(query_str).read()
36 results_dict = json.loads(results)
37
38 # Process results.
39 matched_results = results_dict['matchedTerms']
40 for result in matched_results:
41 # Process result.
42 processed_results = []
43 result_fields = [ result['geneName'], result['geneLongName'], ','.join( result['geneCategories'] ) ]
44 for interaction in result['interactions']:
45 result_fields = result_fields[0:3]
46 result_fields.extend( [
47 interaction['interactionType'], interaction['drugName'], interaction['source']
48 ] )
49 processed_results.append( '\t'.join( result_fields ) )
50
51 # Store processed results.
52 results_dict[ result['searchTerm'] ] = processed_results
53
54 # -- Annotate input file and produce output. --
55 for line in lines:
56 fields = line.split('\t')
57 gene = fields[gene_name_col]
58 if gene in results_dict:
59 for result in results_dict[gene]:
60 print line.strip() + '\t' + result
61 elif options.print_all:
62 print line
63
64 if __name__=="__main__": __main__()