comparison cravat_annotate/cravat_annotate.py @ 19:5de40a6dd491 draft

Uploaded
author in_silico
date Wed, 18 Jul 2018 10:17:29 -0400
parents
children
comparison
equal deleted inserted replaced
18:cde9e74d9fdf 19:5de40a6dd491
1 """
2 A galaxy wrapper for the /rest/service/query API endpoint on Cravat.
3 """
4
5 import requests
6 import json
7 import sys
8 import re
9 import argparse
10 from __future__ import print_function
11
12
13 # The endpoint that CravatQuerys are submitted to
14 endpoint = 'http://www.cravat.us/CRAVAT/rest/service/query'
15
16
17 # newline and delimiter values used in the output file
18 delimiter = "\t"
19 newline = "\n"
20
21
22 # Defualt indices for intepretting a cravat file's row of data in to a CravatQuery
23 cr_mapping = {
24 'chromosome': 1,
25 'position': 2,
26 'strand': 3,
27 'reference': 4,
28 'alternate': 5
29 }
30
31
32 # The neccessary attributes neeeded to submit a query.
33 query_keys = [
34 'chromosome', 'position', 'strand', 'reference', 'alternate'
35 ]
36
37
38 # Expected response keys from server. Ordered in list so that galaxy output has uniform column ordering run-to-run.
39 # If cravat server returns additional keys, they are appended to and included in output.
40 ordered_keys = [
41 "Chromosome", "Position", "Strand", "Reference base(s)", "Alternate base(s)",
42 "HUGO symbol", "S.O. transcript", "Sequence ontology protein change", "Sequence ontology",
43 "S.O. all transcripts", "gnomAD AF", "gnomAD AF (African)", "gnomAD AF (Amrican)",
44 "gnomAD AF (Ashkenazi Jewish)", "gnomAD AF (East Asian)", "gnomAD AF (Finnish)",
45 "gnomAD AF (Non-Finnish European)", "gnomAD AF (Other)", "gnomAD AF (South Asian)",
46 "1000 Genomes AF", "ESP6500 AF (average)", "ESP6500 AF (European American)",
47 "ESP6500 AF (African American)", "COSMIC transcript", "COSMIC protein change",
48 "COSMIC variant count [exact nucleotide change]", "cosmic_site_nt", "CGL driver class",
49 "TARGET", "dbSNP", "cgc_role", "cgc_inheritance", "cgc_tumor_type_somatic",
50 "cgc_tumor_type_germline", "ClinVar", "ClinVar disease identifier", "ClinVar XRef",
51 "GWAS Phenotype (GRASP)", "GWAS PMID (GRASP)", "Protein 3D variant"
52 ]
53
54
55 def get_args():
56 parser = argparse.ArgumentParser()
57 parser.add_argument('--input',
58 '-i',
59 required = True,
60 help='Input path to a cravat file for querying',)
61 parser.add_argument('--output',
62 '-o',
63 default = None,
64 help = 'Output path to write results from query')
65 return parser.parse_args()
66
67
68 def format_chromosome(chrom):
69 """ : Ensure chromosome entry is propely formatted for use as querying attribute. """
70 if chrom[0:3] == 'chr':
71 return chrom
72 return 'chr' + str(chrom)
73
74
75 def get_query_string(row):
76 """ : From a row dict, return a query string for the Cravat server.
77 : The row dict is cravat headeres associated to their values of that row.
78 """
79 return '_'.join([ row['chromosome'], row['position'], row['strand'], row['reference'], row['alternate'] ])
80
81
82 def query(in_path, out_path):
83 """ : From a Cravat the file at in_path, query each line on the Cravat server.
84 : Write the response values to file at out_path.
85 """
86 with open(in_path, 'r') as in_file, \
87 open(out_path, 'w') as out_file:
88 for line in in_file:
89 try:
90 line = line.strip().split('\t')
91 # row is dict of cravat col headers assioted values in this line
92 row = { header: line[index] for header, index in cr_mapping.items() }
93 row['chromosome'] = format_chromosome(row['chromosome'])
94 query_string = get_query_string(row)
95 call = requests.get(endpoint, params={ 'mutation': query_string })
96 if call.status_code != 200 or call.text == "":
97 raise requests.RequestException('Bad server response for query="{}". Respone code: "{}", Response Text: "{}"'
98 .format(query_string, call.status_code, call.text))
99 json_response = json.loads(call.text)
100 # See if server returned additional json key-vals not expected in ordered_keys
101 for key in json_response:
102 if key not in ordered_keys:
103 ordered_keys.append(key)
104 # Write key in order of ordered_keys to standardize order of output columns
105 wrote = False
106 for key in ordered_keys:
107 if key in json_response:
108 val = json_response[key]
109 else:
110 val = None
111 # Standardize format for numeric values
112 try:
113 val = float(val)
114 val = format(val, ".4f")
115 except:
116 pass
117 if wrote:
118 out_file.write(delimiter)
119 out_file.write(str(val))
120 wrote = True
121 out_file.write(newline)
122 except Exception as e:
123 print(e, file=sys.stderr)
124 continue
125
126
127 if __name__ == "__main__":
128 cli_args = get_args()
129 if cli_args.output == None:
130 base, _ = os.path.split(cli_args.input)
131 cli_args.output = os.path.join(base, "cravat_converted.txt")
132 query(cli_args.input, cli_args.output)