annotate polyphen_datasets.py @ 0:40dd2e7ee63a default tip

initial commit
author Yusuf Ali <ali@yusuf.email>
date Wed, 25 Mar 2015 16:00:12 -0600
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
1 import os
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
2 import re
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
3 import csv
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
4 import operator
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
5 from galaxy import config
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
6
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
7 # get tool-data path
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
8 configur = config.Configuration()
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
9 kitDir = configur.resolve_path("tool-data")
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
10
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
11 # determine if config file exists
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
12 if not os.path.exists( kitDir + "/hgvs_annotate.loc" ):
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
13 kitDir = "/export/geno_tmp/achri/dbs/";
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
14 else:
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
15 with open(kitDir + "/hgvs_annotate.loc", "r") as tsv:
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
16 for line in csv.reader(tsv, delimiter="\t"):
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
17 if line[0] == 'pathways_dbs':
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
18 kitDir = line[1]
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
19
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
20
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
21 polyPhenDir = kitDir
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
22 def PolyPhen_fileOptions():
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
23 list = os.listdir(polyPhenDir);
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
24 list.sort()
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
25 pattern = re.compile('polyphen_(.*)\.txt.gz$')
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
26 fileOptions = [(s) for s in list if os.path.exists(polyPhenDir + s)]
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
27 ds = [pattern.match(s) for s in fileOptions]
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
28 datasets = [(m.group(1), m.group(1), False) for m in ds if m]
40dd2e7ee63a initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
29 return datasets