Mercurial > repos > saskia-hiltemann > cgatools_v17
annotate tools/cgatools17/vcf_to_listVariants.py @ 1:3a2e0f376f26 draft
Minor change to tv2vcf.xml to allow for workflow automation
author | dgdekoning |
---|---|
date | Wed, 21 Oct 2015 10:09:15 -0400 |
parents | |
children |
rev | line source |
---|---|
1
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
1 #!/usr/bin/env python |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
2 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
3 """[License: GNU General Public License v3 (GPLv3)] |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
4 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
5 This is free software: you can redistribute it and/or modify |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
6 it under the terms of the GNU General Public License as published by |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
7 the Free Software Foundation, either version 3 of the License, or |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
8 (at your option) any later version. |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
9 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
10 This is distributed in the hope that it will be useful, |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
11 but WITHOUT ANY WARRANTY; without even the implied warranty of |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
13 GNU General Public License for more details. |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
14 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
15 You should have received a copy of the GNU General Public License |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
16 along with this program. If not, see <http://www.gnu.org/licenses/>. |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
17 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
18 Documentation as defined by: |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
19 <http://epydoc.sourceforge.net/manual-fields.html#fields-synonyms> |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
20 """ |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
21 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
22 import sys,os,os.path,argparse,textwrap,datetime |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
23 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
24 def convert_vcf_to_listVariants(filename_in, filename_out): |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
25 if(filename_out == "-"): |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
26 fh_out = sys.stdout |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
27 else: |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
28 fh_out = open(filename_out,"w") |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
29 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
30 i = 1 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
31 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
32 fh_out.write("variantId\tchromosome\tbegin\tend\tvarType\treference\talleleSeq\txRef\n") |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
33 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
34 with open(filename_in, 'r') as fh_in: |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
35 for line in fh_in: |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
36 line_s = line.strip() |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
37 if((len(line_s) >= 6) and line_s[0] != "#"): |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
38 params = line.split("\t") |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
39 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
40 if(len(params[4]) == 1):# single base substitution |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
41 fh_out.write(str(i)) # id |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
42 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
43 fh_out.write("\t"+params[0]) # chr |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
44 fh_out.write("\t"+str(int(params[1])-1)) # begin |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
45 fh_out.write("\t"+params[1]) # end |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
46 fh_out.write("\tsnp") |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
47 fh_out.write("\t"+params[3]) # reference |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
48 fh_out.write("\t"+params[4]) # alleleSeq |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
49 fh_out.write("\t"+params[2]) # dbsnpid / annotation id |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
50 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
51 fh_out.write("\n") |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
52 i += 1 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
53 # else: # indel... |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
54 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
55 if(filename_out != "-"): |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
56 fh_out.close() |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
57 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
58 if __name__ == "__main__": |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
59 parser = argparse.ArgumentParser() |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
60 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
61 parser.add_argument("-i","--input", help="input file (VCF)") |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
62 parser.add_argument("-o","--output",help="output filename; '-' for stdout",default="-") |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
63 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
64 args = parser.parse_args() |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
65 |
3a2e0f376f26
Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff
changeset
|
66 convert_vcf_to_listVariants(args.input,args.output) |