annotate tools/cgatools17/vcf_to_listVariants.py @ 1:3a2e0f376f26 draft

Minor change to tv2vcf.xml to allow for workflow automation
author dgdekoning
date Wed, 21 Oct 2015 10:09:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
1 #!/usr/bin/env python
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
2
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
3 """[License: GNU General Public License v3 (GPLv3)]
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
4
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
5 This is free software: you can redistribute it and/or modify
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
6 it under the terms of the GNU General Public License as published by
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
7 the Free Software Foundation, either version 3 of the License, or
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
8 (at your option) any later version.
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
9
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
10 This is distributed in the hope that it will be useful,
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
13 GNU General Public License for more details.
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
14
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
15 You should have received a copy of the GNU General Public License
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
17
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
18 Documentation as defined by:
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
19 <http://epydoc.sourceforge.net/manual-fields.html#fields-synonyms>
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
20 """
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
21
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
22 import sys,os,os.path,argparse,textwrap,datetime
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
23
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
24 def convert_vcf_to_listVariants(filename_in, filename_out):
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
25 if(filename_out == "-"):
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
26 fh_out = sys.stdout
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
27 else:
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
28 fh_out = open(filename_out,"w")
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
29
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
30 i = 1
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
31
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
32 fh_out.write("variantId\tchromosome\tbegin\tend\tvarType\treference\talleleSeq\txRef\n")
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
33
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
34 with open(filename_in, 'r') as fh_in:
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
35 for line in fh_in:
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
36 line_s = line.strip()
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
37 if((len(line_s) >= 6) and line_s[0] != "#"):
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
38 params = line.split("\t")
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
39
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
40 if(len(params[4]) == 1):# single base substitution
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
41 fh_out.write(str(i)) # id
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
42
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
43 fh_out.write("\t"+params[0]) # chr
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
44 fh_out.write("\t"+str(int(params[1])-1)) # begin
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
45 fh_out.write("\t"+params[1]) # end
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
46 fh_out.write("\tsnp")
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
47 fh_out.write("\t"+params[3]) # reference
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
48 fh_out.write("\t"+params[4]) # alleleSeq
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
49 fh_out.write("\t"+params[2]) # dbsnpid / annotation id
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
50
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
51 fh_out.write("\n")
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
52 i += 1
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
53 # else: # indel...
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
54
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
55 if(filename_out != "-"):
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
56 fh_out.close()
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
57
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
58 if __name__ == "__main__":
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
59 parser = argparse.ArgumentParser()
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
60
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
61 parser.add_argument("-i","--input", help="input file (VCF)")
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
62 parser.add_argument("-o","--output",help="output filename; '-' for stdout",default="-")
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
63
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
64 args = parser.parse_args()
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
65
3a2e0f376f26 Minor change to tv2vcf.xml to allow for workflow automation
dgdekoning
parents:
diff changeset
66 convert_vcf_to_listVariants(args.input,args.output)