Mercurial > repos > geert-vandeweyer > vcf_to_variantdb
annotate VCF_to_VariantDB.pl @ 11:fb815517e918 draft default tip
Added support for 23andMe VCF files generated by ArrogantRobot
author | geert-vandeweyer |
---|---|
date | Fri, 20 Feb 2015 05:49:54 -0500 |
parents | 3b27cae9b359 |
children |
rev | line source |
---|---|
1 | 1 #!/usr/bin/perl |
2 | |
3 # load modules | |
4 use Getopt::Std; | |
5 | |
6 ########################## | |
7 # COMMAND LINE ARGUMENTS # | |
8 ########################## | |
9 # v = (v)cf file to load | |
10 # V = (V)CF file encoded id | |
11 # u = (u)ser email from galaxy | |
12 # n = sample (n)ame | |
13 # a = sample (a)nnotation | |
14 # g = sample (g)ender | |
15 # o = (o)utput file (simple text file) | |
16 # b = (b)am file (optional) | |
17 # B = (B)am index , needed if b is specified | |
18 # c = encoded id of bam file (optional) | |
19 # C = encoded id of Bam index , needed if b is specified => NOT POSSIBLE YET, NEEDS INDEXING ON VARIANTDB SERVER ! | |
20 # S = (S)erver addrress to send data to. | |
21 # R = (r)oot of galaxy web server (/home/galaxyuser/galaxy-dist) | |
22 # H = (H)ost of the galaxy web server (http://my.galaxy.server/galaxy/) | |
9
3b27cae9b359
New Version. VariantDB checks for Genome Build matching using the Galaxy dbkey variable.
geert-vandeweyer
parents:
8
diff
changeset
|
23 # G = Genome build (dbkey) |
7
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
24 # F = (F)ormat of input file |
9
3b27cae9b359
New Version. VariantDB checks for Genome Build matching using the Galaxy dbkey variable.
geert-vandeweyer
parents:
8
diff
changeset
|
25 getopts('v:u:n:a:g:o:b:B:V:c:S:R:H:G:F:', \%opts); # option are in %opts |
2
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
26 $|++; |
1 | 27 ################# |
28 ## CHECK INPUT ## | |
29 ################# | |
30 if (!exists($opts{'v'})) { | |
31 die('No VCF File Specified'); | |
32 } | |
33 if (!-e $opts{'v'}) { | |
34 die('VCF File not found'); | |
35 } | |
36 if (!exists($opts{'u'})) { | |
37 die('No user specified'); | |
38 } | |
39 if (!exists($opts{'S'})) { | |
40 die('No VariantDB server specified'); | |
41 } | |
42 if (!exists($opts{'H'})) { | |
43 die('The Galaxy source-server is not specified'); | |
44 } | |
7
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
45 if (!exists($opts{'F'})) { |
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
46 die('VCF format not specified.'); |
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
47 } |
1 | 48 |
49 ################ | |
50 # open outfile # | |
51 ################ | |
52 open OUT, ">$opts{'o'}"; | |
53 | |
54 ############################### | |
55 ## TEST CONNECTION TO SERVER ## | |
56 ############################### | |
57 use LWP::UserAgent; | |
58 my $url = $opts{'S'}."/"; | |
59 $url =~ s/\/\/$/\//; | |
60 $url .= "cgi-bin/galaxy_communication.cgi"; | |
61 my $conn = LWP::UserAgent->new(); | |
2
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
62 $conn->timeout(1800); |
1 | 63 my $response = $conn->post( $url, {'HelloWorld' => 1} ); |
64 my $content = $response->decoded_content(); | |
65 | |
66 if ($content eq 'HelloGalaxy') { | |
67 print OUT "Testing connection to $opts{'S'} : OK.\n"; | |
68 } | |
69 else { | |
70 die("Could not connect to the specified server : $content"); | |
71 } | |
72 | |
73 | |
74 ################## | |
75 ## TEST USER ID ## | |
76 ################## | |
77 $email = $opts{'u'}; | |
9
3b27cae9b359
New Version. VariantDB checks for Genome Build matching using the Galaxy dbkey variable.
geert-vandeweyer
parents:
8
diff
changeset
|
78 $dbkey = $opts{'G'}; |
3b27cae9b359
New Version. VariantDB checks for Genome Build matching using the Galaxy dbkey variable.
geert-vandeweyer
parents:
8
diff
changeset
|
79 my $response = $conn->post( $url, {'CheckUser' => $email,'dbkey' => $dbkey} ); |
1 | 80 my $content = $response->decoded_content(); |
81 | |
82 if ($content eq 'OK') { | |
9
3b27cae9b359
New Version. VariantDB checks for Genome Build matching using the Galaxy dbkey variable.
geert-vandeweyer
parents:
8
diff
changeset
|
83 print OUT "Testing User-existence and Genome Build: OK.\n"; |
1 | 84 } |
85 else { | |
86 die("ERROR: $content"); | |
87 } | |
88 | |
5 | 89 print $opts{'H'}. " was specified as galaxy host\n"; |
90 | |
1 | 91 ############################################### |
92 ## SEND THE VCF AND BAM FILES FOR PROCESSING ## | |
93 ############################################### | |
94 # filepaths | |
95 my $vcfpath = $opts{'v'}; | |
96 my $bampath = $opts{'b'}; | |
97 my $baipath = $opts{'B'}; | |
7
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
98 # input VCF format |
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
99 $format = $opts{'F'}; |
1 | 100 # make output directory in (galaxy/static/) working dir |
101 my $rand = int(rand(1000)); | |
102 our $wd = $opts{'R'}."/static/VCF_parser.".$rand; #int(rand(1000)); | |
103 our $dd = $opts{'H'}."/static/VCF_parser.".$rand; | |
104 while (-d $wd) { | |
105 my $rand = int(rand(1000)); | |
106 $wd = $opts{'R'}."/static/VCF_parser.".$rand;#int(rand(1000)); | |
107 $dd = $opts{'H'}."/static/VCF_parser.".$rand; | |
108 | |
109 } | |
110 $result = system("mkdir $wd"); | |
111 | |
112 | |
113 ## link files | |
114 $vcfurl = "$dd/data.vcf"; | |
8 | 115 $vcfurl =~ s/\s//g; |
1 | 116 system ("ln -s $vcfpath $wd/data.vcf"); |
117 if (exists($opts{'b'})) { | |
118 $bamurl = "$dd/data.bam"; | |
8 | 119 $bamurl =~ s/\s//g; |
1 | 120 $bamidxurl = "$dd/data.bai"; |
8 | 121 $bamidxurl =~ s/\s//g; |
1 | 122 system ("ln -s $bampath $wd/data.bam"); |
123 system ("ln -s $baipath $wd/data.bai"); | |
124 } | |
125 $sample = $opts{'n'}; | |
126 $gender = $opts{'g'}; | |
127 # post form to the variantDB host. | |
128 if (exists($opts{'b'})) { | |
7
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
129 $response = $conn->post( $url, {'VCFurl1' => "$vcfurl", 'BAMurl1' => "$bamurl", 'BAIurl1' => "$bamidxurl",'storedata1' => 1, 'name1' => "$sample", 'gender1' => "$gender", 'User' => $email, 'GalaxyUpload' => 1,'Format1' => $format} ); |
1 | 130 } |
131 else { | |
7
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
132 $response = $conn->post( $url, {'VCFurl1' => "$vcfurl", 'name1' => "$sample", 'gender1' => "$gender", 'User' => $email, 'GalaxyUpload' =>1, 'Format1' => $format } ); |
1 | 133 } |
134 my $content = $response->decoded_content(); | |
135 chomp($content); | |
2
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
136 ## check if upload went ok. |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
137 if (substr($content,0,2) ne 'OK') { |
1 | 138 die("ERROR: $content"); |
139 } | |
2
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
140 ## extract wd from content. |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
141 print OUT "Uploading datafiles to VariantDB : OK.\n"; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
142 $content =~ m/OK-(.+)$/; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
143 $rwd = $1; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
144 if ($rwd eq '') { |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
145 die("ERROR : No remote working directory provided to check status."); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
146 } |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
147 ## now wait for the import to finish. |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
148 $status = 0; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
149 while ($status == 0) { |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
150 my $response = $conn->post( $url, {'CheckStatus' => 1,'rwd' => $rwd}) ; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
151 $content = $response->decoded_content(); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
152 chomp($content); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
153 if (substr($content,0,2) ne 'OK') { |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
154 die("ERROR: $content"); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
155 } |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
156 $status = substr($content,3,1); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
157 sleep 10; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
158 } |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
159 |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
160 ## Loading OK |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
161 # latest respons : OK-1-Content |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
162 $content = substr($content,5); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
163 print OUT "Processing Datafiles : OK.\n"; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
164 print OUT "\n$content\n"; |
1 | 165 close OUT; |
166 | |
167 # clean up | |
168 system("rm -Rf '$wd'"); | |
169 |