Mercurial > repos > geert-vandeweyer > vcf_to_variantdb
annotate VCF_to_VariantDB.pl @ 7:04e3bba317f4 draft
Added selection to differentiate in VCF format
author | geert-vandeweyer |
---|---|
date | Thu, 05 Jun 2014 04:02:28 -0400 |
parents | 6ddc1c6472ef |
children | db44ff975de8 |
rev | line source |
---|---|
1 | 1 #!/usr/bin/perl |
2 | |
3 # load modules | |
4 use Getopt::Std; | |
5 | |
6 ########################## | |
7 # COMMAND LINE ARGUMENTS # | |
8 ########################## | |
9 # v = (v)cf file to load | |
10 # V = (V)CF file encoded id | |
11 # u = (u)ser email from galaxy | |
12 # n = sample (n)ame | |
13 # a = sample (a)nnotation | |
14 # g = sample (g)ender | |
15 # o = (o)utput file (simple text file) | |
16 # b = (b)am file (optional) | |
17 # B = (B)am index , needed if b is specified | |
18 # c = encoded id of bam file (optional) | |
19 # C = encoded id of Bam index , needed if b is specified => NOT POSSIBLE YET, NEEDS INDEXING ON VARIANTDB SERVER ! | |
20 # S = (S)erver addrress to send data to. | |
21 # R = (r)oot of galaxy web server (/home/galaxyuser/galaxy-dist) | |
22 # H = (H)ost of the galaxy web server (http://my.galaxy.server/galaxy/) | |
7
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
23 # F = (F)ormat of input file |
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
24 getopts('v:u:n:a:g:o:b:B:V:c:S:R:H:F:', \%opts); # option are in %opts |
1 | 25 |
2
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
26 $|++; |
1 | 27 ################# |
28 ## CHECK INPUT ## | |
29 ################# | |
30 if (!exists($opts{'v'})) { | |
31 die('No VCF File Specified'); | |
32 } | |
33 if (!-e $opts{'v'}) { | |
34 die('VCF File not found'); | |
35 } | |
36 if (!exists($opts{'u'})) { | |
37 die('No user specified'); | |
38 } | |
39 if (!exists($opts{'S'})) { | |
40 die('No VariantDB server specified'); | |
41 } | |
42 if (!exists($opts{'H'})) { | |
43 die('The Galaxy source-server is not specified'); | |
44 } | |
7
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
45 if (!exists($opts{'F'})) { |
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
46 die('VCF format not specified.'); |
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
47 } |
1 | 48 |
49 ################ | |
50 # open outfile # | |
51 ################ | |
52 open OUT, ">$opts{'o'}"; | |
53 | |
54 ############################### | |
55 ## TEST CONNECTION TO SERVER ## | |
56 ############################### | |
57 use LWP::UserAgent; | |
58 my $url = $opts{'S'}."/"; | |
59 $url =~ s/\/\/$/\//; | |
60 $url .= "cgi-bin/galaxy_communication.cgi"; | |
61 my $conn = LWP::UserAgent->new(); | |
2
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
62 $conn->timeout(1800); |
1 | 63 my $response = $conn->post( $url, {'HelloWorld' => 1} ); |
64 my $content = $response->decoded_content(); | |
65 | |
66 if ($content eq 'HelloGalaxy') { | |
67 print OUT "Testing connection to $opts{'S'} : OK.\n"; | |
68 } | |
69 else { | |
70 die("Could not connect to the specified server : $content"); | |
71 } | |
72 | |
73 | |
74 ################## | |
75 ## TEST USER ID ## | |
76 ################## | |
77 $email = $opts{'u'}; | |
78 my $response = $conn->post( $url, {'CheckUser' => $email} ); | |
79 my $content = $response->decoded_content(); | |
80 | |
81 if ($content eq 'OK') { | |
82 print OUT "Testing User-existence : OK.\n"; | |
83 } | |
84 else { | |
85 die("ERROR: $content"); | |
86 } | |
87 | |
5 | 88 print $opts{'H'}. " was specified as galaxy host\n"; |
89 | |
1 | 90 ############################################### |
91 ## SEND THE VCF AND BAM FILES FOR PROCESSING ## | |
92 ############################################### | |
93 # filepaths | |
94 my $vcfpath = $opts{'v'}; | |
95 my $bampath = $opts{'b'}; | |
96 my $baipath = $opts{'B'}; | |
7
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
97 # input VCF format |
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
98 $format = $opts{'F'}; |
1 | 99 # make output directory in (galaxy/static/) working dir |
100 my $rand = int(rand(1000)); | |
101 our $wd = $opts{'R'}."/static/VCF_parser.".$rand; #int(rand(1000)); | |
102 our $dd = $opts{'H'}."/static/VCF_parser.".$rand; | |
103 while (-d $wd) { | |
104 my $rand = int(rand(1000)); | |
105 $wd = $opts{'R'}."/static/VCF_parser.".$rand;#int(rand(1000)); | |
106 $dd = $opts{'H'}."/static/VCF_parser.".$rand; | |
107 | |
108 } | |
109 $result = system("mkdir $wd"); | |
110 | |
111 | |
112 ## link files | |
113 $vcfurl = "$dd/data.vcf"; | |
114 system ("ln -s $vcfpath $wd/data.vcf"); | |
115 if (exists($opts{'b'})) { | |
116 $bamurl = "$dd/data.bam"; | |
117 $bamidxurl = "$dd/data.bai"; | |
118 system ("ln -s $bampath $wd/data.bam"); | |
119 system ("ln -s $baipath $wd/data.bai"); | |
120 } | |
121 $sample = $opts{'n'}; | |
122 $gender = $opts{'g'}; | |
123 # post form to the variantDB host. | |
124 if (exists($opts{'b'})) { | |
7
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
125 $response = $conn->post( $url, {'VCFurl1' => "$vcfurl", 'BAMurl1' => "$bamurl", 'BAIurl1' => "$bamidxurl",'storedata1' => 1, 'name1' => "$sample", 'gender1' => "$gender", 'User' => $email, 'GalaxyUpload' => 1,'Format1' => $format} ); |
1 | 126 } |
127 else { | |
7
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
128 $response = $conn->post( $url, {'VCFurl1' => "$vcfurl", 'name1' => "$sample", 'gender1' => "$gender", 'User' => $email, 'GalaxyUpload' =>1, 'Format1' => $format } ); |
1 | 129 } |
130 my $content = $response->decoded_content(); | |
131 chomp($content); | |
2
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
132 ## check if upload went ok. |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
133 if (substr($content,0,2) ne 'OK') { |
1 | 134 die("ERROR: $content"); |
135 } | |
2
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
136 ## extract wd from content. |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
137 print OUT "Uploading datafiles to VariantDB : OK.\n"; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
138 $content =~ m/OK-(.+)$/; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
139 $rwd = $1; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
140 if ($rwd eq '') { |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
141 die("ERROR : No remote working directory provided to check status."); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
142 } |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
143 ## now wait for the import to finish. |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
144 $status = 0; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
145 while ($status == 0) { |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
146 my $response = $conn->post( $url, {'CheckStatus' => 1,'rwd' => $rwd}) ; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
147 $content = $response->decoded_content(); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
148 chomp($content); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
149 if (substr($content,0,2) ne 'OK') { |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
150 die("ERROR: $content"); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
151 } |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
152 $status = substr($content,3,1); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
153 sleep 10; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
154 } |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
155 |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
156 ## Loading OK |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
157 # latest respons : OK-1-Content |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
158 $content = substr($content,5); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
159 print OUT "Processing Datafiles : OK.\n"; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
160 print OUT "\n$content\n"; |
1 | 161 close OUT; |
162 | |
163 # clean up | |
164 system("rm -Rf '$wd'"); | |
165 |