Mercurial > repos > marpiech > norwich_tools_docking
comparison tools/rdock/bin/ht_protocol_finder.pl @ 3:b02d74d22d05 draft default tip
planemo upload
| author | marpiech |
|---|---|
| date | Mon, 29 Aug 2016 08:23:52 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:bd50f811878f | 3:b02d74d22d05 |
|---|---|
| 1 #!/usr/bin/perl -w | |
| 2 | |
| 3 # Script that simulates the result of a high throughput protocol. | |
| 4 | |
| 5 if (@ARGV < 4) | |
| 6 { | |
| 7 print "Script that simulates the result of a high throughput protocol.\n"; | |
| 8 print "1st) exhaustive docking of a small representative part of the\n"; | |
| 9 print " whole library.\n"; | |
| 10 print "2nd) Store the result of sdreport -t over that exhaustive dock.\n"; | |
| 11 print " in file <sdreport_file> that will be the input of this\n"; | |
| 12 print " script.\n"; | |
| 13 print "3rd) ht_protocol_finder.pl <sdreport_file> <output_file> <thr1max>"; | |
| 14 print " <thr1min> <ns1> <ns2>\n"; | |
| 15 print " <ns1> and <ns2> are the number of steps in stage 1 and in\n"; | |
| 16 print " stage 2. If not present, the default values are 5 and 15\n"; | |
| 17 print " <thrmax> and <thrmin> setup the range of thresholds that will\n"; | |
| 18 print " be simulated in stage 1. The threshold of stage 2 depends\n"; | |
| 19 print " on the value of the threshold of stage 1.\n"; | |
| 20 print " An input of -22 -24 will try protocols:\n"; | |
| 21 print " 5 -22 15 -27\n"; | |
| 22 print " 5 -22 15 -28\n"; | |
| 23 print " 5 -22 15 -29\n"; | |
| 24 print " 5 -23 15 -28\n"; | |
| 25 print " 5 -23 15 -29\n"; | |
| 26 print " 5 -23 15 -30\n"; | |
| 27 print " 5 -24 15 -29\n"; | |
| 28 print " 5 -24 15 -30\n"; | |
| 29 print " 5 -24 15 -31\n"; | |
| 30 print " Output of the program is a 7 column values. First column\n"; | |
| 31 print " represents the time. This is a percentage of the time it\n"; | |
| 32 print " would take to do the docking in exhaustive mode, i.e. \n"; | |
| 33 print " docking each ligand 100 times. Anything\n"; | |
| 34 print " above 12 is too long.\n"; | |
| 35 print " Second column is the first percentage. Percentage of\n"; | |
| 36 print " ligands that pass the first stage.\n"; | |
| 37 print " Third column is the second percentage. Percentage of\n"; | |
| 38 print " ligands that pass the second stage.\n"; | |
| 39 print " The four last columns represent the protocol.\n"; | |
| 40 print " All the protocols tried are written at the end.\n"; | |
| 41 print " The ones for which time is less than 12%, perc1 is\n"; | |
| 42 print " less than 30% and perc2 is less than 5% but bigger than 1%\n"; | |
| 43 print " will have a series of *** after, to indicate they are good choices\n"; | |
| 44 print " WARNING! This is a simulation based in a small set.\n"; | |
| 45 print " The numbers are an indication, not factual values.\n"; | |
| 46 exit (0); | |
| 47 } | |
| 48 open (IFILE, $ARGV[0]) || die "cannot open $ARGV[0] for reading: $!"; | |
| 49 open (OFILE, ">$ARGV[1]") || die "cannot open $ARGV[1] for writing: $!"; | |
| 50 <IFILE>; # read first line | |
| 51 $line = <IFILE>; | |
| 52 $line =~ /\S+\s+(\S+)\s+\S+\s+(\S+)/; | |
| 53 $prevname = $1; | |
| 54 $inter = $2; | |
| 55 push @tmp, $inter; | |
| 56 $i = 0; | |
| 57 while ($line = <IFILE>) | |
| 58 { | |
| 59 $line =~ /\S+\s+(\S+)\s+\S+\s+(\S+)/; | |
| 60 $name = $1; | |
| 61 $inter = $2; | |
| 62 if ($name ne $prevname) | |
| 63 { | |
| 64 $prevname = $name; | |
| 65 push @inters, [ @tmp ]; | |
| 66 $runsperligand[$i++] = @tmp; | |
| 67 $#tmp = -1; | |
| 68 } | |
| 69 push @tmp, $inter; | |
| 70 } | |
| 71 push @inters, [ @tmp ]; | |
| 72 $runsperligand[$i++] = @tmp; | |
| 73 $totLigands = @inters; | |
| 74 $ta = $ARGV[2]; | |
| 75 $tb = $ARGV[3]; | |
| 76 if (@ARGV > 4) | |
| 77 { | |
| 78 $n1 = $ARGV[4]; | |
| 79 } | |
| 80 else | |
| 81 { | |
| 82 $n1 = 5; | |
| 83 } | |
| 84 if (@ARGV > 5) | |
| 85 { | |
| 86 $n2 = $ARGV[5]; | |
| 87 } | |
| 88 else | |
| 89 { | |
| 90 $n2 = 15; | |
| 91 } | |
| 92 printf OFILE "Command line args:\n\t"; | |
| 93 foreach $arg (@ARGV) | |
| 94 { | |
| 95 printf OFILE "%s\t", $arg; | |
| 96 } | |
| 97 printf OFILE "\n\n TIME PERC1 PERC2 N1 THR1 N2 THR2\n"; | |
| 98 for ($t1 = $ta ; $t1 >= $tb ; $t1--) | |
| 99 { | |
| 100 $tc = $t1 - 5; | |
| 101 for ($t2 = $tc ; $t2 >= ($tc - 2) ; $t2--) | |
| 102 { | |
| 103 ($cref, $dref) = | |
| 104 getTables($t1, $t2); | |
| 105 @ligBelowThr1 = @$cref; | |
| 106 @ligBelowThr2 = @$dref; | |
| 107 ($time,$p1,$p2) = simulation($n1,$t1,$n2,$t2); | |
| 108 if (($time < 12.0) && ($p1 < 30.0) && ($p2 < 5.0) && ($p2 > 1.0)) | |
| 109 { | |
| 110 printf OFILE "%6.3f, %6.3f, %6.3f, %4i, %4i, %4i, %4i ***\n", | |
| 111 $time, $p1, $p2, $n1,$t1,$n2,$t2; | |
| 112 } | |
| 113 else | |
| 114 { | |
| 115 printf OFILE "%6.3f, %6.3f, %6.3f, %4i, %4i, %4i, %4i\n", | |
| 116 $time, $p1, $p2, $n1,$t1,$n2,$t2; | |
| 117 } | |
| 118 } | |
| 119 } | |
| 120 | |
| 121 | |
| 122 sub simulation | |
| 123 { | |
| 124 my(@params); | |
| 125 my(@lt0, @lt1); | |
| 126 my($thr1, $thr2,$ns1,$ns2,$tottime,$totnumHits,$total,$totnruns); | |
| 127 @params = @_; | |
| 128 $ns1 = $params[0]; | |
| 129 $thr1 = $params[1]; | |
| 130 $ns2 = $params[2]; | |
| 131 $thr2 = $params[3]; | |
| 132 $ntrials = 100; | |
| 133 $tottime = 0; | |
| 134 $totnumHitss1 = 0; | |
| 135 $totnumHitss2 = 0; | |
| 136 $total = $totLigands; | |
| 137 $totnruns = 0; | |
| 138 for ($i = 0 ; $i < $ntrials ; $i++) | |
| 139 { | |
| 140 $numHitss1 = 0; | |
| 141 $numHitss2 = 0; | |
| 142 $totnruns = 0; | |
| 143 for ($j = 0 ; $j < $totLigands ; $j++) | |
| 144 { | |
| 145 ($passStage1, $nruns1) = stage($ns1, $ligBelowThr1[$j]); | |
| 146 if ($passStage1) | |
| 147 { | |
| 148 $numHitss1++; | |
| 149 ($passStage2, $nruns2) = stage($ns2, $ligBelowThr2[$j]); | |
| 150 if ($passStage2) | |
| 151 { | |
| 152 $numHitss2++; | |
| 153 } | |
| 154 $totnruns += $nruns2; | |
| 155 } | |
| 156 $totnruns += $nruns1; | |
| 157 } | |
| 158 $time = $totnruns / $total; | |
| 159 $tottime += $time; | |
| 160 $totnumHitss1 += $numHitss1; | |
| 161 $totnumHitss2 += $numHitss2; | |
| 162 } | |
| 163 $tottime /= $ntrials; | |
| 164 $totnumHitss1 /= $ntrials; | |
| 165 $totnumHitss2 /= $ntrials; | |
| 166 $p1 = $totnumHitss1 * 100.0 / $totLigands; | |
| 167 $p2 = $totnumHitss2 * 100.0 / $totLigands; | |
| 168 return ($tottime, $p1, $p2); | |
| 169 } | |
| 170 | |
| 171 sub getTables | |
| 172 { | |
| 173 my(@params); | |
| 174 my($totLigands, @lt1, @lt2); | |
| 175 my($row,$inter, $belowThr1, $belowThr2); | |
| 176 my($thr0, $thr1); | |
| 177 @params = @_; | |
| 178 $thr1 = $params[0]; | |
| 179 $thr2 = $params[1]; | |
| 180 $totLigands = 0; | |
| 181 $belowThr1 = 0; | |
| 182 $belowThr2 = 0; | |
| 183 $totLigands = 0; | |
| 184 for $i (0 .. $#inters) | |
| 185 { | |
| 186 $row = $inters[$i]; | |
| 187 for $j (0 .. $#{$row}) | |
| 188 { | |
| 189 $inter = $row->[$j]; | |
| 190 if ($inter <= $thr1) | |
| 191 { | |
| 192 $belowThr1++; | |
| 193 } | |
| 194 if ($inter <= $thr2) | |
| 195 { | |
| 196 $belowThr2++; | |
| 197 } | |
| 198 } | |
| 199 $lt1[$totLigands] = | |
| 200 $belowThr1 / $runsperligand[$i]; | |
| 201 $lt2[$totLigands] = | |
| 202 $belowThr2 / $runsperligand[$i]; | |
| 203 $totLigands++; | |
| 204 $belowThr1 = 0; | |
| 205 $belowThr2 = 0; | |
| 206 } | |
| 207 return (\@lt1, \@lt2); | |
| 208 } | |
| 209 | |
| 210 | |
| 211 | |
| 212 sub stage | |
| 213 { | |
| 214 my($nruns, $p, $i, $r); | |
| 215 $nruns = $_[0]; | |
| 216 $p = $_[1]; | |
| 217 if ($nruns == 0) | |
| 218 { | |
| 219 return (1, 0); | |
| 220 } | |
| 221 for ($i = 0 ; $i < $nruns ; $i++) | |
| 222 { | |
| 223 $r = rand; | |
| 224 if ($r < $p) | |
| 225 { | |
| 226 return (1, $i); | |
| 227 } | |
| 228 } | |
| 229 return (0, $nruns); | |
| 230 } | |
| 231 | |
| 232 |
