Mercurial > repos > fubar > microsatbed
comparison seqrequester/scripts/search.pl @ 1:1085e094cf5f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
author | fubar |
---|---|
date | Sat, 13 Jul 2024 12:39:06 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:dd71d3167476 | 1:1085e094cf5f |
---|---|
1 #!/usr/bin/env perl | |
2 | |
3 use strict; | |
4 use Time::HiRes qw(usleep nanosleep); | |
5 | |
6 my $order = 0; | |
7 my $weight = 0; | |
8 | |
9 while (scalar(@ARGV) > 0) { | |
10 my $arg = shift @ARGV; | |
11 | |
12 if ($arg eq "-order") { | |
13 $order = shift @ARGV; | |
14 } | |
15 | |
16 if ($arg eq "-weight") { | |
17 $weight = shift @ARGV; | |
18 } | |
19 } | |
20 if ($order == 0) { | |
21 die "usage: $0 -order <order>\n"; | |
22 } | |
23 | |
24 # This scales at 4^k * 4^k -- (state vectors) * (cycle length) | |
25 # | |
26 # time seqrequester shift -search -fast -tapmin 300000000000 -tapmax 300033333333 -order 12 -report 0.9 | |
27 # | |
28 # The SV MUST NOT start with 0 or 1. We generate the new SR state as | |
29 # | |
30 # out = rightmost 2 bits of SR | |
31 # mul = SV * out (* in GF4, operating on 2-bit tuples) | |
32 # SR = (SR >> 2) ^ mul | |
33 # | |
34 # When the SV starts with 0, the resulting SR will always start with zero - | |
35 # it is like we simply reduce the order by one. The longest cycle in order | |
36 # 12 here is 4194304, exactly 1/4 the maximum expected, and exactly the | |
37 # length of the cycle for order 11. | |
38 # | |
39 # When the SV starts with 1, we car simply moving the output symbol to the | |
40 # input - we turn the shift register into a cyclic shift register. The longest | |
41 # cycle in order 12 here is 'exactly' 1/3 the maximum - 5592406 out of 16777216. | |
42 # I do not know why. | |
43 # | |
44 # 3's and 2's seem interchangable; swapping all 3's and 2's in a vector | |
45 # seems to generate the same cycles length. This probably just 'reverses' | |
46 # the sequence. NOT RIGOROUSLY TESTED. | |
47 # | |
48 # | |
49 # | |
50 # valgrind --tool=callgrind --dump-instr=yes --simulate-cache=yes --collect-jumps=yes \ | |
51 # seqrequester shift -search -fast -tapmin 300000000000 -tapmax 300003333333 -order 12 -report 0.9 | |
52 # callgrind_control -h | |
53 # | |
54 # Hardcoded job splits. | |
55 # order <= 10 - 0 digits - 1 job | |
56 # order == 11 - 1 digit - 4 jobs | |
57 # order == 12 - 2 digits - 16 jobs | |
58 # order == 13 - 4 digits - 256 jobs | |
59 # order == 14 - 6 digits - 4096 jobs | |
60 # order == 15 - 8 digits - 65536 jobs | |
61 # order == 16 - 11 digits - 4194304 jobs | |
62 # | |
63 # Times are on Ryzen 7 3700X unless noted. | |
64 # Times are signifcantly worse if multiple jobs run. | |
65 # | |
66 # order == 12 - with prefix 5 took 1.43 minutes for 30000........ (actual) - 1024 prefixes -> 24 CPU hours | |
67 # order == 12 - with prefix 5 took 0.93 minutes for 30000........ (actual) - 1024 prefixes -> no detect version | |
68 # order == 12 - with prefix 4 took 5.75 minutes for 3000......... (actual) - 256 prefixes -> 24 CPU hours | |
69 # order == 12 - with prefix 4 took 14.03 minutes for 3000......... (actual) - 256 prefixes -> 60 CPU hours (on d) | |
70 # order == 12 - with prefix 4 took 14.03 minutes for 3000......... (actual) - 256 prefixes -> but 90 minutes/job is all 24 cores are used! | |
71 # order == 12 - with prefix 3 took minutes for 300.......... (estimated) - 64 prefixes -> | |
72 # order == 12 - with prefix 2 took minutes for 30........... (estimated) - 16 prefixes -> | |
73 # | |
74 # order == 13 - with prefix 6 took 9 minutes for 300000....... (actual) - 4096 prefixes -> 614 CPU hours | |
75 # order == 13 - with prefix 5 took 36 minutes for 30000........ (estimated) - 1024 prefixes -> | |
76 # order == 13 - with prefix 4 took 144 minutes for 3000......... (estimated) - 256 prefixes -> | |
77 # | |
78 # order == 14 - with prefix 9 took 8 minutes for 300000000..... (actual) - 262144 prefixes -> | |
79 # order == 14 - with prefix 8 took 32 minutes for 30000000...... (actual) - 65536 prefixes -> 35,000 CPU hours | |
80 # order == 14 - with prefix 7 took 128 minutes for 3000000....... (estimated) - 16384 prefixes -> | |
81 # | |
82 # order == 15 - with prefix 12 took 2.96 minutes for 300000000000... (actual) - 16777216 prefixes -> 827,675 CPU hours | |
83 # order == 15 - with prefix 12 took 0.28 minutes for 300000000000... (actual) - 16777216 prefixes -> no detect version | |
84 # order == 15 - with prefix 11 took 9.13 minutes for 30000000000.... (actual) - 4194304 prefixes -> 638,233 CPU hours | |
85 # order == 15 - with prefix 10 took 39.38 minutes for 3000000000..... (actual) - 1048576 prefixes -> 688,273 CPU hours (78.6 years) | |
86 # order == 15 - with prefix 10 took 3.76 minutes for 3000000000..... (actual) - 1048576 prefixes -> no detect version -- | |
87 # order == 15 - with prefix 10 took 4.11 minutes for 3000000000..... (actual) - 1048576 prefixes -> no detect version -- with 16 copies running | |
88 # order == 15 - with prefix 9 took minutes for 300000000...... (estimated) - 262144 prefixes -> | |
89 # | |
90 # CL time | |
91 # | |
92 # BD time | |
93 # time seqrequester shift -search -fast -order 21 -report 0.0 -tapmin 300000000000000000000 -tapmax 300000000000000003333 -weight 3 | |
94 # 66069.844u 0.283s 18:21:14.56 99.9% 177+817k 0+0io 7pf+0w | |
95 # | |
96 # order = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | |
97 my @plen = ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 9, 11, 13, 15, 17, 19, 0 ); | |
98 | |
99 # prefix 5 for 15 is way too big. this resulted in 256 jobs (1024 prefixes, | |
100 # but only the ones starting with 3 are computed). each job is taking a bit | |
101 # more than 100 hours. | |
102 # wallclock 415000s (115 hours x 256 jobs = 29440 cpu hours) | |
103 # user 381110s (remarkably constant) | |
104 # | |
105 # job scaling should be x4 for each search, and there are x4 more vectors to try. | |
106 # so for constant job time, we need to increase the prefix length by 2 for each. | |
107 | |
108 # prefix 9 for 16 resulted in the last job 12550u 3:30 on bv (x 65536 jobs = 229376 cpu hours). | |
109 | |
110 | |
111 print STDERR "Building job names with $plen[$order] components.\n"; | |
112 | |
113 my @alljobs = ( "0", "1", "2", "3" ); | |
114 my @subjobs; | |
115 | |
116 for (my $p=1; $p < $plen[$order]; $p++) { | |
117 my @tmpjobs = @alljobs; | |
118 undef @alljobs; | |
119 | |
120 foreach my $e (@tmpjobs) { | |
121 push @alljobs, "0$e"; | |
122 push @alljobs, "1$e"; | |
123 push @alljobs, "2$e"; | |
124 push @alljobs, "3$e"; | |
125 } | |
126 } | |
127 | |
128 print STDERR "Filtering job names.\n"; | |
129 | |
130 foreach my $pp (@alljobs) { | |
131 push @subjobs, $pp if (($pp =~ m/^0/) && ($order < 14)); # Ignore 0, no complete cycles in here. | |
132 push @subjobs, $pp if (($pp =~ m/^1/) && ($order < 14)); # Ignore 1, no complete cycles in here. | |
133 push @subjobs, $pp if (($pp =~ m/^2/) && ($order < 14)); # Ignore 2, seems to be just a dual of 3. | |
134 push @subjobs, $pp if (($pp =~ m/^3/) && ($order < 22)); # Always use 3. | |
135 } | |
136 | |
137 @subjobs = sort { $b cmp $a } @subjobs; | |
138 | |
139 my $tJobs = scalar(@alljobs); | |
140 my $sJobs = scalar(@subjobs); | |
141 | |
142 print STDERR "Created $tJobs jobs.\n"; | |
143 print STDERR "Submitting $sJobs jobs.\n"; | |
144 | |
145 | |
146 my $bgn = "0" x ($order - $plen[$order]); | |
147 my $end = "3" x ($order - $plen[$order]); | |
148 | |
149 open(F, "> o${order}w${weight}.dat"); | |
150 | |
151 foreach my $pp (@subjobs) { | |
152 print F "$pp\n"; | |
153 } | |
154 | |
155 close(F); | |
156 | |
157 | |
158 | |
159 open(F, "> o${order}w${weight}.sh"); | |
160 print F "#!/bin/sh\n"; | |
161 print F "\n"; | |
162 print F "\n"; | |
163 print F "if [ x\$SGE_TASK_ID = x -o x\$SGE_TASK_ID = xundefined -o x\$SGE_TASK_ID = x0 ]; then\n"; | |
164 print F " j=\$1\n"; | |
165 print F "else\n"; | |
166 print F " j=\$SGE_TASK_ID\n"; | |
167 print F "fi\n"; | |
168 print F "if [ x\$j = x ]; then\n"; | |
169 print F " echo Error: I need SGE_TASK_ID set, or a job index on the command line.\n"; | |
170 print F " exit\n"; | |
171 print F "fi\n"; | |
172 print F "\n"; | |
173 print F "\n"; | |
174 print F "p=`head -n \$j o${order}w${weight}.dat | tail -n 1`"; | |
175 print F "\n"; | |
176 print F "\n"; | |
177 print F "if [ -e \"o${order}w${weight}-out/o${order}w${weight}p\$p.out\" ]; then\n"; | |
178 print F " echo \"o${order}w${weight}p\$p.out exists, it's done!\"\n"; | |
179 print F " exit 0\n"; | |
180 print F "fi\n"; | |
181 print F "\n"; | |
182 print F "if [ ! -d \"o${order}w${weight}-out\" ]; then\n"; | |
183 print F " mkdir -p o${order}w${weight}-out\n"; | |
184 print F "fi\n"; | |
185 print F "\n"; | |
186 print F "/work/seqrequester/FreeBSD-amd64/bin/seqrequester \\\n"; | |
187 print F " shift -search -fast \\\n"; | |
188 print F " -order $order \\\n"; | |
189 print F " -weight $weight \\\n"; | |
190 print F " -report 0.0 \\\n"; | |
191 print F " -tapmin \${p}$bgn \\\n"; | |
192 print F " -tapmax \${p}$end \\\n"; | |
193 print F "> o${order}w${weight}-out/o${order}w${weight}p\$p.out.WORKING \\\n"; | |
194 print F "&& \\\n"; | |
195 print F "mv o${order}w${weight}-out/o${order}w${weight}p\$p.out.WORKING o${order}w${weight}-out/o${order}w${weight}p\$p.out\n"; | |
196 print F "\n"; | |
197 print F "\n"; | |
198 print F "#\n"; | |
199 print F "# qsub -h -cwd -j y -o o${order}w${weight}-err/o${order}w${weight}-\\\$TASK_ID.err -l memory=1g -t 1-$sJobs -N o${order}w${weight} ./o${order}w${weight}.sh\n"; | |
200 print F "#\n"; | |
201 print F "\n"; | |
202 print F "exit 0\n"; | |
203 print F "\n"; | |
204 | |
205 close(F); | |
206 | |
207 chmod(0755, "o${order}w${weight}.sh"); | |
208 | |
209 system("mkdir o${order}w${weight}-err"); | |
210 system("mkdir o${order}w${weight}-out"); | |
211 | |
212 print "qsub -h -cwd -j y -o o${order}w${weight}-err/o${order}w${weight}-\\\$TASK_ID.err -l memory=1g -t 1-$sJobs -N o${order}w${weight} ./o${order}w${weight}.sh\n"; | |
213 system("qsub -h -cwd -j y -o o${order}w${weight}-err/o${order}w${weight}-\\\$TASK_ID.err -l memory=1g -t 1-$sJobs -N o${order}w${weight} ./o${order}w${weight}.sh") if (1); | |
214 | |
215 exit; |