Mercurial > repos > xuebing > sharplabtool
comparison tools/filters/randomlines.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #!/usr/bin/env python | |
2 # Kanwei Li, 2010 | |
3 # Selects N random lines from a file and outputs to another file | |
4 | |
5 import random, sys | |
6 | |
7 def main(): | |
8 infile = open(sys.argv[1], 'r') | |
9 total_lines = int(sys.argv[2]) | |
10 | |
11 if total_lines < 1: | |
12 sys.stderr.write( "Must select at least one line." ) | |
13 sys.exit() | |
14 | |
15 kept = [] | |
16 n = 0 | |
17 for line in infile: | |
18 line = line.rstrip("\n") | |
19 n += 1 | |
20 if (n <= total_lines): | |
21 kept.append(line) | |
22 elif random.randint(1, n) <= total_lines: | |
23 kept.pop(random.randint(0, total_lines-1)) | |
24 kept.append(line) | |
25 | |
26 if n < total_lines: | |
27 sys.stderr.write( "Error: asked to select more lines than there were in the file." ) | |
28 sys.exit() | |
29 | |
30 open(sys.argv[3], 'w').write( "\n".join(kept) ) | |
31 | |
32 if __name__ == "__main__": | |
33 main() |