0
|
1 #!/usr/bin/env python
|
|
2 # Kanwei Li, 2010
|
|
3 # Selects N random lines from a file and outputs to another file
|
|
4
|
|
5 import random, sys
|
|
6
|
|
7 def main():
|
|
8 infile = open(sys.argv[1], 'r')
|
|
9 total_lines = int(sys.argv[2])
|
|
10
|
|
11 if total_lines < 1:
|
|
12 sys.stderr.write( "Must select at least one line." )
|
|
13 sys.exit()
|
|
14
|
|
15 kept = []
|
|
16 n = 0
|
|
17 for line in infile:
|
|
18 line = line.rstrip("\n")
|
|
19 n += 1
|
|
20 if (n <= total_lines):
|
|
21 kept.append(line)
|
|
22 elif random.randint(1, n) <= total_lines:
|
|
23 kept.pop(random.randint(0, total_lines-1))
|
|
24 kept.append(line)
|
|
25
|
|
26 if n < total_lines:
|
|
27 sys.stderr.write( "Error: asked to select more lines than there were in the file." )
|
|
28 sys.exit()
|
|
29
|
|
30 open(sys.argv[3], 'w').write( "\n".join(kept) )
|
|
31
|
|
32 if __name__ == "__main__":
|
|
33 main()
|