0
|
1 #!/usr/bin/env python2.7
|
|
2 # tsv.py: a module for writing TSV (tab-separated value) files
|
|
3 """
|
|
4 This module defines two classes: a TsvWriter, which can be constructed on a
|
|
5 stream to allow writing TSV data lines and #-delimited comments to that stream,
|
|
6 and a TsvReader, which can be constructed on a stream and iterated over to
|
|
7 obtain lists of the values from each non-comment line in the stream.
|
|
8
|
|
9 TSV is most useful as the basis for other, more tightly specified file formats.
|
|
10
|
|
11 """
|
|
12
|
|
13 class TsvWriter(object):
|
|
14 """
|
|
15 Represents a writer for tab-separated value files containing #-delimited
|
|
16 comments.
|
|
17
|
|
18 """
|
|
19 def __init__(self, stream):
|
|
20 """
|
|
21 Make a new TsvWriter for writing TSV data to the given stream.
|
|
22 """
|
|
23
|
|
24 # This holds the stream
|
|
25 self.stream = stream
|
|
26
|
|
27
|
|
28 def line(self, *args):
|
|
29 """
|
|
30 Write the given values to the file, as a TSV line. Args holds a list of
|
|
31 all arguments passed. Any argument that stringifies to a string legal as
|
|
32 a TSV data item can be written.
|
|
33
|
|
34 """
|
|
35
|
|
36 self.list_line(args)
|
|
37
|
|
38
|
|
39 def list_line(self, line):
|
|
40 """
|
|
41 Write the given iterable of values (line) to the file as items on the
|
|
42 same line. Any argument that stringifies to a string legal as a TSV data
|
|
43 item can be written.
|
|
44
|
|
45 Does not copy the line or build a big string in memory.
|
|
46 """
|
|
47
|
|
48 if len(line) == 0:
|
|
49 return
|
|
50
|
|
51 self.stream.write(str(line[0]))
|
|
52
|
|
53 for item in line[1:]:
|
|
54 self.stream.write("\t")
|
|
55 self.stream.write(str(item))
|
|
56
|
|
57 self.stream.write("\n")
|
|
58
|
|
59 def comment(self, text):
|
|
60 """
|
|
61 Write the given text as a TSV comment. text must be a string containing
|
|
62 no newlines.
|
|
63
|
|
64 """
|
|
65
|
|
66 self.stream.write("# {}\n".format(text))
|
|
67
|
|
68 def close(self):
|
|
69 """
|
|
70 Close the underlying stream.
|
|
71 """
|
|
72
|
|
73 self.stream.close()
|
|
74
|
|
75 class TsvReader(object):
|
|
76 """
|
|
77 Represents a reader for tab-separated value files. Skips over comments
|
|
78 starting with #. Can be iterated over.
|
|
79
|
|
80 Field values consisting of only whitespace are not allowed.
|
|
81 """
|
|
82
|
|
83 def __init__(self, stream):
|
|
84 """
|
|
85 Make a new TsvReader to read from the given stream.
|
|
86 """
|
|
87
|
|
88 self.stream = stream
|
|
89
|
|
90 def __iter__(self):
|
|
91 """
|
|
92 Yields lists of all fields on each line, as strings, until all lines are
|
|
93 exhausted. Strips whitespace around field contents.
|
|
94 """
|
|
95
|
|
96 for line in self.stream:
|
|
97 line = line.strip()
|
|
98 if line == "" or line[0] == "#":
|
|
99 # Skip comments and empty lines
|
|
100 continue
|
|
101
|
|
102 yield map(str.strip, line.split("\t"))
|
|
103
|
|
104 def close(self):
|
|
105 """
|
|
106 Close the underlying stream.
|
|
107 """
|
|
108
|
|
109 self.stream.close()
|