Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/chardet/cli/chardetect.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """ | |
2 Script which takes one or more file paths and reports on their detected | |
3 encodings | |
4 | |
5 Example:: | |
6 | |
7 % chardetect somefile someotherfile | |
8 somefile: windows-1252 with confidence 0.5 | |
9 someotherfile: ascii with confidence 1.0 | |
10 | |
11 If no paths are provided, it takes its input from stdin. | |
12 | |
13 """ | |
14 | |
15 from __future__ import absolute_import, print_function, unicode_literals | |
16 | |
17 import argparse | |
18 import sys | |
19 | |
20 from chardet import __version__ | |
21 from chardet.compat import PY2 | |
22 from chardet.universaldetector import UniversalDetector | |
23 | |
24 | |
25 def description_of(lines, name='stdin'): | |
26 """ | |
27 Return a string describing the probable encoding of a file or | |
28 list of strings. | |
29 | |
30 :param lines: The lines to get the encoding of. | |
31 :type lines: Iterable of bytes | |
32 :param name: Name of file or collection of lines | |
33 :type name: str | |
34 """ | |
35 u = UniversalDetector() | |
36 for line in lines: | |
37 line = bytearray(line) | |
38 u.feed(line) | |
39 # shortcut out of the loop to save reading further - particularly useful if we read a BOM. | |
40 if u.done: | |
41 break | |
42 u.close() | |
43 result = u.result | |
44 if PY2: | |
45 name = name.decode(sys.getfilesystemencoding(), 'ignore') | |
46 if result['encoding']: | |
47 return '{}: {} with confidence {}'.format(name, result['encoding'], | |
48 result['confidence']) | |
49 else: | |
50 return '{}: no result'.format(name) | |
51 | |
52 | |
53 def main(argv=None): | |
54 """ | |
55 Handles command line arguments and gets things started. | |
56 | |
57 :param argv: List of arguments, as if specified on the command-line. | |
58 If None, ``sys.argv[1:]`` is used instead. | |
59 :type argv: list of str | |
60 """ | |
61 # Get command line arguments | |
62 parser = argparse.ArgumentParser( | |
63 description="Takes one or more file paths and reports their detected \ | |
64 encodings") | |
65 parser.add_argument('input', | |
66 help='File whose encoding we would like to determine. \ | |
67 (default: stdin)', | |
68 type=argparse.FileType('rb'), nargs='*', | |
69 default=[sys.stdin if PY2 else sys.stdin.buffer]) | |
70 parser.add_argument('--version', action='version', | |
71 version='%(prog)s {}'.format(__version__)) | |
72 args = parser.parse_args(argv) | |
73 | |
74 for f in args.input: | |
75 if f.isatty(): | |
76 print("You are running chardetect interactively. Press " + | |
77 "CTRL-D twice at the start of a blank line to signal the " + | |
78 "end of your input. If you want help, run chardetect " + | |
79 "--help\n", file=sys.stderr) | |
80 print(description_of(f, f.name)) | |
81 | |
82 | |
83 if __name__ == '__main__': | |
84 main() |