Mercurial > repos > immport-devteam > txt_diagnosis
changeset 0:e1f0194cf8fc draft
Uploaded
author | immport-devteam |
---|---|
date | Mon, 27 Feb 2017 13:07:11 -0500 |
parents | |
children | b94872d65050 |
files | txt_diagnosis/test-data/input_error.txt txt_diagnosis/test-data/input_noerror.txt txt_diagnosis/test-data/output_error.txt txt_diagnosis/test-data/output_noerror.txt txt_diagnosis/txtDiagnosis.xml txt_diagnosis/txtdiagnosis.py |
diffstat | 6 files changed, 179 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/txt_diagnosis/test-data/input_error.txt Mon Feb 27 13:07:11 2017 -0500 @@ -0,0 +1,16 @@ +Forward Scatter Side Scatter FITC CD4 PE CD25 PP CD3 APC CD45RA +289 56 438 0 626 nan +352 153 30 147 483 386 +383 190 156 228 734 408 +261 62 432 121 598 555 +451 120 537 338 568 nba +373 104 3 110 621 584 +418 105 561 0 610 562 +358 185 0 292 641 327 +733 970 139 227 293 259 +765 1023 71 239 bkl 253 +762 957 143 158 271 255 +406 191 513 122 646 264 +695 1023 168 251 234 283 +336 178 0 146 128 we +668 1023 167 306 302 253
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/txt_diagnosis/test-data/input_noerror.txt Mon Feb 27 13:07:11 2017 -0500 @@ -0,0 +1,16 @@ +Forward Scatter Side Scatter FITC CD4 PE CD25 PP CD3 APC CD45RA +289 56 438 0 626 0 +352 153 30 147 483 386 +383 190 156 228 734 408 +261 62 432 121 598 555 +451 120 537 338 568 111 +373 104 3 110 621 584 +418 105 561 0 610 562 +358 185 0 292 641 327 +733 970 139 227 293 259 +765 1023 71 239 54 253 +762 957 143 158 271 255 +406 191 513 122 646 264 +695 1023 168 251 234 283 +336 178 0 146 128 35 +668 1023 167 306 302 253
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/txt_diagnosis/test-data/output_error.txt Mon Feb 27 13:07:11 2017 -0500 @@ -0,0 +1,3 @@ +WARNING: line 6 in input_error.txt contains non-numeric results +WARNING: line 11 in input_error.txt contains non-numeric results +WARNING: line 15 in input_error.txt contains non-numeric results
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/txt_diagnosis/test-data/output_noerror.txt Mon Feb 27 13:07:11 2017 -0500 @@ -0,0 +1,1 @@ +No errors in the file.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/txt_diagnosis/txtDiagnosis.xml Mon Feb 27 13:07:11 2017 -0500 @@ -0,0 +1,75 @@ +<tool id="txt_diagnosis" name="Check data" version="1.1"> + <description> in txt-converted FCS files.</description> + <requirements> + <requirement type="package" version="0.17.1">pandas</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command><![CDATA[ + python $__tool_directory__/txtdiagnosis.py -i "${input}" -o "${output}" -n "${input.name}" + ]]> + </command> + <inputs> + <param format="flowtext" name="input" type="data" label="Text file to check"/> + </inputs> + <outputs> + <data format="txt" name="output" label="Report on ${input.name}"/> + </outputs> + <tests> + <test> + <param name="input" value="input_error.txt"/> + <output name="output" file="output_error.txt"> + <assert_contents> + <has_text_matching text="WARNING: line 6 in .* contains non-numeric results"/> + </assert_contents> + </output> + </test> + <test> + <param name="input" value="input_noerror.txt"/> + <output name="output" file="output_noerror.txt"> + <assert_contents> + <has_text text="No errors in the file."/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ + This tool looks for potential errors in txt-converted FCS files. + +----- + +**Input** + +This diagnosis tools reads in text files, and checks that the data is all numeric. + +**Output** + +The output is a report with the errors and corresponding line numbers. + +----- + +**Example** + +*Input*:: + + Marker1 Marker2 Marker3 + 34 45 12 + NaN 65 10 + 34 45 12 + 33 NaN 10 + 34 45 12 + 33 65 10 + 34 45 12 + 33 65 NaN + 34 45 12 + 33 65 10 + +*Output*:: + + WARNING: line 2 in example_file.txt contains non-numeric results + WARNING: line 4 in example_file.txt contains non-numeric results + WARNING: line 8 in example_file.txt contains non-numeric results + ]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/txt_diagnosis/txtdiagnosis.py Mon Feb 27 13:07:11 2017 -0500 @@ -0,0 +1,68 @@ +#!/usr/bin/env python +###################################################################### +# Copyright (c) 2016 Northrop Grumman. +# All rights reserved. +###################################################################### +from __future__ import print_function +from __future__ import division +import pandas as pd +from argparse import ArgumentParser +import sys + + +def is_number(s): + try: + float(s) + return True + except ValueError: + return False + + +def error_report(input_file, fname, output_file): + errors = 0 + df = pd.read_table(input_file) + with open(output_file, "w") as outf: + for cols in df.columns.values: + if df[cols].count() != len(df[cols]): + with open(input_file, "r") as checkfile: + fl = checkfile.readline() + count_lines = 1 + for checklines in checkfile: + to_check = checklines.strip().split("\t") + count_lines += 1 + for item in to_check: + if not is_number(item): + errors += 1 + outf.write(" ".join(["WARNING: line", str(count_lines), "in", fname, "contains non-numeric results\n"])) + if errors == 0: + outf.write("No errors in the file.\n") + return + + +if __name__ == "__main__": + parser = ArgumentParser( + prog="txtDiagnosis", + description="Reports potential errors in text-converted FCS files") + + parser.add_argument( + '-i', + dest="input_file", + required=True, + help="File location for the text file.") + + parser.add_argument( + '-n', + dest="filename", + required=True, + help="Filename location for the text file.") + + parser.add_argument( + '-o', + dest="output_file", + required=True, + help="Name of the output file.") + + args = parser.parse_args() + + error_report(args.input_file, args.filename, args.output_file) + sys.exit(0)