Mercurial > repos > proteore > proteore_filter_keywords_values

--- a/filter_kw_val.py	Tue Feb 05 08:22:47 2019 -0500
+++ b/filter_kw_val.py	Tue Mar 05 07:37:10 2019 -0500
@@ -11,10 +11,11 @@
                         boolean value if the keyword should be filtered in exact ["filename,ncol,true/false"]
         --value         The value to be filtered, the column number where this filter applies and the
                         operation symbol ["value,ncol,=/>/>=/</<=/!="]
-        --values_range  range of values to be keep, example : --values_range 5 20 c1 true
+        --values_range  range of values to be keep, example : --values_range 5 20 c1 true
+        --operation     'keep' or 'discard' lines concerned by filter(s)
         --operator      The operator used to filter with several keywords/values : AND or OR
         --o --output    The output filename
-        --filtered_file    The file contains removed lines
+        --discarded_lines    The file contains removed lines
         -s --sort_col   Used column to sort the file, ",true" for reverse sorting, ",false" otherwise example : c1,false
     """
     parser = argparse.ArgumentParser()
@@ -23,12 +24,14 @@
     parser.add_argument("--kw_file", nargs="+", action="append", help="")
     parser.add_argument("--value", nargs="+", action="append", help="")
     parser.add_argument("--values_range", nargs="+", action="append", help="")
+    parser.add_argument("--operation", default="keep", type=str, choices=['keep','discard'],help='')
     parser.add_argument("--operator", default="OR", type=str, choices=['AND','OR'],help='')
     parser.add_argument("-o", "--output", default="output.txt")
-    parser.add_argument("--filtered_file", default="filtered_output.txt")
+    parser.add_argument("--discarded_lines", default="filtered_output.txt")
     parser.add_argument("-s","--sort_col", help="")

     args = parser.parse_args()
+
     filters(args)

 def str_to_bool(v):
@@ -62,6 +65,7 @@
     header = str_to_bool(args.input.split(",")[1])
     csv_file = blank_to_NA(read_file(filename))
     results_dict = {}
+    operator_dict = { "Equal" : "=" , "Higher" : ">" , "Equal-or-higher" : ">=" , "Lower" : "<" , "Equal-or-lower" : "<=" , "Different" : "!=" }

     if args.kw:
         keywords = args.kw
@@ -79,6 +83,7 @@
     if args.value:
         for v in args.value:
             v[0] = v[0].replace(",",".")
+            v[2] = operator_dict[v[2]]
             if is_number("float", v[0]):
                 csv_file = comma_number_to_float(csv_file,column_from_txt(v[1]),header)
                 results_dict = filter_value(csv_file, header, results_dict, v[0], v[1], v[2])
@@ -123,6 +128,12 @@
         reverse=str_to_bool(args.sort_col.split(",")[1])
         remaining_lines= sort_by_column(remaining_lines,sort_col,reverse,header)
         filtered_lines = sort_by_column(filtered_lines,sort_col,reverse,header)
+
+    #swap lists of lines (files) if 'keep' option selected
+    if args.operation == "keep" :
+        swap = remaining_lines, filtered_lines
+        remaining_lines = swap[1]
+        filtered_lines = swap[0]

     # Write results to output
     with open(args.output,"w") as output :
@@ -130,7 +141,7 @@
         writer.writerows(remaining_lines)

     # Write filtered lines to filtered_output
-    with open(args.filtered_file,"w") as filtered_output :
+    with open(args.discarded_lines,"w") as filtered_output :
         writer = csv.writer(filtered_output,delimiter="\t")
         writer.writerows(filtered_lines)
--- a/filter_kw_val.xml	Tue Feb 05 08:22:47 2019 -0500
+++ b/filter_kw_val.xml	Tue Mar 05 07:37:10 2019 -0500
@@ -1,4 +1,4 @@
-<tool id="MQoutputfilter" name="Filter by keywords and/or numerical value" version="2019.02.05">
+<tool id="MQoutputfilter" name="Filter by keywords and/or numerical value" version="2019.03.05">
     <description></description>
     <requirements>
     </requirements>
@@ -8,8 +8,9 @@
     <command><![CDATA[
         python $__tool_directory__/filter_kw_val.py
         -i "$input1,$header"
-        -o "$output1"
-        --filtered_file "$filtered_file"
+        -o "$kept_lines"
+        --discarded_lines "$discarded_lines"
+        --operation "$operation"
         --operator "$operator"

         ## Keywords
@@ -25,21 +26,8 @@

         ## value to filter
         #for $val in $value
-            #if $val.v.val != "None"
-                --value
-                #if $val.v.val == "Equal"
-                    $val.v.equal "$val.ncol" "="
-                #else if $val.v.val == "Higher"
-                    $val.v.higher "$val.ncol" ">"
-                #else if $val.v.val == "Equal or higher"
-                    $val.v.equal_higher "$val.ncol" ">="
-                #else if $val.v.val == "Lower"
-                    $val.v.lower "$val.ncol" "<"
-                #else if $val.v.val == "Equal or lower"
-                    $val.v.equal_lower "$val.ncol" "<="
-                #else
-                    $val.v.different "$val.ncol" "!="
-                #end if
+            #if $val.value != "None"
+                --value $val.value $val.ncol $val.operator
             #end if
         #end for

@@ -50,21 +38,27 @@
             #end if
         #end for

-        #if $sort_column != ""
-            --sort_col "$sort_column,$reversed_sort"
+        #if $sort.sort_bool == "true"
+            --sort_col "$sort.sort_column,$sort.reversed_sort"
         #end if

     ]]></command>
     <inputs>
         <param type="data" name="input1" format="txt,tabular" label="Input file" />
         <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
+        <param name="operation" type="select" label="Operation" help="keep or discard word(s) or value(s) that match filters ?">
+            <option value="keep">Keep</option>
+            <option value="discard">Discard</option>
+        </param>
         <param name="operator" type="select" label="Select an operator to combine your filters (if more than one)" help="OR : only one filter must be satisfied to filter a row, AND : all your filters must be satisfied to filter a row" >
             <option value="OR" selected="True">OR</option>
             <option value="AND">AND</option>
         </param>

         <repeat name="keyword" title="Filter by keywords" >
-            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' />
+            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek keywords in the first column (and keep or discard them)'>
+                <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
+            </param>
             <param type="boolean" name="match" truevalue="True" label="Search for exact match?" help='Choosing "Yes" will only filter out exact match (e.g. case sensitive), see help section' />
             <conditional name="k" >
                 <param name="kw" type="select" label="Enter keywords" >
@@ -72,7 +66,7 @@
                     <option value="file">File containing keywords</option>
                 </param>
                 <when value="text" >
-                    <param name="txt" type="text" label="Copy/paste keywords to be filtered out" help='Keywords must be separated by tab, space or carriage return into the form field, for example: A8K2U0 Q5TA79 O43175' >
+                    <param name="txt" type="text" label="Copy/paste keywords to find (keep or discard)" help='Keywords must be separated by tab, space or carriage return into the form field, for example: A8K2U0 Q5TA79 O43175' >
                         <sanitizer>
                         <valid initial="string.printable">
                             <remove value="&apos;"/>
@@ -91,56 +85,53 @@
             </conditional>
         </repeat>
         <repeat name="value" title="Filter by numerical value" >
-            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' />
-            <conditional name="v" >
-                <param name="val" type="select" label="Select operator" >
-                    <option value="Equal">=</option>
-                    <option value="Higher">&gt;</option>
-                    <option value="Equal or higher">&gt;=</option>
-                    <option value="Lower">&lt;</option>
-                    <option value="Equal or lower">&lt;=</option>
-                    <option value="Different">!=</option>
-                </param>
-                <when value="Equal" >
-                    <param name="equal" type="float" value="" label="Value" />
-                </when>
-                <when value="Higher" >
-                    <param type="float" name="higher" value="" label="Value" />
-                </when>
-                <when value="Equal or higher" >
-                    <param type="float" name="equal_higher" value="" label="Value" />
-                </when>
-                <when value="Lower" >
-                    <param type="float" name="lower" value="" label="Value" />
-                </when>
-                <when value="Equal or lower" >
-                    <param type="float" name="equal_lower" value="" label="Value" />
-                </when>
-                <when value="Different">
-                    <param type="float" name="different" value="" label="Value"/>
-                </when>
-            </conditional>
+            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek values in the first column (and keep or discard them)'>
+                <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
+            </param>
+            <param name="operator" type="select" label="Select operator" >
+                <option value="Equal">=</option>
+                <option value="Higher">&gt;</option>
+                <option value="Equal-or-higher">&gt;=</option>
+                <option value="Lower">&lt;</option>
+                <option value="Equal-or-lower">&lt;=</option>
+                <option value="Different">!=</option>
+            </param>
+            <param name="value" type="float" value="" label="Value"></param>
         </repeat>
         <repeat name="values_range" title="Filter by range of numerical values">
-            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' />
+            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek values in the first column (and keep or discard them)'>
+                <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
+            </param>
             <param name="bottom_value" type="float" value="" label="Enter the bottom value" />
             <param name="top_value" type="float" value="" label="Enter the top value" />
             <param name="inclusive" type="boolean" label="inclusive range ?" checked="false" truevalue="true" falsevalue="false" />
         </repeat>
-        <param name="sort_column" type="text" value="" label="Sort result files by:" help="Fill in 'c1' if you want to sort your result file by the column 1 values" />
-        <param name="reversed_sort" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Sort in descending order ?"/>
+        <conditional name="sort">
+            <param name="sort_bool" type="boolean" label="Sort by column ?" checked="false" truevalue="true" falsevalue="false" />
+            <when value="true">
+                <param name="sort_column" type="text" value="" label="Sort result files by:" help="Fill in 'c1' if you want to sort your result file by the column 1 values">
+                    <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]</validator>
+                </param>
+                <param name="reversed_sort" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Sort in descending order ?"/>
+            </when>
+            <when value="false"/>
+        </conditional>
     </inputs>
     <outputs>
-        <data name="output1" format="tsv" label="${tool.name} on ${input1.name}" />
-        <data name="filtered_file" format="tsv" label="${tool.name} on ${input1.name} - Filtered lines" />
+        <data name="kept_lines" format="tsv" label="Filtered_${input1.name}" />
+        <data name="discarded_lines" format="tsv" label="Filtered_${input1.name} - discarded_lines" />
     </outputs>
     <tests>
         <test>
             <param name="input1" value="Lacombe_et_al_2017_OK.txt" />
             <param name="header" value="true" />
             <param name="operator" value="OR"/>
+            <param name="operation" value="discard"/>
             <param name="sort_column" value="c1"/>
-            <param name="reversed_sort" value="false"/>
+            <conditional name="sort">
+                <param name="sort_bool" value="false"/>
+                <param name="reversed_sort" value="false"/>
+            </conditional>
             <repeat name="keyword">
                 <param name="ncol" value="c1" />
                 <param name="match" value="True" />
@@ -151,19 +142,17 @@
             </repeat>
             <repeat name="value">
                 <param name="ncol" value="c3"/>
-                <conditional name="v">
-                    <param name="val" value="Higher"/>
-                    <param name="higher" value="20" />
-                </conditional>
+                <param name="operator" value="Higher"/>
+                <param name="value" value="20" />
             </repeat>
-            <output name="output1" file="output.csv" />
-            <output name="filtered_file" file="filtered_output.csv" />
+            <output name="kept_lines" file="output.tsv" />
+            <output name="discarded_lines" file="discarded_lines.tsv" />
         </test>
     </tests>
     <help><![CDATA[
 **Description**

-This tool allows to filter out data according to different criteria such as keywords (e.g. a list of contaminants) or numerical values (e.g. intensity measurements below a given threshold).
+This tool allows to keep/discard rows from your dataset according to different filter such as keywords (e.g. a list of contaminants) or numerical values (e.g. intensity measurements below a given threshold).
 A boolean operator "OR/AND" allows to combine different type of filters making this tool very powerful.

 -----
@@ -176,12 +165,21 @@

 **Parameters**

-**AND/OR operator**
+**Operation**
+
+- **Keep**: only keep lines with keyword(s) and/or value(s) concerned by defined filter(s)
+- **Discard**: only keep lines with keyword(s) and/or value(s) NOT concerned by defined filter(s)
+
+.. class:: infomark

-As many filters as needed can be combined, you can choose how filters apply on your data by using the following boolean operators:
+Two output files are created, one with kept lines and the other one with discarded lines.
+
+**Select an operator to combine your filters (if more than one)**

-- OR: only one filter must be satisfied to remove one row
-- AND: all filters must be satisfied to remove one row
+Many filters (criteria) can be combined in a single execution making this tool quite powerful; this can be achieved using the following boolean operators:
+
+- **OR**: only one of filters must be satisfied to keep/discard one row
+- **AND**: all filters must be satisfied to keep/discard one row

 -----

@@ -205,7 +203,7 @@

 "Search for exact match?": Keywords search can be applied by performing either exact match or partial one by using the following option:

-- If you choose **Yes**, only the fields that contains exactly the same content will be removed.
+- If you choose **Yes**, only the fields that contains exactly the same content will be removed (i.e. using the "discard" mode).

 - If you choose **No**, all the fields containing the keyword will be removed.

@@ -231,18 +229,17 @@
 - >= (greater than or equal to)

 Then enter the numerical threshold to apply by filling the "Value" box.
-If you choose > 10, each row containing a numerical value (in the chosen column of your input file) that correspond to your settings will be filtered out.
+If you choose > 10, each row containing a numerical value (in the chosen column of your input file) that correspond to your settings will be kept or discarded (based on operation parameter).

 -----

 **Filter by a range of values**: You can also set a range of values to filter your file.
-Conversely to the numeric filter, rows with numerical values within the defined range will be kept while rows with values out of this range will be filtered out.
+Conversely to the numeric filter, rows with numerical values within the defined range will be kept while rows with values out of this range will be discarded (or the other way around based on operation parameter).

 -----

-**Sort results files**
-
-You can sort your results by column in ascending (default value) or descending by entering the column number on which to sort the data.
+**Sort by column ?**
+click on the "Yes" button allows to "Sort result files by:" a column number. this can be done in ascending (default value) or descending order by entering the column number on which to sort the data.

 -----

@@ -250,9 +247,9 @@

 The tool returns two output files.

-* A text file containing the results that pass your filters
+* A text file containing the results that satisfy your filters (i.e. "keep" mode).

-* A text file containing the rows removed from the input file (i.e. containing data taht do not pass your filter(s).
+* A text file containing the rows removed from the input file (i.e. "discard" mode).

 -----

@@ -260,7 +257,7 @@

 **Authors**

-T.P. Lien Nguyen, David Christiany, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR

 Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/discarded_lines.tsv	Tue Mar 05 07:37:10 2019 -0500
@@ -0,0 +1,21 @@
+Protein accession number (UniProt)	Protein name	Number of peptides (razor + unique)
+P15924	Desmoplakin	69
+P02538	Keratin, type II cytoskeletal 6A	53
+P02768	Serum albumin	44
+P08779	Keratin, type I cytoskeletal 16	29
+Q02413	Desmoglein-1	24
+P07355	Annexin A2;Putative annexin A2-like protein	22
+P14923	Junction plakoglobin	22
+P02788	Lactotransferrin	21
+Q9HC84	Mucin-5B	21
+P04745	Alpha-amylase 1	23
+P04264	Keratin, type II cytoskeletal 1	61
+P35908	Keratin, type II cytoskeletal 2 epidermal	40
+P13645	Keratin, type I cytoskeletal 10	40
+Q5D862	Filaggrin-2	14
+Q5T749	Keratinocyte proline-rich protein	13
+Q8IW75	Serpin A12	3
+P81605	Dermcidin	3
+P22531	Small proline-rich protein 2E	3
+P59666	Neutrophil defensin 3	2
+P78386	Keratin, type II cuticular Hb5	2
--- a/test-data/filtered_output.csv	Tue Feb 05 08:22:47 2019 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-Protein accession number (UniProt)	Protein name	Number of peptides (razor + unique)
-P02538	Keratin, type II cytoskeletal 6A	53
-P02768	Serum albumin	44
-P02788	Lactotransferrin	21
-P04264	Keratin, type II cytoskeletal 1	61
-P04745	Alpha-amylase 1	23
-P07355	Annexin A2;Putative annexin A2-like protein	22
-P08779	Keratin, type I cytoskeletal 16	29
-P13645	Keratin, type I cytoskeletal 10	40
-P14923	Junction plakoglobin	22
-P15924	Desmoplakin	69
-P22531	Small proline-rich protein 2E	3
-P35908	Keratin, type II cytoskeletal 2 epidermal	40
-P59666	Neutrophil defensin 3	2
-P78386	Keratin, type II cuticular Hb5	2
-P81605	Dermcidin	3
-Q02413	Desmoglein-1	24
-Q5D862	Filaggrin-2	14
-Q5T749	Keratinocyte proline-rich protein	13
-Q8IW75	Serpin A12	3
-Q9HC84	Mucin-5B	21
--- a/test-data/output.csv	Tue Feb 05 08:22:47 2019 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,142 +0,0 @@
-Protein accession number (UniProt)	Protein name	Number of peptides (razor + unique)
-A8K2U0	Alpha-2-macroglobulin-like protein 1	3
-O43707	Alpha-actinin-4;Alpha-actinin-1	6
-O60911	Cathepsin L2	4
-O75223	Gamma-glutamylcyclotransferase	6
-O75342	Arachidonate 12-lipoxygenase, 12R-type	4
-O75369	Filamin-B	5
-O75635	Serpin B7	7
-O95274	Ly6/PLAUR domain-containing protein 3	2
-P00338	L-lactate dehydrogenase A chain	6
-P00441	Superoxide dismutase [Cu-Zn]	5
-P00491	Purine nucleoside phosphorylase	2
-P00558	Phosphoglycerate kinase 1	4
-P00738	Haptoglobin	3
-P01009	Alpha-1-antitrypsin;Short peptide from AAT	6
-P01011	Alpha-1-antichymotrypsin	3
-P01036	Cystatin-S	5
-P01037	Cystatin-SN	2
-P01040	Cystatin-A	8
-P01623	Ig kappa chain V-III region	3
-P01625	Ig kappa chain V-IV region Len	2
-P01765	Ig heavy chain V-III region TIL	2
-P01766	Ig heavy chain V-III region BRO	2
-P01833	Polymeric immunoglobulin receptor	15
-P01834	Ig kappa chain C region	8
-P01857	Ig gamma-1 chain C region	7
-P01860	Ig gamma-3 chain C region	2
-P01871	Ig mu chain C region	2
-P01876	Ig alpha-1 chain C region	16
-P01877	Ig alpha-2 chain C region	3
-P02545	Prelamin-A/C;Lamin-A/C	10
-P02763	Alpha-1-acid glycoprotein 1	3
-P02787	Serotransferrin	9
-P04040	Catalase	9
-P04075	Fructose-bisphosphate aldolase A	4
-P04080	Cystatin-B	2
-P04083	Annexin A1	10
-P04259	Keratin, type II cytoskeletal 6B	10
-P04406	Glyceraldehyde-3-phosphate dehydrogenase	8
-P04792	Heat shock protein beta-1	5
-P05089	Arginase-1	8
-P05090	Apolipoprotein D	2
-P06396	Gelsolin	3
-P06733	Alpha-enolase	15
-P06870	Kallikrein-1	2
-P07339	Cathepsin D	6
-P07384	Calpain-1 catalytic subunit	4
-P07858	Cathepsin B	2
-P07900	Heat shock protein HSP 90-alpha	6
-P08865	40S ribosomal protein SA	2
-P09211	Glutathione S-transferase P	4
-P09228	Cystatin-SA	2
-P09972	Fructose-bisphosphate aldolase C	2
-P0CG05	Ig lambda-2 chain C regions	4
-P0DMV9	Heat shock 70 kDa protein 1B	8
-P10599	Thioredoxin	6
-P11021	78 kDa glucose-regulated protein	10
-P11142	Heat shock cognate 71 kDa protein	5
-P11279	Lysosome-associated membrane glycoprotein 1	2
-P12273	Prolactin-inducible protein	6
-P13473	Lysosome-associated membrane glycoprotein 2	2
-P13639	Elongation factor 2	8
-P13646	Keratin, type I cytoskeletal 13	11
-P14618	Pyruvate kinase PKM	9
-P14735	Insulin-degrading enzyme	3
-P18206	Vinculin	4
-P18510	Interleukin-1 receptor antagonist protein	3
-P19012	Keratin, type I cytoskeletal 15	2
-P19013	Keratin, type II cytoskeletal 4	13
-P19971	Thymidine phosphorylase	2
-P20930	Filaggrin	2
-P20933	N(4)-(beta-N-acetylglucosaminyl)-L-asparaginase	3
-P22528	Cornifin-B	3
-P23284	Peptidyl-prolyl cis-trans isomerase B	2
-P23396	40S ribosomal protein S3	2
-P25311	Zinc-alpha-2-glycoprotein	15
-P25705	ATP synthase subunit alpha, mitochondrial	2
-P25788	Proteasome subunit alpha type-3	3
-P26641	Elongation factor 1-gamma	3
-P27482	Calmodulin-like protein 3	2
-P29508	Serpin B3	20
-P30740	Leukocyte elastase inhibitor	3
-P31025	Lipocalin-1	4
-P31151	Protein S100-A7	9
-P31944	Caspase-14	15
-P31947	14-3-3 protein sigma	9
-P31949	Protein S100-A11	2
-P35579	Myosin-9	8
-P36952	Serpin B5	3
-P40121	Macrophage-capping protein	2
-P40926	Malate dehydrogenase, mitochondrial	3
-P42357	Histidine ammonia-lyase	2
-P47756	F-actin-capping protein subunit beta	2
-P47929	Galectin-7	5
-P48594	Serpin B4	4
-P48637	Glutathione synthetase	2
-P49720	Proteasome subunit beta type-3	2
-P50395	Rab GDP dissociation inhibitor beta	2
-P58107	Epiplakin	5
-P59998	Actin-related protein 2/3 complex subunit 4	2
-P60174	Triosephosphate isomerase	9
-P60842	Eukaryotic initiation factor 4A-I	5
-P61160	Actin-related protein 2	2
-P61626	Lysozyme C	7
-P61916	Epididymal secretory protein E1	2
-P62258	14-3-3 protein epsilon	4
-P62937	Peptidyl-prolyl cis-trans isomerase A	5
-P62987	Ubiquitin-60S ribosomal protein L40	6
-P63104	14-3-3 protein zeta/delta	5
-P63261	Actin, cytoplasmic 2	19
-P68363	Tubulin alpha-1B chain	7
-P68371	Tubulin beta-4B chain	8
-P68871	Hemoglobin subunit beta	4
-P80188	Neutrophil gelatinase-associated lipocalin	3
-Q01469	Fatty acid-binding protein 5, epidermal	15
-Q04695	Keratin, type I cytoskeletal 17	18
-Q06830	Peroxiredoxin-1	9
-Q08188	Protein-glutamine gamma-glutamyltransferase E	12
-Q13867	Bleomycin hydrolase	5
-Q14574	Desmocollin-3	4
-Q15149	Plectin	15
-Q15828	Cystatin-M	3
-Q5T750	Skin-specific protein 32	4
-Q6KB66	Keratin, type II cytoskeletal 80	13
-Q6P4A8	Phospholipase B-like 1	5
-Q6UWP8	Suprabasin	4
-Q86YZ3	Hornerin	11
-Q8N1N4	Keratin, type II cytoskeletal 78	18
-Q8TAX7	Mucin-7	2
-Q8WVV4	Protein POF1B	8
-Q92820	Gamma-glutamyl hydrolase	5
-Q96DA0	Zymogen granule protein 16 homolog B	5
-Q96FX8	p53 apoptosis effector related to PMP-22	2
-Q96P63	Serpin B12	9
-Q9C075	Keratin, type I cytoskeletal 23	4
-Q9HCY8	Protein S100-A14	3
-Q9NZH8	Interleukin-36 gamma	6
-Q9NZT1	Calmodulin-like protein 5	8
-Q9UGM3	Deleted in malignant brain tumors 1 protein	6
-Q9UI42	Carboxypeptidase A4	6
-Q9UIV8	Serpin B13	2
-Q9Y6R7	IgGFc-binding protein	3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.tsv	Tue Mar 05 07:37:10 2019 -0500
@@ -0,0 +1,142 @@
+Protein accession number (UniProt)	Protein name	Number of peptides (razor + unique)
+P29508	Serpin B3	20
+P63261	Actin, cytoplasmic 2	19
+Q8N1N4	Keratin, type II cytoskeletal 78	18
+Q04695	Keratin, type I cytoskeletal 17	18
+P01876	Ig alpha-1 chain C region	16
+Q01469	Fatty acid-binding protein 5, epidermal	15
+P31944	Caspase-14	15
+P01833	Polymeric immunoglobulin receptor	15
+P06733	Alpha-enolase	15
+P25311	Zinc-alpha-2-glycoprotein	15
+Q15149	Plectin	15
+P19013	Keratin, type II cytoskeletal 4	13
+Q6KB66	Keratin, type II cytoskeletal 80	13
+Q08188	Protein-glutamine gamma-glutamyltransferase E	12
+P13646	Keratin, type I cytoskeletal 13	11
+Q86YZ3	Hornerin	11
+P04259	Keratin, type II cytoskeletal 6B	10
+P02545	Prelamin-A/C;Lamin-A/C	10
+P04083	Annexin A1	10
+P11021	78 kDa glucose-regulated protein	10
+P02787	Serotransferrin	9
+P04040	Catalase	9
+P31151	Protein S100-A7	9
+P31947	14-3-3 protein sigma	9
+Q96P63	Serpin B12	9
+P14618	Pyruvate kinase PKM	9
+P60174	Triosephosphate isomerase	9
+Q06830	Peroxiredoxin-1	9
+P01040	Cystatin-A	8
+P05089	Arginase-1	8
+P01834	Ig kappa chain C region	8
+P04406	Glyceraldehyde-3-phosphate dehydrogenase	8
+P0DMV9	Heat shock 70 kDa protein 1B	8
+P13639	Elongation factor 2	8
+P35579	Myosin-9	8
+P68371	Tubulin beta-4B chain	8
+Q8WVV4	Protein POF1B	8
+O75635	Serpin B7	7
+P01857	Ig gamma-1 chain C region	7
+P61626	Lysozyme C	7
+P68363	Tubulin alpha-1B chain	7
+P01009	Alpha-1-antitrypsin;Short peptide from AAT	6
+P07900	Heat shock protein HSP 90-alpha	6
+Q9NZH8	Interleukin-36 gamma	6
+O43707	Alpha-actinin-4;Alpha-actinin-1	6
+O75223	Gamma-glutamylcyclotransferase	6
+P00338	L-lactate dehydrogenase A chain	6
+P07339	Cathepsin D	6
+P62987	Ubiquitin-60S ribosomal protein L40	6
+P10599	Thioredoxin	6
+Q9UGM3	Deleted in malignant brain tumors 1 protein	6
+Q9UI42	Carboxypeptidase A4	6
+P47929	Galectin-7	5
+Q13867	Bleomycin hydrolase	5
+Q6P4A8	Phospholipase B-like 1	5
+O75369	Filamin-B	5
+P00441	Superoxide dismutase [Cu-Zn]	5
+P04792	Heat shock protein beta-1	5
+P11142	Heat shock cognate 71 kDa protein	5
+P58107	Epiplakin	5
+P60842	Eukaryotic initiation factor 4A-I	5
+P62937	Peptidyl-prolyl cis-trans isomerase A	5
+P63104	14-3-3 protein zeta/delta	5
+Q92820	Gamma-glutamyl hydrolase	5
+O75342	Arachidonate 12-lipoxygenase, 12R-type	4
+P09211	Glutathione S-transferase P	4
+P31025	Lipocalin-1	4
+P48594	Serpin B4	4
+Q14574	Desmocollin-3	4
+Q5T750	Skin-specific protein 32	4
+Q6UWP8	Suprabasin	4
+O60911	Cathepsin L2	4
+P00558	Phosphoglycerate kinase 1	4
+P04075	Fructose-bisphosphate aldolase A	4
+P07384	Calpain-1 catalytic subunit	4
+P0CG05	Ig lambda-2 chain C regions	4
+P18206	Vinculin	4
+P62258	14-3-3 protein epsilon	4
+P68871	Hemoglobin subunit beta	4
+Q9C075	Keratin, type I cytoskeletal 23	4
+A8K2U0	Alpha-2-macroglobulin-like protein 1	3
+P00738	Haptoglobin	3
+P01011	Alpha-1-antichymotrypsin	3
+P02763	Alpha-1-acid glycoprotein 1	3
+P18510	Interleukin-1 receptor antagonist protein	3
+P22528	Cornifin-B	3
+P30740	Leukocyte elastase inhibitor	3
+P80188	Neutrophil gelatinase-associated lipocalin	3
+Q15828	Cystatin-M	3
+Q9HCY8	Protein S100-A14	3
+P01623	Ig kappa chain V-III region	3
+P01877	Ig alpha-2 chain C region	3
+P06396	Gelsolin	3
+P14735	Insulin-degrading enzyme	3
+P20933	N(4)-(beta-N-acetylglucosaminyl)-L-asparaginase	3
+P25788	Proteasome subunit alpha type-3	3
+P26641	Elongation factor 1-gamma	3
+P36952	Serpin B5	3
+P40926	Malate dehydrogenase, mitochondrial	3
+Q9Y6R7	IgGFc-binding protein	3
+O95274	Ly6/PLAUR domain-containing protein 3	2
+P00491	Purine nucleoside phosphorylase	2
+P04080	Cystatin-B	2
+P09972	Fructose-bisphosphate aldolase C	2
+P19012	Keratin, type I cytoskeletal 15	2
+P20930	Filaggrin	2
+Q96FX8	p53 apoptosis effector related to PMP-22	2
+Q9UIV8	Serpin B13	2
+P01625	Ig kappa chain V-IV region Len	2
+P01765	Ig heavy chain V-III region TIL	2
+P01766	Ig heavy chain V-III region BRO	2
+P01860	Ig gamma-3 chain C region	2
+P01871	Ig mu chain C region	2
+P05090	Apolipoprotein D	2
+P06870	Kallikrein-1	2
+P07858	Cathepsin B	2
+P08865	40S ribosomal protein SA	2
+P11279	Lysosome-associated membrane glycoprotein 1	2
+P13473	Lysosome-associated membrane glycoprotein 2	2
+P19971	Thymidine phosphorylase	2
+P23284	Peptidyl-prolyl cis-trans isomerase B	2
+P23396	40S ribosomal protein S3	2
+P25705	ATP synthase subunit alpha, mitochondrial	2
+P27482	Calmodulin-like protein 3	2
+P31949	Protein S100-A11	2
+P40121	Macrophage-capping protein	2
+P42357	Histidine ammonia-lyase	2
+P47756	F-actin-capping protein subunit beta	2
+P48637	Glutathione synthetase	2
+P49720	Proteasome subunit beta type-3	2
+P50395	Rab GDP dissociation inhibitor beta	2
+P59998	Actin-related protein 2/3 complex subunit 4	2
+P61160	Actin-related protein 2	2
+P61916	Epididymal secretory protein E1	2
+Q9NZT1	Calmodulin-like protein 5	8
+P12273	Prolactin-inducible protein	6
+Q96DA0	Zymogen granule protein 16 homolog B	5
+P01036	Cystatin-S	5
+Q8TAX7	Mucin-7	2
+P01037	Cystatin-SN	2
+P09228	Cystatin-SA	2