Mercurial > repos > proteore > proteore_filter_keywords_values
changeset 5:33ca9ba2495a draft
planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
author | proteore |
---|---|
date | Tue, 05 Mar 2019 07:37:10 -0500 |
parents | 2080e2a4f209 |
children | b4641c0f8a82 |
files | filter_kw_val.py filter_kw_val.xml test-data/discarded_lines.tsv test-data/filtered_output.csv test-data/output.csv test-data/output.tsv |
diffstat | 6 files changed, 252 insertions(+), 244 deletions(-) [+] |
line wrap: on
line diff
--- a/filter_kw_val.py Tue Feb 05 08:22:47 2019 -0500 +++ b/filter_kw_val.py Tue Mar 05 07:37:10 2019 -0500 @@ -11,10 +11,11 @@ boolean value if the keyword should be filtered in exact ["filename,ncol,true/false"] --value The value to be filtered, the column number where this filter applies and the operation symbol ["value,ncol,=/>/>=/</<=/!="] - --values_range range of values to be keep, example : --values_range 5 20 c1 true + --values_range range of values to be keep, example : --values_range 5 20 c1 true + --operation 'keep' or 'discard' lines concerned by filter(s) --operator The operator used to filter with several keywords/values : AND or OR --o --output The output filename - --filtered_file The file contains removed lines + --discarded_lines The file contains removed lines -s --sort_col Used column to sort the file, ",true" for reverse sorting, ",false" otherwise example : c1,false """ parser = argparse.ArgumentParser() @@ -23,12 +24,14 @@ parser.add_argument("--kw_file", nargs="+", action="append", help="") parser.add_argument("--value", nargs="+", action="append", help="") parser.add_argument("--values_range", nargs="+", action="append", help="") + parser.add_argument("--operation", default="keep", type=str, choices=['keep','discard'],help='') parser.add_argument("--operator", default="OR", type=str, choices=['AND','OR'],help='') parser.add_argument("-o", "--output", default="output.txt") - parser.add_argument("--filtered_file", default="filtered_output.txt") + parser.add_argument("--discarded_lines", default="filtered_output.txt") parser.add_argument("-s","--sort_col", help="") args = parser.parse_args() + filters(args) def str_to_bool(v): @@ -62,6 +65,7 @@ header = str_to_bool(args.input.split(",")[1]) csv_file = blank_to_NA(read_file(filename)) results_dict = {} + operator_dict = { "Equal" : "=" , "Higher" : ">" , "Equal-or-higher" : ">=" , "Lower" : "<" , "Equal-or-lower" : "<=" , "Different" : "!=" } if args.kw: keywords = args.kw @@ -79,6 +83,7 @@ if args.value: for v in args.value: v[0] = v[0].replace(",",".") + v[2] = operator_dict[v[2]] if is_number("float", v[0]): csv_file = comma_number_to_float(csv_file,column_from_txt(v[1]),header) results_dict = filter_value(csv_file, header, results_dict, v[0], v[1], v[2]) @@ -123,6 +128,12 @@ reverse=str_to_bool(args.sort_col.split(",")[1]) remaining_lines= sort_by_column(remaining_lines,sort_col,reverse,header) filtered_lines = sort_by_column(filtered_lines,sort_col,reverse,header) + + #swap lists of lines (files) if 'keep' option selected + if args.operation == "keep" : + swap = remaining_lines, filtered_lines + remaining_lines = swap[1] + filtered_lines = swap[0] # Write results to output with open(args.output,"w") as output : @@ -130,7 +141,7 @@ writer.writerows(remaining_lines) # Write filtered lines to filtered_output - with open(args.filtered_file,"w") as filtered_output : + with open(args.discarded_lines,"w") as filtered_output : writer = csv.writer(filtered_output,delimiter="\t") writer.writerows(filtered_lines)
--- a/filter_kw_val.xml Tue Feb 05 08:22:47 2019 -0500 +++ b/filter_kw_val.xml Tue Mar 05 07:37:10 2019 -0500 @@ -1,4 +1,4 @@ -<tool id="MQoutputfilter" name="Filter by keywords and/or numerical value" version="2019.02.05"> +<tool id="MQoutputfilter" name="Filter by keywords and/or numerical value" version="2019.03.05"> <description></description> <requirements> </requirements> @@ -8,8 +8,9 @@ <command><![CDATA[ python $__tool_directory__/filter_kw_val.py -i "$input1,$header" - -o "$output1" - --filtered_file "$filtered_file" + -o "$kept_lines" + --discarded_lines "$discarded_lines" + --operation "$operation" --operator "$operator" ## Keywords @@ -25,21 +26,8 @@ ## value to filter #for $val in $value - #if $val.v.val != "None" - --value - #if $val.v.val == "Equal" - $val.v.equal "$val.ncol" "=" - #else if $val.v.val == "Higher" - $val.v.higher "$val.ncol" ">" - #else if $val.v.val == "Equal or higher" - $val.v.equal_higher "$val.ncol" ">=" - #else if $val.v.val == "Lower" - $val.v.lower "$val.ncol" "<" - #else if $val.v.val == "Equal or lower" - $val.v.equal_lower "$val.ncol" "<=" - #else - $val.v.different "$val.ncol" "!=" - #end if + #if $val.value != "None" + --value $val.value $val.ncol $val.operator #end if #end for @@ -50,21 +38,27 @@ #end if #end for - #if $sort_column != "" - --sort_col "$sort_column,$reversed_sort" + #if $sort.sort_bool == "true" + --sort_col "$sort.sort_column,$sort.reversed_sort" #end if ]]></command> <inputs> <param type="data" name="input1" format="txt,tabular" label="Input file" /> <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" /> + <param name="operation" type="select" label="Operation" help="keep or discard word(s) or value(s) that match filters ?"> + <option value="keep">Keep</option> + <option value="discard">Discard</option> + </param> <param name="operator" type="select" label="Select an operator to combine your filters (if more than one)" help="OR : only one filter must be satisfied to filter a row, AND : all your filters must be satisfied to filter a row" > <option value="OR" selected="True">OR</option> <option value="AND">AND</option> </param> <repeat name="keyword" title="Filter by keywords" > - <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' /> + <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek keywords in the first column (and keep or discard them)'> + <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator> + </param> <param type="boolean" name="match" truevalue="True" label="Search for exact match?" help='Choosing "Yes" will only filter out exact match (e.g. case sensitive), see help section' /> <conditional name="k" > <param name="kw" type="select" label="Enter keywords" > @@ -72,7 +66,7 @@ <option value="file">File containing keywords</option> </param> <when value="text" > - <param name="txt" type="text" label="Copy/paste keywords to be filtered out" help='Keywords must be separated by tab, space or carriage return into the form field, for example: A8K2U0 Q5TA79 O43175' > + <param name="txt" type="text" label="Copy/paste keywords to find (keep or discard)" help='Keywords must be separated by tab, space or carriage return into the form field, for example: A8K2U0 Q5TA79 O43175' > <sanitizer> <valid initial="string.printable"> <remove value="'"/> @@ -91,56 +85,53 @@ </conditional> </repeat> <repeat name="value" title="Filter by numerical value" > - <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' /> - <conditional name="v" > - <param name="val" type="select" label="Select operator" > - <option value="Equal">=</option> - <option value="Higher">></option> - <option value="Equal or higher">>=</option> - <option value="Lower"><</option> - <option value="Equal or lower"><=</option> - <option value="Different">!=</option> - </param> - <when value="Equal" > - <param name="equal" type="float" value="" label="Value" /> - </when> - <when value="Higher" > - <param type="float" name="higher" value="" label="Value" /> - </when> - <when value="Equal or higher" > - <param type="float" name="equal_higher" value="" label="Value" /> - </when> - <when value="Lower" > - <param type="float" name="lower" value="" label="Value" /> - </when> - <when value="Equal or lower" > - <param type="float" name="equal_lower" value="" label="Value" /> - </when> - <when value="Different"> - <param type="float" name="different" value="" label="Value"/> - </when> - </conditional> + <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek values in the first column (and keep or discard them)'> + <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator> + </param> + <param name="operator" type="select" label="Select operator" > + <option value="Equal">=</option> + <option value="Higher">></option> + <option value="Equal-or-higher">>=</option> + <option value="Lower"><</option> + <option value="Equal-or-lower"><=</option> + <option value="Different">!=</option> + </param> + <param name="value" type="float" value="" label="Value"></param> </repeat> <repeat name="values_range" title="Filter by range of numerical values"> - <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' /> + <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek values in the first column (and keep or discard them)'> + <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator> + </param> <param name="bottom_value" type="float" value="" label="Enter the bottom value" /> <param name="top_value" type="float" value="" label="Enter the top value" /> <param name="inclusive" type="boolean" label="inclusive range ?" checked="false" truevalue="true" falsevalue="false" /> </repeat> - <param name="sort_column" type="text" value="" label="Sort result files by:" help="Fill in 'c1' if you want to sort your result file by the column 1 values" /> - <param name="reversed_sort" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Sort in descending order ?"/> + <conditional name="sort"> + <param name="sort_bool" type="boolean" label="Sort by column ?" checked="false" truevalue="true" falsevalue="false" /> + <when value="true"> + <param name="sort_column" type="text" value="" label="Sort result files by:" help="Fill in 'c1' if you want to sort your result file by the column 1 values"> + <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]</validator> + </param> + <param name="reversed_sort" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Sort in descending order ?"/> + </when> + <when value="false"/> + </conditional> </inputs> <outputs> - <data name="output1" format="tsv" label="${tool.name} on ${input1.name}" /> - <data name="filtered_file" format="tsv" label="${tool.name} on ${input1.name} - Filtered lines" /> + <data name="kept_lines" format="tsv" label="Filtered_${input1.name}" /> + <data name="discarded_lines" format="tsv" label="Filtered_${input1.name} - discarded_lines" /> </outputs> <tests> <test> <param name="input1" value="Lacombe_et_al_2017_OK.txt" /> <param name="header" value="true" /> <param name="operator" value="OR"/> + <param name="operation" value="discard"/> <param name="sort_column" value="c1"/> - <param name="reversed_sort" value="false"/> + <conditional name="sort"> + <param name="sort_bool" value="false"/> + <param name="reversed_sort" value="false"/> + </conditional> <repeat name="keyword"> <param name="ncol" value="c1" /> <param name="match" value="True" /> @@ -151,19 +142,17 @@ </repeat> <repeat name="value"> <param name="ncol" value="c3"/> - <conditional name="v"> - <param name="val" value="Higher"/> - <param name="higher" value="20" /> - </conditional> + <param name="operator" value="Higher"/> + <param name="value" value="20" /> </repeat> - <output name="output1" file="output.csv" /> - <output name="filtered_file" file="filtered_output.csv" /> + <output name="kept_lines" file="output.tsv" /> + <output name="discarded_lines" file="discarded_lines.tsv" /> </test> </tests> <help><![CDATA[ **Description** -This tool allows to filter out data according to different criteria such as keywords (e.g. a list of contaminants) or numerical values (e.g. intensity measurements below a given threshold). +This tool allows to keep/discard rows from your dataset according to different filter such as keywords (e.g. a list of contaminants) or numerical values (e.g. intensity measurements below a given threshold). A boolean operator "OR/AND" allows to combine different type of filters making this tool very powerful. ----- @@ -176,12 +165,21 @@ **Parameters** -**AND/OR operator** +**Operation** + +- **Keep**: only keep lines with keyword(s) and/or value(s) concerned by defined filter(s) +- **Discard**: only keep lines with keyword(s) and/or value(s) NOT concerned by defined filter(s) + +.. class:: infomark -As many filters as needed can be combined, you can choose how filters apply on your data by using the following boolean operators: +Two output files are created, one with kept lines and the other one with discarded lines. + +**Select an operator to combine your filters (if more than one)** -- OR: only one filter must be satisfied to remove one row -- AND: all filters must be satisfied to remove one row +Many filters (criteria) can be combined in a single execution making this tool quite powerful; this can be achieved using the following boolean operators: + +- **OR**: only one of filters must be satisfied to keep/discard one row +- **AND**: all filters must be satisfied to keep/discard one row ----- @@ -205,7 +203,7 @@ "Search for exact match?": Keywords search can be applied by performing either exact match or partial one by using the following option: -- If you choose **Yes**, only the fields that contains exactly the same content will be removed. +- If you choose **Yes**, only the fields that contains exactly the same content will be removed (i.e. using the "discard" mode). - If you choose **No**, all the fields containing the keyword will be removed. @@ -231,18 +229,17 @@ - >= (greater than or equal to) Then enter the numerical threshold to apply by filling the "Value" box. -If you choose > 10, each row containing a numerical value (in the chosen column of your input file) that correspond to your settings will be filtered out. +If you choose > 10, each row containing a numerical value (in the chosen column of your input file) that correspond to your settings will be kept or discarded (based on operation parameter). ----- **Filter by a range of values**: You can also set a range of values to filter your file. -Conversely to the numeric filter, rows with numerical values within the defined range will be kept while rows with values out of this range will be filtered out. +Conversely to the numeric filter, rows with numerical values within the defined range will be kept while rows with values out of this range will be discarded (or the other way around based on operation parameter). ----- -**Sort results files** - -You can sort your results by column in ascending (default value) or descending by entering the column number on which to sort the data. +**Sort by column ?** +click on the "Yes" button allows to "Sort result files by:" a column number. this can be done in ascending (default value) or descending order by entering the column number on which to sort the data. ----- @@ -250,9 +247,9 @@ The tool returns two output files. -* A text file containing the results that pass your filters +* A text file containing the results that satisfy your filters (i.e. "keep" mode). -* A text file containing the rows removed from the input file (i.e. containing data taht do not pass your filter(s). +* A text file containing the rows removed from the input file (i.e. "discard" mode). ----- @@ -260,7 +257,7 @@ **Authors** -T.P. Lien Nguyen, David Christiany, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR +David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/discarded_lines.tsv Tue Mar 05 07:37:10 2019 -0500 @@ -0,0 +1,21 @@ +Protein accession number (UniProt) Protein name Number of peptides (razor + unique) +P15924 Desmoplakin 69 +P02538 Keratin, type II cytoskeletal 6A 53 +P02768 Serum albumin 44 +P08779 Keratin, type I cytoskeletal 16 29 +Q02413 Desmoglein-1 24 +P07355 Annexin A2;Putative annexin A2-like protein 22 +P14923 Junction plakoglobin 22 +P02788 Lactotransferrin 21 +Q9HC84 Mucin-5B 21 +P04745 Alpha-amylase 1 23 +P04264 Keratin, type II cytoskeletal 1 61 +P35908 Keratin, type II cytoskeletal 2 epidermal 40 +P13645 Keratin, type I cytoskeletal 10 40 +Q5D862 Filaggrin-2 14 +Q5T749 Keratinocyte proline-rich protein 13 +Q8IW75 Serpin A12 3 +P81605 Dermcidin 3 +P22531 Small proline-rich protein 2E 3 +P59666 Neutrophil defensin 3 2 +P78386 Keratin, type II cuticular Hb5 2
--- a/test-data/filtered_output.csv Tue Feb 05 08:22:47 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -Protein accession number (UniProt) Protein name Number of peptides (razor + unique) -P02538 Keratin, type II cytoskeletal 6A 53 -P02768 Serum albumin 44 -P02788 Lactotransferrin 21 -P04264 Keratin, type II cytoskeletal 1 61 -P04745 Alpha-amylase 1 23 -P07355 Annexin A2;Putative annexin A2-like protein 22 -P08779 Keratin, type I cytoskeletal 16 29 -P13645 Keratin, type I cytoskeletal 10 40 -P14923 Junction plakoglobin 22 -P15924 Desmoplakin 69 -P22531 Small proline-rich protein 2E 3 -P35908 Keratin, type II cytoskeletal 2 epidermal 40 -P59666 Neutrophil defensin 3 2 -P78386 Keratin, type II cuticular Hb5 2 -P81605 Dermcidin 3 -Q02413 Desmoglein-1 24 -Q5D862 Filaggrin-2 14 -Q5T749 Keratinocyte proline-rich protein 13 -Q8IW75 Serpin A12 3 -Q9HC84 Mucin-5B 21
--- a/test-data/output.csv Tue Feb 05 08:22:47 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,142 +0,0 @@ -Protein accession number (UniProt) Protein name Number of peptides (razor + unique) -A8K2U0 Alpha-2-macroglobulin-like protein 1 3 -O43707 Alpha-actinin-4;Alpha-actinin-1 6 -O60911 Cathepsin L2 4 -O75223 Gamma-glutamylcyclotransferase 6 -O75342 Arachidonate 12-lipoxygenase, 12R-type 4 -O75369 Filamin-B 5 -O75635 Serpin B7 7 -O95274 Ly6/PLAUR domain-containing protein 3 2 -P00338 L-lactate dehydrogenase A chain 6 -P00441 Superoxide dismutase [Cu-Zn] 5 -P00491 Purine nucleoside phosphorylase 2 -P00558 Phosphoglycerate kinase 1 4 -P00738 Haptoglobin 3 -P01009 Alpha-1-antitrypsin;Short peptide from AAT 6 -P01011 Alpha-1-antichymotrypsin 3 -P01036 Cystatin-S 5 -P01037 Cystatin-SN 2 -P01040 Cystatin-A 8 -P01623 Ig kappa chain V-III region 3 -P01625 Ig kappa chain V-IV region Len 2 -P01765 Ig heavy chain V-III region TIL 2 -P01766 Ig heavy chain V-III region BRO 2 -P01833 Polymeric immunoglobulin receptor 15 -P01834 Ig kappa chain C region 8 -P01857 Ig gamma-1 chain C region 7 -P01860 Ig gamma-3 chain C region 2 -P01871 Ig mu chain C region 2 -P01876 Ig alpha-1 chain C region 16 -P01877 Ig alpha-2 chain C region 3 -P02545 Prelamin-A/C;Lamin-A/C 10 -P02763 Alpha-1-acid glycoprotein 1 3 -P02787 Serotransferrin 9 -P04040 Catalase 9 -P04075 Fructose-bisphosphate aldolase A 4 -P04080 Cystatin-B 2 -P04083 Annexin A1 10 -P04259 Keratin, type II cytoskeletal 6B 10 -P04406 Glyceraldehyde-3-phosphate dehydrogenase 8 -P04792 Heat shock protein beta-1 5 -P05089 Arginase-1 8 -P05090 Apolipoprotein D 2 -P06396 Gelsolin 3 -P06733 Alpha-enolase 15 -P06870 Kallikrein-1 2 -P07339 Cathepsin D 6 -P07384 Calpain-1 catalytic subunit 4 -P07858 Cathepsin B 2 -P07900 Heat shock protein HSP 90-alpha 6 -P08865 40S ribosomal protein SA 2 -P09211 Glutathione S-transferase P 4 -P09228 Cystatin-SA 2 -P09972 Fructose-bisphosphate aldolase C 2 -P0CG05 Ig lambda-2 chain C regions 4 -P0DMV9 Heat shock 70 kDa protein 1B 8 -P10599 Thioredoxin 6 -P11021 78 kDa glucose-regulated protein 10 -P11142 Heat shock cognate 71 kDa protein 5 -P11279 Lysosome-associated membrane glycoprotein 1 2 -P12273 Prolactin-inducible protein 6 -P13473 Lysosome-associated membrane glycoprotein 2 2 -P13639 Elongation factor 2 8 -P13646 Keratin, type I cytoskeletal 13 11 -P14618 Pyruvate kinase PKM 9 -P14735 Insulin-degrading enzyme 3 -P18206 Vinculin 4 -P18510 Interleukin-1 receptor antagonist protein 3 -P19012 Keratin, type I cytoskeletal 15 2 -P19013 Keratin, type II cytoskeletal 4 13 -P19971 Thymidine phosphorylase 2 -P20930 Filaggrin 2 -P20933 N(4)-(beta-N-acetylglucosaminyl)-L-asparaginase 3 -P22528 Cornifin-B 3 -P23284 Peptidyl-prolyl cis-trans isomerase B 2 -P23396 40S ribosomal protein S3 2 -P25311 Zinc-alpha-2-glycoprotein 15 -P25705 ATP synthase subunit alpha, mitochondrial 2 -P25788 Proteasome subunit alpha type-3 3 -P26641 Elongation factor 1-gamma 3 -P27482 Calmodulin-like protein 3 2 -P29508 Serpin B3 20 -P30740 Leukocyte elastase inhibitor 3 -P31025 Lipocalin-1 4 -P31151 Protein S100-A7 9 -P31944 Caspase-14 15 -P31947 14-3-3 protein sigma 9 -P31949 Protein S100-A11 2 -P35579 Myosin-9 8 -P36952 Serpin B5 3 -P40121 Macrophage-capping protein 2 -P40926 Malate dehydrogenase, mitochondrial 3 -P42357 Histidine ammonia-lyase 2 -P47756 F-actin-capping protein subunit beta 2 -P47929 Galectin-7 5 -P48594 Serpin B4 4 -P48637 Glutathione synthetase 2 -P49720 Proteasome subunit beta type-3 2 -P50395 Rab GDP dissociation inhibitor beta 2 -P58107 Epiplakin 5 -P59998 Actin-related protein 2/3 complex subunit 4 2 -P60174 Triosephosphate isomerase 9 -P60842 Eukaryotic initiation factor 4A-I 5 -P61160 Actin-related protein 2 2 -P61626 Lysozyme C 7 -P61916 Epididymal secretory protein E1 2 -P62258 14-3-3 protein epsilon 4 -P62937 Peptidyl-prolyl cis-trans isomerase A 5 -P62987 Ubiquitin-60S ribosomal protein L40 6 -P63104 14-3-3 protein zeta/delta 5 -P63261 Actin, cytoplasmic 2 19 -P68363 Tubulin alpha-1B chain 7 -P68371 Tubulin beta-4B chain 8 -P68871 Hemoglobin subunit beta 4 -P80188 Neutrophil gelatinase-associated lipocalin 3 -Q01469 Fatty acid-binding protein 5, epidermal 15 -Q04695 Keratin, type I cytoskeletal 17 18 -Q06830 Peroxiredoxin-1 9 -Q08188 Protein-glutamine gamma-glutamyltransferase E 12 -Q13867 Bleomycin hydrolase 5 -Q14574 Desmocollin-3 4 -Q15149 Plectin 15 -Q15828 Cystatin-M 3 -Q5T750 Skin-specific protein 32 4 -Q6KB66 Keratin, type II cytoskeletal 80 13 -Q6P4A8 Phospholipase B-like 1 5 -Q6UWP8 Suprabasin 4 -Q86YZ3 Hornerin 11 -Q8N1N4 Keratin, type II cytoskeletal 78 18 -Q8TAX7 Mucin-7 2 -Q8WVV4 Protein POF1B 8 -Q92820 Gamma-glutamyl hydrolase 5 -Q96DA0 Zymogen granule protein 16 homolog B 5 -Q96FX8 p53 apoptosis effector related to PMP-22 2 -Q96P63 Serpin B12 9 -Q9C075 Keratin, type I cytoskeletal 23 4 -Q9HCY8 Protein S100-A14 3 -Q9NZH8 Interleukin-36 gamma 6 -Q9NZT1 Calmodulin-like protein 5 8 -Q9UGM3 Deleted in malignant brain tumors 1 protein 6 -Q9UI42 Carboxypeptidase A4 6 -Q9UIV8 Serpin B13 2 -Q9Y6R7 IgGFc-binding protein 3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.tsv Tue Mar 05 07:37:10 2019 -0500 @@ -0,0 +1,142 @@ +Protein accession number (UniProt) Protein name Number of peptides (razor + unique) +P29508 Serpin B3 20 +P63261 Actin, cytoplasmic 2 19 +Q8N1N4 Keratin, type II cytoskeletal 78 18 +Q04695 Keratin, type I cytoskeletal 17 18 +P01876 Ig alpha-1 chain C region 16 +Q01469 Fatty acid-binding protein 5, epidermal 15 +P31944 Caspase-14 15 +P01833 Polymeric immunoglobulin receptor 15 +P06733 Alpha-enolase 15 +P25311 Zinc-alpha-2-glycoprotein 15 +Q15149 Plectin 15 +P19013 Keratin, type II cytoskeletal 4 13 +Q6KB66 Keratin, type II cytoskeletal 80 13 +Q08188 Protein-glutamine gamma-glutamyltransferase E 12 +P13646 Keratin, type I cytoskeletal 13 11 +Q86YZ3 Hornerin 11 +P04259 Keratin, type II cytoskeletal 6B 10 +P02545 Prelamin-A/C;Lamin-A/C 10 +P04083 Annexin A1 10 +P11021 78 kDa glucose-regulated protein 10 +P02787 Serotransferrin 9 +P04040 Catalase 9 +P31151 Protein S100-A7 9 +P31947 14-3-3 protein sigma 9 +Q96P63 Serpin B12 9 +P14618 Pyruvate kinase PKM 9 +P60174 Triosephosphate isomerase 9 +Q06830 Peroxiredoxin-1 9 +P01040 Cystatin-A 8 +P05089 Arginase-1 8 +P01834 Ig kappa chain C region 8 +P04406 Glyceraldehyde-3-phosphate dehydrogenase 8 +P0DMV9 Heat shock 70 kDa protein 1B 8 +P13639 Elongation factor 2 8 +P35579 Myosin-9 8 +P68371 Tubulin beta-4B chain 8 +Q8WVV4 Protein POF1B 8 +O75635 Serpin B7 7 +P01857 Ig gamma-1 chain C region 7 +P61626 Lysozyme C 7 +P68363 Tubulin alpha-1B chain 7 +P01009 Alpha-1-antitrypsin;Short peptide from AAT 6 +P07900 Heat shock protein HSP 90-alpha 6 +Q9NZH8 Interleukin-36 gamma 6 +O43707 Alpha-actinin-4;Alpha-actinin-1 6 +O75223 Gamma-glutamylcyclotransferase 6 +P00338 L-lactate dehydrogenase A chain 6 +P07339 Cathepsin D 6 +P62987 Ubiquitin-60S ribosomal protein L40 6 +P10599 Thioredoxin 6 +Q9UGM3 Deleted in malignant brain tumors 1 protein 6 +Q9UI42 Carboxypeptidase A4 6 +P47929 Galectin-7 5 +Q13867 Bleomycin hydrolase 5 +Q6P4A8 Phospholipase B-like 1 5 +O75369 Filamin-B 5 +P00441 Superoxide dismutase [Cu-Zn] 5 +P04792 Heat shock protein beta-1 5 +P11142 Heat shock cognate 71 kDa protein 5 +P58107 Epiplakin 5 +P60842 Eukaryotic initiation factor 4A-I 5 +P62937 Peptidyl-prolyl cis-trans isomerase A 5 +P63104 14-3-3 protein zeta/delta 5 +Q92820 Gamma-glutamyl hydrolase 5 +O75342 Arachidonate 12-lipoxygenase, 12R-type 4 +P09211 Glutathione S-transferase P 4 +P31025 Lipocalin-1 4 +P48594 Serpin B4 4 +Q14574 Desmocollin-3 4 +Q5T750 Skin-specific protein 32 4 +Q6UWP8 Suprabasin 4 +O60911 Cathepsin L2 4 +P00558 Phosphoglycerate kinase 1 4 +P04075 Fructose-bisphosphate aldolase A 4 +P07384 Calpain-1 catalytic subunit 4 +P0CG05 Ig lambda-2 chain C regions 4 +P18206 Vinculin 4 +P62258 14-3-3 protein epsilon 4 +P68871 Hemoglobin subunit beta 4 +Q9C075 Keratin, type I cytoskeletal 23 4 +A8K2U0 Alpha-2-macroglobulin-like protein 1 3 +P00738 Haptoglobin 3 +P01011 Alpha-1-antichymotrypsin 3 +P02763 Alpha-1-acid glycoprotein 1 3 +P18510 Interleukin-1 receptor antagonist protein 3 +P22528 Cornifin-B 3 +P30740 Leukocyte elastase inhibitor 3 +P80188 Neutrophil gelatinase-associated lipocalin 3 +Q15828 Cystatin-M 3 +Q9HCY8 Protein S100-A14 3 +P01623 Ig kappa chain V-III region 3 +P01877 Ig alpha-2 chain C region 3 +P06396 Gelsolin 3 +P14735 Insulin-degrading enzyme 3 +P20933 N(4)-(beta-N-acetylglucosaminyl)-L-asparaginase 3 +P25788 Proteasome subunit alpha type-3 3 +P26641 Elongation factor 1-gamma 3 +P36952 Serpin B5 3 +P40926 Malate dehydrogenase, mitochondrial 3 +Q9Y6R7 IgGFc-binding protein 3 +O95274 Ly6/PLAUR domain-containing protein 3 2 +P00491 Purine nucleoside phosphorylase 2 +P04080 Cystatin-B 2 +P09972 Fructose-bisphosphate aldolase C 2 +P19012 Keratin, type I cytoskeletal 15 2 +P20930 Filaggrin 2 +Q96FX8 p53 apoptosis effector related to PMP-22 2 +Q9UIV8 Serpin B13 2 +P01625 Ig kappa chain V-IV region Len 2 +P01765 Ig heavy chain V-III region TIL 2 +P01766 Ig heavy chain V-III region BRO 2 +P01860 Ig gamma-3 chain C region 2 +P01871 Ig mu chain C region 2 +P05090 Apolipoprotein D 2 +P06870 Kallikrein-1 2 +P07858 Cathepsin B 2 +P08865 40S ribosomal protein SA 2 +P11279 Lysosome-associated membrane glycoprotein 1 2 +P13473 Lysosome-associated membrane glycoprotein 2 2 +P19971 Thymidine phosphorylase 2 +P23284 Peptidyl-prolyl cis-trans isomerase B 2 +P23396 40S ribosomal protein S3 2 +P25705 ATP synthase subunit alpha, mitochondrial 2 +P27482 Calmodulin-like protein 3 2 +P31949 Protein S100-A11 2 +P40121 Macrophage-capping protein 2 +P42357 Histidine ammonia-lyase 2 +P47756 F-actin-capping protein subunit beta 2 +P48637 Glutathione synthetase 2 +P49720 Proteasome subunit beta type-3 2 +P50395 Rab GDP dissociation inhibitor beta 2 +P59998 Actin-related protein 2/3 complex subunit 4 2 +P61160 Actin-related protein 2 2 +P61916 Epididymal secretory protein E1 2 +Q9NZT1 Calmodulin-like protein 5 8 +P12273 Prolactin-inducible protein 6 +Q96DA0 Zymogen granule protein 16 homolog B 5 +P01036 Cystatin-S 5 +Q8TAX7 Mucin-7 2 +P01037 Cystatin-SN 2 +P09228 Cystatin-SA 2