# HG changeset patch # User proteore # Date 1527865847 14400 # Node ID 6f32c1e12572fa1c1aeff77bb3863e0d67992c85 # Parent c6ba1e6f686955126f67e085adeab232604f1357 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337 diff -r c6ba1e6f6869 -r 6f32c1e12572 README.rst --- a/README.rst Fri Apr 20 09:07:23 2018 -0400 +++ b/README.rst Fri Jun 01 11:10:47 2018 -0400 @@ -3,7 +3,7 @@ **Authors** -T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR +T.P. Lien Nguyen, David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform @@ -15,9 +15,7 @@ This tool allows to remove unneeded data (e.g. contaminants, non-significant values) from a proteomics results file (e.g. MaxQuant or Proline output). -**For each row, if there are more than one protein IDs/protein names/gene names, only the first one will be considered in the output** - -**Filter the file by keywords** +**Filter by keyword(s)** Several options can be used. For each option, you can fill in the field or upload a file which contains the keywords. @@ -45,11 +43,55 @@ **No** option (partial match) for "kinase": not only lines which contain "kinase" but also lines with "alpha-kinase" (and so on) are removed. -**Filter the file by values** +------------------------------------------------------- + +**Filter by values** + +You can filter your data by a column of numerical values. +Enter the column to be use and select one operator in the list : + +- "=" +- "!=" +- "<" +- "<=" +- ">" +- ">=" + +Then enter the value to filter and specify the column to apply that option. +If a row contains a value that correspond to your settings, it will be filtered. + +------------------------------------------------------- + +**Filter by a range of values** + +You can also set a range of values to filter your file. +In opposition to value filter, rows with values inside of the defined range are kept. -You can choose to use one or more options (e.g. to filter out peptides of low intensity value, by q-value, etc.). +Rows with values outside of the defined range will be filtered. + +------------------------------------------------------- + +**AND/OR operator** + +Since you can add as many filters as you want, you can choose how filters apply on your data. + +AND or OR operator option works on all filters : + +- OR : only one filter to be satisfied to remove one row +- AND : all filters must be satisfied to remove one row -* For each option, you can choose between "=", ">", ">=", "<" and "<=", then enter the value to filter and specify the column to apply that option. +------------------------------------------------------- + +**Sort the results files** + +You can sort the result file if you wish, it can help you to check results. + +In order to do so : enter the column to be used, all columns will be sorted according to the one filled in. + +Rows stay intact, just in different order like excel. +You can also choose ascending or descending order, by default descending order is set. + +------------------------------------------------------- **Output** diff -r c6ba1e6f6869 -r 6f32c1e12572 filter_kw_val.py --- a/filter_kw_val.py Fri Apr 20 09:07:23 2018 -0400 +++ b/filter_kw_val.py Fri Jun 01 11:10:47 2018 -0400 @@ -1,38 +1,46 @@ -import argparse -import re - +import argparse, re, csv def options(): """ Parse options: -i, --input Input filename and boolean value if the file contains header ["filename,true/false"] - -m, --match if the keywords should be filtered in exact --kw Keyword to be filtered, the column number where this filter applies, boolean value if the keyword should be filtered in exact ["keyword,ncol,true/false"]. This option can be repeated: --kw "kw1,c1,true" --kw "kw2,c1,false" --kw "kw3,c2,true" --kwfile A file that contains keywords to be filter, the column where this filter applies and boolean value if the keyword should be filtered in exact ["filename,ncol,true/false"] --value The value to be filtered, the column number where this filter applies and the - operation symbol ["value,ncol,=/>/>=//>=/ 1 : #if there's more than just a header or 1 row + if header is True : + head=tab[0] + tab=tab[1:] + + if is_number("int",tab[0][sort_col]) : + tab = sorted(tab, key=lambda row: int(row[sort_col]), reverse=reverse) + elif is_number("float",tab[0][sort_col]) : + tab = sorted(tab, key=lambda row: float(row[sort_col]), reverse=reverse) + else : + tab = sorted(tab, key=lambda row: row[sort_col], reverse=reverse) + + if header is True : tab = [head]+tab + + return tab + +#Read the keywords file to extract the list of keywords +def read_option(filename): + with open(filename, "r") as f: + filter_list=f.read().splitlines() + filter_list=[key for key in filter_list if len(key.replace(' ',''))!=0] + filters=";".join(filter_list) + return filters -def readMQ(MQfilename): - # Read input file - mqfile = open(MQfilename, "r") - mq = mqfile.readlines() +# Read input file +def read_file(filename): + with open(filename,"r") as f : + reader=csv.reader(f,delimiter="\t") + tab=list(reader) + # Remove empty lines (contain only space or new line or "") - [mq.remove(blank) for blank in mq if blank.isspace() or blank == ""] - return mq + #[tab.remove(blank) for blank in tab if blank.isspace() or blank == ""] + tab=[line for line in tab if len("".join(line).replace(" ","")) !=0 ] + + return tab + +#seek for keywords in rows of csvfile, return a dictionary of boolean (true if keyword found, false otherwise) +def filter_keyword(csv_file, header, results_dict, keywords, ncol, match): + match=str_to_bool(match) + ncol=column_from_txt(ncol) + + keywords = keywords.upper().split(";") # Split list of filter keyword + [keywords.remove(blank) for blank in keywords if blank.isspace() or blank == ""] # Remove blank keywords + keywords = [k.strip() for k in keywords] # Remove space from 2 heads of keywords + + for id_line,line in enumerate(csv_file): + if header is True and id_line == 0 : continue + #line = line.replace("\n", "") + keyword_inline = line[ncol].replace('"', "").split(";") + #line = line + "\n" + + #Perfect match or not + if match is True : + found_in_line = any(pid.upper() in keywords for pid in keyword_inline) + else: + found_in_line = any(ft in pid.upper() for pid in keyword_inline for ft in keywords) + + #if the keyword is found in line + if id_line in results_dict : results_dict[id_line].append(found_in_line) + else : results_dict[id_line]=[found_in_line] + + return results_dict + +#filter ba determined value in rows of csvfile, return a dictionary of boolean (true if value filtered, false otherwise) +def filter_value(csv_file, header, results_dict, filter_value, ncol, opt): + + filter_value = float(filter_value) + ncol=column_from_txt(ncol) -def filter_keyword(MQfile, header, filtered_lines, ids, ncol, match): - mq = MQfile - if isnumber("int", ncol.replace("c", "")): - id_index = int(ncol.replace("c", "")) - 1 + for id_line,line in enumerate(csv_file): + if header is True and id_line == 0 : continue + value = line[ncol].replace('"', "").strip() + if value.replace(".", "", 1).isdigit(): + to_filter=value_compare(value,filter_value,opt) + + #adding the result to the dictionary + if id_line in results_dict : results_dict[id_line].append(to_filter) + else : results_dict[id_line]=[to_filter] + + return results_dict + +#filter ba determined value in rows of csvfile, return a dictionary of boolean (true if value filtered, false otherwise) +def filter_values_range(csv_file, header, results_dict, bottom_value, top_value, ncol, inclusive): + inclusive=str_to_bool(inclusive) + bottom_value = float(bottom_value) + top_value=float(top_value) + ncol=column_from_txt(ncol) + + for id_line, line in enumerate(csv_file): + if header is True and id_line == 0 : continue + value = line[ncol].replace('"', "").strip() + if value.replace(".", "", 1).isdigit(): + value=float(value) + if inclusive is True: + in_range = not (bottom_value <= value <= top_value) + else : + in_range = not (bottom_value < value < top_value) + + #adding the result to the dictionary + if id_line in results_dict : results_dict[id_line].append(in_range) + else : results_dict[id_line]=[in_range] + + return results_dict + +def column_from_txt(ncol): + if is_number("int", ncol.replace("c", "")): + ncol = int(ncol.replace("c", "")) - 1 else: raise ValueError("Please specify the column where " "you would like to apply the filter " "with valid format") - - # Split list of filter IDs - ids = ids.upper().split(";") - # Remove blank IDs - [ids.remove(blank) for blank in ids if blank.isspace() or blank == ""] - # Remove space from 2 heads of IDs - ids = [id.strip() for id in ids] - - - if header == "true": - header = mq[0] - content = mq[1:] - else: - header = "" - content = mq[:] - - if not filtered_lines: # In case there is already some filtered lines from other filters - filtered_lines = [] - if header != "": - filtered_lines.append(header) + return ncol - for line in content: - line = line.replace("\n", "") - id_inline = line.split("\t")[id_index].replace('"', "").split(";") - # Take only first IDs - #one_id_line = line.replace(line.split("\t")[id_index], id_inline[0]) - line = line + "\n" - - if match != "false": - # Filter protein IDs - if any(pid.upper() in ids for pid in id_inline): - filtered_lines.append(line) - mq.remove(line) - #else: - # mq[mq.index(line)] = one_id_line - else: - if any(ft in pid.upper() for pid in id_inline for ft in ids): - filtered_lines.append(line) - mq.remove(line) - #else: - # mq[mq.index(line)] = one_id_line - return mq, filtered_lines +#return True if value is in the determined values, false otherwise +def value_compare(value,filter_value,opt): + test_value=False -def filter_value(MQfile, header, filtered_prots, filter_value, ncol, opt): - mq = MQfile - if ncol and isnumber("int", ncol.replace("c", "")): - index = int(ncol.replace("c", "")) - 1 - else: - raise ValueError("Please specify the column where " - "you would like to apply the filter " - "with valid format") - if header == "true": - header = mq[0] - content = mq[1:] - else: - header = "" - content = mq[:] - if not filtered_prots: # In case there is already some filtered lines from other filters - filtered_prots = [] - if header != "": - filtered_prots.append(header) + if opt == "<": + if float(value) < filter_value: + test_value = True + elif opt == "<=": + if float(value) <= filter_value: + test_value = True + elif opt == ">": + if float(value) > filter_value: + test_value = True + elif opt == ">=": + if float(value) >= filter_value: + test_value = True + elif opt == "=": + if float(value) == filter_value: + test_value = True + elif opt == "!=": + if float(value) != filter_value: + test_value = True - for line in content: - prot = line.replace("\n","") - filter_value = float(filter_value) - pep = prot.split("\t")[index].replace('"', "") - if pep.replace(".", "", 1).isdigit(): - if opt == "<": - if float(pep) >= filter_value: - filtered_prots.append(line) - mq.remove(line) - elif opt == "<=": - if float(pep) > filter_value: - filtered_prots.append(line) - mq.remove(line) - elif opt == ">": - #print(prot.number_of_prots, filter_value, int(prot.number_of_prots) > filter_value) - if float(pep) <= filter_value: - filtered_prots.append(line) - mq.remove(line) - elif opt == ">=": - if float(pep) < filter_value: - filtered_prots.append(line) - mq.remove(line) - else: - if float(pep) != filter_value: - filtered_prots.append(line) - mq.remove(line) - return mq, filtered_prots + return test_value if __name__ == "__main__": options() diff -r c6ba1e6f6869 -r 6f32c1e12572 filter_kw_val.xml --- a/filter_kw_val.xml Fri Apr 20 09:07:23 2018 -0400 +++ b/filter_kw_val.xml Fri Jun 01 11:10:47 2018 -0400 @@ -9,10 +9,11 @@ python $__tool_directory__/filter_kw_val.py -i "$input1,$header" -o "$output1" - --trash_file "$trash_file" + --filtered_file "$filtered_file" + --operator "$operator" ## Keywords - #for $i, $key in enumerate($keyword) + #for $key in $keyword #if $key.k.kw != "None" #if $key.k.kw == "text" --kw "$key.k.txt" "$key.ncol" "$key.match" @@ -22,8 +23,8 @@ #end if #end for - ## Number of proteins - #for $i, $val in enumerate($value) + ## value to filter + #for $val in $value #if $val.v.val != "None" --value #if $val.v.val == "Equal" @@ -34,16 +35,35 @@ $val.v.equal_higher "$val.ncol" ">=" #else if $val.v.val == "Lower" $val.v.lower "$val.ncol" "<" - #else + #else if $val.v.val == "Equal or lower" $val.v.equal_lower "$val.ncol" "<=" + #else + $val.v.different "$val.ncol" "!=" #end if #end if #end for + ##range of values to keep + #for $vr in $values_range + #if vr + --values_range $vr.bottom_value $vr.top_value $vr.ncol $vr.inclusive + #end if + #end for + + #if $sort_column != "" + --sort_col "$sort_column,$reversed_sort" + #end if + ]]> + + + + + + @@ -71,7 +91,6 @@ - @@ -82,6 +101,7 @@ + @@ -100,18 +120,29 @@ + + + - + + + + + + - + + + + @@ -120,16 +151,21 @@ - - + + + + + + + + + " +- ">=" + +Then enter the value to filter and specify the column to apply that option. +If a row contains a value that correspond to your settings, it will be filtered. + +----- + +**Filter by a range of values** + +You can also set a range of values to filter your file. +In opposition to value filter, rows with values inside of the defined range are kept. -You can choose to use one or more options (e.g. to filter out peptides of low intensity value, by q-value, etc.). +Rows with values outside of the defined range will be filtered. + +----- + +**AND/OR operator** + +Since you can add as many filters as you want, you can choose how filters apply on your data. + +AND or OR operator option works on all filters : + +- OR : only one filter to be satisfied to remove one row +- AND : all filters must be satisfied to remove one row -* For each option, you can choose between "=", ">", ">=", "<" and "<=", then enter the value to filter and specify the column to apply that option. +----- + +**Sort the results files** + +You can sort the result file if you wish, it can help you to check results. + +In order to do so : enter the column to be used, all columns will be sorted according to the one filled in. + +Rows stay intact, just in different order like excel. +You can also choose ascending or descending order, by default descending order is set. + +----- **Output** @@ -169,7 +249,7 @@ * A text file containing the resulting filtered input file. -* A text file containing the rows that have been filtered from the input file. +* A text file containing the rows removed from the input file. ----- @@ -177,7 +257,7 @@ **Authors** -T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR +T.P. Lien Nguyen, David Christiany, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR diff -r c6ba1e6f6869 -r 6f32c1e12572 test-data/FKW_Lacombe_et_al_2017_OK.txt --- a/test-data/FKW_Lacombe_et_al_2017_OK.txt Fri Apr 20 09:07:23 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,154 +0,0 @@ -Protein accession number (UniProt) Protein name Number of peptides (razor + unique) - -P15924 Desmoplakin 69 -P02538 Keratin, type II cytoskeletal 6A 53 -P02768 Serum albumin 44 -P08779 Keratin, type I cytoskeletal 16 29 -Q02413 Desmoglein-1 24 -P07355 "Annexin A2;Putative annexin A2-like protein" 22 -P14923 Junction plakoglobin 22 -P02788 Lactotransferrin 21 -Q9HC84 Mucin-5B 21 -P29508 Serpin B3 20 -P63261 Actin, cytoplasmic 2 19 -Q8N1N4 Keratin, type II cytoskeletal 78 18 -Q04695 Keratin, type I cytoskeletal 17 18 -P01876 Ig alpha-1 chain C region 16 -Q01469 Fatty acid-binding protein 5, epidermal 15 -P31944 Caspase-14 15 -P01833 Polymeric immunoglobulin receptor 15 -P06733 Alpha-enolase 15 -P25311 Zinc-alpha-2-glycoprotein 15 -Q15149 Plectin 15 -P19013 Keratin, type II cytoskeletal 4 13 -Q6KB66 Keratin, type II cytoskeletal 80 13 -Q08188 Protein-glutamine gamma-glutamyltransferase E 12 -P13646 Keratin, type I cytoskeletal 13 11 -Q86YZ3 Hornerin 11 -P04259 Keratin, type II cytoskeletal 6B 10 -P02545 "Prelamin-A/C;Lamin-A/C" 10 -P04083 Annexin A1 10 -P11021 78 kDa glucose-regulated protein 10 -P02787 Serotransferrin 9 -P04040 Catalase 9 -P31151 Protein S100-A7 9 -P31947 14-3-3 protein sigma 9 -Q96P63 Serpin B12 9 -P14618 Pyruvate kinase PKM 9 -P60174 Triosephosphate isomerase 9 -Q06830 Peroxiredoxin-1 9 -P01040 Cystatin-A 8 -P05089 Arginase-1 8 -P01834 Ig kappa chain C region 8 -P04406 Glyceraldehyde-3-phosphate dehydrogenase 8 -P0DMV9 Heat shock 70 kDa protein 1B 8 -P13639 Elongation factor 2 8 -P35579 Myosin-9 8 -P68371 Tubulin beta-4B chain 8 -Q8WVV4 Protein POF1B 8 -O75635 Serpin B7 7 -P01857 Ig gamma-1 chain C region 7 -P61626 Lysozyme C 7 -P68363 Tubulin alpha-1B chain 7 -P01009 "Alpha-1-antitrypsin;Short peptide from AAT" 6 -P07900 Heat shock protein HSP 90-alpha 6 -Q9NZH8 Interleukin-36 gamma 6 -O43707 "Alpha-actinin-4;Alpha-actinin-1" 6 -O75223 Gamma-glutamylcyclotransferase 6 -P00338 L-lactate dehydrogenase A chain 6 -P07339 Cathepsin D 6 -P62987 Ubiquitin-60S ribosomal protein L40 6 -P10599 Thioredoxin 6 -Q9UGM3 Deleted in malignant brain tumors 1 protein 6 -Q9UI42 Carboxypeptidase A4 6 -P47929 Galectin-7 5 -Q13867 Bleomycin hydrolase 5 -Q6P4A8 Phospholipase B-like 1 5 -O75369 Filamin-B 5 -P00441 Superoxide dismutase [Cu-Zn] 5 -P04792 Heat shock protein beta-1 5 -P11142 Heat shock cognate 71 kDa protein 5 -P58107 Epiplakin 5 -P60842 Eukaryotic initiation factor 4A-I 5 -P62937 Peptidyl-prolyl cis-trans isomerase A 5 -P63104 14-3-3 protein zeta/delta 5 -Q92820 Gamma-glutamyl hydrolase 5 -O75342 Arachidonate 12-lipoxygenase, 12R-type 4 -P09211 Glutathione S-transferase P 4 -P31025 Lipocalin-1 4 -P48594 Serpin B4 4 -Q14574 Desmocollin-3 4 -Q5T750 Skin-specific protein 32 4 -Q6UWP8 Suprabasin 4 -O60911 Cathepsin L2 4 -P00558 Phosphoglycerate kinase 1 4 -P04075 Fructose-bisphosphate aldolase A 4 -P07384 Calpain-1 catalytic subunit 4 -P0CG05 Ig lambda-2 chain C regions 4 -P18206 Vinculin 4 -P62258 14-3-3 protein epsilon 4 -P68871 Hemoglobin subunit beta 4 -Q9C075 Keratin, type I cytoskeletal 23 4 -A8K2U0 Alpha-2-macroglobulin-like protein 1 3 -P00738 Haptoglobin 3 -P01011 Alpha-1-antichymotrypsin 3 -P02763 Alpha-1-acid glycoprotein 1 3 -P18510 Interleukin-1 receptor antagonist protein 3 -P22528 Cornifin-B 3 -P30740 Leukocyte elastase inhibitor 3 -P80188 Neutrophil gelatinase-associated lipocalin 3 -Q15828 Cystatin-M 3 -Q9HCY8 Protein S100-A14 3 -P01623 Ig kappa chain V-III region 3 -P01877 Ig alpha-2 chain C region 3 -P06396 Gelsolin 3 -P14735 Insulin-degrading enzyme 3 -P20933 N(4)-(beta-N-acetylglucosaminyl)-L-asparaginase 3 -P25788 Proteasome subunit alpha type-3 3 -P26641 Elongation factor 1-gamma 3 -P36952 Serpin B5 3 -P40926 Malate dehydrogenase, mitochondrial 3 -Q9Y6R7 IgGFc-binding protein 3 -O95274 Ly6/PLAUR domain-containing protein 3 2 -P00491 Purine nucleoside phosphorylase 2 -P04080 Cystatin-B 2 -P09972 Fructose-bisphosphate aldolase C 2 -P19012 Keratin, type I cytoskeletal 15 2 -P20930 Filaggrin 2 -Q96FX8 p53 apoptosis effector related to PMP-22 2 -Q9UIV8 Serpin B13 2 -P01625 Ig kappa chain V-IV region Len 2 -P01765 Ig heavy chain V-III region TIL 2 -P01766 Ig heavy chain V-III region BRO 2 -P01860 Ig gamma-3 chain C region 2 -P01871 Ig mu chain C region 2 -P05090 Apolipoprotein D 2 -P06870 Kallikrein-1 2 -P07858 Cathepsin B 2 -P08865 40S ribosomal protein SA 2 -P11279 Lysosome-associated membrane glycoprotein 1 2 -P13473 Lysosome-associated membrane glycoprotein 2 2 -P19971 Thymidine phosphorylase 2 -P23284 Peptidyl-prolyl cis-trans isomerase B 2 -P23396 40S ribosomal protein S3 2 -P25705 ATP synthase subunit alpha, mitochondrial 2 -P27482 Calmodulin-like protein 3 2 -P31949 Protein S100-A11 2 -P40121 Macrophage-capping protein 2 -P42357 Histidine ammonia-lyase 2 -P47756 F-actin-capping protein subunit beta 2 -P48637 Glutathione synthetase 2 -P49720 Proteasome subunit beta type-3 2 -P50395 Rab GDP dissociation inhibitor beta 2 -P59998 Actin-related protein 2/3 complex subunit 4 2 -P61160 Actin-related protein 2 2 -P61916 Epididymal secretory protein E1 2 -P04745 Alpha-amylase 1 23 -Q9NZT1 Calmodulin-like protein 5 8 -P12273 Prolactin-inducible protein 6 -Q96DA0 Zymogen granule protein 16 homolog B 5 -P01036 Cystatin-S 5 -Q8TAX7 Mucin-7 2 -P01037 Cystatin-SN 2 -P09228 Cystatin-SA 2 - \ No newline at end of file diff -r c6ba1e6f6869 -r 6f32c1e12572 test-data/Lacombe_et_al_2017_OK.txt diff -r c6ba1e6f6869 -r 6f32c1e12572 test-data/Trash_FKW_Lacombe_et_al_2017_OK.txt --- a/test-data/Trash_FKW_Lacombe_et_al_2017_OK.txt Fri Apr 20 09:07:23 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -Protein accession number (UniProt) Protein name Number of peptides (razor + unique) - -P04264 Keratin, type II cytoskeletal 1 61 -P35908 Keratin, type II cytoskeletal 2 epidermal 40 -P13645 Keratin, type I cytoskeletal 10 40 -Q5D862 Filaggrin-2 14 -Q5T749 Keratinocyte proline-rich protein 13 -Q8IW75 Serpin A12 3 -P81605 Dermcidin 3 -P22531 Small proline-rich protein 2E 3 -P59666 Neutrophil defensin 3 2 -P78386 Keratin, type II cuticular Hb5 2 \ No newline at end of file diff -r c6ba1e6f6869 -r 6f32c1e12572 test-data/filtered_output.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtered_output.csv Fri Jun 01 11:10:47 2018 -0400 @@ -0,0 +1,21 @@ +Protein accession number (UniProt) Protein name Number of peptides (razor + unique) +P02538 Keratin, type II cytoskeletal 6A 53 +P02768 Serum albumin 44 +P02788 Lactotransferrin 21 +P04264 Keratin, type II cytoskeletal 1 61 +P04745 Alpha-amylase 1 23 +P07355 Annexin A2;Putative annexin A2-like protein 22 +P08779 Keratin, type I cytoskeletal 16 29 +P13645 Keratin, type I cytoskeletal 10 40 +P14923 Junction plakoglobin 22 +P15924 Desmoplakin 69 +P22531 Small proline-rich protein 2E 3 +P35908 Keratin, type II cytoskeletal 2 epidermal 40 +P59666 Neutrophil defensin 3 2 +P78386 Keratin, type II cuticular Hb5 2 +P81605 Dermcidin 3 +Q02413 Desmoglein-1 24 +Q5D862 Filaggrin-2 14 +Q5T749 Keratinocyte proline-rich protein 13 +Q8IW75 Serpin A12 3 +Q9HC84 Mucin-5B 21 diff -r c6ba1e6f6869 -r 6f32c1e12572 test-data/output.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.csv Fri Jun 01 11:10:47 2018 -0400 @@ -0,0 +1,142 @@ +Protein accession number (UniProt) Protein name Number of peptides (razor + unique) +A8K2U0 Alpha-2-macroglobulin-like protein 1 3 +O43707 Alpha-actinin-4;Alpha-actinin-1 6 +O60911 Cathepsin L2 4 +O75223 Gamma-glutamylcyclotransferase 6 +O75342 Arachidonate 12-lipoxygenase, 12R-type 4 +O75369 Filamin-B 5 +O75635 Serpin B7 7 +O95274 Ly6/PLAUR domain-containing protein 3 2 +P00338 L-lactate dehydrogenase A chain 6 +P00441 Superoxide dismutase [Cu-Zn] 5 +P00491 Purine nucleoside phosphorylase 2 +P00558 Phosphoglycerate kinase 1 4 +P00738 Haptoglobin 3 +P01009 Alpha-1-antitrypsin;Short peptide from AAT 6 +P01011 Alpha-1-antichymotrypsin 3 +P01036 Cystatin-S 5 +P01037 Cystatin-SN 2 +P01040 Cystatin-A 8 +P01623 Ig kappa chain V-III region 3 +P01625 Ig kappa chain V-IV region Len 2 +P01765 Ig heavy chain V-III region TIL 2 +P01766 Ig heavy chain V-III region BRO 2 +P01833 Polymeric immunoglobulin receptor 15 +P01834 Ig kappa chain C region 8 +P01857 Ig gamma-1 chain C region 7 +P01860 Ig gamma-3 chain C region 2 +P01871 Ig mu chain C region 2 +P01876 Ig alpha-1 chain C region 16 +P01877 Ig alpha-2 chain C region 3 +P02545 Prelamin-A/C;Lamin-A/C 10 +P02763 Alpha-1-acid glycoprotein 1 3 +P02787 Serotransferrin 9 +P04040 Catalase 9 +P04075 Fructose-bisphosphate aldolase A 4 +P04080 Cystatin-B 2 +P04083 Annexin A1 10 +P04259 Keratin, type II cytoskeletal 6B 10 +P04406 Glyceraldehyde-3-phosphate dehydrogenase 8 +P04792 Heat shock protein beta-1 5 +P05089 Arginase-1 8 +P05090 Apolipoprotein D 2 +P06396 Gelsolin 3 +P06733 Alpha-enolase 15 +P06870 Kallikrein-1 2 +P07339 Cathepsin D 6 +P07384 Calpain-1 catalytic subunit 4 +P07858 Cathepsin B 2 +P07900 Heat shock protein HSP 90-alpha 6 +P08865 40S ribosomal protein SA 2 +P09211 Glutathione S-transferase P 4 +P09228 Cystatin-SA 2 +P09972 Fructose-bisphosphate aldolase C 2 +P0CG05 Ig lambda-2 chain C regions 4 +P0DMV9 Heat shock 70 kDa protein 1B 8 +P10599 Thioredoxin 6 +P11021 78 kDa glucose-regulated protein 10 +P11142 Heat shock cognate 71 kDa protein 5 +P11279 Lysosome-associated membrane glycoprotein 1 2 +P12273 Prolactin-inducible protein 6 +P13473 Lysosome-associated membrane glycoprotein 2 2 +P13639 Elongation factor 2 8 +P13646 Keratin, type I cytoskeletal 13 11 +P14618 Pyruvate kinase PKM 9 +P14735 Insulin-degrading enzyme 3 +P18206 Vinculin 4 +P18510 Interleukin-1 receptor antagonist protein 3 +P19012 Keratin, type I cytoskeletal 15 2 +P19013 Keratin, type II cytoskeletal 4 13 +P19971 Thymidine phosphorylase 2 +P20930 Filaggrin 2 +P20933 N(4)-(beta-N-acetylglucosaminyl)-L-asparaginase 3 +P22528 Cornifin-B 3 +P23284 Peptidyl-prolyl cis-trans isomerase B 2 +P23396 40S ribosomal protein S3 2 +P25311 Zinc-alpha-2-glycoprotein 15 +P25705 ATP synthase subunit alpha, mitochondrial 2 +P25788 Proteasome subunit alpha type-3 3 +P26641 Elongation factor 1-gamma 3 +P27482 Calmodulin-like protein 3 2 +P29508 Serpin B3 20 +P30740 Leukocyte elastase inhibitor 3 +P31025 Lipocalin-1 4 +P31151 Protein S100-A7 9 +P31944 Caspase-14 15 +P31947 14-3-3 protein sigma 9 +P31949 Protein S100-A11 2 +P35579 Myosin-9 8 +P36952 Serpin B5 3 +P40121 Macrophage-capping protein 2 +P40926 Malate dehydrogenase, mitochondrial 3 +P42357 Histidine ammonia-lyase 2 +P47756 F-actin-capping protein subunit beta 2 +P47929 Galectin-7 5 +P48594 Serpin B4 4 +P48637 Glutathione synthetase 2 +P49720 Proteasome subunit beta type-3 2 +P50395 Rab GDP dissociation inhibitor beta 2 +P58107 Epiplakin 5 +P59998 Actin-related protein 2/3 complex subunit 4 2 +P60174 Triosephosphate isomerase 9 +P60842 Eukaryotic initiation factor 4A-I 5 +P61160 Actin-related protein 2 2 +P61626 Lysozyme C 7 +P61916 Epididymal secretory protein E1 2 +P62258 14-3-3 protein epsilon 4 +P62937 Peptidyl-prolyl cis-trans isomerase A 5 +P62987 Ubiquitin-60S ribosomal protein L40 6 +P63104 14-3-3 protein zeta/delta 5 +P63261 Actin, cytoplasmic 2 19 +P68363 Tubulin alpha-1B chain 7 +P68371 Tubulin beta-4B chain 8 +P68871 Hemoglobin subunit beta 4 +P80188 Neutrophil gelatinase-associated lipocalin 3 +Q01469 Fatty acid-binding protein 5, epidermal 15 +Q04695 Keratin, type I cytoskeletal 17 18 +Q06830 Peroxiredoxin-1 9 +Q08188 Protein-glutamine gamma-glutamyltransferase E 12 +Q13867 Bleomycin hydrolase 5 +Q14574 Desmocollin-3 4 +Q15149 Plectin 15 +Q15828 Cystatin-M 3 +Q5T750 Skin-specific protein 32 4 +Q6KB66 Keratin, type II cytoskeletal 80 13 +Q6P4A8 Phospholipase B-like 1 5 +Q6UWP8 Suprabasin 4 +Q86YZ3 Hornerin 11 +Q8N1N4 Keratin, type II cytoskeletal 78 18 +Q8TAX7 Mucin-7 2 +Q8WVV4 Protein POF1B 8 +Q92820 Gamma-glutamyl hydrolase 5 +Q96DA0 Zymogen granule protein 16 homolog B 5 +Q96FX8 p53 apoptosis effector related to PMP-22 2 +Q96P63 Serpin B12 9 +Q9C075 Keratin, type I cytoskeletal 23 4 +Q9HCY8 Protein S100-A14 3 +Q9NZH8 Interleukin-36 gamma 6 +Q9NZT1 Calmodulin-like protein 5 8 +Q9UGM3 Deleted in malignant brain tumors 1 protein 6 +Q9UI42 Carboxypeptidase A4 6 +Q9UIV8 Serpin B13 2 +Q9Y6R7 IgGFc-binding protein 3