Mercurial > repos > galaxyp > filter_by_fasta_ids
comparison filter_by_fasta_ids.py @ 5:dff7df6fcab5 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/filter_by_fasta_ids commit f608f41d45664d04d3124c6ebc791bf8a566b3c5
author | galaxyp |
---|---|
date | Wed, 15 May 2019 03:18:11 -0400 |
parents | cd22452edec2 |
children |
comparison
equal
deleted
inserted
replaced
4:cd22452edec2 | 5:dff7df6fcab5 |
---|---|
59 parser.add_argument('-i', required=True, help='Path to input FASTA file') | 59 parser.add_argument('-i', required=True, help='Path to input FASTA file') |
60 parser.add_argument('-o', required=True, help='Path to output FASTA file') | 60 parser.add_argument('-o', required=True, help='Path to output FASTA file') |
61 parser.add_argument('-d', help='Path to discarded entries file') | 61 parser.add_argument('-d', help='Path to discarded entries file') |
62 header_criteria = parser.add_mutually_exclusive_group() | 62 header_criteria = parser.add_mutually_exclusive_group() |
63 header_criteria.add_argument('--id_list', help='Path to the ID list file') | 63 header_criteria.add_argument('--id_list', help='Path to the ID list file') |
64 parser.add_argument('--pattern', help='regex earch attern for ID in FASTA entry') | 64 parser.add_argument('--pattern', help='regex search pattern for ID in FASTA entry') |
65 header_criteria.add_argument('--header_regexp', help='Regular expression pattern the header should match') | 65 header_criteria.add_argument('--header_regexp', help='Regular expression pattern the header should match') |
66 sequence_criteria = parser.add_mutually_exclusive_group() | 66 sequence_criteria = parser.add_mutually_exclusive_group() |
67 sequence_criteria.add_argument('--min_length', type=int, help='Minimum sequence length') | 67 sequence_criteria.add_argument('--min_length', type=int, help='Minimum sequence length') |
68 sequence_criteria.add_argument('--sequence_regexp', help='Regular expression pattern the header should match') | 68 sequence_criteria.add_argument('--sequence_regexp', help='Regular expression pattern the sequence should match') |
69 parser.add_argument('--max_length', type=int, help='Maximum sequence length') | 69 parser.add_argument('--max_length', type=int, help='Maximum sequence length') |
70 parser.add_argument('--dedup', action='store_true', default=False, help='Whether to remove duplicate sequences') | 70 parser.add_argument('--dedup', action='store_true', default=False, help='Whether to remove duplicate sequences') |
71 options = parser.parse_args() | 71 options = parser.parse_args() |
72 | 72 |
73 if options.pattern: | 73 if options.pattern: |
87 if options.dedup: | 87 if options.dedup: |
88 used_sequences = set() | 88 used_sequences = set() |
89 work_summary['duplicates'] = 0 | 89 work_summary['duplicates'] = 0 |
90 | 90 |
91 if options.id_list: | 91 if options.id_list: |
92 targets = [] | 92 targets = set() |
93 with open(options.id_list) as f_target: | 93 with open(options.id_list) as f_target: |
94 for line in f_target: | 94 for line in f_target: |
95 targets.append(line.strip().upper()) | 95 targets.add(line.strip().upper()) |
96 work_summary['wanted'] = len(targets) | 96 work_summary['wanted'] = len(targets) |
97 | 97 |
98 homd_db = FASTAReader_gen(options.i) | 98 homd_db = FASTAReader_gen(options.i) |
99 if options.d: | 99 if options.d: |
100 discarded = open(options.d, 'w') | 100 discarded = open(options.d, 'w') |