comparison filter_by_fasta_ids.py @ 5:dff7df6fcab5 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/filter_by_fasta_ids commit f608f41d45664d04d3124c6ebc791bf8a566b3c5
author galaxyp
date Wed, 15 May 2019 03:18:11 -0400
parents cd22452edec2
children
comparison
equal deleted inserted replaced
4:cd22452edec2 5:dff7df6fcab5
59 parser.add_argument('-i', required=True, help='Path to input FASTA file') 59 parser.add_argument('-i', required=True, help='Path to input FASTA file')
60 parser.add_argument('-o', required=True, help='Path to output FASTA file') 60 parser.add_argument('-o', required=True, help='Path to output FASTA file')
61 parser.add_argument('-d', help='Path to discarded entries file') 61 parser.add_argument('-d', help='Path to discarded entries file')
62 header_criteria = parser.add_mutually_exclusive_group() 62 header_criteria = parser.add_mutually_exclusive_group()
63 header_criteria.add_argument('--id_list', help='Path to the ID list file') 63 header_criteria.add_argument('--id_list', help='Path to the ID list file')
64 parser.add_argument('--pattern', help='regex earch attern for ID in FASTA entry') 64 parser.add_argument('--pattern', help='regex search pattern for ID in FASTA entry')
65 header_criteria.add_argument('--header_regexp', help='Regular expression pattern the header should match') 65 header_criteria.add_argument('--header_regexp', help='Regular expression pattern the header should match')
66 sequence_criteria = parser.add_mutually_exclusive_group() 66 sequence_criteria = parser.add_mutually_exclusive_group()
67 sequence_criteria.add_argument('--min_length', type=int, help='Minimum sequence length') 67 sequence_criteria.add_argument('--min_length', type=int, help='Minimum sequence length')
68 sequence_criteria.add_argument('--sequence_regexp', help='Regular expression pattern the header should match') 68 sequence_criteria.add_argument('--sequence_regexp', help='Regular expression pattern the sequence should match')
69 parser.add_argument('--max_length', type=int, help='Maximum sequence length') 69 parser.add_argument('--max_length', type=int, help='Maximum sequence length')
70 parser.add_argument('--dedup', action='store_true', default=False, help='Whether to remove duplicate sequences') 70 parser.add_argument('--dedup', action='store_true', default=False, help='Whether to remove duplicate sequences')
71 options = parser.parse_args() 71 options = parser.parse_args()
72 72
73 if options.pattern: 73 if options.pattern:
87 if options.dedup: 87 if options.dedup:
88 used_sequences = set() 88 used_sequences = set()
89 work_summary['duplicates'] = 0 89 work_summary['duplicates'] = 0
90 90
91 if options.id_list: 91 if options.id_list:
92 targets = [] 92 targets = set()
93 with open(options.id_list) as f_target: 93 with open(options.id_list) as f_target:
94 for line in f_target: 94 for line in f_target:
95 targets.append(line.strip().upper()) 95 targets.add(line.strip().upper())
96 work_summary['wanted'] = len(targets) 96 work_summary['wanted'] = len(targets)
97 97
98 homd_db = FASTAReader_gen(options.i) 98 homd_db = FASTAReader_gen(options.i)
99 if options.d: 99 if options.d:
100 discarded = open(options.d, 'w') 100 discarded = open(options.d, 'w')