Mercurial > repos > artbio > cherry_pick_fasta
comparison cherry_pick_fasta.py @ 6:d8fa616a228a draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
author | artbio |
---|---|
date | Fri, 08 Apr 2022 16:56:42 +0000 |
parents | ba6c4aeb22ea |
children | 6c0aefd9fee3 |
comparison
equal
deleted
inserted
replaced
5:144b856e926c | 6:d8fa616a228a |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 # Chery pick of fasta sequences satisfying a query string in their header/name | |
4 import argparse | 1 import argparse |
5 | |
6 from Bio import SeqIO | |
7 | 2 |
8 | 3 |
9 def Parser(): | 4 def Parser(): |
10 the_parser = argparse.ArgumentParser( | 5 the_parser = argparse.ArgumentParser( |
11 description='Cherry pick fasta sequences') | 6 description='Cherry pick fasta sequences') |
66 querylist.append(line.rstrip()) | 61 querylist.append(line.rstrip()) |
67 return querylist | 62 return querylist |
68 | 63 |
69 | 64 |
70 def buid_fasta_dict(fasta): | 65 def buid_fasta_dict(fasta): |
71 seq_dict = {rec.id: rec.seq for rec in SeqIO.parse(fasta, "fasta")} | 66 seq_dict = dict() |
67 f = open(fasta, 'r') | |
68 content = f.read() | |
69 segmented_content = content.split('>') | |
70 segmented_content = segmented_content[1:] | |
71 for seq in segmented_content: | |
72 sliced_seq = seq.split('\n') | |
73 header = sliced_seq[0] | |
74 sliced_seq = sliced_seq[1:] | |
75 sequence = ''.join(sliced_seq) | |
76 seq_dict[header] = sequence | |
72 return seq_dict | 77 return seq_dict |
73 | 78 |
74 | 79 |
75 def write_fasta_result(fasta_dict, file): | 80 def write_fasta_result(fasta_dict, file): |
76 line_length = 60 | 81 line_length = 60 |