comparison fasta_report_sequence_lengths.py @ 0:4b01f0d7b350 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/sshmm/ commit b578a90031fd7061fbdaef48b6a66d895ac077c3
author rnateam
date Fri, 06 Jul 2018 09:01:40 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4b01f0d7b350
1 #!/usr/bin/env python
2
3 import sys
4
5 """
6 Input: FASTA file
7 Output: Print sequence ID and corresponding sequence length
8
9 Example output:
10 chr1 248956422
11 chr2 242193529
12 chr3 198295559
13 ...
14
15 """
16
17 # Check input.
18 if not len(sys.argv) == 2:
19 exit("Usage: fasta_report_sequence_lengths.py <fasta_file>")
20
21 fasta_file = sys.argv[1]
22
23 seq_id = "id"
24 seq_len = 0
25
26 # Go through FASTA file, extract sequence lengths.
27 with open(fasta_file) as f:
28 for line in f:
29 if line.startswith(">"):
30 new_id = line[1:].strip()
31 if seq_len:
32 print("%s\t%i" % (seq_id, seq_len))
33 seq_len = 0
34 seq_id = new_id
35 else:
36 seq_len += len(line.strip())
37
38 # Print last sequence length.
39 if seq_len:
40 print("%s\t%i" % (seq_id, seq_len))
41