view fasta_report_sequence_lengths.py @ 0:4b01f0d7b350 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/sshmm/ commit b578a90031fd7061fbdaef48b6a66d895ac077c3
author rnateam
date Fri, 06 Jul 2018 09:01:40 -0400
parents
children
line wrap: on
line source

#!/usr/bin/env python

import sys

"""
Input: FASTA file
Output: Print sequence ID and corresponding sequence length

Example output:
chr1	248956422
chr2	242193529
chr3	198295559
...

"""

# Check input.
if not len(sys.argv) == 2:
    exit("Usage: fasta_report_sequence_lengths.py <fasta_file>")

fasta_file = sys.argv[1]

seq_id = "id"
seq_len = 0

# Go through FASTA file, extract sequence lengths.
with open(fasta_file) as f:
    for line in f:
        if line.startswith(">"):
            new_id = line[1:].strip()
            if seq_len:
                print("%s\t%i" % (seq_id, seq_len))
            seq_len = 0
            seq_id = new_id
        else:
            seq_len += len(line.strip())

# Print last sequence length.
if seq_len:
    print("%s\t%i" % (seq_id, seq_len))