Mercurial > repos > rnateam > sshmm
diff fasta_report_sequence_lengths.py @ 0:4b01f0d7b350 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/sshmm/ commit b578a90031fd7061fbdaef48b6a66d895ac077c3
author | rnateam |
---|---|
date | Fri, 06 Jul 2018 09:01:40 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta_report_sequence_lengths.py Fri Jul 06 09:01:40 2018 -0400 @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +import sys + +""" +Input: FASTA file +Output: Print sequence ID and corresponding sequence length + +Example output: +chr1 248956422 +chr2 242193529 +chr3 198295559 +... + +""" + +# Check input. +if not len(sys.argv) == 2: + exit("Usage: fasta_report_sequence_lengths.py <fasta_file>") + +fasta_file = sys.argv[1] + +seq_id = "id" +seq_len = 0 + +# Go through FASTA file, extract sequence lengths. +with open(fasta_file) as f: + for line in f: + if line.startswith(">"): + new_id = line[1:].strip() + if seq_len: + print("%s\t%i" % (seq_id, seq_len)) + seq_len = 0 + seq_id = new_id + else: + seq_len += len(line.strip()) + +# Print last sequence length. +if seq_len: + print("%s\t%i" % (seq_id, seq_len)) +