comparison get_chrom_sizes/calculating_chrom.sizes.py @ 3:cfdf764b9226 draft

Uploaded
author jackcurragh
date Thu, 21 Apr 2022 10:39:18 +0000
parents 27f3669eda60
children c6a297d05c8e
comparison
equal deleted inserted replaced
2:a244b29ce89a 3:cfdf764b9226
1 # input a genome file and return a file genome.chrom.sizes to be associated with the custom build (or just have it as an output to be used later in the history. 1 # input a genome file and return a file genome.chrom.sizes to be associated with the custom build (or just have it as an output to be used later in the history.
2 # adapted from https://bioexpressblog.wordpress.com/2014/04/15/calculate-length-of-all-sequences-in-an-multi-fasta-file/ 2 # adapted from https://bioexpressblog.wordpress.com/2014/04/15/calculate-length-of-all-sequences-in-an-multi-fasta-file/
3 from sys import argv 3 from sys import argv
4 # python calculating_chrom.sizes.py genome_input.fa output.chrom.sizes 4 # python calculating_chrom.sizes.py genome_input.fa output.chrom.sizes
5 genome = str(argv[1]) 5 genome = str(argv[1])
6 output = str(argv[2]) 6 prefix = str(argv[2])
7 output = str(argv[3])
7 # genome = 'test-data/test.fasta' 8 # genome = 'test-data/test.fasta'
8 # output = "test-data/test_chrom.sizes" 9 # output = "test-data/test_chrom.sizes"
9 10
10 chromSizesoutput = open(output,"w") 11 chromSizesoutput = open(output,"w")
11 12
18 record = [line.strip("\n").split(' ')[0][1:], 0] 19 record = [line.strip("\n").split(' ')[0][1:], 0]
19 20
20 else: 21 else:
21 sequence = line.strip('\n') 22 sequence = line.strip('\n')
22 record[1] += len(sequence) 23 record[1] += len(sequence)
23 24
25 if record not in records:
26 records.append(record)
27
24 for seq_record in records: 28 for seq_record in records:
25 output_line = '%s\t%i\n' % (seq_record[0], seq_record[1]) 29 output_line = f"{prefix}{seq_record[0]}\t{seq_record[1]}\n"
26 chromSizesoutput.write(output_line) 30 chromSizesoutput.write(output_line)
27 31
28 chromSizesoutput.close() 32 chromSizesoutput.close()