comparison get_chrom_sizes/calculating_chrom.sizes.py @ 18:06c2eaff31e5 draft

Uploaded
author jackcurragh
date Tue, 17 May 2022 14:00:40 +0000
parents f87a4b64e6ef
children cc021a23613b
comparison
equal deleted inserted replaced
17:f87a4b64e6ef 18:06c2eaff31e5
2 # adapted from https://bioexpressblog.wordpress.com/2014/04/15/calculate-length-of-all-sequences-in-an-multi-fasta-file/ 2 # adapted from https://bioexpressblog.wordpress.com/2014/04/15/calculate-length-of-all-sequences-in-an-multi-fasta-file/
3 from sys import argv 3 from sys import argv
4 # python calculating_chrom.sizes.py genome_input.fa output.chrom.sizes 4 # python calculating_chrom.sizes.py genome_input.fa output.chrom.sizes
5 fasta_source = str(argv[1]) 5 fasta_source = str(argv[1])
6 prefix = str(argv[2]) 6 prefix = str(argv[2])
7 output = str(argv[3]) 7 # output = str(argv[3])
8 genome = str(argv[4]) 8 genome = str(argv[3])
9 builtin = str(argv[5]) 9 builtin = str(argv[4])
10 10
11 # genome = 'test-data/test.fasta' 11 # genome = 'test-data/test.fasta'
12 # output = "test-data/test_chrom.sizes" 12 # output = "test-data/test_chrom.sizes"
13 if fasta_source == 'builtin': 13 if fasta_source == 'builtin':
14 genome = builtin 14 genome = builtin
15 15
16 chromSizesoutput = open(output,"w") 16 chromSizesoutput = open('output',"w")
17 17
18 records = [] 18 records = []
19 record = False 19 record = False
20 for line in open(genome, 'r').readlines(): 20 for line in open(genome, 'r').readlines():
21 if line[0] == '>': 21 if line[0] == '>':