Mercurial > repos > greg > lrn_risk
diff lrn_risk.py @ 1:f98c92618a6c draft
Uploaded
author | greg |
---|---|
date | Fri, 28 Apr 2023 15:06:29 +0000 |
parents | 99e04eba4033 |
children | 8dc6d4aa17ec |
line wrap: on
line diff
--- a/lrn_risk.py Thu Apr 27 19:22:36 2023 +0000 +++ b/lrn_risk.py Fri Apr 28 15:06:29 2023 +0000 @@ -11,15 +11,17 @@ # get GTDB species # assumes there is one genome in the GTDB-Tk output file with open(f, 'r') as fh: - for line in fh: - if line.find('user_genome') < 0: - items = line.split('\t') - tax = items[1].strip() - tax = tax.split(';')[-1].strip() - # split on GTDB species tag - tax = tax.split('s__')[1].strip() - if len(tax) == 0: - tax = '(Unknown Species)' + for i, line in enumerate(fh): + if i == 0: + # Skip header. + continue + items = line.split('\t') + tax = items[1].strip() + tax = tax.split(';')[-1].strip() + # split on GTDB species tag + tax = tax.split('s__')[1].strip() + if len(tax) == 0: + tax = '(Unknown Species)' return tax