compare_humann2_output: compare_humann2

comparison compare_humann2_output.py @ 2:05766022dfc4 draft

"planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit dc55dc3b5275d1d6aac390698c0c6e0ab8fbf2f7"

author	bebatut
date	Mon, 14 Sep 2020 13:50:30 +0000
parents	9959fa526f1a
children	eaa95ea1195c

comparison

equal deleted inserted replaced

-:c1aca37cb1fc
+:05766022dfc4
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-import sys
-import os
 import argparse
-import re
-def extract_abundances(filepath, nb_charact_to_extract):
+def extract_abundances(fp, nb_charact_to_extract):
 abundances = {}
 more_abund_charact = []
 abund_sum = 0
-with open(filepath, 'r') as abundance_file:
+with open(fp, 'r') as abundance_f:
-for line in abundance_file.readlines()[1:]:
+for line in abundance_f.readlines()[1:]:
 split_line = line[:-1].split('\t')
 charact_id = split_line[0]
 abund = float(split_line[1])
 abundances[charact_id] = 100*abund
 abund_sum += abundances[charact_id]
 if len(more_abund_charact) < nb_charact_to_extract:
 more_abund_charact.append(charact_id)
 else:
 best_pos = None
-for i in range(len(more_abund_charact)-1,-1,-1):
+for i in range(len(more_abund_charact)-1, -1, -1):
 if abundances[more_abund_charact[i]] < abund:
 best_pos = i
 else:
 break
-if best_pos != None:
+if best_pos is not None:
 tmp_more_abund_charact = more_abund_charact
 more_abund_charact = tmp_more_abund_charact[:best_pos]
 more_abund_charact += [charact_id]
 more_abund_charact += tmp_more_abund_charact[best_pos:-1]
 return abundances, more_abund_charact
 def format_characteristic_name(all_name):
 if all_name.find(':') != -1:
 charact_id = all_name.split(':')[0]
-charact_name = all_name.split(':')[1][1:]
+char_name = all_name.split(':')[1][1:]
 else:
 charact_id = all_name
-charact_name = ''
+char_name = ''
-charact_name = charact_name.replace('/',' ')
+char_name = char_name.replace('/', ' ')
-charact_name = charact_name.replace('-',' ')
+char_name = char_name.replace('-', ' ')
-charact_name = charact_name.replace("'",'')
+char_name = char_name.replace("'", '')
-if charact_name.find('(') != -1 and charact_name.find(')') != -1:
+if char_name.find('(') != -1 and char_name.find(')') != -1:
-open_bracket = charact_name.find('(')
+open_bracket = char_name.find('(')
-close_bracket = charact_name.find(')')+1
+close_bracket = char_name.find(')')+1
-charact_name = charact_name[:open_bracket] + charact_name[close_bracket:]
+char_name = char_name[:open_bracket] + char_name[close_bracket:]
-return charact_id,charact_name
+return charact_id, char_name
-def write_more_abundant_charat(abundances,more_abund_charact, output_filepath):
-with open(output_filepath,'w') as output_file:
+def write_more_abundant_charat(abundances, more_abund_charact, output_fp):
-output_file.write('id\tname\t')
+with open(output_fp, 'w') as output_f:
-output_file.write('\t'.join(abundances.keys()) + '\n')
+output_f.write('id\tname\t%s\n' % '\t'.join(abundances.keys()))
 for mac in more_abund_charact:
-charact_id,charact_name = format_characteristic_name(mac)
+charact_id, charact_name = format_characteristic_name(mac)
-output_file.write(charact_id + '\t' + charact_name)
+output_f.write('%s\t%s' % (charact_id, charact_name))
 for sample in abundances:
 abund = abundances[sample].get(mac, 0)
-output_file.write('\t' + str(abund))
+output_f.write('\t%s' % (abund))
-output_file.write('\n')
+output_f.write('\n')
-def extract_similar_characteristics(abundances, sim_output_filepath,
-specific_output_files):
-sim_characteristics = set(abundances[abundances.keys()[0]].keys())
-for sample in abundances.keys()[1:]:
-sim_characteristics.intersection_update(abundances[sample].keys())
-print 'Similar between all samples:', len(sim_characteristics)
-with open(sim_output_filepath, 'w') as sim_output_file:
+def extract_similar_characteristics(abund, sim_output_fp, output_files):
-sim_output_file.write('id\tname\t' + '\t'.join(abundances.keys()) + '\n')
+abund_keys = list(abund)
+sim_characteristics = set(abund[abund_keys[0]].keys())
+for sample in abund_keys[1:]:
+sim_characteristics.intersection_update(abund[sample].keys())
+print('Similar between all samples: %s' % len(sim_characteristics))
+with open(sim_output_fp, 'w') as sim_output_f:
+sim_output_f.write('id\tname\t%s\n' % '\t'.join(abund_keys))
 for charact in list(sim_characteristics):
-charact_id,charact_name = format_characteristic_name(charact)
+charact_id, charact_name = format_characteristic_name(charact)
-sim_output_file.write(charact_id + '\t' + charact_name)
+sim_output_f.write('%s\t%s' % (charact_id, charact_name))
-for sample in abundances.keys():
+for sample in abund_keys:
-sim_output_file.write('\t' + str(abundances[sample][charact]))
+sim_output_f.write('\t%s' % abund[sample][charact])
-sim_output_file.write('\n')
+sim_output_f.write('\n')
-print 'Specific to samples:'
+print('Specific to samples:')
-diff_characteristics = {}
+diff_char = {}
-for i in range(len(abundances.keys())):
+for i in range(len(abund_keys)):
-sample = abundances.keys()[i]
+sample = abund_keys[i]
-print ' ', sample, ""
+print(' %s' % sample )
-print '    All:', len(abundances[sample].keys())
+print('    All: %s' % len(abund[sample].keys()))
-diff_characteristics[sample] = set(abundances[sample].keys())
+diff_char[sample] = set(abund[sample].keys())
-diff_characteristics[sample].difference_update(sim_characteristics)
+diff_char[sample].difference_update(sim_characteristics)
-print '    Number of specific characteristics:',
+perc = 100*len(diff_char[sample])/(1.*len(abund[sample].keys()))
-print len(diff_characteristics[sample])
+print('    Number of specific characteristics: %s' % len(diff_char[sample]))
-print '    Percentage of specific characteristics:',
+print('    Percentage of specific characteristics: %s' % perc)
-print 100*len(diff_characteristics[sample])/(1.*len(abundances[sample].keys()))
 relative_abundance = 0
-with open(specific_output_files[i], 'w') as output_file:
+with open(output_files[i], 'w') as output_f:
-output_file.write('id\tname\tabundances\n')
+output_f.write('id\tname\tabundances\n')
-for charact in list(diff_characteristics[sample]):
+for charact in list(diff_char[sample]):
-charact_id,charact_name = format_characteristic_name(charact)
+charact_id, charact_name = format_characteristic_name(charact)
-output_file.write(charact_id + '\t' + charact_name + '\t')
+output_f.write('%s\t%s' % (charact_id, charact_name))
-output_file.write(str(abundances[sample][charact]) + '\n')
+output_f.write('%s\n' % abund[sample][charact])
-relative_abundance += abundances[sample][charact]
+relative_abundance += abund[sample][charact]
-print '    Relative abundance of specific characteristics(%):', relative_abundance
+print('    Relative abundance of specific characteristics: %s' % relative_abundance)
 return sim_characteristics
 def compare_humann2_output(args):
-abundances = {}
+abund = {}
 more_abund_charact = []
 for i in range(len(args.sample_name)):
-abundances[args.sample_name[i]], mac = extract_abundances(args.charact_input_file[i],
+abund[args.sample_name[i]], mac = extract_abundances(
+args.charact_input_fp[i],
 args.most_abundant_characteristics_to_extract)
 more_abund_charact += mac
-write_more_abundant_charat(abundances, list(set(more_abund_charact)),
+write_more_abundant_charat(
-args.more_abundant_output_file)
+abund,
-sim_characteristics = extract_similar_characteristics(abundances,
+list(set(more_abund_charact)),
-args.similar_output_file, args.specific_output_file)
+args.more_abundant_output_fp)
+extract_similar_characteristics(
+abund,
+args.similar_output_fp,
+args.specific_output_fp)
 if __name__ == '__main__':
 parser = argparse.ArgumentParser()
 parser.add_argument('--sample_name', required=True, action='append')
-parser.add_argument('--charact_input_file', required=True, action='append')
+parser.add_argument('--charact_input_fp', required=True, action='append')
-parser.add_argument('--most_abundant_characteristics_to_extract', required=True,
+parser.add_argument(
-type = int)
+'--most_abundant_characteristics_to_extract',
-parser.add_argument('--more_abundant_output_file', required=True)
+required=True,
-parser.add_argument('--similar_output_file', required=True)
+type=int)
-parser.add_argument('--specific_output_file', required=True,action='append')
+parser.add_argument('--more_abundant_output_fp', required=True)
+parser.add_argument('--similar_output_fp', required=True)
+parser.add_argument(
+'--specific_output_fp',
+required=True,
+action='append')
 args = parser.parse_args()
-if len(args.sample_name) != len(args.charact_input_file):
+if len(args.sample_name) != len(args.charact_input_fp):
-raise ValueError("Same number of values (in same order) are expected for --sample_name and --charact_input_file")
+string = "Same number of values (in same order) are expected for "
-if len(args.sample_name) != len(args.specific_output_file):
+string += "--sample_name and --charact_input_fp"
-raise ValueError("Same number of values (in same order) are expected for --sample_name and --specific_output_file")
+raise ValueError(string)
+if len(args.sample_name) != len(args.specific_output_fp):
+string = "Same number of values (in same order) are expected for "
+string += "--sample_name and --specific_output_fp"
+raise ValueError(string)
 compare_humann2_output(args)

Mercurial > repos > bebatut > compare_humann2_output

comparison compare_humann2_output.py @ 2:05766022dfc4 draft