changeset 31:944e15aa970a draft

Uploaded
author bimib
date Tue, 15 Oct 2019 12:22:43 -0400
parents e88efefbd015
children b795e3e163e0
files Desktop/Marea/marea.py Desktop/Marea/marea.xml Desktop/Marea/marea_cluster.py Desktop/Marea/marea_cluster.xml Desktop/Marea/marea_macros.xml Marea/marea.py Marea/marea.xml Marea/marea_cluster.py Marea/marea_cluster.xml
diffstat 9 files changed, 87 insertions(+), 1769 deletions(-) [+]
line wrap: on
line diff
--- a/Desktop/Marea/marea.py	Tue Oct 15 12:21:16 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,861 +0,0 @@
-from __future__ import division
-import sys
-import pandas as pd
-import itertools as it
-import scipy.stats as st
-import collections
-import lxml.etree as ET
-import shutil
-import pickle as pk
-import math
-import os
-import argparse
-from svglib.svglib import svg2rlg
-from reportlab.graphics import renderPDF
-
-########################## argparse ##########################################
-
-def process_args(args):
-    parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
-                                     description = 'process some value\'s'+
-                                     ' genes to create a comparison\'s map.')
-    parser.add_argument('-rs', '--rules_selector', 
-                        type = str,
-                        default = 'HMRcore',
-                        choices = ['HMRcore', 'Recon', 'Custom'], 
-                        help = 'chose which type of dataset you want use')
-    parser.add_argument('-cr', '--custom',
-                        type = str,
-                        help='your dataset if you want custom rules')
-    parser.add_argument('-na', '--names', 
-                        type = str,
-                        nargs = '+', 
-                        help = 'input names')
-    parser.add_argument('-n', '--none',
-                        type = str,
-                        default = 'true',
-                        choices = ['true', 'false'], 
-                        help = 'compute Nan values')
-    parser.add_argument('-pv' ,'--pValue', 
-                        type = float, 
-                        default = 0.05, 
-                        help = 'P-Value threshold (default: %(default)s)')
-    parser.add_argument('-fc', '--fChange', 
-                        type = float, 
-                        default = 1.5, 
-                        help = 'Fold-Change threshold (default: %(default)s)')
-    parser.add_argument('-td', '--tool_dir',
-                        type = str,
-                        required = True,
-                        help = 'your tool directory')
-    parser.add_argument('-op', '--option', 
-                        type = str, 
-                        choices = ['datasets', 'dataset_class', 'datasets_rasonly'],
-                        help='dataset or dataset and class')
-    parser.add_argument('-ol', '--out_log', 
-                        help = "Output log")    
-    parser.add_argument('-ids', '--input_datas', 
-                        type = str,
-                        nargs = '+', 
-                        help = 'input datasets')
-    parser.add_argument('-id', '--input_data',
-                        type = str,
-                        help = 'input dataset')
-    parser.add_argument('-ic', '--input_class', 
-                        type = str, 
-                        help = 'sample group specification')
-    parser.add_argument('-cm', '--custom_map', 
-                        type = str, 
-                        help = 'custom map')
-    parser.add_argument('-yn', '--yes_no', 
-                        type = str,
-                        choices = ['yes', 'no'],
-                        help = 'if make or not custom map')
-    parser.add_argument('-gs', '--generate_svg',
-                        type = str,
-                        default = 'true',
-                        choices = ['true', 'false'], 
-                        help = 'generate svg map')
-    parser.add_argument('-gp', '--generate_pdf',
-                        type = str,
-                        default = 'true',
-                        choices = ['true', 'false'], 
-                        help = 'generate pdf map')
-    parser.add_argument('-gr', '--generate_ras',
-                        type = str,
-                        default = 'true',
-                        choices = ['true', 'false'],
-                        help = 'generate reaction activity score')
-    parser.add_argument('-sr', '--single_ras_file',  
-                         type = str,              
-                         help = 'file that will contain ras')
-    					
-    args = parser.parse_args()
-    return args
-
-########################### warning ###########################################
-
-def warning(s):
-    args = process_args(sys.argv)
-    with open(args.out_log, 'a') as log:
-            log.write(s)
-            
-############################ dataset input ####################################
-
-def read_dataset(data, name):
-    try:
-        dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python')
-    except pd.errors.EmptyDataError:
-        sys.exit('Execution aborted: wrong format of ' + name + '\n')
-    if len(dataset.columns) < 2:
-        sys.exit('Execution aborted: wrong format of ' + name + '\n')
-    return dataset
-
-############################ dataset name #####################################
-
-def name_dataset(name_data, count):
-    if str(name_data) == 'Dataset':
-        return str(name_data) + '_' + str(count)
-    else:
-        return str(name_data)
-    
-############################ load id e rules ##################################
-
-def load_id_rules(reactions):
-    ids, rules = [], []
-    for key, value in reactions.items():
-            ids.append(key)
-            rules.append(value)
-    return (ids, rules)
-
-############################ check_methods ####################################
-
-def gene_type(l, name):
-    if check_hgnc(l):
-        return 'hugo_id'
-    elif check_ensembl(l):
-        return 'ensembl_gene_id'
-    elif check_symbol(l):
-        return 'symbol'
-    elif check_entrez(l):
-        return 'entrez_id'
-    else:
-        sys.exit('Execution aborted:\n' +
-                 'gene ID type in ' + name + ' not supported. Supported ID'+
-                 'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n')
-
-def check_hgnc(l):
-    if len(l) > 5:
-        if (l.upper()).startswith('HGNC:'):
-            return l[5:].isdigit()
-        else:
-            return False
-    else:
-        return False
-
-def check_ensembl(l): 
-    if len(l) == 15:
-        if (l.upper()).startswith('ENS'):
-            return l[4:].isdigit()
-        else:  
-            return False 
-    else: 
-        return False 
-
-def check_symbol(l):
-    if len(l) > 0:
-        if l[0].isalpha() and l[1:].isalnum():
-            return True
-        else:
-            return False
-    else:
-        return False
-
-def check_entrez(l): 
-    if len(l) > 0:
-        return l.isdigit()
-    else: 
-        return False
-
-def check_bool(b):
-    if b == 'true':
-        return True
-    elif b == 'false':
-        return False
-    
-############################ resolve_methods ##################################
-
-def replace_gene_value(l, d):
-    tmp = []
-    err = []
-    while l:
-        if isinstance(l[0], list):
-            tmp_rules, tmp_err = replace_gene_value(l[0], d)
-            tmp.append(tmp_rules)
-            err.extend(tmp_err)
-        else:
-            value = replace_gene(l[0], d)
-            tmp.append(value)
-            if value == None:
-                err.append(l[0])
-        l = l[1:]
-    return (tmp, err)
-
-def replace_gene(l, d):
-    if l =='and' or l == 'or':
-        return l
-    else:
-        value = d.get(l, None)
-        if not(value == None or isinstance(value, (int, float))):
-            sys.exit('Execution aborted: ' + value + ' value not valid\n')
-        return value
-
-def computes(val1, op, val2, cn):
-    if val1 != None and val2 != None:
-        if op == 'and':
-            return min(val1, val2)
-        else:
-            return val1 + val2
-    elif op == 'and':
-        if cn is True:
-            if val1 != None:
-                return val1
-            elif val2 != None:
-                return val2
-            else:
-                return None
-        else:
-            return None
-    else:
-        if val1 != None:
-            return val1
-        elif val2 != None:
-            return val2
-        else:
-            return None
-
-def control(ris, l, cn):
-    if len(l) == 1:
-        if isinstance(l[0], (float, int)) or l[0] == None:
-            return l[0]
-        elif isinstance(l[0], list):
-            return control(None, l[0], cn)
-        else:
-            return False
-    elif len(l) > 2:
-        return control_list(ris, l, cn)
-    else:
-        return False
-
-def control_list(ris, l, cn):
-    while l:
-        if len(l) == 1:
-            return False
-        elif (isinstance(l[0], (float, int)) or
-              l[0] == None) and l[1] in ['and', 'or']:
-            if isinstance(l[2], (float, int)) or l[2] == None:
-                ris = computes(l[0], l[1], l[2], cn)            
-            elif isinstance(l[2], list):
-                tmp = control(None, l[2], cn)
-                if tmp is False:
-                    return False
-                else:
-                    ris = computes(l[0], l[1], tmp, cn)
-            else:
-                return False
-            l = l[3:]
-        elif l[0] in ['and', 'or']:
-            if isinstance(l[1], (float, int)) or l[1] == None:
-                ris = computes(ris, l[0], l[1], cn)
-            elif isinstance(l[1], list):
-                tmp = control(None,l[1], cn)
-                if tmp is False:
-                    return False
-                else:
-                    ris = computes(ris, l[0], tmp, cn)
-            else:
-                return False
-            l = l[2:]
-        elif isinstance(l[0], list) and l[1] in ['and', 'or']:
-            if isinstance(l[2], (float, int)) or l[2] == None:
-                tmp = control(None, l[0], cn)
-                if tmp is False:
-                    return False
-                else:
-                    ris = computes(tmp, l[1], l[2], cn)
-            elif isinstance(l[2], list):
-                tmp = control(None, l[0], cn)
-                tmp2 = control(None, l[2], cn)
-                if tmp is False or tmp2 is False:
-                    return False
-                else:
-                    ris = computes(tmp, l[1], tmp2, cn)
-            else:
-                return False
-            l = l[3:]
-        else:
-            return False
-    return ris
-
-############################ map_methods ######################################
-
-def fold_change(avg1, avg2):
-    if avg1 == 0 and avg2 == 0:
-        return 0
-    elif avg1 == 0:
-        return '-INF'
-    elif avg2 == 0:
-        return 'INF'
-    else:
-        return math.log(avg1 / avg2, 2)
-    
-def fix_style(l, col, width, dash):
-    tmp = l.split(';')
-    flag_col = False
-    flag_width = False
-    flag_dash = False
-    for i in range(len(tmp)):
-        if tmp[i].startswith('stroke:'):
-            tmp[i] = 'stroke:' + col
-            flag_col = True
-        if tmp[i].startswith('stroke-width:'):
-            tmp[i] = 'stroke-width:' + width
-            flag_width = True
-        if tmp[i].startswith('stroke-dasharray:'):
-            tmp[i] = 'stroke-dasharray:' + dash
-            flag_dash = True
-    if not flag_col:
-        tmp.append('stroke:' + col)
-    if not flag_width:
-        tmp.append('stroke-width:' + width)
-    if not flag_dash:
-        tmp.append('stroke-dasharray:' + dash)
-    return ';'.join(tmp)
-
-def fix_map(d, core_map, threshold_P_V, threshold_F_C, max_F_C):
-    maxT = 12
-    minT = 2
-    grey = '#BEBEBE'
-    blue = '#0000FF'
-    red = '#E41A1C'
-    for el in core_map.iter():
-        el_id = str(el.get('id'))
-        if el_id.startswith('R_'):
-            tmp = d.get(el_id[2:])
-            if tmp != None:
-                p_val = tmp[0]
-                f_c = tmp[1]
-                if p_val < threshold_P_V:
-                    if not isinstance(f_c, str):
-                        if abs(f_c) < math.log(threshold_F_C, 2):
-                            col = grey
-                            width = str(minT)
-                        else:
-                            if f_c < 0:
-                                col = blue
-                            elif f_c > 0:
-                                col = red
-                            width = str(max((abs(f_c) * maxT) / max_F_C, minT))
-                    else:
-                        if f_c == '-INF':
-                            col = blue
-                        elif f_c == 'INF':
-                            col = red
-                        width = str(maxT)
-                    dash = 'none'
-                else:
-                    dash = '5,5'
-                    col = grey
-                    width = str(minT)
-                el.set('style', fix_style(el.get('style'), col, width, dash))
-    return core_map
-
-############################ make recon #######################################
-
-def check_and_doWord(l):
-    tmp = []
-    tmp_genes = []
-    count = 0
-    while l:
-        if count >= 0:
-            if l[0] == '(':
-                count += 1
-                tmp.append(l[0])
-                l.pop(0)
-            elif l[0] == ')':
-                count -= 1
-                tmp.append(l[0])
-                l.pop(0)
-            elif l[0] == ' ':
-                l.pop(0)
-            else:
-                word = []
-                while l:
-                    if l[0] in [' ', '(', ')']:
-                        break
-                    else:
-                        word.append(l[0])
-                        l.pop(0)
-                word = ''.join(word)
-                tmp.append(word)
-                if not(word in ['or', 'and']):
-                    tmp_genes.append(word)
-        else:
-            return False
-    if count == 0:
-        return (tmp, tmp_genes)
-    else:
-        return False
-
-def brackets_to_list(l):
-    tmp = []
-    while l:
-        if l[0] == '(':
-            l.pop(0)
-            tmp.append(resolve_brackets(l))
-        else:
-            tmp.append(l[0])
-            l.pop(0)
-    return tmp
-
-def resolve_brackets(l):
-    tmp = []
-    while l[0] != ')':
-        if l[0] == '(':
-            l.pop(0)
-            tmp.append(resolve_brackets(l))
-        else:
-            tmp.append(l[0])
-            l.pop(0)
-    l.pop(0)
-    return tmp
-
-def priorityAND(l):
-    tmp = []
-    flag = True
-    while l:
-        if len(l) == 1:
-            if isinstance(l[0], list):
-                tmp.append(priorityAND(l[0]))
-            else:
-                tmp.append(l[0])
-            l = l[1:]
-        elif l[0] == 'or':
-            tmp.append(l[0])
-            flag = False
-            l = l[1:]
-        elif l[1] == 'or':
-            if isinstance(l[0], list): 
-                tmp.append(priorityAND(l[0]))
-            else:
-                tmp.append(l[0])
-            tmp.append(l[1])
-            flag = False
-            l = l[2:]
-        elif l[1] == 'and':
-            tmpAnd = []
-            if isinstance(l[0], list): 
-                tmpAnd.append(priorityAND(l[0]))
-            else:
-                tmpAnd.append(l[0])
-            tmpAnd.append(l[1])
-            if isinstance(l[2], list): 
-                tmpAnd.append(priorityAND(l[2]))
-            else:
-                tmpAnd.append(l[2])
-            l = l[3:]
-            while l:
-                if l[0] == 'and':
-                    tmpAnd.append(l[0])
-                    if isinstance(l[1], list): 
-                        tmpAnd.append(priorityAND(l[1]))
-                    else:
-                        tmpAnd.append(l[1])
-                    l = l[2:]
-                elif l[0] == 'or':
-                    flag = False
-                    break
-            if flag == True: #when there are only AND in list
-                tmp.extend(tmpAnd)
-            elif flag == False:
-                tmp.append(tmpAnd)
-    return tmp
-
-def checkRule(l):
-    if len(l) == 1:
-        if isinstance(l[0], list):
-            if checkRule(l[0]) is False:
-                return False
-    elif len(l) > 2:
-        if checkRule2(l) is False:
-            return False
-    else:
-        return False
-    return True
-
-def checkRule2(l):
-    while l:
-        if len(l) == 1:
-            return False
-        elif isinstance(l[0], list) and l[1] in ['and', 'or']:
-            if checkRule(l[0]) is False:
-                return False
-            if isinstance(l[2], list):
-                if checkRule(l[2]) is False:
-                    return False
-            l = l[3:]
-        elif l[1] in ['and', 'or']:
-            if isinstance(l[2], list):
-                if checkRule(l[2]) is False:
-                    return False
-            l = l[3:]
-        elif l[0] in ['and', 'or']:
-            if isinstance(l[1], list):
-                if checkRule(l[1]) is False:
-                    return False
-            l = l[2:]
-        else:
-            return False
-    return True
-
-def do_rules(rules):
-    split_rules = []
-    err_rules = []
-    tmp_gene_in_rule = []
-    for i in range(len(rules)):
-        tmp = list(rules[i])
-        if tmp:
-            tmp, tmp_genes = check_and_doWord(tmp)
-            tmp_gene_in_rule.extend(tmp_genes)
-            if tmp is False:
-                split_rules.append([])
-                err_rules.append(rules[i])
-            else:
-                tmp = brackets_to_list(tmp)
-                if checkRule(tmp):
-                    split_rules.append(priorityAND(tmp))
-                else:
-                    split_rules.append([])
-                    err_rules.append(rules[i])
-        else:
-            split_rules.append([])
-    if err_rules:
-        warning('Warning: wrong format rule in ' + str(err_rules) + '\n')
-    return (split_rules, list(set(tmp_gene_in_rule)))
-
-def make_recon(data):
-    try:
-        import cobra as cb
-        import warnings
-        with warnings.catch_warnings():
-            warnings.simplefilter('ignore')
-            recon = cb.io.read_sbml_model(data)
-        react = recon.reactions
-        rules = [react[i].gene_reaction_rule for i in range(len(react))]
-        ids = [react[i].id for i in range(len(react))]
-    except cb.io.sbml3.CobraSBMLError:
-        try:
-            data = (pd.read_csv(data, sep = '\t', dtype = str, engine='python')).fillna('')
-            if len(data.columns) < 2:
-                sys.exit('Execution aborted: wrong format of '+
-                         'custom datarules\n')
-            if not len(data.columns) == 2:
-                warning('Warning: more than 2 columns in custom datarules.\n' +
-                        'Extra columns have been disregarded\n')
-            ids = list(data.iloc[:, 0])
-            rules = list(data.iloc[:, 1])
-        except pd.errors.EmptyDataError:
-            sys.exit('Execution aborted: wrong format of custom datarules\n')
-        except pd.errors.ParserError:
-            sys.exit('Execution aborted: wrong format of custom datarules\n')            
-    split_rules, tmp_genes = do_rules(rules)
-    gene_in_rule = {}
-    for i in tmp_genes:
-        gene_in_rule[i] = 'ok'
-    return (ids, split_rules, gene_in_rule)
-
-############################ gene #############################################
-
-def data_gene(gene, type_gene, name, gene_custom):
-    args = process_args(sys.argv)
-    for i in range(len(gene)):
-        tmp = gene.iloc[i, 0]
-        if tmp.startswith(' ') or tmp.endswith(' '):
-            gene.iloc[i, 0] = (tmp.lstrip()).rstrip()
-    gene_dup = [item for item, count in 
-               collections.Counter(gene[gene.columns[0]]).items() if count > 1]
-    pat_dup = [item for item, count in 
-               collections.Counter(list(gene.columns)).items() if count > 1]
-    if gene_dup:
-        if gene_custom == None:
-            if args.rules_selector == 'HMRcore':
-                gene_in_rule = pk.load(open(args.tool_dir +
-                                            '/local/HMRcore_genes.p', 'rb'))
-            elif args.rules_selector == 'Recon':
-                gene_in_rule = pk.load(open(args.tool_dir +
-                                            '/local/Recon_genes.p', 'rb'))
-            gene_in_rule = gene_in_rule.get(type_gene)
-        else:
-            gene_in_rule = gene_custom
-        tmp = []
-        for i in gene_dup:
-            if gene_in_rule.get(i) == 'ok':
-                tmp.append(i)
-        if tmp:
-            sys.exit('Execution aborted because gene ID '
-                     +str(tmp)+' in '+name+' is duplicated\n')
-    if pat_dup:
-        warning('Warning: duplicated label\n' + str(pat_dup) + 'in ' + name + 
-                '\n')
-    return (gene.set_index(gene.columns[0])).to_dict()
-
-############################ resolve ##########################################
-
-def resolve(genes, rules, ids, resolve_none, name):
-    resolve_rules = {}
-    not_found = []
-    flag = False
-    for key, value in genes.items():
-        tmp_resolve = []
-        for i in range(len(rules)):
-            tmp = rules[i]
-            if tmp:
-                tmp, err = replace_gene_value(tmp, value)
-                if err:
-                    not_found.extend(err)
-                ris = control(None, tmp, resolve_none)
-                if ris is False or ris == None:
-                    tmp_resolve.append(None)
-                else:
-                    tmp_resolve.append(ris)
-                    flag = True
-            else:
-                tmp_resolve.append(None)        
-        resolve_rules[key] = tmp_resolve
-    if flag is False:
-        warning('Warning: no computable score (due to missing gene values)' +
-                'for class ' + name + ', the class has been disregarded\n')
-        return (None, None)
-    return (resolve_rules, list(set(not_found)))
-
-############################ split class ######################################
-
-def split_class(classes, resolve_rules):
-    class_pat = {}
-    for i in range(len(classes)):
-        classe = classes.iloc[i, 1]
-        if not pd.isnull(classe):
-            l = []
-            for j in range(i, len(classes)):
-                if classes.iloc[j, 1] == classe:
-                    pat_id = classes.iloc[j, 0]
-                    tmp = resolve_rules.get(pat_id, None)
-                    if tmp != None:
-                        l.append(tmp)
-                    classes.iloc[j, 1] = None
-            if l:
-                class_pat[classe] = list(map(list, zip(*l)))
-            else:
-                warning('Warning: no sample found in class ' + classe +
-                        ', the class has been disregarded\n')
-    return class_pat
-
-############################ create_ras #######################################
-
-def create_ras (resolve_rules, dataset_name, single_ras):
-
-    if resolve_rules == None:
-        warning("Couldn't generate RAS for current dataset: " + dataset_name)
-
-    for geni in resolve_rules.values():
-        for i, valori in enumerate(geni):
-            if valori == None:
-                geni[i] = 'None'
-                
-    output_ras = pd.DataFrame.from_dict(resolve_rules)
-    output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False)
-    
-    if (single_ras):
-        args = process_args(sys.argv)
-        text_file = open(args.single_ras_file, "w")
-    else:
-        text_file = open("ras/Reaction_Activity_Score_Of_" + dataset_name + ".tsv", "w")
-    
-    text_file.write(output_to_csv)
-    text_file.close()
-
-############################ map ##############################################
-
-def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf):
-    args = process_args(sys.argv)
-    if (not class_pat) or (len(class_pat.keys()) < 2):
-        sys.exit('Execution aborted: classes provided for comparisons are ' +
-                 'less than two\n')
-    for i, j in it.combinations(class_pat.keys(), 2):
-        tmp = {}
-        count = 0
-        max_F_C = 0
-        for l1, l2 in zip(class_pat.get(i), class_pat.get(j)):
-            try:
-               stat_D, p_value = st.ks_2samp(l1, l2)
-               avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
-               if not isinstance(avg, str):
-                   if max_F_C < abs(avg):
-                       max_F_C = abs(avg)
-               tmp[ids[count]] = [float(p_value), avg]
-               count += 1
-            except (TypeError, ZeroDivisionError):
-               count += 1
-        tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv'
-        tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
-        tmp_csv = tmp_csv.reset_index()
-        header = ['ids', 'P_Value', 'Log2(fold change)']
-        tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
-        
-        if create_svg or create_pdf:
-            if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom'
-                                                    and args.yes_no == 'yes'):
-                fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
-                file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg'
-                with open(file_svg, 'wb') as new_map:
-                    new_map.write(ET.tostring(core_map))
-                    
-                
-                if create_pdf:
-                    file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf'
-                    renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
-                
-                if not create_svg:
-                    #Ho utilizzato il file svg per generare il pdf, 
-                    #ma l'utente non ne ha richiesto il ritorno, quindi
-                    #lo elimino
-                    os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg')
-                    
-    return None
-
-############################ MAIN #############################################
-
-def main():
-    args = process_args(sys.argv)
-    
-    create_svg = check_bool(args.generate_svg)
-    create_pdf = check_bool(args.generate_pdf)
-    generate_ras = check_bool(args.generate_ras)
-    
-    os.makedirs('result')
-
-    if generate_ras:
-        os.makedirs('ras')
-    
-    if args.rules_selector == 'HMRcore':        
-        recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb'))
-    elif args.rules_selector == 'Recon':
-        recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb'))
-    elif args.rules_selector == 'Custom':
-        ids, rules, gene_in_rule = make_recon(args.custom)
-        
-    resolve_none = check_bool(args.none)
-    
-    class_pat = {}
-    
-    if args.option == 'datasets_rasonly':
-        name = "RAS Dataset"
-        dataset = read_dataset(args.input_datas[0],"dataset")
-
-        dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
-
-        type_gene = gene_type(dataset.iloc[0, 0], name) 
-            
-        if args.rules_selector != 'Custom':
-            genes = data_gene(dataset, type_gene, name, None)
-            ids, rules = load_id_rules(recon.get(type_gene))
-        elif args.rules_selector == 'Custom':
-            genes = data_gene(dataset, type_gene, name, gene_in_rule)
-                
-        resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
-
-        create_ras(resolve_rules, name, True)
-          
-        if err != None and err:
-            warning('Warning: gene\n' + str(err) + '\nnot found in class '
-                + name + ', the expression level for this gene ' +
-                'will be considered NaN\n')
-        
-        print('execution succeded')
-        return None
-    
-    
-    elif args.option == 'datasets':
-        num = 1
-        for i, j in zip(args.input_datas, args.names):
-
-            name = name_dataset(j, num)
-            dataset = read_dataset(i, name)
-
-            dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
-
-            type_gene = gene_type(dataset.iloc[0, 0], name) 
-            
-            if args.rules_selector != 'Custom':
-                genes = data_gene(dataset, type_gene, name, None)
-                ids, rules = load_id_rules(recon.get(type_gene))
-            elif args.rules_selector == 'Custom':
-                genes = data_gene(dataset, type_gene, name, gene_in_rule)
-                
-            resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
-
-            if generate_ras:
-                create_ras(resolve_rules, name, False)
-            
-            if err != None and err:
-                warning('Warning: gene\n' + str(err) + '\nnot found in class '
-                    + name + ', the expression level for this gene ' +
-                    'will be considered NaN\n')
-            if resolve_rules != None:
-                class_pat[name] = list(map(list, zip(*resolve_rules.values())))
-            num += 1
-    elif args.option == 'dataset_class':
-        name = 'RNAseq'
-        dataset = read_dataset(args.input_data, name)
-        dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
-        type_gene = gene_type(dataset.iloc[0, 0], name)
-        classes = read_dataset(args.input_class, 'class')
-        if not len(classes.columns) == 2:
-            warning('Warning: more than 2 columns in class file. Extra' +
-                    'columns have been disregarded\n')
-        classes = classes.astype(str)
-        if args.rules_selector != 'Custom':
-            genes = data_gene(dataset, type_gene, name, None)
-            ids, rules = load_id_rules(recon.get(type_gene))
-        elif args.rules_selector == 'Custom':
-            genes = data_gene(dataset, type_gene, name, gene_in_rule)
-        resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
-        if err != None and err:
-            warning('Warning: gene\n'+str(err)+'\nnot found in class '
-                    + name + ', the expression level for this gene ' +
-                    'will be considered NaN\n')
-        if resolve_rules != None:
-            class_pat = split_class(classes, resolve_rules)
-    
-    	
-    if args.rules_selector == 'Custom':
-        if args.yes_no == 'yes':
-            try:
-                core_map = ET.parse(args.custom_map)
-            except (ET.XMLSyntaxError, ET.XMLSchemaParseError):
-                sys.exit('Execution aborted: custom map in wrong format')
-        elif args.yes_no == 'no':
-            core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg')
-    else:       
-        core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg')
-        
-    maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf)
-        
-    print('Execution succeded')
-
-    return None
-
-###############################################################################
-
-if __name__ == "__main__":
-    main()
--- a/Desktop/Marea/marea.xml	Tue Oct 15 12:21:16 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,274 +0,0 @@
-<tool id="MaREA" name="Metabolic Reaction Enrichment Analysis" version="1.0.3">
-    <description></description>
-    <macros>
-        <import>marea_macros.xml</import>
-    </macros>
-    <requirements>
-        <requirement type="package" version="0.23.0">pandas</requirement>
-        <requirement type="package" version="1.1.0">scipy</requirement>
-        <requirement type="package" version="0.10.1">cobra</requirement>
-        <requirement type="package" version="4.2.1">lxml</requirement>
-        <requirement type="package" version="0.8.1">svglib</requirement>
-        <requirement type="package" version="3.4.0">reportlab</requirement>
-    </requirements>
-    <command detect_errors="exit_code">
-        <![CDATA[
-      	python $__tool_directory__/marea.py
-        --rules_selector $cond_rule.rules_selector
-        #if $cond_rule.rules_selector == 'Custom':
-            --custom ${cond_rule.Custom_rules}
-            --yes_no ${cond_rule.cond_map.yes_no}
-            #if $cond_rule.cond_map.yes_no == 'yes':
-                --custom_map $cond_rule.cond_map.Custom_map
-            #end if
-        #end if
-	
-      	--tool_dir $__tool_directory__
-      	--option $cond.type_selector
-        --out_log $log		
-	
-        #if $cond.type_selector == 'datasets':
-            --input_datas
-            #for $data in $cond.input_Datasets:
-                ${data.input}
-            #end for
-            --names
-            #for $data in $cond.input_Datasets:
-                ${data.input_name}
-            #end for
-            #if $cond.advanced.choice == 'true':
-      	    --none ${cond.advanced.None}
-      	    --pValue ${cond.advanced.pValue}
-      	    --fChange ${cond.advanced.fChange}
-	    	--generate_svg ${cond.advanced.generateSvg}
-	    	--generate_pdf ${cond.advanced.generatePdf}
-	    --generate_ras ${cond.advanced.generateRas}
-	#else 
-	    --none true
-	    --pValue 0.05
-	    --fChange 1.5
-	    --generate_svg false
-	    --generate_pdf true
-	    --generate_ras false
-	#end if
-        #elif $cond.type_selector == 'dataset_class':
-            --input_data ${input_data}
-            --input_class ${input_class}
-            #if $cond.advanced.choice == 'true':
-      	    --none ${cond.advanced.None}
-      	    --pValue ${cond.advanced.pValue}
-      	    --fChange ${cond.advanced.fChange}
-	    --generate_svg ${cond.advanced.generateSvg}
-	    --generate_pdf ${cond.advanced.generatePdf}
-	    --generate_ras ${cond.advanced.generateRas}
-	#else 
-	    --none true
-	    --pValue 0.05
-	    --fChange 1.5
-	    --generate_svg false
-	    --generate_pdf true
-	    --generate_ras false
-	#end if
-        #end if
-        #if $cond.type_selector == 'datasets_rasonly':
-            --input_datas ${input_Datasets}
-            --single_ras_file $ras_single
-            --none ${cond.advanced.None}
-        #end if
-        ]]>
-    </command>
-
-    <inputs>
-        <conditional name="cond_rule">
-            <expand macro="options"/>
-            <when value="HMRcore">
-            </when>
-            <when value="Recon">
-            </when>
-            <when value="Custom">
-                <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />
-                <conditional name="cond_map">
-                    <param name="yes_no" type="select" label="Custom map? (optional)">
-                        <option value="no" selected="true">no</option>
-                        <option value="yes">yes</option>
-                    </param>
-                    <when value="yes">
-                        <param name="Custom_map" argument="--custom_map" type="data" format="xml, svg" label="custom-map.svg"/>
-                    </when>
-                    <when value="no">
-                    </when>
-                </conditional>
-            </when>
-        </conditional>
-        <conditional name="cond">
-            <param name="type_selector" argument="--option" type="select" label="Input format:">
-                <option value="datasets" selected="true">RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N</option>
-                <option value="dataset_class">RNAseq of all samples + sample group specification</option>
-                <option value="datasets_rasonly" selected="true">RNAseq dataset</option>
-            </param>
-            <when value="datasets">
-                <repeat name="input_Datasets" title="RNAseq" min="2">
-                    <param name="input" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" />	
-                    <param name="input_name" argument="--names" type="text" label="Dataset's name:" value="Dataset" help="Default: Dataset" />
-                </repeat>
-                <conditional name="advanced">
-					<param name="choice" type="boolean" checked="false" label="Use advanced options?" help="Use this options to choose custom rules for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps.">
-		    			<option value="true" selected="true">No</option>
-		    			<option value="false">Yes</option>
-					</param>
-					<when value="false">
-					</when>
-					<when value="true">
-		    			<param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> 
-		    			<param name="pValue" argument="--pValue" type="float" size="20" value="0.01" max="1" min="0" label="P-value threshold:" help="min value 0" />
-		    			<param name="fChange" argument="--fChange" type="float" size="20" value="1.2" min="1" label="Fold-Change threshold:" help="min value 1" />
-		    			<param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" />
-		    			<param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" />	
-		    			<param name="generateRas" argument="--generateRas" type="boolean" checked="false" label="Generate Reaction Activity Score for each table" help="Generate Reaction Activity Score for each table" />		
-					</when>
-    	</conditional>
-            </when>
-            <when value="datasets_rasonly">
-                <param name="input_Datasets" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" />
-                <param name="input_name" argument="--names" type="text" label="Dataset's name:" value="Dataset" help="Default: Dataset" />
-                <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> 
-            </when>
-            <when value="dataset_class">
-                <param name="input_data" argument="--input_data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" />
-                <param name="input_class" argument="--input_class" type="data" format="tabular, csv, tsv" label="Sample group specification" />
-                <conditional name="advanced">
-					<param name="choice" type="boolean" checked="false" label="Use advanced options?" help="Use this options to choose custom rules for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps.">
-		    			<option value="true" selected="true">No</option>
-		    			<option value="false">Yes</option>
-					</param>
-					<when value="false">
-					</when>
-					<when value="true">
-		    			<param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> 
-		    			<param name="pValue" argument="--pValue" type="float" size="20" value="0.01" max="1" min="0" label="P-value threshold:" help="min value 0" />
-		    			<param name="fChange" argument="--fChange" type="float" size="20" value="1.2" min="1" label="Fold-Change threshold:" help="min value 1" />
-		    			<param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" />
-		    			<param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" />	
-		    			<param name="generateRas" argument="--generateRas" type="boolean" checked="false" label="Generate Reaction Activity Score for each table" help="Generate Reaction Activity Score for each table" />		
-					</when>
-    	</conditional>
-            </when>
-        </conditional>
-       
-      
-       
-	
-    </inputs>
-
-    <outputs>
-        <data format="txt" name="log" label="MaREA - Log" />
-        <data format="tabular" name="ras_single" label="MaREA - RAS - ${cond.input_name}">
-        	<filter>cond['type_selector'] == "datasets_rasonly"</filter>
-        </data>
-        <collection name="results" type="list" label="MaREA - Results">
-        <filter>cond['type_selector'] == "datasets" or cond['type_selector'] == "dataset_class"</filter>
-            <discover_datasets pattern="__name_and_ext__" directory="result"/>
-        </collection>
-	<collection name="ras" type="list" label="MaREA - RAS list" format_source="tabular">
-	    <filter>cond['type_selector'] != "datasets_rasonly" and cond['advanced']['choice'] and cond['advanced']['generateRas']</filter>
-    	    <discover_datasets pattern="__name_and_ext__" directory="ras" format="tabular"/>
-	</collection>
-	
-    </outputs>
-    <tests>
-        <test>
-            <param name="pValue" value="0.56"/>
-            <output name="log" file="log.txt"/>
-        </test>
-    </tests>
-    <help>
-<![CDATA[
-
-What it does
--------------
-
-This tool analyzes RNA-seq dataset(s) as described in Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724.
-
-Accepted files are: 
-    - option 1) two or more RNA-seq datasets, each referring to samples in a given condition/class. The user can specify a label for each class (as e.g. "*classA*" and "*classB*");
-    - option 2) one RNA dataset and one class-file specifying the class/condition each sample belongs to.
-
-Optional files:
-    - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats:
-
-	* (Cobra Toolbox and CobraPy compliant) xml of metabolic model;
-	* .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2).
-    - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example.
-
-The tool generates:
-    1) a tab-separated file: reporting fold-change and p-values of reaction activity scores (RASs) between a pair of conditions/classes;
-    2) a metabolic map file (downlodable as .svg): visualizing up- and down-regulated reactions between a pair of conditions/classes;
-    3) a log file (.txt).
-
-RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID.
-
-Class-file format: each row of the class-file reports the sample ID (column1) and the label of the class/condition the sample belongs to (column 2).
-
-To calculate P-Values and Fold-Changes and to generate maps, comparisons are performed for each possible pair of classes.
-
-Output files will be named as classA_vs_classB. Reactions will conventionally be reported as up-regulated (down-regulated) if they are significantly more (less) active in class having label "classA".
-
-
-Example input
--------------
-
-**"Custom Rules"** option:
-
-Custom Rules Dastaset:
-
-@CUSTOM_RULES_EXEMPLE@
-
-**"RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N"** option:
-
-RNA-seq Dataset 1:						
-
-@DATASET_EXEMPLE1@
-
-RNA-seq Dataset 2:
-
-@DATASET_EXEMPLE2@
-
-**"RNAseq of all samples + sample group specification"** option:
-
-RNA-seq Dataset:
-
-@DATASET_EXEMPLE1@
-
-Class-file:
-
-+------------+------------+   
-| Patient_ID |    class   |   
-+============+============+   
-| TCGAAA3529 |     MSI    |   
-+------------+------------+    
-| TCGAA62671 |     MSS    |    
-+------------+------------+    
-| TCGAA62672 |     MSI    |   
-+------------+------------+
-
-|
-
-.. class:: infomark
-
-**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.
-
-.. class:: infomark
-
-**TIP**: If your dataset is not split into classes, use `MaREA cluster analysis`_.
-
-@REFERENCE@
-
-.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724
-.. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj
-.. _MaREA cluster analysis: http://link del tool di cluster.org
-
-]]>
-    </help>
-    <expand macro="citations" />
-</tool>
-	
--- a/Desktop/Marea/marea_cluster.py	Tue Oct 15 12:21:16 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,401 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Jun 3 19:51:00 2019
-@author: Narger
-"""
-
-import sys
-import argparse
-import os
-from sklearn.datasets import make_blobs
-from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
-from sklearn.metrics import silhouette_samples, silhouette_score, davies_bouldin_score, cluster
-import matplotlib
-matplotlib.use('agg')
-import matplotlib.pyplot as plt
-import scipy.cluster.hierarchy as shc   
-import matplotlib.cm as cm
-import numpy as np
-import pandas as pd
-
-################################# process args ###############################
-
-def process_args(args):
-    parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
-                                     description = 'process some value\'s' +
-                                     ' genes to create class.')
-
-    parser.add_argument('-ol', '--out_log', 
-                        help = "Output log")
-    
-    parser.add_argument('-in', '--input',
-                        type = str,
-                        help = 'input dataset')
-    
-    parser.add_argument('-cy', '--cluster_type',
-                        type = str,
-                        choices = ['kmeans', 'meanshift', 'dbscan', 'hierarchy'],
-                        default = 'kmeans',
-                        help = 'choose clustering algorythm')
-    
-    parser.add_argument('-k1', '--k_min', 
-                        type = int,
-                        default = 2,
-                        help = 'choose minimun cluster number to be generated')
-    
-    parser.add_argument('-k2', '--k_max', 
-                        type = int,
-                        default = 7,
-                        help = 'choose maximum cluster number to be generated')
-    
-    parser.add_argument('-el', '--elbow', 
-                        type = str,
-                        default = 'false',
-                        choices = ['true', 'false'],
-                        help = 'choose if you want to generate an elbow plot for kmeans')
-    
-    parser.add_argument('-si', '--silhouette', 
-                        type = str,
-                        default = 'false',
-                        choices = ['true', 'false'],
-                        help = 'choose if you want silhouette plots')
-    
-    parser.add_argument('-db', '--davies', 
-                        type = str,
-                        default = 'false',
-                        choices = ['true', 'false'],
-                        help = 'choose if you want davies bouldin scores')
-    
-    parser.add_argument('-td', '--tool_dir',
-                        type = str,
-                        required = True,
-                        help = 'your tool directory')
-                        
-    parser.add_argument('-ms', '--min_samples',
-                        type = int,
-                        help = 'min samples for dbscan (optional)')
-                        
-    parser.add_argument('-ep', '--eps',
-                        type = int,
-                        help = 'eps for dbscan (optional)')
-                        
-    parser.add_argument('-bc', '--best_cluster',
-                        type = str,
-                        help = 'output of best cluster tsv')
-    				
-    
-    
-    args = parser.parse_args()
-    return args
-
-########################### warning ###########################################
-
-def warning(s):
-    args = process_args(sys.argv)
-    with open(args.out_log, 'a') as log:
-        log.write(s + "\n\n")
-    print(s)
-
-########################## read dataset ######################################
-    
-def read_dataset(dataset):
-    try:
-        dataset = pd.read_csv(dataset, sep = '\t', header = 0)
-    except pd.errors.EmptyDataError:
-        sys.exit('Execution aborted: wrong format of dataset\n')
-    if len(dataset.columns) < 2:
-        sys.exit('Execution aborted: wrong format of dataset\n')
-    return dataset
-
-############################ rewrite_input ###################################
-    
-def rewrite_input(dataset):
-    #Riscrivo il dataset come dizionario di liste, 
-    #non come dizionario di dizionari
-    
-    for key, val in dataset.items():
-        l = []
-        for i in val:
-            if i == 'None':
-                l.append(None)
-            else:
-                l.append(float(i))
-   
-        dataset[key] = l
-    
-    return dataset
-
-############################## write to csv ##################################
-    
-def write_to_csv (dataset, labels, name):
-    #labels = predict
-    predict = [x+1 for x in labels]
-  
-    classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
-
-    dest = name
-    classe.to_csv(dest, sep = '\t', index = False,
-                      header = ['Patient_ID', 'Class'])
-   
-########################### trova il massimo in lista ########################
-def max_index (lista):
-    best = -1
-    best_index = 0
-    for i in range(len(lista)):
-        if lista[i] > best:
-            best = lista [i]
-            best_index = i
-            
-    return best_index
-    
-################################ kmeans #####################################
-    
-def kmeans (k_min, k_max, dataset, elbow, silhouette, davies, best_cluster):
-    if not os.path.exists('clustering'):
-        os.makedirs('clustering')
-    
-        
-    if elbow == 'true':
-        elbow = True
-    else:
-        elbow = False
-        
-    if silhouette == 'true':
-        silhouette = True
-    else:
-        silhouette = False
-        
-    if davies == 'true':
-        davies = True
-    else:
-        davies = False
-        
-
-    range_n_clusters = [i for i in range(k_min, k_max+1)]
-    distortions = []
-    scores = []
-    all_labels = []
-    
-    clusterer = KMeans(n_clusters=1, random_state=10)
-    distortions.append(clusterer.fit(dataset).inertia_)
-    
-    
-    for n_clusters in range_n_clusters:
-        clusterer = KMeans(n_clusters=n_clusters, random_state=10)
-        cluster_labels = clusterer.fit_predict(dataset)
-        
-        all_labels.append(cluster_labels)
-        if n_clusters == 1:
-        	silhouette_avg = 0
-        else:
-            silhouette_avg = silhouette_score(dataset, cluster_labels)
-        scores.append(silhouette_avg)
-        distortions.append(clusterer.fit(dataset).inertia_)
-        
-    best = max_index(scores) + k_min
-        
-    for i in range(len(all_labels)):
-        prefix = ''
-        if (i + k_min == best):
-            prefix = '_BEST'
-            
-        write_to_csv(dataset, all_labels[i], 'clustering/kmeans_with_' + str(i + k_min) + prefix + '_clusters.tsv')
-        
-        
-        if (prefix == '_BEST'):
-            labels = all_labels[i]
-            predict = [x+1 for x in labels]
-            classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
-            classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class'])
-            
-            
-        if davies:
-            with np.errstate(divide='ignore', invalid='ignore'):
-                davies_bouldin = davies_bouldin_score(dataset, all_labels[i])
-            warning("\nFor n_clusters = " + str(i + k_min) +
-                  " The average davies bouldin score is: " + str(davies_bouldin))
-        
-       
-        if silhouette:
-            silihouette_draw(dataset, all_labels[i], i + k_min, 'clustering/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png')
-        
-        
-    if elbow:
-        elbow_plot(distortions, k_min,k_max) 
-
-   
-    
-    
-
-############################## elbow_plot ####################################
-    
-def elbow_plot (distortions, k_min, k_max):
-    plt.figure(0)
-    x = list(range(k_min, k_max + 1))
-    x.insert(0, 1)
-    plt.plot(x, distortions, marker = 'o')
-    plt.xlabel('Number of clusters (k)')
-    plt.ylabel('Distortion')
-    s = 'clustering/elbow_plot.png'
-    fig = plt.gcf()
-    fig.set_size_inches(18.5, 10.5, forward = True)
-    fig.savefig(s, dpi=100)
-    
-    
-############################## silhouette plot ###############################
-def silihouette_draw(dataset, labels, n_clusters, path):
-    if n_clusters == 1:
-        return None
-        
-    silhouette_avg = silhouette_score(dataset, labels)
-    warning("For n_clusters = " + str(n_clusters) +
-          " The average silhouette_score is: " + str(silhouette_avg))
-           
-    plt.close('all')
-    # Create a subplot with 1 row and 2 columns
-    fig, (ax1) = plt.subplots(1, 1)
-    
-    fig.set_size_inches(18, 7)
-        
-    # The 1st subplot is the silhouette plot
-    # The silhouette coefficient can range from -1, 1 but in this example all
-    # lie within [-0.1, 1]
-    ax1.set_xlim([-1, 1])
-    # The (n_clusters+1)*10 is for inserting blank space between silhouette
-    # plots of individual clusters, to demarcate them clearly.
-    ax1.set_ylim([0, len(dataset) + (n_clusters + 1) * 10])
-    
-    # Compute the silhouette scores for each sample
-    sample_silhouette_values = silhouette_samples(dataset, labels)
-        
-    y_lower = 10
-    for i in range(n_clusters):
-        # Aggregate the silhouette scores for samples belonging to
-        # cluster i, and sort them
-        ith_cluster_silhouette_values = \
-        sample_silhouette_values[labels == i]
-        
-        ith_cluster_silhouette_values.sort()
-    
-        size_cluster_i = ith_cluster_silhouette_values.shape[0]
-        y_upper = y_lower + size_cluster_i
-    
-        color = cm.nipy_spectral(float(i) / n_clusters)
-        ax1.fill_betweenx(np.arange(y_lower, y_upper),
-                          0, ith_cluster_silhouette_values,
-                                     facecolor=color, edgecolor=color, alpha=0.7)
-        
-        # Label the silhouette plots with their cluster numbers at the middle
-        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
-        
-        # Compute the new y_lower for next plot
-        y_lower = y_upper + 10  # 10 for the 0 samples
-    
-        ax1.set_title("The silhouette plot for the various clusters.")
-        ax1.set_xlabel("The silhouette coefficient values")
-        ax1.set_ylabel("Cluster label")
-        
-        # The vertical line for average silhouette score of all the values
-        ax1.axvline(x=silhouette_avg, color="red", linestyle="--")
-    
-        ax1.set_yticks([])  # Clear the yaxis labels / ticks
-        ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
-        
-        
-        plt.suptitle(("Silhouette analysis for clustering on sample data "
-                          "with n_clusters = " + str(n_clusters) + "\nAverage silhouette_score = " + str(silhouette_avg)), fontsize=12, fontweight='bold')
-            
-            
-        plt.savefig(path, bbox_inches='tight')
-            
-######################## dbscan ##############################################
-    
-def dbscan(dataset, eps, min_samples):
-    if not os.path.exists('clustering'):
-        os.makedirs('clustering')
-        
-    if eps is not None:
-    	clusterer = DBSCAN(eps = eps, min_samples = min_samples)
-    else:
-    	clusterer = DBSCAN()
-    
-    clustering = clusterer.fit(dataset)
-    
-    core_samples_mask = np.zeros_like(clustering.labels_, dtype=bool)
-    core_samples_mask[clustering.core_sample_indices_] = True
-    labels = clustering.labels_
-
-    # Number of clusters in labels, ignoring noise if present.
-    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
-    
-    
-    ##TODO: PLOT SU DBSCAN (no centers) e HIERARCHICAL
-    
-    
-    write_to_csv(dataset, labels, 'clustering/dbscan_results.tsv')
-    
-########################## hierachical #######################################
-    
-def hierachical_agglomerative(dataset, k_min, k_max):
-
-    if not os.path.exists('clustering'):
-        os.makedirs('clustering')
-    
-    plt.figure(figsize=(10, 7))  
-    plt.title("Customer Dendograms")  
-    shc.dendrogram(shc.linkage(dataset, method='ward'))  
-    fig = plt.gcf()
-    fig.savefig('clustering/dendogram.png', dpi=200)
-    
-    range_n_clusters = [i for i in range(k_min, k_max+1)]
-
-    for n_clusters in range_n_clusters:
-        
-        cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward')  
-        cluster.fit_predict(dataset)  
-        cluster_labels = cluster.labels_
-        
-        silhouette_avg = silhouette_score(dataset, cluster_labels)
-        write_to_csv(dataset, cluster_labels, 'clustering/hierarchical_with_' + str(n_clusters) + '_clusters.tsv')
-        #warning("For n_clusters =", n_clusters,
-              #"The average silhouette_score is :", silhouette_avg)
-        
-        
-       
-
-    
-############################# main ###########################################
-
-
-def main():
-    if not os.path.exists('clustering'):
-        os.makedirs('clustering')
-
-    args = process_args(sys.argv)
-    
-    #Data read
-    
-    X = read_dataset(args.input)
-    X = pd.DataFrame.to_dict(X, orient='list')
-    X = rewrite_input(X)
-    X = pd.DataFrame.from_dict(X, orient = 'index')
-    
-    for i in X.columns:
-        tmp = X[i][0]
-        if tmp == None:
-            X = X.drop(columns=[i])
-    
-    
-    if args.cluster_type == 'kmeans':
-        kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.davies, args.best_cluster)
-    
-    if args.cluster_type == 'dbscan':
-        dbscan(X, args.eps, args.min_samples)
-        
-    if args.cluster_type == 'hierarchy':
-        hierachical_agglomerative(X, args.k_min, args.k_max)
-        
-##############################################################################
-
-if __name__ == "__main__":
-    main()
--- a/Desktop/Marea/marea_cluster.xml	Tue Oct 15 12:21:16 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,95 +0,0 @@
-<tool id="MaREA_cluester" name="Cluster Analysis" version="1.0.6">
-    <description></description>
-    <macros>
-        <import>marea_macros.xml</import>
-    </macros>
-    <requirements>
-        <requirement type="package" version="0.25.1">pandas</requirement>
-        <requirement type="package" version="1.1.0">scipy</requirement>
-        <requirement type="package" version="0.10.1">cobra</requirement>
-        <requirement type="package" version="0.21.3">scikit-learn</requirement>
-        <requirement type="package" version="2.2.2">matplotlib</requirement>
-	<requirement type="package" version="1.17">numpy</requirement>
-    </requirements>
-    <command detect_errors="exit_code">
-        <![CDATA[
-      	python $__tool_directory__/marea_cluster.py
-        --input $input
-      	--tool_dir $__tool_directory__
-        --out_log $log
-        --best_cluster $best_cluster
-        --cluster_type ${data.clust_type}
-        #if $data.clust_type == 'kmeans':
-        	--k_min ${data.k_min}
-        	--k_max ${data.k_max}
-        	--elbow ${data.elbow}
-        	--silhouette ${data.silhouette}
-        #end if
-        #if $data.clust_type == 'dbscan':
-        	#if $data.dbscan_advanced.advanced == 'true'
-        		--eps ${data.dbscan_advanced.eps}
-        		--min_samples ${data.dbscan_advanced.min_samples}
-        	#end if
-        #end if
-        #if $data.clust_type == 'hierarchy':
-        	--k_min ${data.k_min}
-        	--k_max ${data.k_max}
-      	#end if
-        ]]>
-    </command>
-    <inputs>
-        <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Input dataset" />
-        
-        <conditional name="data">
-			<param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:">
-                	<option value="kmeans" selected="true">KMeans</option>
-                	<option value="dbscan">DBSCAN</option>
-                	<option value="hierarchy">Agglomerative Hierarchical</option>
-        	</param>
-        	<when value="kmeans">
-        		<param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
-        		<param name="k_max" argument="--k_max" type="integer" min="2" max="20" value="3" label="Max number of clusters (k) to be tested" />
-        		<param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/>
-        		<param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
-        	</when>
-        	<when value="dbscan">
-        		<conditional name="dbscan_advanced">
-        			<param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)">
-        				<option value="true">Yes</option>
-        				<option value="false">No</option>
-        			</param>
-        			<when value="false"></when>
-        			<when value="true">
-        				<param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" />
-        				<param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/>
-        			
-        			</when>
-        		</conditional>   	
-        	</when>
-        	<when value="hierarchy">
-        		<param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" />
-        		<param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" />
-        	</when>
-		</conditional>
-    </inputs>
-
-    <outputs>
-        <data format="txt" name="log" label="${tool.name} - Log" />
-        <data format="tabular" name="best_cluster" label="${tool.name} - Best cluster" />
-        <collection name="results" type="list" label="${tool.name} - Plots and results">
-            <discover_datasets pattern="__name_and_ext__" directory="clustering"/>
-        </collection>
-    </outputs>
-    <help>
-<![CDATA[
-
-What it does
--------------
-
-
-]]>
-    </help>
-    <expand macro="citations" />
-</tool>
-	
-	
--- a/Desktop/Marea/marea_macros.xml	Tue Oct 15 12:21:16 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-<macros>
-
-    <xml name="options">
-        <param name="rules_selector" argument="--rules_selector" type="select" label="Gene-Protein-Reaction rules:">
-            <option value="HMRcore" selected="true">HMRcore rules</option>
-            <option value="Recon">Recon 2.2 rules</option>
-            <option value="Custom">Custom rules</option>
-        </param>
-    </xml>
-
-   <token name="@CUSTOM_RULES_EXEMPLE@">
-
-+--------------------+-------------------------------+
-|         id         |     rule (with entrez-id)     |
-+====================+===============================+
-|        SHMT1       |        155060 or 10357        |
-+--------------------+-------------------------------+
-|        NIT2        |      155060 or 100134869      |
-+--------------------+-------------------------------+
-| GOT1_GOT2_GOT1L1_2 | 155060 and 100134869 or 10357 |
-+--------------------+-------------------------------+
-
-|
-
-    </token>
-
-    <token name="@DATASET_EXEMPLE1@">
-
-+------------+------------+------------+------------+   
-|  Hugo_ID   | TCGAA62670 | TCGAA62671 | TCGAA62672 |   
-+============+============+============+============+   
-| HGNC:24086 |  0.523167  |  0.371355  |  0.925661  |   
-+------------+------------+------------+------------+    
-| HGNC:24086 |  0.568765  |  0.765567  |  0.456789  |    
-+------------+------------+------------+------------+    
-| HGNC:9876  |  0.876545  |  0.768933  |  0.987654  |   
-+------------+------------+------------+------------+    
-| HGNC:9     |  0.456788  |  0.876543  |  0.876542  |   
-+------------+------------+------------+------------+    
-| HGNC:23    |  0.876543  |  0.786543  |  0.897654  |   
-+------------+------------+------------+------------+ 
-   
-|
-
-    </token>
-
-    <token name="@DATASET_EXEMPLE2@">
-
-+-------------+------------+------------+------------+
-| Hugo_Symbol | TCGAA62670 | TCGAA62671 | TCGAA62672 |
-+=============+============+============+============+
-|    A1BG     |  0.523167  |  0.371355  |  0.925661  |
-+-------------+------------+------------+------------+
-|    A1CF     |  0.568765  |  0.765567  |  0.456789  |
-+-------------+------------+------------+------------+
-|     A2M     |  0.876545  |  0.768933  |  0.987654  |
-+-------------+------------+------------+------------+
-|    A4GALT   |  0.456788  |  0.876543  |  0.876542  |
-+-------------+------------+------------+------------+
-|   M664Y65   |  0.876543  |  0.786543  |  0.897654  |
-+-------------+------------+------------+------------+
-
-|
-
-    </token>
-
-    <token name="@REFERENCE@">
-
-This tool is developed by the `BIMIB`_ at the `Department of Informatics, Systems and Communications`_ of `University of Milan - Bicocca`_.
-
-.. _BIMIB: http://sito di bio.org
-.. _Department of Informatics, Systems and Communications: http://www.disco.unimib.it/go/Home/English
-.. _University of Milan - Bicocca: https://www.unimib.it/
-
-    </token>
-
-    <xml name="citations">
-        <citations> <!--esempio di citazione-->
-            <citation type="bibtex">
-@online{lh32017,
-  author = {Alex Graudenzi, Davide Maspero, Cluadio Isella, Marzia Di Filippo, Giancarlo Mauri, Enzo Medico, Marco Antoniotti, Chiara Damiani},
-  year = {2018},
-  title = {MaREA: Metabolic feature extraction, enrichment and visualization of RNAseq},
-  publisher = {bioRxiv},
-  journal = {bioRxiv},
-  url = {https://www.biorxiv.org/content/early/2018/01/16/248724},
-}
-            </citation>
-        </citations>
-    </xml>
-
-</macros>
--- a/Marea/marea.py	Tue Oct 15 12:21:16 2019 -0400
+++ b/Marea/marea.py	Tue Oct 15 12:22:43 2019 -0400
@@ -709,7 +709,7 @@
         tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv'
         tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
         tmp_csv = tmp_csv.reset_index()
-        header = ['ids', 'P_Value', 'Average']
+        header = ['ids', 'P_Value', 'Log2(fold change)']
         tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
         
         if create_svg or create_pdf:
--- a/Marea/marea.xml	Tue Oct 15 12:21:16 2019 -0400
+++ b/Marea/marea.xml	Tue Oct 15 12:22:43 2019 -0400
@@ -1,4 +1,4 @@
-<tool id="MaREA" name="Metabolic Reaction Enrichment Analysis" version="1.0.2">
+<tool id="MaREA" name="Metabolic Reaction Enrichment Analysis" version="1.0.3">
     <description></description>
     <macros>
         <import>marea_macros.xml</import>
@@ -22,21 +22,7 @@
                 --custom_map $cond_rule.cond_map.Custom_map
             #end if
         #end if
-	#if $advanced.choice == 'true':
-      	    --none ${advanced.None}
-      	    --pValue ${advanced.pValue}
-      	    --fChange ${advanced.fChange}
-	    --generate_svg ${advanced.generateSvg}
-	    --generate_pdf ${advanced.generatePdf}
-	    --generate_ras ${advanced.generateRas}
-	#else 
-	    --none true
-	    --pValue 0.05
-	    --fChange 1.5
-	    --generate_svg false
-	    --generate_pdf true
-	    --generate_ras false
-	#end if
+	
       	--tool_dir $__tool_directory__
       	--option $cond.type_selector
         --out_log $log		
@@ -50,13 +36,44 @@
             #for $data in $cond.input_Datasets:
                 ${data.input_name}
             #end for
+            #if $cond.advanced.choice == 'true':
+      	    --none ${cond.advanced.None}
+      	    --pValue ${cond.advanced.pValue}
+      	    --fChange ${cond.advanced.fChange}
+	    	--generate_svg ${cond.advanced.generateSvg}
+	    	--generate_pdf ${cond.advanced.generatePdf}
+	    --generate_ras ${cond.advanced.generateRas}
+	#else 
+	    --none true
+	    --pValue 0.05
+	    --fChange 1.5
+	    --generate_svg false
+	    --generate_pdf true
+	    --generate_ras false
+	#end if
         #elif $cond.type_selector == 'dataset_class':
             --input_data ${input_data}
             --input_class ${input_class}
+            #if $cond.advanced.choice == 'true':
+      	    --none ${cond.advanced.None}
+      	    --pValue ${cond.advanced.pValue}
+      	    --fChange ${cond.advanced.fChange}
+	    --generate_svg ${cond.advanced.generateSvg}
+	    --generate_pdf ${cond.advanced.generatePdf}
+	    --generate_ras ${cond.advanced.generateRas}
+	#else 
+	    --none true
+	    --pValue 0.05
+	    --fChange 1.5
+	    --generate_svg false
+	    --generate_pdf true
+	    --generate_ras false
+	#end if
         #end if
         #if $cond.type_selector == 'datasets_rasonly':
             --input_datas ${input_Datasets}
             --single_ras_file $ras_single
+            --none ${cond.advanced.None}
         #end if
         ]]>
     </command>
@@ -94,48 +111,66 @@
                     <param name="input" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" />	
                     <param name="input_name" argument="--names" type="text" label="Dataset's name:" value="Dataset" help="Default: Dataset" />
                 </repeat>
+                <conditional name="advanced">
+					<param name="choice" type="boolean" checked="false" label="Use advanced options?" help="Use this options to choose custom rules for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps.">
+		    			<option value="true" selected="true">No</option>
+		    			<option value="false">Yes</option>
+					</param>
+					<when value="false">
+					</when>
+					<when value="true">
+		    			<param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> 
+		    			<param name="pValue" argument="--pValue" type="float" size="20" value="0.01" max="1" min="0" label="P-value threshold:" help="min value 0" />
+		    			<param name="fChange" argument="--fChange" type="float" size="20" value="1.2" min="1" label="Fold-Change threshold:" help="min value 1" />
+		    			<param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" />
+		    			<param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" />	
+		    			<param name="generateRas" argument="--generateRas" type="boolean" checked="false" label="Generate Reaction Activity Score for each table" help="Generate Reaction Activity Score for each table" />		
+					</when>
+    	</conditional>
             </when>
             <when value="datasets_rasonly">
                 <param name="input_Datasets" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" />
+                <param name="input_name" argument="--names" type="text" label="Dataset's name:" value="Dataset" help="Default: Dataset" />
+                <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> 
             </when>
             <when value="dataset_class">
                 <param name="input_data" argument="--input_data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" />
                 <param name="input_class" argument="--input_class" type="data" format="tabular, csv, tsv" label="Sample group specification" />
+                <conditional name="advanced">
+					<param name="choice" type="boolean" checked="false" label="Use advanced options?" help="Use this options to choose custom rules for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps.">
+		    			<option value="true" selected="true">No</option>
+		    			<option value="false">Yes</option>
+					</param>
+					<when value="false">
+					</when>
+					<when value="true">
+		    			<param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> 
+		    			<param name="pValue" argument="--pValue" type="float" size="20" value="0.01" max="1" min="0" label="P-value threshold:" help="min value 0" />
+		    			<param name="fChange" argument="--fChange" type="float" size="20" value="1.2" min="1" label="Fold-Change threshold:" help="min value 1" />
+		    			<param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" />
+		    			<param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" />	
+		    			<param name="generateRas" argument="--generateRas" type="boolean" checked="false" label="Generate Reaction Activity Score for each table" help="Generate Reaction Activity Score for each table" />		
+					</when>
+    	</conditional>
             </when>
         </conditional>
        
-       
-       <!--TODO: NASCONDERE ADVANCED SE RAS ONLY-->
+      
        
-	<conditional name="advanced">
-		<param name="choice" type="boolean" checked="false" label="Use advanced options?" help="Use this options to choose custom rules for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps.">
-		    <option value="true" selected="true">No</option>
-		    <option value="false">Yes</option>
-		</param>
-		<when value="false">
-		</when>
-		<when value="true">
-		    <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> 
-		    <param name="pValue" argument="--pValue" type="float" size="20" value="0.05" max="1" min="0" label="P-value threshold:" help="min value 0" />
-		    <param name="fChange" argument="--fChange" type="float" size="20" value="1.5" min="1" label="Fold-Change threshold:" help="min value 1" />
-		    <param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" />
-		    <param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" />	
-		    <param name="generateRas" argument="--generateRas" type="boolean" checked="false" label="Generate Reaction Activity Score for each table" help="Generate Reaction Activity Score for each table" />		
-		</when>
-    	</conditional>
+	
     </inputs>
 
     <outputs>
-        <data format="txt" name="log" label="${tool.name} - Log" />
-        <data format="tabular" name="ras_single" label="${tool.name} - RAS">
+        <data format="txt" name="log" label="MaREA - Log" />
+        <data format="tabular" name="ras_single" label="MaREA - RAS - ${cond.input_name}">
         	<filter>cond['type_selector'] == "datasets_rasonly"</filter>
         </data>
-        <collection name="results" type="list" label="${tool.name} - Results">
+        <collection name="results" type="list" label="MaREA - Results">
         <filter>cond['type_selector'] == "datasets" or cond['type_selector'] == "dataset_class"</filter>
             <discover_datasets pattern="__name_and_ext__" directory="result"/>
         </collection>
-	<collection name="ras" type="list" label="${tool.name} - RAS list" format_source="tabular">
-	    <filter>advanced['choice'] and advanced['generateRas']</filter>
+	<collection name="ras" type="list" label="MaREA - RAS list" format_source="tabular">
+	    <filter>cond['type_selector'] != "datasets_rasonly" and cond['advanced']['choice'] and cond['advanced']['generateRas']</filter>
     	    <discover_datasets pattern="__name_and_ext__" directory="ras" format="tabular"/>
 	</collection>
 	
--- a/Marea/marea_cluster.py	Tue Oct 15 12:21:16 2019 -0400
+++ b/Marea/marea_cluster.py	Tue Oct 15 12:22:43 2019 -0400
@@ -176,6 +176,10 @@
     scores = []
     all_labels = []
     
+    clusterer = KMeans(n_clusters=1, random_state=10)
+    distortions.append(clusterer.fit(dataset).inertia_)
+    
+    
     for n_clusters in range_n_clusters:
         clusterer = KMeans(n_clusters=n_clusters, random_state=10)
         cluster_labels = clusterer.fit_predict(dataset)
@@ -227,8 +231,10 @@
     
 def elbow_plot (distortions, k_min, k_max):
     plt.figure(0)
-    plt.plot(range(k_min, k_max+1), distortions, marker = 'o')
-    plt.xlabel('Number of cluster')
+    x = list(range(k_min, k_max + 1))
+    x.insert(0, 1)
+    plt.plot(x, distortions, marker = 'o')
+    plt.xlabel('Number of clusters (k)')
     plt.ylabel('Distortion')
     s = 'clustering/elbow_plot.png'
     fig = plt.gcf()
--- a/Marea/marea_cluster.xml	Tue Oct 15 12:21:16 2019 -0400
+++ b/Marea/marea_cluster.xml	Tue Oct 15 12:22:43 2019 -0400
@@ -1,4 +1,4 @@
-<tool id="MaREA_cluester" name="Cluster Analysis" version="1.0.5">
+<tool id="MaREA_cluester" name="Cluster Analysis" version="1.0.6">
     <description></description>
     <macros>
         <import>marea_macros.xml</import>
@@ -38,7 +38,7 @@
         ]]>
     </command>
     <inputs>
-        <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" />
+        <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Input dataset" />
         
         <conditional name="data">
 			<param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:">
@@ -47,8 +47,8 @@
                 	<option value="hierarchy">Agglomerative Hierarchical</option>
         	</param>
         	<when value="kmeans">
-        		<param name="k_min" argument="--k_min" type="integer" min="1" max="99" value="3" label="Min number of clusters (k) to be tested" />
-        		<param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" />
+        		<param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
+        		<param name="k_max" argument="--k_max" type="integer" min="2" max="20" value="3" label="Max number of clusters (k) to be tested" />
         		<param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/>
         		<param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
         	</when>