# HG changeset patch # User bimib # Date 1582468874 18000 # Node ID e4235b5231e417a8e5b605b68551d2723f93a86f # Parent 3af9d394367c9fc83fa17541659510e32864ab12 Uploaded diff -r 3af9d394367c -r e4235b5231e4 Marea/marea.py --- a/Marea/marea.py Wed Feb 19 10:44:52 2020 -0500 +++ b/Marea/marea.py Sun Feb 23 09:41:14 2020 -0500 @@ -18,14 +18,17 @@ parser = argparse.ArgumentParser(usage = '%(prog)s [options]', description = 'process some value\'s'+ ' genes to create a comparison\'s map.') - parser.add_argument('-rs', '--rules_selector', + parser.add_argument('-cr', '--custom_rules', type = str, - default = 'HMRcore', - choices = ['HMRcore', 'Recon', 'Custom'], - help = 'chose which type of dataset you want use') - parser.add_argument('-cr', '--custom', + default = 'false', + choices = ['true', 'false'], + help = 'choose whether to use custom rules') + parser.add_argument('-cc', '--custom_rule', type = str, - help='your dataset if you want custom rules') + help='custom rules to use') + parser.add_argument('-cm', '--custom_map', + type = str, + help='custom map to use') parser.add_argument('-n', '--none', type = str, default = 'true', @@ -55,13 +58,6 @@ parser.add_argument('-ic', '--input_class', type = str, help = 'sample group specification') - parser.add_argument('-cm', '--custom_map', - type = str, - help = 'custom map') - parser.add_argument('-yn', '--yes_no', - type = str, - choices = ['yes', 'no'], - help = 'if make or not custom map') parser.add_argument('-gs', '--generate_svg', type = str, default = 'true', @@ -551,7 +547,7 @@ react = recon.reactions rules = [react[i].gene_reaction_rule for i in range(len(react))] ids = [react[i].id for i in range(len(react))] - except cb.io.sbml3.CobraSBMLError: + except cb.io.sbml.CobraSBMLError: try: data = (pd.read_csv(data, sep = '\t', dtype = str, engine='python')).fillna('') if len(data.columns) < 2: @@ -693,8 +689,8 @@ tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) if create_svg or create_pdf: - if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' - and args.yes_no == 'yes'): + if args.custom_rules == 'false' or (args.custom_rules == 'true' + and args.custom_map != ''): fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' with open(file_svg, 'wb') as new_map: @@ -760,13 +756,6 @@ elif comparison == "onevsmany": for i, j in it.combinations(class_pat.keys(), 2): - - if i != control and j != control: - print(str(control) + " " + str(i) + " " + str(j)) - #Se è un confronto fra elementi diversi dal controllo, skippo - continue - - print('vado') tmp = {} count = 0 max_F_C = 0 @@ -823,13 +812,9 @@ if os.path.isdir('result') == False: os.makedirs('result') - - if args.rules_selector == 'HMRcore': - recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) - elif args.rules_selector == 'Recon': - recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) - elif args.rules_selector == 'Custom': - ids, rules, gene_in_rule = make_recon(args.custom) + + if args.custom_rules == 'true': + ids, rules, gene_in_rule = make_recon(args.custom_rule) class_pat = {} @@ -885,15 +870,13 @@ if resolve_rules_float != None: class_pat = split_class(classes, resolve_rules_float) - if args.rules_selector == 'Custom': - if args.yes_no == 'yes': - try: - core_map = ET.parse(args.custom_map) - except (ET.XMLSyntaxError, ET.XMLSchemaParseError): - sys.exit('Execution aborted: custom map in wrong format') - elif args.yes_no == 'no': - core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg') - else: + + if args.custom_rules == 'true': + try: + core_map = ET.parse(args.custom_map) + except (ET.XMLSyntaxError, ET.XMLSchemaParseError): + sys.exit('Execution aborted: custom map in wrong format') + else: core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf, args.comparison, args.control) diff -r 3af9d394367c -r e4235b5231e4 Marea/marea.xml --- a/Marea/marea.xml Wed Feb 19 10:44:52 2020 -0500 +++ b/Marea/marea.xml Sun Feb 23 09:41:14 2020 -0500 @@ -1,4 +1,4 @@ - + marea_macros.xml @@ -28,6 +28,11 @@ ${data.input_name} #end for --comparison ${cond.comparis.comparison} + #if $cond.advanced.cond_map == 'true': + --custom_rules true + --custom_rule ${cond.advanced.cond_map.custom_rule} + --custom_map ${cond.advanced.cond_map.custom_map} + #end if #if $cond.advanced.choice == 'true': --pValue ${cond.advanced.pValue} --fChange ${cond.advanced.fChange} @@ -47,6 +52,11 @@ #if $cond.comparis.comparison == 'onevsmany' --control ${cond.comparis.controlgroup} #end if + #if $cond.advanced.cond_map == 'true': + --custom_rules true + --custom_rule ${cond.advanced.cond_map.custom_rule} + --custom_map ${cond.advanced.cond_map.custom_map} + #end if #if $cond.advanced.choice == 'true': --pValue ${cond.advanced.pValue} --fChange ${cond.advanced.fChange} @@ -90,11 +100,12 @@ - + - + + @@ -131,6 +142,7 @@ + @@ -155,71 +167,108 @@ What it does ------------- -This tool analyzes RNA-seq dataset(s) as described in Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724. +This tool analyzes and visualizes differences in the Reaction Activity Scores (RASs) of groups of samples, as computed by the Expression2RAS tool, of groups of samples. Accepted files are: - - option 1) two or more RNA-seq datasets, each referring to samples in a given condition/class. The user can specify a label for each class (as e.g. "*classA*" and "*classB*"); - - option 2) one RNA dataset and one class-file specifying the class/condition each sample belongs to. + - option 1) two or more RAS datasets, each referring to samples in a given group. The user can specify a label for each group (as e.g. "classA" and "classB"); + - option 2) one RAS dataset and one group-file specifying the group each sample belongs to. + +RAS datasets format: tab-separated text files, reporting the RAS value of each reaction (row) for a given sample (column). + +Column header: sample ID. +Raw header: reaction ID. Optional files: - - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats: - - * (Cobra Toolbox and CobraPy compliant) xml of metabolic model; - * .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2). - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example. The tool generates: - 1) a tab-separated file: reporting fold-change and p-values of reaction activity scores (RASs) between a pair of conditions/classes; - 2) a metabolic map file (downlodable as .svg): visualizing up- and down-regulated reactions between a pair of conditions/classes; - 3) a log file (.txt). + - 1) a tab-separated file: reporting fold-change and p-values of reaction activity scores (RASs) between a pair of conditions/classes; + - 2) a metabolic map file (downloadable as .svg): visualizing up- and down-regulated reactions between a pair of conditions/classes; + - 3) a log file (.txt). + +Output options: +To calculate P-Values and Fold-Changes and to enrich maps, comparisons are performed for each possible pair of groups (default option ‘One vs One’). -RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID. - -Class-file format: each row of the class-file reports the sample ID (column1) and the label of the class/condition the sample belongs to (column 2). - -To calculate P-Values and Fold-Changes and to generate maps, comparisons are performed for each possible pair of classes. +Alternative options are: + - comparison of each group vs. the rest of samples (option ‘One vs Rest’) + - comparison of each group vs. a control group (option ‘One vs Control). If this option is selected the user must indicate the control group label. Output files will be named as classA_vs_classB. Reactions will conventionally be reported as up-regulated (down-regulated) if they are significantly more (less) active in class having label "classA". - Example input ------------- -**"Custom Rules"** option: +"RAS of group 1 + RAS of group 2 + ... + RAS of group N" option: -Custom Rules Dastaset: +RAS Dataset 1: -@CUSTOM_RULES_EXEMPLE@ - -**"RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N"** option: ++------------+----------------+----------------+----------------+ +| Reaction ID| TCGAA62670 | TCGAA62671 | TCGAA62672 | ++============+================+================+================+ +| r1642 | 0.523167 | 0.371355 | 0.925661 | ++------------+----------------+----------------+----------------+ +| r1643 | 0.568765 | 0.765567 | 0.456789 | ++------------+----------------+----------------+----------------+ +| r1640 | 0.876545 | 0.768933 | 0.987654 | ++------------+----------------+----------------+----------------+ +| r1641 | 0.456788 | 0.876543 | 0.876542 | ++------------+----------------+----------------+----------------+ +| r1646 | 0.876543 | 0.786543 | 0.897654 | ++------------+----------------+----------------+----------------+ -RNA-seq Dataset 1: - -@DATASET_EXEMPLE1@ +RAS Dataset 2: -RNA-seq Dataset 2: ++------------+----------------+----------------+----------------+ +| Reaction ID| TCGAA62670 | TCGAA62671 | TCGAA62672 | ++============+================+================+================+ +| r1642 | 0.523167 | 0.371355 | 0.925661 | ++------------+----------------+----------------+----------------+ +| r1643 | 0.568765 | 0.765567 | 0.456789 | ++------------+----------------+----------------+----------------+ +| r1640 | 0.876545 | 0.768933 | 0.987654 | ++------------+----------------+----------------+----------------+ +| r1641 | 0.456788 | 0.876543 | 0.876542 | ++------------+----------------+----------------+----------------+ +| r1646 | 0.876543 | 0.786543 | 0.897654 | ++------------+----------------+----------------+----------------+ -@DATASET_EXEMPLE2@ - -**"RNAseq of all samples + sample group specification"** option: +"RAS of all samples + sample group specification" option: -RNA-seq Dataset: +RAS Dataset: -@DATASET_EXEMPLE1@ ++------------+----------------+----------------+----------------+ +| Reaction ID| TCGAA62670 | TCGAA62671 | TCGAA62672 | ++============+================+================+================+ +| r1642 | 0.523167 | 0.371355 | 0.925661 | ++------------+----------------+----------------+----------------+ +| r1643 | 0.568765 | 0.765567 | 0.456789 | ++------------+----------------+----------------+----------------+ +| r1640 | 0.876545 | 0.768933 | 0.987654 | ++------------+----------------+----------------+----------------+ +| r1641 | 0.456788 | 0.876543 | 0.876542 | ++------------+----------------+----------------+----------------+ +| r1646 | 0.876543 | 0.786543 | 0.897654 | ++------------+----------------+----------------+----------------+ -Class-file: +Group-file -+------------+------------+ -| Patient_ID | class | -+============+============+ -| TCGAAA3529 | MSI | -+------------+------------+ -| TCGAA62671 | MSS | -+------------+------------+ -| TCGAA62672 | MSI | -+------------+------------+ ++---------------+-----------+ +| Patient ID | Class | ++===============+===========+ +| TCGAAA3529 | MSI | ++---------------+-----------+ +| TCGAA62671 | MSS | ++---------------+-----------+ +| TCGAA62672 | MSI | ++---------------+-----------+ -| +Advanced options +---------------- + +P-Value threshold: the threshold used for significance Kolmogorov-Smirnov (KS) test, to verify whether the distributions of RASs over the samples in two sets are significantly different + +Fold-Change threshold: threshold of the fold-change between the average RAS of two groups. Among the reactions that pass the KS test, only fold-change values larger than the indicated threshold will be visualized on the output metabolic map; + .. class:: infomark diff -r 3af9d394367c -r e4235b5231e4 Marea/ras_generator.xml --- a/Marea/ras_generator.xml Wed Feb 19 10:44:52 2020 -0500 +++ b/Marea/ras_generator.xml Sun Feb 23 09:41:14 2020 -0500 @@ -1,4 +1,4 @@ - + - Reaction Activity Scores computation marea_macros.xml @@ -31,17 +31,6 @@ - - - - - - - - - - - @@ -59,6 +48,62 @@ What it does ------------- + +This tool computes Reaction Activity Scores from gene expression (RNA-seq) dataset(s), as described in Graudenzi et al. Integration of transcriptomic data and metabolic networks in cancer samples reveals highly significant prognostic power. Journal of Biomedical Informatics, 2018, 87: 37-49. + +Accepted files: + - A gene expression dataset + +Format: +Tab-separated text file reporting the normalized expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). +Column header: sample ID. +Row header: gene ID. + + +Optional files: + - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats: + + * (Cobra Toolbox and CobraPy compliant) xml of metabolic model; + * .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2). + +Computation option ‘(A and NaN) solved as (A)’: +In case of missing expression value, referred to as NaN (Not a Number), for a gene joined with an AND operator in a given GPR rule, the rule ‘A and NaN’ + +If YES is selected: the GPR will be solved as A. + +If NO is selected: the GPR will be disregarded tout-court (i.e., treated as NaN). + +Example input +------------- + +Custom GPR rules: + ++------------+--------------------------------------+ +| id | rule (with entrez-id | ++============+======================================+ +| r1642 | 155060 or 10357 | ++------------+--------------------------------------+ +| r1643 | 155060 or 100134869 | ++------------+--------------------------------------+ +| r1640 | 155060 and 100134869 or 10357 | ++------------+--------------------------------------+ + +RNA-seq dataset: + ++------------+----------------+----------------+----------------+ +| Hugo_ID | TCGAA62670 | TCGAA62671 | TCGAA62672 | ++============+================+================+================+ +| HGNC:24086 | 0.523167 | 0.371355 | 0.925661 | ++------------+----------------+----------------+----------------+ +| HGNC:24086 | 0.568765 | 0.765567 | 0.456789 | ++------------+----------------+----------------+----------------+ +| HGNC:9876 | 0.876545 | 0.768933 | 0.987654 | ++------------+----------------+----------------+----------------+ +| HGNC:9 | 0.456788 | 0.876543 | 0.876542 | ++------------+----------------+----------------+----------------+ +| HGNC:23 | 0.876543 | 0.786543 | 0.897654 | ++------------+----------------+----------------+----------------+ + ]]>