Mercurial > repos > vijay > pancancer_compare_within_models
changeset 0:84ac2a86bde1 draft default tip
"planemo upload for repository http://github.com/nvk747/papaa/galaxy/ commit 954b283ef7f82f59f55476a4b3a230d655187ac1"
author | vijay |
---|---|
date | Wed, 16 Dec 2020 23:29:26 +0000 |
parents | |
children | |
files | compare_within_models.xml macros.xml |
diffstat | 2 files changed, 349 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/compare_within_models.xml Wed Dec 16 23:29:26 2020 +0000 @@ -0,0 +1,120 @@ +<tool id="pancancer_compare_within_models" name="PAPAA: PanCancer compare within models" version="@VERSION@"> + <description>compare within models</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <version_command><![CDATA['papaa_compare_within_models.R' --version 2>&1 | grep PAPAA]]></version_command> + <command><![CDATA[ + mkdir 'classifier' && + mkdir -p 'classifier/figures' && + ln -s '${pancan_classifier_summary}' 'classifier/classifier_summary.txt' && + ln -s '${pancan_classifier_coefficients}' 'classifier/classifier_coefficients.tsv' && + #for $within_summary in $pancan_within_classifier_summary: + mkdir -p 'classifier/within_disease/${within_summary.element_identifier}' && + ln -s '${within_summary}' 'classifier/within_disease/${within_summary.element_identifier}/classifier_summary.txt' && + #end for + #for $within_coefficient in $pancan_within_classifier_coefficients: + mkdir -p 'classifier/within_disease/${within_coefficient.element_identifier}' && + ln -s '${within_coefficient}' 'classifier/within_disease/${within_coefficient.element_identifier}/classifier_coefficients.tsv' && + #end for + #if $include_alt.include_alt_select == 'true': + mkdir 'alt_classifier' && + ln -s '${include_alt.alt_pancan_classifier_summary}' 'alt_classifier/classifier_summary.txt' && + ln -s '${include_alt.alt_pancan_classifier_coefficients}' 'alt_classifier/classifier_coefficients.tsv' && + #for $within_summary in $include_alt.alt_pancan_within_classifier_summary: + mkdir -p 'alt_classifier/within_disease/${within_summary.element_identifier}' && + ln -s '${within_summary}' 'alt_classifier/within_disease/${within_summary.element_identifier}/classifier_summary.txt' && + #end for + #for $within_coefficient in $include_alt.alt_pancan_within_classifier_coefficients: + mkdir -p 'alt_classifier/within_disease/${within_coefficient.element_identifier}' && + ln -s '${within_coefficient}' 'alt_classifier/within_disease/${within_coefficient.element_identifier}/classifier_coefficients.tsv' && + #end for + #end if + ls -lahR && + papaa_compare_within_models.R + --pancan_model 'classifier' + #if $include_alt.include_alt_select == 'true': + --alt_model 'alt_classifier' + #end if + > '${log}' + ]]> + </command> + <inputs> + <param argument="--pancan_model" label="pancancer classifier summary" name="pancan_classifier_summary" optional="false" type="data" format="txt" help="classifer_summary.txt"/> + <param label="pancancer classifier coefficients" name="pancan_classifier_coefficients" optional="false" type="data" format="tabular" help="classifier_coefficients.tsv"/> + <param label="pan_within classifier summary" name="pancan_within_classifier_summary" optional="false" type="data" format="txt" multiple="true" help="multiple classifer_summary.txt"/> + <param label="pan_within classifier coefficients" name="pancan_within_classifier_coefficients" optional="false" type="data" format="tabular" multiple="true" help="multiple classifier_coefficients.tsv"/> + <conditional name="include_alt"> + <param name="include_alt_select" type="select" label="Would you want to compare given model with alt gene model?" help="output of pancancer classifier and pancancer within disease for alt gene"> + <option value="false" selected="true">do not do alt gene</option> + <option value="true">do alt gene</option> + </param> + <when value="true"> + <param argument="--alt_model" label="pancancer classifier summary" name="alt_pancan_classifier_summary" optional="false" type="data" format="txt" help="alt classifer_summary.txt"/> + <param label="pancancer classifier coefficients" name="alt_pancan_classifier_coefficients" optional="false" type="data" format="tabular" help="alt classifier_coefficients.tsv"/> + <param argument="pan_within classifier summary" label="alt_pancan_within_classifier_summary" name="alt_pancan_within_classifier_summary" optional="true" type="data" format="txt" multiple="true" help="multiple alt classifer_summary.txt"/> + <param label="alt_pancan_within classifier coefficients" name="alt_pancan_within_classifier_coefficients" optional="true" type="data" format="tabular" multiple="true" help="multiple alt classifier_coefficients.tsv"/> + </when> + <when value="false"> + </when> + </conditional> + </inputs> + <outputs> + <data format="txt" name="log" label="${tool.name} on ${on_string} (Log)" /> + <data format="pdf" name="aupr_comparison" label="${tool.name} on ${on_string} (aupr_comparison.pdf)" from_work_dir="classifier/figures/aupr_comparison.pdf"/> + <data format="pdf" name="auroc_comparison" label="${tool.name} on ${on_string} (auroc_comparison.pdf)" from_work_dir="classifier/figures/auroc_comparison.pdf"/> + <data format="pdf" name="alt_gene_aupr_comparison" label="${tool.name} on ${on_string} (alt_gene_aupr_comparison.pdf)" from_work_dir="classifier/figures/alt_gene_aupr_comparison.pdf" > + <filter>include_alt['include_alt_select'] == 'true' </filter> + </data> + <data format="pdf" name="alt_gene_auroc_comparison" label="${tool.name} on ${on_string} (alt_gene_auroc_comparison.pdf)" from_work_dir="classifier/figures/alt_gene_auroc_comparison.pdf" > + <filter>include_alt['include_alt_select'] == 'true' </filter> + </data> + </outputs> + <tests> + <test> + <param name="pancan_classifier_summary" value="classifier_summary.txt" ftype="txt"/> + <param name="pancan_classifier_coefficients" value="classifier_coefficients.tsv" ftype="tabular"/> + <param name="pancan_within_classifier_summary" value="classifier_summary/GBM" > <!-- + <collection type="list"> + <element name="GBM" value="GBM_100.txt" ftype="txt"/> + </collection> --> + </param> + <param name="pancan_within_classifier_coefficients" value="classifier_coefficients/GBM"> <!-- + <collection type="list"> + <element name="GBM" value="GBM_99.tabular" ftype="tabular"/> + </collection> --> + </param> + <param name="include_alt_select" value="false"/> + <!-- + <conditional name="include_alt"> + <param name="include_alt_select" value="false"/> + <param name="alt_pancan_summary" value="alt_pancan_summary.txt" ftype="txt"/> + <param name="alt_pancan_classifier_coefficients" value="alt_pancan_classifier_coefficients.tsv" ftype="tabular"/> + <param name="alt_pan_within_classifier_summary" value="alt_pan_within_classifier_summary.txt" ftype="txt"/> + <param name="alt_pan_within_classifier_coefficients" value="alt_pan_within_classifier_coefficients.tsv" ftype="tabular"/> + </conditional> + --> + <output name="log" file="compare_within_models_Log.txt"/> + <output name="aupr_comparison" file="aupr_comparison.pdf" compare="sim_size" delta="50"/> + <output name="auroc_comparison" file="auroc_comparison.pdf" compare="sim_size" delta="50"/> + + </test> + </tests> + <help><![CDATA[ + + **Pancancer_Aberrant_Pathway_Activity_Analysis scripts/papaa_compare_within_models.R:** + + **Inputs:** + --pancan_model String of the Directory: location of Pan classifier summary file + --alt_model String of the Directory: location of Alt gene classifier summary file + + **Outputs:** + Comparison plots for Pan and Pan_within models ("auroc_comparison.pdf" and "aupr_comparison.pdf") + + Comparison plots for altgene, alt_within, Pan_alt models ("alt_gene_auroc_comparison.pdf" and "alt_gene_aupr_comparison.pdf") ]]> + + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Dec 16 23:29:26 2020 +0000 @@ -0,0 +1,229 @@ +<macros> + <token name="@VERSION@">0.1.9</token> + + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">papaa</requirement> + </requirements> + </xml> + + <xml name="stdio"> + <stdio> + <exit_code level="fatal" range="1:"/> + </stdio> + </xml> + + <xml name="inputs_basic"> + <param argument="--x_matrix" label="Filename of features to use in model" name="x_matrix" optional="true" type="data" format="tabular" help="data/pancan_rnaseq_freeze.tsv.gz"/> + <yield/> + <param argument="--filename_mut" label="Filename mutations" name="filename_mut" optional="true" type="data" format="tabular" help="data/pancan_mutation_freeze.tsv.gz"/> + <param argument="--filename_mut_burden" label="Filename of mutation burden" name="filename_mut_burden" optional="true" type="data" format="tabular" help="data/mutation_burden_freeze.tsv"/> + <param argument="--filename_sample" label="Filename of sample" name="filename_sample" optional="true" type="data" format="tabular" help="data/sample_freeze.tsv"/> + </xml> + + <token name="@INPUTS_BASIC@"><![CDATA[ + #if $x_matrix and $x_matrix is not None: + --x_matrix '$x_matrix' + #end if + #if $filename_mut and $filename_mut is not None: + --filename_mut '$filename_mut' + #end if + #if $filename_mut_burden and $filename_mut_burden is not None: + --filename_mut_burden '$filename_mut_burden' + #end if + #if $filename_sample and $filename_sample is not None: + --filename_sample '$filename_sample' + #end if + ]]> + </token> + + <xml name="inputs_genes_diseases"> + <param argument="--genes" label="Comma separated string of HUGO gene symbols" name="genes" optional="False" type="text" value="ERBB2,PIK3CA,KRAS,AKT1"/> + <param argument="--diseases" label="Comma sep string of TCGA disease acronyms. If no arguments are passed, filtering will default to options given in --filter_count and --filter_prop." name="diseases" optional="true" type="text" value="BLCA,BRCA,CESC,COAD,ESCA,LUAD,LUSC,OV,PRAD,READ,STAD,UCEC,UCS"/> + </xml> + + <token name="@INPUTS_GENES_DISEASES@"><![CDATA[ + #if $genes and $genes is not None: + --genes '$genes' + #end if + #if $diseases and str($diseases) != '': + --diseases '$diseases' + #end if + ]]> + </token> + + + <xml name="input_filename_raw_mut"> + <param argument="--filename_raw_mut" label="Filename of raw mut MAF" name="filename_raw_mut" optional="true" type="data" format="tabular" help="data/raw/mc3.v0.2.8.PUBLIC.maf"/> + </xml> + + <token name="@INPUT_FILENAME_RAW_MUT@"><![CDATA[ + #if $filename_raw_mut and $filename_raw_mut is not None: + --filename_raw_mut '$filename_raw_mut' + #end if + ]]> + </token> + + + <xml name="input_filename_burden"> + <param argument="--filename_burden" label="Burden file" name="filename_burden" optional="true" type="data" format="tabular" help="data/seg_based_scores.tsv"/> + </xml> + + <token name="@INPUT_FILENAME_BURDEN@"><![CDATA[ + #if $filename_burden and $filename_burden is not None: + --filename_burden '$filename_burden' + #end if + ]]> + </token> + + <xml name="input_filename_snaptron_samples"> + <param argument="--sample_file" label="SNAPTRON samples" name="sample_file" optional="true" type="data" format="tabular" help="scripts/snaptron/samples.tsv.gz"/> + </xml> + + <token name="@INPUT_FILENAME_SNAPTRON_SAMPLES@"><![CDATA[ + #if $sample_file and $sample_file is not None: + --sample_file '$sample_file' + #end if + ]]> + </token> + + + <xml name="input_filename_snaptron_junctions"> + <param argument="--junction_file" label="SNAPTRON junctions" name="junction_file" optional="true" type="data" format="tabular" help="scripts/snaptron/tp53_junctions.txt.gz"/> + </xml> + + <token name="@INPUT_FILENAME_SNAPTRON_JUNCTIONS@"><![CDATA[ + #if $junction_file and $junction_file is not None: + --junction_file '$junction_file' + #end if + ]]> + </token> + + <xml name="inputs_copy_number_file"> + <param argument="--filename_copy_loss" label="File with Copy number loss" name="filename_copy_loss" optional="true" type="data" format="tabular" help="data/copy_number_loss_status.tsv.gz"/> + <param argument="--filename_copy_gain" label="File with Copy number gain" name="filename_copy_gain" optional="true" type="data" format="tabular" help="data/copy_number_gain_status.tsv.gz"/> + </xml> + + <xml name="inputs_copy_number_class_file"> + <expand macro="inputs_copy_number_file" /> + <param argument="--filename_cancer_gene_classification" label="File with cancer gene classification table" name="filename_cancer_gene_classification" optional="true" type="data" format="tabular" help="data/cosmic_cancer_classification.tsv"/> + </xml> + + <xml name="inputs_copy_number_file_conditional"> + <conditional name="copy_number_conditional"> + <param argument="--copy_number" checked="false" label="Supplement Y matrix with copy number events" name="copy_number" type="boolean" truevalue="--copy_number" falsevalue=""/> + <when value="--copy_number"> + <expand macro="inputs_copy_number_file" /> + </when> + <when value=""/> + </conditional> + </xml> + + <xml name="inputs_copy_number_class_file_conditional"> + <conditional name="copy_number_conditional"> + <param argument="--copy_number" checked="false" label="Supplement Y matrix with copy number events" name="copy_number" type="boolean" truevalue="--copy_number" falsevalue=""/> + <when value="--copy_number"> + <expand macro="inputs_copy_number_class_file" /> + </when> + <when value=""/> + </conditional> + </xml> + + <token name="@INPUTS_COPY_NUMBER_FILE_PREFIX@"><![CDATA[ + #set $copy_number_conditional = type('',(object,),{'filename_copy_loss':$filename_copy_loss,'filename_copy_gain':$filename_copy_gain})() + ]]> + </token> + + <token name="@INPUTS_COPY_NUMBER_FILE@"><![CDATA[ + #if $copy_number_conditional.filename_copy_loss and $copy_number_conditional.filename_copy_loss is not None: + --filename_copy_loss '$copy_number_conditional.filename_copy_loss' + #end if + #if $copy_number_conditional.filename_copy_gain and $copy_number_conditional.filename_copy_gain is not None: + --filename_copy_gain '$copy_number_conditional.filename_copy_gain' + #end if + ]]> + </token> + + <token name="@INPUTS_COPY_NUMBER_FILE_CONDITIONAL@"><![CDATA[ + #if $copy_number_conditional.copy_number + $copy_number_conditional.copy_number + #if $copy_number_conditional.filename_copy_loss and $copy_number_conditional.filename_copy_loss is not None: + --filename_copy_loss '$copy_number_conditional.filename_copy_loss' + #end if + #if $copy_number_conditional.filename_copy_gain and $copy_number_conditional.filename_copy_gain is not None: + --filename_copy_gain '$copy_number_conditional.filename_copy_gain' + #end if + ##@INPUTS_COPY_NUMBER_FILE@ + #end if + ]]> + </token> + + <token name="@INPUTS_COPY_NUMBER_CLASS_FILE_PREFIX@"><![CDATA[ + #set $copy_number_conditional = type('',(object,),{'filename_copy_loss':$filename_copy_loss,'filename_copy_gain':$filename_copy_gain,'filename_cancer_gene_classification':$filename_cancer_gene_classification})() + ]]> + </token> + + <token name="@INPUTS_COPY_NUMBER_CLASS_FILE@"><![CDATA[ + #if $copy_number_conditional.filename_copy_loss and $copy_number_conditional.filename_copy_loss is not None: + --filename_copy_loss '$copy_number_conditional.filename_copy_loss' + #end if + #if $copy_number_conditional.filename_copy_gain and $copy_number_conditional.filename_copy_gain is not None: + --filename_copy_gain '$copy_number_conditional.filename_copy_gain' + #end if + #if $copy_number_conditional.filename_cancer_gene_classification and $copy_number_conditional.filename_cancer_gene_classification is not None: + --filename_cancer_gene_classification '$copy_number_conditional.filename_cancer_gene_classification' + #end if + ]]> + </token> + + <token name="@INPUTS_COPY_NUMBER_CLASS_FILE_CONDITIONAL@"><![CDATA[ + #if $copy_number_conditional.copy_number + $copy_number_conditional.copy_number + ## + #if $copy_number_conditional.filename_copy_loss and $copy_number_conditional.filename_copy_loss is not None: + --filename_copy_loss '$copy_number_conditional.filename_copy_loss' + #end if + #if $copy_number_conditional.filename_copy_gain and $copy_number_conditional.filename_copy_gain is not None: + --filename_copy_gain '$copy_number_conditional.filename_copy_gain' + #end if + #if $copy_number_conditional.filename_cancer_gene_classification and $copy_number_conditional.filename_cancer_gene_classification is not None: + --filename_cancer_gene_classification '$copy_number_conditional.filename_cancer_gene_classification' + #end if + ##@INPUTS_COPY_NUMBER_CLASS_FILE@ + #end if + ]]> + </token> + + <xml name="input_remove_hyper"> + <param argument="--remove_hyper" checked="false" label="Remove hypermutated samples" name="remove_hyper" type="boolean" truevalue="--remove_hyper" falsevalue=""/> + </xml> + <token name="@INPUT_REMOVE_HYPER@"><![CDATA[$remove_hyper]]></token> + + <xml name="input_alphas"> + <param argument="--alphas" label="the alphas for parameter sweep" name="alphas" optional="true" type="text" value="0.1,0.13,0.15,0.18,0.2,0.3,0.4,0.6,0.7"/> + </xml> + + <token name="@INPUTS_ALPHAS@"><![CDATA[ + #if $alphas and $alphas is not None: + --alphas '$alphas' + #end if + ]]> + </token> + + <xml name="input_l1_ratios"> + <param argument="--l1_ratios" label="the l1 ratios for parameter sweep" name="l1_ratios" optional="true" type="text" value="0.1,0.125,0.15,0.2,0.25,0.3,0.35"/> + </xml> + + <token name="@INPUTS_L1_RATIOS@"><![CDATA[ + #if $l1_ratios and $l1_ratios is not None: + --l1_ratios '$l1_ratios' + #end if + ]]> + </token> + <xml name="citations"> + <citations> + <yield /> + </citations> + </xml> +</macros> +