Mercurial > repos > mvdbeek > plot_correlation_matrix
changeset 0:0cee38fb62af draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
author | mvdbeek |
---|---|
date | Fri, 27 Apr 2018 09:48:24 -0400 |
parents | |
children | e9268619b503 |
files | plot_corr.py plot_corr.xml test-data/1.txt test-data/2.txt test-data/3.txt test-data/corr.pdf test-data/corr.png test-data/corr.tab |
diffstat | 8 files changed, 731 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plot_corr.py Fri Apr 27 09:48:24 2018 -0400 @@ -0,0 +1,51 @@ +import os +import click +import matplotlib +matplotlib.use('tkagg') + +import pandas as pd # noqa: E402 +import seaborn as sns # noqa: E402 + + +def get_dataframe(files, labels, column, skiprows=1, sep='\t'): + d = {} + for file, label in zip(files, labels): + d[label] = pd.read_csv(file, usecols=[column], sep=sep, skiprows=skiprows, header=None, squeeze=True) + return pd.DataFrame.from_dict(d) + + +def plot_correlation(df, plot_path=None, method='pearson', correlation_matrix_path=None, figsize=(12, 12)): + corr = df.corr(method=method) + if correlation_matrix_path: + corr.to_csv(correlation_matrix_path, sep="\t") + if plot_path: + g = sns.clustermap(corr, annot=True, method="centroid", figsize=figsize, cbar_kws={'label': "%s correlation" % method}) + g.fig.suptitle("Cluster based on %s correlation for all samples" % method) + g.savefig(plot_path, bbox_inches='tight') + + +@click.command() +@click.argument("files", type=click.Path(exists=True), nargs=-1, required=True) +@click.option("-c", "--column", help="Use this numeric column to calculate correlation across files", default=1, required=True) +@click.option("--labels", help="File containing a list of labels, one label per line. Must match number of files", type=click.Path(exists=True), required=False) +@click.option("--plot_path", help="Write correlation plot to this path", type=click.Path(exists=False), required=False) +@click.option("--correlation_matrix_path", help="Write correlation plot to this path", type=click.Path(exists=False), required=False) +@click.option("--method", help="Use this method for calculating the correlation", required=False, type=click.Choice(['pearson', 'spearman', 'kendall'])) +@click.option("--skiprows", help="Skip this number of rows", required=False, default=0) +@click.option("--sep", help="Use this field separator when reading files", required=False, default="\t") +def main(files, column, labels=None, method="pearson", skiprows=1, plot_path=None, correlation_matrix_path=None, figsize=(12, 12), sep='\t'): + """Plot heatmap of pearson correlation and/or write matrix of pearson correlation values.""" + if labels: + labels = [l.strip() for l in open(labels) if l.strip()] + assert len(labels) == len(files), "Got %d files for plotting, but %d labels. Label and file length must be equal" % (len(files), len(labels)) + if not labels: + labels = [os.path.basename(f) for f in files] + if column != -1: + # Adjust for 0-based column selection + column -= 1 + df = get_dataframe(files, labels, column=column, skiprows=skiprows, sep=sep) + plot_correlation(df, plot_path=plot_path, correlation_matrix_path=correlation_matrix_path, figsize=figsize, method=method) + + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plot_corr.xml Fri Apr 27 09:48:24 2018 -0400 @@ -0,0 +1,79 @@ +<tool id="plot_correlation" name="Plot correlations" version="0.1.3"> + <description>for multiple samples</description> + <requirements> + <requirement type="package" version="0.8.1">seaborn</requirement> + <requirement type="package" version="6.7">click</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ +python '$__tool_directory__/plot_corr.py' +#echo "".join(["'%s' " % f for f in $input_files]) +-c $c +--labels '$labels' +#if $plot_type != "None" + --plot_path output.$plot_type +#end if +--correlation_matrix_path $tab_output +--method $method +--skiprows $skiprows +#if $input_files[0].ext == 'csv' +--sep "," +#end if +#if $plot_type != "None" + && mv output.$plot_type plot +#end if + ]]></command> +<configfiles> + <configfile name="labels">#for f in $input_files +$f.element_identifier +#end for</configfile> +</configfiles> + <inputs> + <param name="input_files" type="data" multiple="true" format="tabular,csv" label="Select the files for which to calculate the correlation"/> + <param argument="-c" type="data_column" data_ref="input_files" value="1" label="Select the numeric column to use"/> + <param argument="--skiprows" type="integer" min="0" value="0" label="Skip the first N rows" help="Make sure that all values are numeric"/> + <param argument="--plot_type" type="select" label="Select the type of plot to produce"> + <option value="pdf">PDF</option> + <option value="png">PNG</option> + <option value="None">Don't output plot</option> + </param> + <param argument="--method" type="select" label="Select the correlation method"> + <option value="pearson">Pearson Correlation</option> + <option value="spearman">Spearman Correlation</option> + <option value="kendall">Kendall-Tau Correlation</option> + </param> + </inputs> + <outputs> + <data name="plot_output" format="pdf" from_work_dir="plot" label="${tool.name} plot on ${on_string}"> + <filter>str(plot_type) != "None"</filter> + <change_format> + <when input="plot_type" value="png" format="png"/> + </change_format> + </data> + <data name="tab_output" format="tabular" label="${tool.name} table on ${on_string}"/> + </outputs> + <tests> + <test> + <param name="input_files" value="1.txt,2.txt,3.txt" ftype="tabular"/> + <output name="plot_output" value="corr.pdf" ftype="pdf"/> + <output name="tab_output" value="corr.tab" ftype="tabular"/> + </test> + <test> + <param name="input_files" value="1.txt,2.txt,3.txt" ftype="tabular"/> + <param name="plot_type" value="png"/> + <output name="plot_output" value="corr.png" ftype="png"/> + <output name="tab_output" value="corr.tab" ftype="tabular"/> + </test> + </tests> + <help><![CDATA[ +What it does +------------ + +This tool calculates the correlation for a selected colum across many datasets. + +Example output: +--------------- + +.. image:: $PATH_TO_IMAGES/corr.png + + ]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.txt Fri Apr 27 09:48:24 2018 -0400 @@ -0,0 +1,199 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0.340878569626861 +0.783114037679612 +0.985023999605682 +0.93854871150263 +0.723533523775476 +0.459379001980574 +0.45866193521824 +0.54595594654851 +-0.341555820598067 +-0.713125843105172 +-0.457284817064768 +-0.0397055874053696 +-0.464690568994232 +-1.08876081934037 +-0.311298850787743 +0.0294159530936283 +-0.401401782084653 +-0.401401782084653 +-0.173321824267136 +0 +0 +-0.0748881183635566 +-0.146079985637596 +-0.400183882725489 +-0.195444961520949 +0.0718968413352789 +0 +0 +0.00239427610664721 +0.0396732567708672 +-0.144984588482702 +-0.0748881183635566 +0.237081127101969 +1.47544582386528 +1.75356074837402 +1.27285838538621 +0.580299085618157 +1.75145187202761 +2.65512807488557 +1.22842280306334 +0.75217724057521 +1.68241392987815 +1.61127623856826 +1.81229285390482 +0.26543381745884 +-0.0560753433086623 +-0.594642661237038 +0.747855337690327 +0.402578158832136 +0 +0 +0 +-0.163144363969827 +-0.141726247263969 +1.07616290739433 +0.608702956731739 +-0.125443481839913 +-0.133394582942526 +-0.0748881183635566 +0.0618486977780613 +0.215285979214162 +-0.0748881183635566 +-0.146079985637596 +-0.099011194760994 +-0.468438726089802 +-0.441934619465136 +-0.267244941618625 +-0.362339056192987 +-0.671517353435421 +-0.616163353069114 +-0.352817974430892 +-0.146079985637596 +-0.0748881183635566 +-0.686080813179982 +-1.43559617507953 +-0.567560086760216 +-0.117049257132813 +-0.0266496935789866 +0.00173678667295641 +-0.146079985637596 +-0.616163353069114 +-0.050354814742703 +-0.00529137795437628 +-0.146079985637596 +-0.146079985637596 +-0.611139861231487 +-0.565123498933311 +-0.616163353069114 +-1.00885089419105 +-0.241998837850606 +-0.335707389274849 +-0.4447529692261 +-0.47921410412305 +-0.577422842153099 +-0.487599449923428 +-0.0296158372481541 +-0.0790407982826758 +-0.0422892971579304 +-0.65064635069254 +-0.253151620830461 +-0.524636573410633 +-0.285378089555842 +0.155893141115969 +0.123858981710829 +0.0189774904485757 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0.0836550596576762 +0.438259494654197 +-0.224064958303134 +-0.146079985637596 +0 +0.140397860850605 +-0.136723672942153 +-0.340729461054075 +-0.287700441570664 +-1.11912088481286 +-1.4191961137097 +-0.38500268510384 +-0.608270021114394 +-1.24336645475494 +-0.167975497346332 +-0.213585194236977 +-0.463937540889709 +-0.400183882725489 +-0.498674642477199 +-0.213858799244627 +-0.227117442560717 +-0.110923140622157 +-0.400183882725489 +-0.037929920762671 +0.00073188131666976 +0.00239427610664721 +0.00239427610664721 +0.183548175980195 +-0.0737986541129008 +0.926400226550786 +1.73241630640993 +2.98322348831199 +1.91345286310638 +1.51238679064264 +1.44378679878595 +-0.778600165728515 +-0.889449873690397 +-0.713125843105172 +-0.180400439635403 +0 +0.18865478327894 +0.478062651498562 +0.402578158832136 +0.402578158832136
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2.txt Fri Apr 27 09:48:24 2018 -0400 @@ -0,0 +1,199 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0.570680842540193 +1.09127163907536 +1.60025048328575 +1.50591112428121 +1.53090357285913 +1.12115660736468 +1.04165048293942 +0.392317485068746 +-0.333499309937955 +-0.595866635547588 +-0.376629478375865 +0.0842219048551826 +0.512293699990611 +0.0638111943294771 +0.0680047614501333 +0.10652071565967 +-0.000377644662777674 +0.0433889569444438 +0.0361857071486257 +0 +0 +-0.0602065451006225 +-0.11800088111536 +-0.0929922441678113 +0.101265807974036 +0.113464063990748 +0 +0 +0.238995048720927 +0.555270578315121 +0.188514310496358 +0.0443924005958465 +0.51923228145664 +2.07578742313746 +2.53880609638676 +2.18407050737506 +1.42991407877687 +2.30513023319817 +3.07724318220236 +1.64032305399604 +1.41815878149883 +2.41242761410088 +2.43691649428345 +2.28993041874246 +0.299064275777494 +0.021827789080669 +-0.221285484082794 +0.573603177239845 +0.300271538068134 +0 +-0.030417317182508 +-0.0602065451006225 +0.0597263502296259 +0.611928711962974 +1.61752991342197 +1.2700001019322 +0.485445549650901 +-0.188359560598637 +-0.146052400693485 +0.116300287436039 +0.0857241854722652 +-0.030417317182508 +-0.0602065451006225 +0.0191786870600221 +-0.0197101965744905 +-0.20516307801075 +-0.116508876497406 +-0.100942790105228 +-0.546281331969727 +-0.554603772852006 +-0.308761713619314 +-0.173568877983246 +-0.253102904654661 +-0.734288382653597 +-1.0143464714691 +-0.404049154810451 +-0.110054380199101 +0.321882277574593 +0.25632329440684 +-0.240148003651987 +-0.518271283456564 +-0.0797297060843935 +-0.0231337092364335 +-0.0602065451006225 +-0.0676992293759263 +-0.423249669024149 +-0.44596781811035 +-0.512125905341891 +-0.87262614807418 +-0.206825097408425 +-0.224055547216995 +0.0887091997943875 +-0.47089714203616 +-0.3926977882848 +0.38432942808505 +0.308369048461799 +0.6791419925113 +0.429688296761981 +0.0436816186988902 +0.0339676336666199 +-0.100650041164653 +-0.225867086565635 +0.0491194838969109 +-0.000221976756635183 +-3.27839226078609e-05 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0.139512215256645 +0.715167605182794 +0.0128842916328488 +-0.278668852889821 +0 +0.0565376318136268 +-0.00024192108667959 +-0.227075717256281 +-0.159291642394799 +-0.676304780023023 +-0.728102194952582 +-0.166723669064738 +-0.711738503160839 +-0.63777343523279 +-0.101015632321306 +-0.0841183748684746 +-0.418774928833871 +-0.328480460616235 +-0.376629478375865 +-0.166814032631594 +0.115821691473219 +0.0146031726777319 +-0.227075717256281 +-0.0453888169621028 +0.0565376318136268 +0.183267800062878 +0.183267800062878 +0.57185292720238 +0.786127605052998 +1.64842522568533 +2.27032427314837 +3.78080281428422 +3.13134829674951 +2.3348012072879 +1.65307392175147 +0.108188185447197 +-0.000492854324334838 +-0.180842916120512 +-0.0649590802273921 +-0.0404155599717056 +-0.000221976756635183 +0.173368229007495 +0.0513711588769046 +0.0513711588769046
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3.txt Fri Apr 27 09:48:24 2018 -0400 @@ -0,0 +1,199 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +-0.00516453559818209 +-0.0205481328793524 +-0.0307134820917063 +0.643067329643611 +1.55327354658444 +2.05395883600638 +1.8485408786659 +1.82516135186822 +1.6217760889074 +1.62897705755179 +0.971182460361824 +0.156660883535125 +0.127775374015235 +0.135010755726777 +0.542939010074629 +0.744330138490134 +0.597246050412643 +0.657133070688299 +0.346534408347958 +-0.0245163429088547 +-0.0139431460176159 +-0.0103487933749282 +-0.0103106492052688 +-0.0154384717774205 +-0.03829076379033 +-0.0607867125996615 +-0.0479737996899171 +0.0966985442338963 +0.0800373111167644 +-0.00773988693300774 +-0.00773988693300774 +0.390707120926344 +0.84557965619107 +0.68862282176212 +0.252380557759054 +0.578661190397868 +2.67747479840647 +3.06299611887981 +2.51938197834922 +1.9661531133494 +2.7005379234909 +3.57941962956699 +2.0372572607031 +1.74881150691715 +2.7673496630897 +2.64626125313048 +2.94407556915418 +0.519799593798434 +-0.12883780640624 +0.032334599017083 +0.569621956213534 +0.260212608552839 +0 +-0.0154384717774205 +-0.0307134820917063 +0.165837377212996 +0.908892576494339 +1.99701663216898 +1.56011053456701 +0.66338446079209 +0.0339859538062707 +-0.0151027085132735 +0.421833511505335 +0.118090946049965 +-0.0154384717774205 +-0.00399886774792812 +0.120535390215508 +-0.0119747604416604 +-0.118006428596147 +-0.102289217979718 +-0.109088998145182 +-0.411680278962258 +-0.35809957947193 +-0.253555441247913 +-0.135915307930659 +-0.287532814323564 +-0.728591834224069 +-1.11303503477544 +-0.619145980148498 +-0.259915666746712 +0.436406900825769 +0.349079400982403 +0.0405931055914946 +-0.242916799233507 +-0.0836047687464678 +0.0565601134633812 +0.0284212689385437 +-0.163380839539776 +-0.484596736082457 +-0.332896053948832 +-0.40354898270941 +-0.648090691311062 +-0.338823373781745 +-0.287758240179105 +0.500525235802173 +-0.0729205133690652 +-0.401658865500543 +0.629195126721701 +0.758631057489393 +1.09143642596256 +0.565345036665179 +0.163201739198077 +-0.0598547120512539 +-0.258379203064883 +0.123968606604721 +0.203131608539123 +0.20277939185342 +0.0313742103972228 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0.141086272493922 +0.707959960298169 +0.173000900218851 +-0.105128197823929 +-0.00773988693300774 +0.103510624785032 +-0.148181584376348 +-0.293877189116431 +-0.193187017449263 +0.129553618242327 +-0.00983965633013711 +0.0278223912802781 +-0.686754347603887 +-0.388633393547178 +-0.0326841598036203 +0.0182494822016573 +-0.349675044282807 +-0.202442038801667 +-0.167197985214767 +0.029749358080811 +0.272032074411685 +0.0205281145027709 +-0.0724479907461126 +0.0195170611639227 +0.155199750844578 +0.432032883161112 +0.432032883161112 +0.678959692548691 +0.861347033282818 +1.65249778123284 +2.42202818938149 +4.15845547013568 +3.54858169138378 +2.77629728427456 +2.01251615020891 +0.426321746003957 +0.143329923099115 +-0.0360137424037682 +-0.0278351723545067 +0.00479719455674217 +0.195861339146108 +0.213625337630196 +-0.117951109200658 +-0.117951109200658