changeset 0:0cee38fb62af draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
author mvdbeek
date Fri, 27 Apr 2018 09:48:24 -0400
parents
children e9268619b503
files plot_corr.py plot_corr.xml test-data/1.txt test-data/2.txt test-data/3.txt test-data/corr.pdf test-data/corr.png test-data/corr.tab
diffstat 8 files changed, 731 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plot_corr.py	Fri Apr 27 09:48:24 2018 -0400
@@ -0,0 +1,51 @@
+import os
+import click
+import matplotlib
+matplotlib.use('tkagg')
+
+import pandas as pd  # noqa: E402
+import seaborn as sns   # noqa: E402
+
+
+def get_dataframe(files, labels, column, skiprows=1, sep='\t'):
+    d = {}
+    for file, label in zip(files, labels):
+        d[label] = pd.read_csv(file, usecols=[column], sep=sep, skiprows=skiprows, header=None, squeeze=True)
+    return pd.DataFrame.from_dict(d)
+
+
+def plot_correlation(df, plot_path=None, method='pearson', correlation_matrix_path=None, figsize=(12, 12)):
+    corr = df.corr(method=method)
+    if correlation_matrix_path:
+        corr.to_csv(correlation_matrix_path, sep="\t")
+    if plot_path:
+        g = sns.clustermap(corr, annot=True, method="centroid", figsize=figsize, cbar_kws={'label': "%s correlation" % method})
+        g.fig.suptitle("Cluster based on %s correlation for all samples" % method)
+        g.savefig(plot_path, bbox_inches='tight')
+
+
+@click.command()
+@click.argument("files", type=click.Path(exists=True), nargs=-1, required=True)
+@click.option("-c", "--column", help="Use this numeric column to calculate correlation across files", default=1, required=True)
+@click.option("--labels", help="File containing a list of labels, one label per line. Must match number of files", type=click.Path(exists=True), required=False)
+@click.option("--plot_path", help="Write correlation plot to this path", type=click.Path(exists=False), required=False)
+@click.option("--correlation_matrix_path", help="Write correlation plot to this path", type=click.Path(exists=False), required=False)
+@click.option("--method", help="Use this method for calculating the correlation", required=False, type=click.Choice(['pearson', 'spearman', 'kendall']))
+@click.option("--skiprows", help="Skip this number of rows", required=False, default=0)
+@click.option("--sep", help="Use this field separator when reading files", required=False, default="\t")
+def main(files, column, labels=None, method="pearson", skiprows=1, plot_path=None, correlation_matrix_path=None, figsize=(12, 12), sep='\t'):
+    """Plot heatmap of pearson correlation and/or write matrix of pearson correlation values."""
+    if labels:
+        labels = [l.strip() for l in open(labels) if l.strip()]
+        assert len(labels) == len(files), "Got %d files for plotting, but %d labels. Label and file length must be equal" % (len(files), len(labels))
+    if not labels:
+        labels = [os.path.basename(f) for f in files]
+    if column != -1:
+        # Adjust for 0-based column selection
+        column -= 1
+    df = get_dataframe(files, labels, column=column, skiprows=skiprows, sep=sep)
+    plot_correlation(df, plot_path=plot_path, correlation_matrix_path=correlation_matrix_path, figsize=figsize, method=method)
+
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plot_corr.xml	Fri Apr 27 09:48:24 2018 -0400
@@ -0,0 +1,79 @@
+<tool id="plot_correlation" name="Plot correlations" version="0.1.3">
+    <description>for multiple samples</description>
+    <requirements>
+        <requirement type="package" version="0.8.1">seaborn</requirement>
+        <requirement type="package" version="6.7">click</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+python '$__tool_directory__/plot_corr.py'
+#echo "".join(["'%s' " % f for f in $input_files])
+-c $c
+--labels '$labels'
+#if $plot_type != "None"
+    --plot_path output.$plot_type
+#end if
+--correlation_matrix_path $tab_output
+--method $method
+--skiprows $skiprows
+#if $input_files[0].ext == 'csv'
+--sep ","
+#end if
+#if $plot_type != "None"
+    && mv output.$plot_type plot
+#end if
+    ]]></command>
+<configfiles>
+    <configfile name="labels">#for f in $input_files
+$f.element_identifier
+#end for</configfile>
+</configfiles>
+    <inputs>
+        <param name="input_files" type="data" multiple="true" format="tabular,csv" label="Select the files for which to calculate the correlation"/>
+        <param argument="-c" type="data_column" data_ref="input_files" value="1" label="Select the numeric column to use"/>
+        <param argument="--skiprows" type="integer" min="0" value="0" label="Skip the first N rows" help="Make sure that all values are numeric"/>
+        <param argument="--plot_type" type="select" label="Select the type of plot to produce">
+            <option value="pdf">PDF</option>
+            <option value="png">PNG</option>
+            <option value="None">Don't output plot</option>
+        </param>
+        <param argument="--method" type="select" label="Select the correlation method">
+            <option value="pearson">Pearson Correlation</option>
+            <option value="spearman">Spearman Correlation</option>
+            <option value="kendall">Kendall-Tau Correlation</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="plot_output" format="pdf" from_work_dir="plot" label="${tool.name} plot on ${on_string}">
+            <filter>str(plot_type) != "None"</filter>
+            <change_format>
+                <when input="plot_type" value="png" format="png"/>
+            </change_format>
+        </data>
+        <data name="tab_output" format="tabular" label="${tool.name} table on ${on_string}"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_files" value="1.txt,2.txt,3.txt" ftype="tabular"/>
+            <output name="plot_output" value="corr.pdf" ftype="pdf"/>
+            <output name="tab_output" value="corr.tab" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="input_files" value="1.txt,2.txt,3.txt" ftype="tabular"/>
+            <param name="plot_type" value="png"/>
+            <output name="plot_output" value="corr.png" ftype="png"/>
+            <output name="tab_output" value="corr.tab" ftype="tabular"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+What it does
+------------
+
+This tool calculates the correlation for a selected colum across many datasets.
+
+Example output:
+---------------
+
+.. image:: $PATH_TO_IMAGES/corr.png
+
+        ]]></help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/1.txt	Fri Apr 27 09:48:24 2018 -0400
@@ -0,0 +1,199 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0.340878569626861
+0.783114037679612
+0.985023999605682
+0.93854871150263
+0.723533523775476
+0.459379001980574
+0.45866193521824
+0.54595594654851
+-0.341555820598067
+-0.713125843105172
+-0.457284817064768
+-0.0397055874053696
+-0.464690568994232
+-1.08876081934037
+-0.311298850787743
+0.0294159530936283
+-0.401401782084653
+-0.401401782084653
+-0.173321824267136
+0
+0
+-0.0748881183635566
+-0.146079985637596
+-0.400183882725489
+-0.195444961520949
+0.0718968413352789
+0
+0
+0.00239427610664721
+0.0396732567708672
+-0.144984588482702
+-0.0748881183635566
+0.237081127101969
+1.47544582386528
+1.75356074837402
+1.27285838538621
+0.580299085618157
+1.75145187202761
+2.65512807488557
+1.22842280306334
+0.75217724057521
+1.68241392987815
+1.61127623856826
+1.81229285390482
+0.26543381745884
+-0.0560753433086623
+-0.594642661237038
+0.747855337690327
+0.402578158832136
+0
+0
+0
+-0.163144363969827
+-0.141726247263969
+1.07616290739433
+0.608702956731739
+-0.125443481839913
+-0.133394582942526
+-0.0748881183635566
+0.0618486977780613
+0.215285979214162
+-0.0748881183635566
+-0.146079985637596
+-0.099011194760994
+-0.468438726089802
+-0.441934619465136
+-0.267244941618625
+-0.362339056192987
+-0.671517353435421
+-0.616163353069114
+-0.352817974430892
+-0.146079985637596
+-0.0748881183635566
+-0.686080813179982
+-1.43559617507953
+-0.567560086760216
+-0.117049257132813
+-0.0266496935789866
+0.00173678667295641
+-0.146079985637596
+-0.616163353069114
+-0.050354814742703
+-0.00529137795437628
+-0.146079985637596
+-0.146079985637596
+-0.611139861231487
+-0.565123498933311
+-0.616163353069114
+-1.00885089419105
+-0.241998837850606
+-0.335707389274849
+-0.4447529692261
+-0.47921410412305
+-0.577422842153099
+-0.487599449923428
+-0.0296158372481541
+-0.0790407982826758
+-0.0422892971579304
+-0.65064635069254
+-0.253151620830461
+-0.524636573410633
+-0.285378089555842
+0.155893141115969
+0.123858981710829
+0.0189774904485757
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0.0836550596576762
+0.438259494654197
+-0.224064958303134
+-0.146079985637596
+0
+0.140397860850605
+-0.136723672942153
+-0.340729461054075
+-0.287700441570664
+-1.11912088481286
+-1.4191961137097
+-0.38500268510384
+-0.608270021114394
+-1.24336645475494
+-0.167975497346332
+-0.213585194236977
+-0.463937540889709
+-0.400183882725489
+-0.498674642477199
+-0.213858799244627
+-0.227117442560717
+-0.110923140622157
+-0.400183882725489
+-0.037929920762671
+0.00073188131666976
+0.00239427610664721
+0.00239427610664721
+0.183548175980195
+-0.0737986541129008
+0.926400226550786
+1.73241630640993
+2.98322348831199
+1.91345286310638
+1.51238679064264
+1.44378679878595
+-0.778600165728515
+-0.889449873690397
+-0.713125843105172
+-0.180400439635403
+0
+0.18865478327894
+0.478062651498562
+0.402578158832136
+0.402578158832136
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2.txt	Fri Apr 27 09:48:24 2018 -0400
@@ -0,0 +1,199 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0.570680842540193
+1.09127163907536
+1.60025048328575
+1.50591112428121
+1.53090357285913
+1.12115660736468
+1.04165048293942
+0.392317485068746
+-0.333499309937955
+-0.595866635547588
+-0.376629478375865
+0.0842219048551826
+0.512293699990611
+0.0638111943294771
+0.0680047614501333
+0.10652071565967
+-0.000377644662777674
+0.0433889569444438
+0.0361857071486257
+0
+0
+-0.0602065451006225
+-0.11800088111536
+-0.0929922441678113
+0.101265807974036
+0.113464063990748
+0
+0
+0.238995048720927
+0.555270578315121
+0.188514310496358
+0.0443924005958465
+0.51923228145664
+2.07578742313746
+2.53880609638676
+2.18407050737506
+1.42991407877687
+2.30513023319817
+3.07724318220236
+1.64032305399604
+1.41815878149883
+2.41242761410088
+2.43691649428345
+2.28993041874246
+0.299064275777494
+0.021827789080669
+-0.221285484082794
+0.573603177239845
+0.300271538068134
+0
+-0.030417317182508
+-0.0602065451006225
+0.0597263502296259
+0.611928711962974
+1.61752991342197
+1.2700001019322
+0.485445549650901
+-0.188359560598637
+-0.146052400693485
+0.116300287436039
+0.0857241854722652
+-0.030417317182508
+-0.0602065451006225
+0.0191786870600221
+-0.0197101965744905
+-0.20516307801075
+-0.116508876497406
+-0.100942790105228
+-0.546281331969727
+-0.554603772852006
+-0.308761713619314
+-0.173568877983246
+-0.253102904654661
+-0.734288382653597
+-1.0143464714691
+-0.404049154810451
+-0.110054380199101
+0.321882277574593
+0.25632329440684
+-0.240148003651987
+-0.518271283456564
+-0.0797297060843935
+-0.0231337092364335
+-0.0602065451006225
+-0.0676992293759263
+-0.423249669024149
+-0.44596781811035
+-0.512125905341891
+-0.87262614807418
+-0.206825097408425
+-0.224055547216995
+0.0887091997943875
+-0.47089714203616
+-0.3926977882848
+0.38432942808505
+0.308369048461799
+0.6791419925113
+0.429688296761981
+0.0436816186988902
+0.0339676336666199
+-0.100650041164653
+-0.225867086565635
+0.0491194838969109
+-0.000221976756635183
+-3.27839226078609e-05
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0.139512215256645
+0.715167605182794
+0.0128842916328488
+-0.278668852889821
+0
+0.0565376318136268
+-0.00024192108667959
+-0.227075717256281
+-0.159291642394799
+-0.676304780023023
+-0.728102194952582
+-0.166723669064738
+-0.711738503160839
+-0.63777343523279
+-0.101015632321306
+-0.0841183748684746
+-0.418774928833871
+-0.328480460616235
+-0.376629478375865
+-0.166814032631594
+0.115821691473219
+0.0146031726777319
+-0.227075717256281
+-0.0453888169621028
+0.0565376318136268
+0.183267800062878
+0.183267800062878
+0.57185292720238
+0.786127605052998
+1.64842522568533
+2.27032427314837
+3.78080281428422
+3.13134829674951
+2.3348012072879
+1.65307392175147
+0.108188185447197
+-0.000492854324334838
+-0.180842916120512
+-0.0649590802273921
+-0.0404155599717056
+-0.000221976756635183
+0.173368229007495
+0.0513711588769046
+0.0513711588769046
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3.txt	Fri Apr 27 09:48:24 2018 -0400
@@ -0,0 +1,199 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+-0.00516453559818209
+-0.0205481328793524
+-0.0307134820917063
+0.643067329643611
+1.55327354658444
+2.05395883600638
+1.8485408786659
+1.82516135186822
+1.6217760889074
+1.62897705755179
+0.971182460361824
+0.156660883535125
+0.127775374015235
+0.135010755726777
+0.542939010074629
+0.744330138490134
+0.597246050412643
+0.657133070688299
+0.346534408347958
+-0.0245163429088547
+-0.0139431460176159
+-0.0103487933749282
+-0.0103106492052688
+-0.0154384717774205
+-0.03829076379033
+-0.0607867125996615
+-0.0479737996899171
+0.0966985442338963
+0.0800373111167644
+-0.00773988693300774
+-0.00773988693300774
+0.390707120926344
+0.84557965619107
+0.68862282176212
+0.252380557759054
+0.578661190397868
+2.67747479840647
+3.06299611887981
+2.51938197834922
+1.9661531133494
+2.7005379234909
+3.57941962956699
+2.0372572607031
+1.74881150691715
+2.7673496630897
+2.64626125313048
+2.94407556915418
+0.519799593798434
+-0.12883780640624
+0.032334599017083
+0.569621956213534
+0.260212608552839
+0
+-0.0154384717774205
+-0.0307134820917063
+0.165837377212996
+0.908892576494339
+1.99701663216898
+1.56011053456701
+0.66338446079209
+0.0339859538062707
+-0.0151027085132735
+0.421833511505335
+0.118090946049965
+-0.0154384717774205
+-0.00399886774792812
+0.120535390215508
+-0.0119747604416604
+-0.118006428596147
+-0.102289217979718
+-0.109088998145182
+-0.411680278962258
+-0.35809957947193
+-0.253555441247913
+-0.135915307930659
+-0.287532814323564
+-0.728591834224069
+-1.11303503477544
+-0.619145980148498
+-0.259915666746712
+0.436406900825769
+0.349079400982403
+0.0405931055914946
+-0.242916799233507
+-0.0836047687464678
+0.0565601134633812
+0.0284212689385437
+-0.163380839539776
+-0.484596736082457
+-0.332896053948832
+-0.40354898270941
+-0.648090691311062
+-0.338823373781745
+-0.287758240179105
+0.500525235802173
+-0.0729205133690652
+-0.401658865500543
+0.629195126721701
+0.758631057489393
+1.09143642596256
+0.565345036665179
+0.163201739198077
+-0.0598547120512539
+-0.258379203064883
+0.123968606604721
+0.203131608539123
+0.20277939185342
+0.0313742103972228
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0.141086272493922
+0.707959960298169
+0.173000900218851
+-0.105128197823929
+-0.00773988693300774
+0.103510624785032
+-0.148181584376348
+-0.293877189116431
+-0.193187017449263
+0.129553618242327
+-0.00983965633013711
+0.0278223912802781
+-0.686754347603887
+-0.388633393547178
+-0.0326841598036203
+0.0182494822016573
+-0.349675044282807
+-0.202442038801667
+-0.167197985214767
+0.029749358080811
+0.272032074411685
+0.0205281145027709
+-0.0724479907461126
+0.0195170611639227
+0.155199750844578
+0.432032883161112
+0.432032883161112
+0.678959692548691
+0.861347033282818
+1.65249778123284
+2.42202818938149
+4.15845547013568
+3.54858169138378
+2.77629728427456
+2.01251615020891
+0.426321746003957
+0.143329923099115
+-0.0360137424037682
+-0.0278351723545067
+0.00479719455674217
+0.195861339146108
+0.213625337630196
+-0.117951109200658
+-0.117951109200658
Binary file test-data/corr.pdf has changed
Binary file test-data/corr.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/corr.tab	Fri Apr 27 09:48:24 2018 -0400
@@ -0,0 +1,4 @@
+	1.txt	2.txt	3.txt
+1.txt	1.0	0.9240450294117696	0.8726802298055488
+2.txt	0.9240450294117696	1.0	0.9784345740033148
+3.txt	0.8726802298055488	0.9784345740033148	1.0