Mercurial > repos > mvdbeek > plot_correlation_matrix
annotate plot_corr.py @ 0:0cee38fb62af draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
author | mvdbeek |
---|---|
date | Fri, 27 Apr 2018 09:48:24 -0400 |
parents | |
children | e9268619b503 |
rev | line source |
---|---|
0
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
1 import os |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
2 import click |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
3 import matplotlib |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
4 matplotlib.use('tkagg') |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
5 |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
6 import pandas as pd # noqa: E402 |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
7 import seaborn as sns # noqa: E402 |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
8 |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
9 |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
10 def get_dataframe(files, labels, column, skiprows=1, sep='\t'): |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
11 d = {} |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
12 for file, label in zip(files, labels): |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
13 d[label] = pd.read_csv(file, usecols=[column], sep=sep, skiprows=skiprows, header=None, squeeze=True) |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
14 return pd.DataFrame.from_dict(d) |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
15 |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
16 |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
17 def plot_correlation(df, plot_path=None, method='pearson', correlation_matrix_path=None, figsize=(12, 12)): |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
18 corr = df.corr(method=method) |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
19 if correlation_matrix_path: |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
20 corr.to_csv(correlation_matrix_path, sep="\t") |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
21 if plot_path: |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
22 g = sns.clustermap(corr, annot=True, method="centroid", figsize=figsize, cbar_kws={'label': "%s correlation" % method}) |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
23 g.fig.suptitle("Cluster based on %s correlation for all samples" % method) |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
24 g.savefig(plot_path, bbox_inches='tight') |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
25 |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
26 |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
27 @click.command() |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
28 @click.argument("files", type=click.Path(exists=True), nargs=-1, required=True) |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
29 @click.option("-c", "--column", help="Use this numeric column to calculate correlation across files", default=1, required=True) |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
30 @click.option("--labels", help="File containing a list of labels, one label per line. Must match number of files", type=click.Path(exists=True), required=False) |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
31 @click.option("--plot_path", help="Write correlation plot to this path", type=click.Path(exists=False), required=False) |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
32 @click.option("--correlation_matrix_path", help="Write correlation plot to this path", type=click.Path(exists=False), required=False) |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
33 @click.option("--method", help="Use this method for calculating the correlation", required=False, type=click.Choice(['pearson', 'spearman', 'kendall'])) |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
34 @click.option("--skiprows", help="Skip this number of rows", required=False, default=0) |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
35 @click.option("--sep", help="Use this field separator when reading files", required=False, default="\t") |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
36 def main(files, column, labels=None, method="pearson", skiprows=1, plot_path=None, correlation_matrix_path=None, figsize=(12, 12), sep='\t'): |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
37 """Plot heatmap of pearson correlation and/or write matrix of pearson correlation values.""" |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
38 if labels: |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
39 labels = [l.strip() for l in open(labels) if l.strip()] |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
40 assert len(labels) == len(files), "Got %d files for plotting, but %d labels. Label and file length must be equal" % (len(files), len(labels)) |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
41 if not labels: |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
42 labels = [os.path.basename(f) for f in files] |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
43 if column != -1: |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
44 # Adjust for 0-based column selection |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
45 column -= 1 |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
46 df = get_dataframe(files, labels, column=column, skiprows=skiprows, sep=sep) |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
47 plot_correlation(df, plot_path=plot_path, correlation_matrix_path=correlation_matrix_path, figsize=figsize, method=method) |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
48 |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
49 |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
50 if __name__ == '__main__': |
0cee38fb62af
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/plot_correlation_matrix commit 8451e17775c0ffcd7fa63ef65081f442bef91496
mvdbeek
parents:
diff
changeset
|
51 main() |