# HG changeset patch
# User bimib
# Date 1571257556 14400
# Node ID abf0bfe01c784c09bc7ee7e0121c280fefdb0491
# Parent b795e3e163e0041ebbe949f7934197dd60e61e55
Uploaded
diff -r b795e3e163e0 -r abf0bfe01c78 Marea/marea.xml
--- a/Marea/marea.xml Wed Oct 16 07:12:37 2019 -0400
+++ b/Marea/marea.xml Wed Oct 16 16:25:56 2019 -0400
@@ -22,11 +22,11 @@
--custom_map $cond_rule.cond_map.Custom_map
#end if
#end if
-
+
--tool_dir $__tool_directory__
--option $cond.type_selector
- --out_log $log
-
+ --out_log $log
+
#if $cond.type_selector == 'datasets':
--input_datas
#for $data in $cond.input_Datasets:
@@ -43,7 +43,7 @@
--generate_svg ${cond.advanced.generateSvg}
--generate_pdf ${cond.advanced.generatePdf}
--generate_ras ${cond.advanced.generateRas}
- #else
+ #else
--none true
--pValue 0.05
--fChange 1.5
@@ -61,7 +61,7 @@
--generate_svg ${cond.advanced.generateSvg}
--generate_pdf ${cond.advanced.generatePdf}
--generate_ras ${cond.advanced.generateRas}
- #else
+ #else
--none true
--pValue 0.05
--fChange 1.5
@@ -73,7 +73,7 @@
#if $cond.type_selector == 'datasets_rasonly':
--input_datas ${input_Datasets}
--single_ras_file $ras_single
- --none ${None}
+ --none ${cond.None}
#end if
]]>
@@ -108,56 +108,56 @@
-
+
-
+
-
+
-
-
+
+
-
+
-
+
-
+
-
-
+
+
-
-
-
-
+
+
+
+
@@ -173,7 +173,7 @@
cond['type_selector'] != "datasets_rasonly" and cond['advanced']['choice'] and cond['advanced']['generateRas']
-
+
@@ -189,7 +189,7 @@
This tool analyzes RNA-seq dataset(s) as described in Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724.
-Accepted files are:
+Accepted files are:
- option 1) two or more RNA-seq datasets, each referring to samples in a given condition/class. The user can specify a label for each class (as e.g. "*classA*" and "*classB*");
- option 2) one RNA dataset and one class-file specifying the class/condition each sample belongs to.
@@ -225,7 +225,7 @@
**"RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N"** option:
-RNA-seq Dataset 1:
+RNA-seq Dataset 1:
@DATASET_EXEMPLE1@
@@ -241,14 +241,14 @@
Class-file:
-+------------+------------+
-| Patient_ID | class |
-+============+============+
-| TCGAAA3529 | MSI |
-+------------+------------+
-| TCGAA62671 | MSS |
-+------------+------------+
-| TCGAA62672 | MSI |
++------------+------------+
+| Patient_ID | class |
++============+============+
+| TCGAAA3529 | MSI |
++------------+------------+
+| TCGAA62671 | MSS |
++------------+------------+
+| TCGAA62672 | MSI |
+------------+------------+
|
@@ -271,3 +271,4 @@
+
diff -r b795e3e163e0 -r abf0bfe01c78 Marea/marea_cluster.py
--- a/Marea/marea_cluster.py Wed Oct 16 07:12:37 2019 -0400
+++ b/Marea/marea_cluster.py Wed Oct 16 16:25:56 2019 -0400
@@ -72,11 +72,11 @@
help = 'your tool directory')
parser.add_argument('-ms', '--min_samples',
- type = int,
+ type = float,
help = 'min samples for dbscan (optional)')
parser.add_argument('-ep', '--eps',
- type = int,
+ type = float,
help = 'eps for dbscan (optional)')
parser.add_argument('-bc', '--best_cluster',
@@ -310,7 +310,7 @@
######################## dbscan ##############################################
-def dbscan(dataset, eps, min_samples):
+def dbscan(dataset, eps, min_samples, best_cluster):
if not os.path.exists('clustering'):
os.makedirs('clustering')
@@ -331,12 +331,15 @@
##TODO: PLOT SU DBSCAN (no centers) e HIERARCHICAL
-
- write_to_csv(dataset, labels, 'clustering/dbscan_results.tsv')
+ labels = labels
+ predict = [x+1 for x in labels]
+ classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
+ classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class'])
+
########################## hierachical #######################################
-def hierachical_agglomerative(dataset, k_min, k_max):
+def hierachical_agglomerative(dataset, k_min, k_max, best_cluster):
if not os.path.exists('clustering'):
os.makedirs('clustering')
@@ -349,16 +352,28 @@
range_n_clusters = [i for i in range(k_min, k_max+1)]
- for n_clusters in range_n_clusters:
-
+ scores = []
+ labels = []
+ for n_clusters in range_n_clusters:
cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward')
cluster.fit_predict(dataset)
cluster_labels = cluster.labels_
-
+ labels.append(cluster_labels)
silhouette_avg = silhouette_score(dataset, cluster_labels)
write_to_csv(dataset, cluster_labels, 'clustering/hierarchical_with_' + str(n_clusters) + '_clusters.tsv')
+ scores.append(silhouette_avg)
#warning("For n_clusters =", n_clusters,
#"The average silhouette_score is :", silhouette_avg)
+
+ best = max_index(scores) + k_min
+
+ for i in range(len(labels)):
+ if (i + k_min == best):
+ labels = labels[i]
+ predict = [x+1 for x in labels]
+ classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
+ classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class'])
+
@@ -390,10 +405,10 @@
kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.davies, args.best_cluster)
if args.cluster_type == 'dbscan':
- dbscan(X, args.eps, args.min_samples)
+ dbscan(X, args.eps, args.min_samples, args.best_cluster)
if args.cluster_type == 'hierarchy':
- hierachical_agglomerative(X, args.k_min, args.k_max)
+ hierachical_agglomerative(X, args.k_min, args.k_max, args.best_cluster)
##############################################################################
diff -r b795e3e163e0 -r abf0bfe01c78 Marea/marea_cluster.xml
--- a/Marea/marea_cluster.xml Wed Oct 16 07:12:37 2019 -0400
+++ b/Marea/marea_cluster.xml Wed Oct 16 16:25:56 2019 -0400
@@ -1,4 +1,4 @@
-
+
marea_macros.xml
@@ -75,9 +75,10 @@
-
+
+ data['clust_type'] == "kmeans" or data['clust_type'] == "hierarchy"