Mercurial > repos > bgruening > numeric_clustering
diff Untitled.ipynb @ 0:a3fd214e7555 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/numeric_clustering commit bafd56379ff227fb81f8cd61d708ebc39814da54
author | bgruening |
---|---|
date | Fri, 01 Jan 2016 18:37:54 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Untitled.ipynb Fri Jan 01 18:37:54 2016 -0500 @@ -0,0 +1,744 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=8, n_init=10,\n", + " n_jobs=1, precompute_distances='auto', random_state=None, tol=0.0001,\n", + " verbose=0)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import sys\n", + "import json\n", + "import numpy as np\n", + "import sklearn.cluster\n", + "import pandas\n", + "\n", + "data = pandas.read_csv(\"/home/bag/projects/code/galaxytools/tools/numeric_clustering/test-data/numeric_values.tabular\", sep='\\t', header=0, index_col=None, parse_dates=True, encoding=None )\n", + "my_class = getattr(sklearn.cluster, \"KMeans\")\n", + "cluster_object = my_class()\n", + "\n", + "params = dict()\n", + "cluster_object.set_params(**params)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0 0 0 0 0 0 0 0 0 0 0 0 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1\n", + " 2 2 2 2 2 2 2 2 2 2 2] 48 48\n", + " 0\n", + "0 0\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + "5 0\n", + "6 0\n", + "7 0\n", + "8 0\n", + "9 0\n", + "10 0\n", + "11 0\n", + "12 3\n", + "13 3\n", + "14 3\n", + "15 3\n", + "16 3\n", + "17 3\n", + "18 3\n", + "19 3\n", + "20 3\n", + "21 3\n", + "22 3\n", + "23 3\n", + "24 3\n", + "25 1\n", + "26 1\n", + "27 1\n", + "28 1\n", + "29 1\n", + "30 1\n", + "31 1\n", + "32 1\n", + "33 1\n", + "34 1\n", + "35 1\n", + "36 1\n", + "37 2\n", + "38 2\n", + "39 2\n", + "40 2\n", + "41 2\n", + "42 2\n", + "43 2\n", + "44 2\n", + "45 2\n", + "46 2\n", + "47 2\n" + ] + } + ], + "source": [ + "\n", + "if 4 >= 4:\n", + " data_matrix = data.values[:, 1-1:1]\n", + " #print data_matrix\n", + "else:\n", + " data_matrix = data.values\n", + "\n", + "prediction = cluster_object.fit_predict( data_matrix )\n", + "print prediction, len(prediction), len(data_matrix)\n", + "\n", + "pred = pandas.DataFrame(prediction)\n", + "print pred\n", + "\n", + "#data[len(data.columns)] = prediction\n", + "#data.to_csv(path_or_buf = \"foo.tab\", sep=\"\\t\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>-67</th>\n", + " <th>0</th>\n", + " <th>56</th>\n", + " <th>58</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0 </th>\n", + " <td>NaN</td>\n", + " <td> -76</td>\n", + " <td> 0</td>\n", + " <td> 64</td>\n", + " <td> 44</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1 </th>\n", + " <td>NaN</td>\n", + " <td> -73</td>\n", + " <td> 0</td>\n", + " <td> 48</td>\n", + " <td> 51</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2 </th>\n", + " <td>NaN</td>\n", + " <td> -49</td>\n", + " <td> 0</td>\n", + " <td> 65</td>\n", + " <td> 58</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3 </th>\n", + " <td>NaN</td>\n", + " <td> -49</td>\n", + " <td> 0</td>\n", + " <td> 61</td>\n", + " <td> 43</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4 </th>\n", + " <td>NaN</td>\n", + " <td> -79</td>\n", + " <td> 0</td>\n", + " <td> 43</td>\n", + " <td> 45</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5 </th>\n", + " <td>NaN</td>\n", + " <td> -98</td>\n", + " <td> 0</td>\n", + " <td> 60</td>\n", + " <td> 42</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6 </th>\n", + " <td>NaN</td>\n", + " <td> -59</td>\n", + " <td> 0</td>\n", + " <td> 55</td>\n", + " <td> 50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7 </th>\n", + " <td>NaN</td>\n", + " <td> -56</td>\n", + " <td> 0</td>\n", + " <td> 53</td>\n", + " <td> 53</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8 </th>\n", + " <td>NaN</td>\n", + " <td> -61</td>\n", + " <td> 0</td>\n", + " <td> 44</td>\n", + " <td> 45</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9 </th>\n", + " <td>NaN</td>\n", + " <td> -84</td>\n", + " <td> 0</td>\n", + " <td> 65</td>\n", + " <td> 43</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>NaN</td>\n", + " <td> -75</td>\n", + " <td> 0</td>\n", + " <td> 52</td>\n", + " <td> 35</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>NaN</td>\n", + " <td> -70</td>\n", + " <td> 0</td>\n", + " <td> 56</td>\n", + " <td> 56</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>NaN</td>\n", + " <td> 43</td>\n", + " <td> 1</td>\n", + " <td> 86</td>\n", + " <td> -61</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>NaN</td>\n", + " <td> 15</td>\n", + " <td> 1</td>\n", + " <td> 93</td>\n", + " <td> -67</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>NaN</td>\n", + " <td> 36</td>\n", + " <td> 1</td>\n", + " <td> 94</td>\n", + " <td> -59</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>NaN</td>\n", + " <td> 62</td>\n", + " <td> 1</td>\n", + " <td> 92</td>\n", + " <td> -50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>NaN</td>\n", + " <td> 70</td>\n", + " <td> 1</td>\n", + " <td> 91</td>\n", + " <td> -78</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>NaN</td>\n", + " <td> 47</td>\n", + " <td> 1</td>\n", + " <td> 87</td>\n", + " <td> -35</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>NaN</td>\n", + " <td> 52</td>\n", + " <td> 1</td>\n", + " <td> 91</td>\n", + " <td> -56</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>NaN</td>\n", + " <td> 46</td>\n", + " <td> 1</td>\n", + " <td> 81</td>\n", + " <td> -61</td>\n", + " </tr>\n", + " <tr>\n", + " <th>20</th>\n", + " <td>NaN</td>\n", + " <td> 34</td>\n", + " <td> 1</td>\n", + " <td> 78</td>\n", + " <td> -83</td>\n", + " </tr>\n", + " <tr>\n", + " <th>21</th>\n", + " <td>NaN</td>\n", + " <td> 45</td>\n", + " <td> 1</td>\n", + " <td> 87</td>\n", + " <td> -50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>22</th>\n", + " <td>NaN</td>\n", + " <td> 50</td>\n", + " <td> 1</td>\n", + " <td> 73</td>\n", + " <td> -67</td>\n", + " </tr>\n", + " <tr>\n", + " <th>23</th>\n", + " <td>NaN</td>\n", + " <td> 45</td>\n", + " <td> 1</td>\n", + " <td> 97</td>\n", + " <td> -50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>24</th>\n", + " <td>NaN</td>\n", + " <td> 45</td>\n", + " <td> 1</td>\n", + " <td> 111</td>\n", + " <td> -61</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25</th>\n", + " <td>NaN</td>\n", + " <td> -92</td>\n", + " <td> 2</td>\n", + " <td> 23</td>\n", + " <td>-109</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26</th>\n", + " <td>NaN</td>\n", + " <td> -96</td>\n", + " <td> 2</td>\n", + " <td> 20</td>\n", + " <td> -94</td>\n", + " </tr>\n", + " <tr>\n", + " <th>27</th>\n", + " <td>NaN</td>\n", + " <td> -88</td>\n", + " <td> 2</td>\n", + " <td> 26</td>\n", + " <td> -85</td>\n", + " </tr>\n", + " <tr>\n", + " <th>28</th>\n", + " <td>NaN</td>\n", + " <td>-114</td>\n", + " <td> 2</td>\n", + " <td> 33</td>\n", + " <td> -90</td>\n", + " </tr>\n", + " <tr>\n", + " <th>29</th>\n", + " <td>NaN</td>\n", + " <td>-106</td>\n", + " <td> 2</td>\n", + " <td> 9</td>\n", + " <td> -63</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td> 3</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td> 3</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>20</th>\n", + " <td> 3</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>21</th>\n", + " <td> 3</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>22</th>\n", + " <td> 3</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>23</th>\n", + " <td> 3</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>24</th>\n", + " <td> 3</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25</th>\n", + " <td> 1</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26</th>\n", + " <td> 1</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>27</th>\n", + " <td> 1</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>28</th>\n", + " <td> 1</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>29</th>\n", + " <td> 1</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>30</th>\n", + " <td> 1</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>31</th>\n", + " <td> 1</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>32</th>\n", + " <td> 1</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>33</th>\n", + " <td> 1</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>34</th>\n", + " <td> 1</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>35</th>\n", + " <td> 1</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>36</th>\n", + " <td> 1</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>37</th>\n", + " <td> 2</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>38</th>\n", + " <td> 2</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>39</th>\n", + " <td> 2</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>40</th>\n", + " <td> 2</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>41</th>\n", + " <td> 2</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>42</th>\n", + " <td> 2</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>43</th>\n", + " <td> 2</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>44</th>\n", + " <td> 2</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>45</th>\n", + " <td> 2</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>46</th>\n", + " <td> 2</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>47</th>\n", + " <td> 2</td>\n", + " <td> NaN</td>\n", + " <td>NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>96 rows × 5 columns</p>\n", + "</div>" + ], + "text/plain": [ + " 0 -67 0 56 58\n", + "0 NaN -76 0 64 44\n", + "1 NaN -73 0 48 51\n", + "2 NaN -49 0 65 58\n", + "3 NaN -49 0 61 43\n", + "4 NaN -79 0 43 45\n", + "5 NaN -98 0 60 42\n", + "6 NaN -59 0 55 50\n", + "7 NaN -56 0 53 53\n", + "8 NaN -61 0 44 45\n", + "9 NaN -84 0 65 43\n", + "10 NaN -75 0 52 35\n", + "11 NaN -70 0 56 56\n", + "12 NaN 43 1 86 -61\n", + "13 NaN 15 1 93 -67\n", + "14 NaN 36 1 94 -59\n", + "15 NaN 62 1 92 -50\n", + "16 NaN 70 1 91 -78\n", + "17 NaN 47 1 87 -35\n", + "18 NaN 52 1 91 -56\n", + "19 NaN 46 1 81 -61\n", + "20 NaN 34 1 78 -83\n", + "21 NaN 45 1 87 -50\n", + "22 NaN 50 1 73 -67\n", + "23 NaN 45 1 97 -50\n", + "24 NaN 45 1 111 -61\n", + "25 NaN -92 2 23 -109\n", + "26 NaN -96 2 20 -94\n", + "27 NaN -88 2 26 -85\n", + "28 NaN -114 2 33 -90\n", + "29 NaN -106 2 9 -63\n", + ".. .. ... .. ... ...\n", + "18 3 NaN NaN NaN NaN\n", + "19 3 NaN NaN NaN NaN\n", + "20 3 NaN NaN NaN NaN\n", + "21 3 NaN NaN NaN NaN\n", + "22 3 NaN NaN NaN NaN\n", + "23 3 NaN NaN NaN NaN\n", + "24 3 NaN NaN NaN NaN\n", + "25 1 NaN NaN NaN NaN\n", + "26 1 NaN NaN NaN NaN\n", + "27 1 NaN NaN NaN NaN\n", + "28 1 NaN NaN NaN NaN\n", + "29 1 NaN NaN NaN NaN\n", + "30 1 NaN NaN NaN NaN\n", + "31 1 NaN NaN NaN NaN\n", + "32 1 NaN NaN NaN NaN\n", + "33 1 NaN NaN NaN NaN\n", + "34 1 NaN NaN NaN NaN\n", + "35 1 NaN NaN NaN NaN\n", + "36 1 NaN NaN NaN NaN\n", + "37 2 NaN NaN NaN NaN\n", + "38 2 NaN NaN NaN NaN\n", + "39 2 NaN NaN NaN NaN\n", + "40 2 NaN NaN NaN NaN\n", + "41 2 NaN NaN NaN NaN\n", + "42 2 NaN NaN NaN NaN\n", + "43 2 NaN NaN NaN NaN\n", + "44 2 NaN NaN NaN NaN\n", + "45 2 NaN NaN NaN NaN\n", + "46 2 NaN NaN NaN NaN\n", + "47 2 NaN NaN NaN NaN\n", + "\n", + "[96 rows x 5 columns]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pandas.concat([data, pred], axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.10" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}