Mercurial > repos > bgruening > numeric_clustering
view Untitled.ipynb @ 0:a3fd214e7555 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/numeric_clustering commit bafd56379ff227fb81f8cd61d708ebc39814da54
author | bgruening |
---|---|
date | Fri, 01 Jan 2016 18:37:54 -0500 |
parents | |
children |
line wrap: on
line source
{ "cells": [ { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=8, n_init=10,\n", " n_jobs=1, precompute_distances='auto', random_state=None, tol=0.0001,\n", " verbose=0)" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sys\n", "import json\n", "import numpy as np\n", "import sklearn.cluster\n", "import pandas\n", "\n", "data = pandas.read_csv(\"/home/bag/projects/code/galaxytools/tools/numeric_clustering/test-data/numeric_values.tabular\", sep='\\t', header=0, index_col=None, parse_dates=True, encoding=None )\n", "my_class = getattr(sklearn.cluster, \"KMeans\")\n", "cluster_object = my_class()\n", "\n", "params = dict()\n", "cluster_object.set_params(**params)\n" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 0 0 0 0 0 0 0 0 0 0 0 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1\n", " 2 2 2 2 2 2 2 2 2 2 2] 48 48\n", " 0\n", "0 0\n", "1 0\n", "2 0\n", "3 0\n", "4 0\n", "5 0\n", "6 0\n", "7 0\n", "8 0\n", "9 0\n", "10 0\n", "11 0\n", "12 3\n", "13 3\n", "14 3\n", "15 3\n", "16 3\n", "17 3\n", "18 3\n", "19 3\n", "20 3\n", "21 3\n", "22 3\n", "23 3\n", "24 3\n", "25 1\n", "26 1\n", "27 1\n", "28 1\n", "29 1\n", "30 1\n", "31 1\n", "32 1\n", "33 1\n", "34 1\n", "35 1\n", "36 1\n", "37 2\n", "38 2\n", "39 2\n", "40 2\n", "41 2\n", "42 2\n", "43 2\n", "44 2\n", "45 2\n", "46 2\n", "47 2\n" ] } ], "source": [ "\n", "if 4 >= 4:\n", " data_matrix = data.values[:, 1-1:1]\n", " #print data_matrix\n", "else:\n", " data_matrix = data.values\n", "\n", "prediction = cluster_object.fit_predict( data_matrix )\n", "print prediction, len(prediction), len(data_matrix)\n", "\n", "pred = pandas.DataFrame(prediction)\n", "print pred\n", "\n", "#data[len(data.columns)] = prediction\n", "#data.to_csv(path_or_buf = \"foo.tab\", sep=\"\\t\")\n" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>0</th>\n", " <th>-67</th>\n", " <th>0</th>\n", " <th>56</th>\n", " <th>58</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0 </th>\n", " <td>NaN</td>\n", " <td> -76</td>\n", " <td> 0</td>\n", " <td> 64</td>\n", " <td> 44</td>\n", " </tr>\n", " <tr>\n", " <th>1 </th>\n", " <td>NaN</td>\n", " <td> -73</td>\n", " <td> 0</td>\n", " <td> 48</td>\n", " <td> 51</td>\n", " </tr>\n", " <tr>\n", " <th>2 </th>\n", " <td>NaN</td>\n", " <td> -49</td>\n", " <td> 0</td>\n", " <td> 65</td>\n", " <td> 58</td>\n", " </tr>\n", " <tr>\n", " <th>3 </th>\n", " <td>NaN</td>\n", " <td> -49</td>\n", " <td> 0</td>\n", " <td> 61</td>\n", " <td> 43</td>\n", " </tr>\n", " <tr>\n", " <th>4 </th>\n", " <td>NaN</td>\n", " <td> -79</td>\n", " <td> 0</td>\n", " <td> 43</td>\n", " <td> 45</td>\n", " </tr>\n", " <tr>\n", " <th>5 </th>\n", " <td>NaN</td>\n", " <td> -98</td>\n", " <td> 0</td>\n", " <td> 60</td>\n", " <td> 42</td>\n", " </tr>\n", " <tr>\n", " <th>6 </th>\n", " <td>NaN</td>\n", " <td> -59</td>\n", " <td> 0</td>\n", " <td> 55</td>\n", " <td> 50</td>\n", " </tr>\n", " <tr>\n", " <th>7 </th>\n", " <td>NaN</td>\n", " <td> -56</td>\n", " <td> 0</td>\n", " <td> 53</td>\n", " <td> 53</td>\n", " </tr>\n", " <tr>\n", " <th>8 </th>\n", " <td>NaN</td>\n", " <td> -61</td>\n", " <td> 0</td>\n", " <td> 44</td>\n", " <td> 45</td>\n", " </tr>\n", " <tr>\n", " <th>9 </th>\n", " <td>NaN</td>\n", " <td> -84</td>\n", " <td> 0</td>\n", " <td> 65</td>\n", " <td> 43</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>NaN</td>\n", " <td> -75</td>\n", " <td> 0</td>\n", " <td> 52</td>\n", " <td> 35</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>NaN</td>\n", " <td> -70</td>\n", " <td> 0</td>\n", " <td> 56</td>\n", " <td> 56</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>NaN</td>\n", " <td> 43</td>\n", " <td> 1</td>\n", " <td> 86</td>\n", " <td> -61</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>NaN</td>\n", " <td> 15</td>\n", " <td> 1</td>\n", " <td> 93</td>\n", " <td> -67</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>NaN</td>\n", " <td> 36</td>\n", " <td> 1</td>\n", " <td> 94</td>\n", " <td> -59</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>NaN</td>\n", " <td> 62</td>\n", " <td> 1</td>\n", " <td> 92</td>\n", " <td> -50</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>NaN</td>\n", " <td> 70</td>\n", " <td> 1</td>\n", " <td> 91</td>\n", " <td> -78</td>\n", " </tr>\n", " <tr>\n", " <th>17</th>\n", " <td>NaN</td>\n", " <td> 47</td>\n", " <td> 1</td>\n", " <td> 87</td>\n", " <td> -35</td>\n", " </tr>\n", " <tr>\n", " <th>18</th>\n", " <td>NaN</td>\n", " <td> 52</td>\n", " <td> 1</td>\n", " <td> 91</td>\n", " <td> -56</td>\n", " </tr>\n", " <tr>\n", " <th>19</th>\n", " <td>NaN</td>\n", " <td> 46</td>\n", " <td> 1</td>\n", " <td> 81</td>\n", " <td> -61</td>\n", " </tr>\n", " <tr>\n", " <th>20</th>\n", " <td>NaN</td>\n", " <td> 34</td>\n", " <td> 1</td>\n", " <td> 78</td>\n", " <td> -83</td>\n", " </tr>\n", " <tr>\n", " <th>21</th>\n", " <td>NaN</td>\n", " <td> 45</td>\n", " <td> 1</td>\n", " <td> 87</td>\n", " <td> -50</td>\n", " </tr>\n", " <tr>\n", " <th>22</th>\n", " <td>NaN</td>\n", " <td> 50</td>\n", " <td> 1</td>\n", " <td> 73</td>\n", " <td> -67</td>\n", " </tr>\n", " <tr>\n", " <th>23</th>\n", " <td>NaN</td>\n", " <td> 45</td>\n", " <td> 1</td>\n", " <td> 97</td>\n", " <td> -50</td>\n", " </tr>\n", " <tr>\n", " <th>24</th>\n", " <td>NaN</td>\n", " <td> 45</td>\n", " <td> 1</td>\n", " <td> 111</td>\n", " <td> -61</td>\n", " </tr>\n", " <tr>\n", " <th>25</th>\n", " <td>NaN</td>\n", " <td> -92</td>\n", " <td> 2</td>\n", " <td> 23</td>\n", " <td>-109</td>\n", " </tr>\n", " <tr>\n", " <th>26</th>\n", " <td>NaN</td>\n", " <td> -96</td>\n", " <td> 2</td>\n", " <td> 20</td>\n", " <td> -94</td>\n", " </tr>\n", " <tr>\n", " <th>27</th>\n", " <td>NaN</td>\n", " <td> -88</td>\n", " <td> 2</td>\n", " <td> 26</td>\n", " <td> -85</td>\n", " </tr>\n", " <tr>\n", " <th>28</th>\n", " <td>NaN</td>\n", " <td>-114</td>\n", " <td> 2</td>\n", " <td> 33</td>\n", " <td> -90</td>\n", " </tr>\n", " <tr>\n", " <th>29</th>\n", " <td>NaN</td>\n", " <td>-106</td>\n", " <td> 2</td>\n", " <td> 9</td>\n", " <td> -63</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>18</th>\n", " <td> 3</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>19</th>\n", " <td> 3</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>20</th>\n", " <td> 3</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>21</th>\n", " <td> 3</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>22</th>\n", " <td> 3</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>23</th>\n", " <td> 3</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>24</th>\n", " <td> 3</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>25</th>\n", " <td> 1</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>26</th>\n", " <td> 1</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>27</th>\n", " <td> 1</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>28</th>\n", " <td> 1</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>29</th>\n", " <td> 1</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>30</th>\n", " <td> 1</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>31</th>\n", " <td> 1</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>32</th>\n", " <td> 1</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>33</th>\n", " <td> 1</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>34</th>\n", " <td> 1</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>35</th>\n", " <td> 1</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>36</th>\n", " <td> 1</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>37</th>\n", " <td> 2</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>38</th>\n", " <td> 2</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>39</th>\n", " <td> 2</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>40</th>\n", " <td> 2</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>41</th>\n", " <td> 2</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>42</th>\n", " <td> 2</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>43</th>\n", " <td> 2</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>44</th>\n", " <td> 2</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>45</th>\n", " <td> 2</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>46</th>\n", " <td> 2</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " <tr>\n", " <th>47</th>\n", " <td> 2</td>\n", " <td> NaN</td>\n", " <td>NaN</td>\n", " <td> NaN</td>\n", " <td> NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>96 rows × 5 columns</p>\n", "</div>" ], "text/plain": [ " 0 -67 0 56 58\n", "0 NaN -76 0 64 44\n", "1 NaN -73 0 48 51\n", "2 NaN -49 0 65 58\n", "3 NaN -49 0 61 43\n", "4 NaN -79 0 43 45\n", "5 NaN -98 0 60 42\n", "6 NaN -59 0 55 50\n", "7 NaN -56 0 53 53\n", "8 NaN -61 0 44 45\n", "9 NaN -84 0 65 43\n", "10 NaN -75 0 52 35\n", "11 NaN -70 0 56 56\n", "12 NaN 43 1 86 -61\n", "13 NaN 15 1 93 -67\n", "14 NaN 36 1 94 -59\n", "15 NaN 62 1 92 -50\n", "16 NaN 70 1 91 -78\n", "17 NaN 47 1 87 -35\n", "18 NaN 52 1 91 -56\n", "19 NaN 46 1 81 -61\n", "20 NaN 34 1 78 -83\n", "21 NaN 45 1 87 -50\n", "22 NaN 50 1 73 -67\n", "23 NaN 45 1 97 -50\n", "24 NaN 45 1 111 -61\n", "25 NaN -92 2 23 -109\n", "26 NaN -96 2 20 -94\n", "27 NaN -88 2 26 -85\n", "28 NaN -114 2 33 -90\n", "29 NaN -106 2 9 -63\n", ".. .. ... .. ... ...\n", "18 3 NaN NaN NaN NaN\n", "19 3 NaN NaN NaN NaN\n", "20 3 NaN NaN NaN NaN\n", "21 3 NaN NaN NaN NaN\n", "22 3 NaN NaN NaN NaN\n", "23 3 NaN NaN NaN NaN\n", "24 3 NaN NaN NaN NaN\n", "25 1 NaN NaN NaN NaN\n", "26 1 NaN NaN NaN NaN\n", "27 1 NaN NaN NaN NaN\n", "28 1 NaN NaN NaN NaN\n", "29 1 NaN NaN NaN NaN\n", "30 1 NaN NaN NaN NaN\n", "31 1 NaN NaN NaN NaN\n", "32 1 NaN NaN NaN NaN\n", "33 1 NaN NaN NaN NaN\n", "34 1 NaN NaN NaN NaN\n", "35 1 NaN NaN NaN NaN\n", "36 1 NaN NaN NaN NaN\n", "37 2 NaN NaN NaN NaN\n", "38 2 NaN NaN NaN NaN\n", "39 2 NaN NaN NaN NaN\n", "40 2 NaN NaN NaN NaN\n", "41 2 NaN NaN NaN NaN\n", "42 2 NaN NaN NaN NaN\n", "43 2 NaN NaN NaN NaN\n", "44 2 NaN NaN NaN NaN\n", "45 2 NaN NaN NaN NaN\n", "46 2 NaN NaN NaN NaN\n", "47 2 NaN NaN NaN NaN\n", "\n", "[96 rows x 5 columns]" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pandas.concat([data, pred], axis=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }