{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Motivation\n",
    "## How to solve matrix-matrix multiplication"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Simple approach in C\n",
    "\n",
    "~~~\n",
    "   for (c = 0; c < m; c++) {\n",
    "      for (d = 0; d < q; d++) {\n",
    "        for (k = 0; k < p; k++) {\n",
    "          sum = sum + first[c][k]*second[k][d];\n",
    "        }\n",
    " \n",
    "        multiply[c][d] = sum;\n",
    "        sum = 0;\n",
    "      }\n",
    "    }\n",
    "~~~"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def mymatrixmult(A,B):\n",
    "    y = np.zeros((A.shape[0], B.shape[1]))\n",
    "    for i in range(A.shape[0]):\n",
    "        for j in range(B.shape[1]):\n",
    "            for k in range(A.shape[0]):\n",
    "                 y[i][j] += A[i][k]*B[k][j]\n",
    "    return y\n",
    "                \n",
    "m = np.arange(40000).reshape(200,200)\n",
    "m1 = m/np.average(m)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Takes a while (200**3) = 8 MFLOPS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[6.61708085e-03, 1.65675784e-02, 2.65180759e-02, ...,\n",
       "        1.96686509e+00, 1.97681559e+00, 1.98676609e+00],\n",
       "       [1.65675784e-02, 4.65190759e-02, 7.64705735e-02, ...,\n",
       "        5.91701260e+00, 5.94696409e+00, 5.97691559e+00],\n",
       "       [2.65180759e-02, 7.64705735e-02, 1.26423071e-01, ...,\n",
       "        9.86716010e+00, 9.91711260e+00, 9.96706510e+00],\n",
       "       ...,\n",
       "       [1.96686509e+00, 5.91701260e+00, 9.86716010e+00, ...,\n",
       "        7.80145924e+02, 7.84096071e+02, 7.88046219e+02],\n",
       "       [1.97681559e+00, 5.94696409e+00, 9.91711260e+00, ...,\n",
       "        7.84096071e+02, 7.88066220e+02, 7.92036368e+02],\n",
       "       [1.98676609e+00, 5.97691559e+00, 9.96706510e+00, ...,\n",
       "        7.88046219e+02, 7.92036368e+02, 7.96026518e+02]])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mymatrixmult(m1, m1.T)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[6.61708085e-03, 1.65675784e-02, 2.65180759e-02, ...,\n",
       "        1.96686509e+00, 1.97681559e+00, 1.98676609e+00],\n",
       "       [1.65675784e-02, 4.65190759e-02, 7.64705735e-02, ...,\n",
       "        5.91701260e+00, 5.94696409e+00, 5.97691559e+00],\n",
       "       [2.65180759e-02, 7.64705735e-02, 1.26423071e-01, ...,\n",
       "        9.86716010e+00, 9.91711260e+00, 9.96706510e+00],\n",
       "       ...,\n",
       "       [1.96686509e+00, 5.91701260e+00, 9.86716010e+00, ...,\n",
       "        7.80145924e+02, 7.84096071e+02, 7.88046219e+02],\n",
       "       [1.97681559e+00, 5.94696409e+00, 9.91711260e+00, ...,\n",
       "        7.84096071e+02, 7.88066220e+02, 7.92036368e+02],\n",
       "       [1.98676609e+00, 5.97691559e+00, 9.96706510e+00, ...,\n",
       "        7.88046219e+02, 7.92036368e+02, 7.96026518e+02]])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "m1.dot(m1.T)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Now numpy: (2000**3) = 8 GFLOPS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "M = np.arange(4000000).reshape(2000,2000)   \n",
    "M1 = M/np.average(M) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[6.66167083e-04, 1.66566758e-03, 2.66516808e-03, ...,\n",
       "        1.99666867e+00, 1.99766817e+00, 1.99866767e+00],\n",
       "       [1.66566758e-03, 4.66516908e-03, 7.66467058e-03, ...,\n",
       "        5.99167016e+00, 5.99466966e+00, 5.99766917e+00],\n",
       "       [2.66516808e-03, 7.66467058e-03, 1.26641731e-02, ...,\n",
       "        9.98667166e+00, 9.99167116e+00, 9.99667067e+00],\n",
       "       ...,\n",
       "       [1.99666867e+00, 5.99167016e+00, 9.98667166e+00, ...,\n",
       "        7.98001466e+03, 7.98400966e+03, 7.98800466e+03],\n",
       "       [1.99766817e+00, 5.99466966e+00, 9.99167116e+00, ...,\n",
       "        7.98400966e+03, 7.98800666e+03, 7.99200366e+03],\n",
       "       [1.99866767e+00, 5.99766917e+00, 9.99667067e+00, ...,\n",
       "        7.98800466e+03, 7.99200366e+03, 7.99600267e+03]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "M1.dot(M1.T)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11],\n",
       "       [ 12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23],\n",
       "       [ 24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35],\n",
       "       [ 36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47],\n",
       "       [ 48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59],\n",
       "       [ 60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71],\n",
       "       [ 72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83],\n",
       "       [ 84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95],\n",
       "       [ 96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107],\n",
       "       [108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119],\n",
       "       [120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131],\n",
       "       [132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143]])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n = 12\n",
    "np.arange(n*n).reshape(n,n)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Splitting the matrix\n",
    "\n",
    "Two reasons:\n",
    "1. optimize cache usage\n",
    "2. using SIMD power\n",
    "\n",
    "<img src=\"pics/matrix.png\">"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ?\n",
    "\n",
    "No idea, about the following: $y = tanh(M)$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = np.tanh(M1.dot(M1.T))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1000, 1000)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "16.0"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "((4*128)**3)*16/((128)**3*64)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# das wichtig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}