{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Motivation\n", "## How to solve matrix-matrix multiplication" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Simple approach in C\n", "\n", "~~~\n", " for (c = 0; c < m; c++) {\n", " for (d = 0; d < q; d++) {\n", " for (k = 0; k < p; k++) {\n", " sum = sum + first[c][k]*second[k][d];\n", " }\n", " \n", " multiply[c][d] = sum;\n", " sum = 0;\n", " }\n", " }\n", "~~~" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def mymatrixmult(A,B):\n", " y = np.zeros((A.shape[0], B.shape[1]))\n", " for i in range(A.shape[0]):\n", " for j in range(B.shape[1]):\n", " for k in range(A.shape[0]):\n", " y[i][j] += A[i][k]*B[k][j]\n", " return y\n", " \n", "m = np.arange(40000).reshape(200,200)\n", "m1 = m/np.average(m)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Takes a while (200**3) = 8 MFLOPS" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[6.61708085e-03, 1.65675784e-02, 2.65180759e-02, ...,\n", " 1.96686509e+00, 1.97681559e+00, 1.98676609e+00],\n", " [1.65675784e-02, 4.65190759e-02, 7.64705735e-02, ...,\n", " 5.91701260e+00, 5.94696409e+00, 5.97691559e+00],\n", " [2.65180759e-02, 7.64705735e-02, 1.26423071e-01, ...,\n", " 9.86716010e+00, 9.91711260e+00, 9.96706510e+00],\n", " ...,\n", " [1.96686509e+00, 5.91701260e+00, 9.86716010e+00, ...,\n", " 7.80145924e+02, 7.84096071e+02, 7.88046219e+02],\n", " [1.97681559e+00, 5.94696409e+00, 9.91711260e+00, ...,\n", " 7.84096071e+02, 7.88066220e+02, 7.92036368e+02],\n", " [1.98676609e+00, 5.97691559e+00, 9.96706510e+00, ...,\n", " 7.88046219e+02, 7.92036368e+02, 7.96026518e+02]])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mymatrixmult(m1, m1.T)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[6.61708085e-03, 1.65675784e-02, 2.65180759e-02, ...,\n", " 1.96686509e+00, 1.97681559e+00, 1.98676609e+00],\n", " [1.65675784e-02, 4.65190759e-02, 7.64705735e-02, ...,\n", " 5.91701260e+00, 5.94696409e+00, 5.97691559e+00],\n", " [2.65180759e-02, 7.64705735e-02, 1.26423071e-01, ...,\n", " 9.86716010e+00, 9.91711260e+00, 9.96706510e+00],\n", " ...,\n", " [1.96686509e+00, 5.91701260e+00, 9.86716010e+00, ...,\n", " 7.80145924e+02, 7.84096071e+02, 7.88046219e+02],\n", " [1.97681559e+00, 5.94696409e+00, 9.91711260e+00, ...,\n", " 7.84096071e+02, 7.88066220e+02, 7.92036368e+02],\n", " [1.98676609e+00, 5.97691559e+00, 9.96706510e+00, ...,\n", " 7.88046219e+02, 7.92036368e+02, 7.96026518e+02]])" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m1.dot(m1.T)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Now numpy: (2000**3) = 8 GFLOPS" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "M = np.arange(4000000).reshape(2000,2000) \n", "M1 = M/np.average(M) " ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[6.66167083e-04, 1.66566758e-03, 2.66516808e-03, ...,\n", " 1.99666867e+00, 1.99766817e+00, 1.99866767e+00],\n", " [1.66566758e-03, 4.66516908e-03, 7.66467058e-03, ...,\n", " 5.99167016e+00, 5.99466966e+00, 5.99766917e+00],\n", " [2.66516808e-03, 7.66467058e-03, 1.26641731e-02, ...,\n", " 9.98667166e+00, 9.99167116e+00, 9.99667067e+00],\n", " ...,\n", " [1.99666867e+00, 5.99167016e+00, 9.98667166e+00, ...,\n", " 7.98001466e+03, 7.98400966e+03, 7.98800466e+03],\n", " [1.99766817e+00, 5.99466966e+00, 9.99167116e+00, ...,\n", " 7.98400966e+03, 7.98800666e+03, 7.99200366e+03],\n", " [1.99866767e+00, 5.99766917e+00, 9.99667067e+00, ...,\n", " 7.98800466e+03, 7.99200366e+03, 7.99600267e+03]])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "M1.dot(M1.T)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],\n", " [ 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],\n", " [ 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35],\n", " [ 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],\n", " [ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59],\n", " [ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71],\n", " [ 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83],\n", " [ 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95],\n", " [ 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107],\n", " [108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119],\n", " [120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131],\n", " [132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143]])" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "n = 12\n", "np.arange(n*n).reshape(n,n)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Splitting the matrix\n", "\n", "Two reasons:\n", "1. optimize cache usage\n", "2. using SIMD power\n", "\n", "" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## ?\n", "\n", "No idea, about the following: $y = tanh(M)$" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "y = np.tanh(M1.dot(M1.T))" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1000, 1000)" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y.shape" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "16.0" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "((4*128)**3)*16/((128)**3*64)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# das wichtig" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 4 }