stammtisch ß6.05.2020
This commit is contained in:
parent
c55a1cab4b
commit
9e5eed71ff
5 changed files with 904 additions and 1 deletions
318
post/motivation_matrix_mult.ipynb
Normal file
318
post/motivation_matrix_mult.ipynb
Normal file
|
|
@ -0,0 +1,318 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Motivation\n",
|
||||
"## How to solve matrix-matrix multiplication"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Simple approach in C\n",
|
||||
"\n",
|
||||
"~~~\n",
|
||||
" for (c = 0; c < m; c++) {\n",
|
||||
" for (d = 0; d < q; d++) {\n",
|
||||
" for (k = 0; k < p; k++) {\n",
|
||||
" sum = sum + first[c][k]*second[k][d];\n",
|
||||
" }\n",
|
||||
" \n",
|
||||
" multiply[c][d] = sum;\n",
|
||||
" sum = 0;\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"~~~"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def mymatrixmult(A,B):\n",
|
||||
" y = np.zeros((A.shape[0], B.shape[1]))\n",
|
||||
" for i in range(A.shape[0]):\n",
|
||||
" for j in range(B.shape[1]):\n",
|
||||
" for k in range(A.shape[0]):\n",
|
||||
" y[i][j] += A[i][k]*B[k][j]\n",
|
||||
" return y\n",
|
||||
" \n",
|
||||
"m = np.arange(40000).reshape(200,200)\n",
|
||||
"m1 = m/np.average(m)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Takes a while (200**3) = 8 MFLOPS"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[6.61708085e-03, 1.65675784e-02, 2.65180759e-02, ...,\n",
|
||||
" 1.96686509e+00, 1.97681559e+00, 1.98676609e+00],\n",
|
||||
" [1.65675784e-02, 4.65190759e-02, 7.64705735e-02, ...,\n",
|
||||
" 5.91701260e+00, 5.94696409e+00, 5.97691559e+00],\n",
|
||||
" [2.65180759e-02, 7.64705735e-02, 1.26423071e-01, ...,\n",
|
||||
" 9.86716010e+00, 9.91711260e+00, 9.96706510e+00],\n",
|
||||
" ...,\n",
|
||||
" [1.96686509e+00, 5.91701260e+00, 9.86716010e+00, ...,\n",
|
||||
" 7.80145924e+02, 7.84096071e+02, 7.88046219e+02],\n",
|
||||
" [1.97681559e+00, 5.94696409e+00, 9.91711260e+00, ...,\n",
|
||||
" 7.84096071e+02, 7.88066220e+02, 7.92036368e+02],\n",
|
||||
" [1.98676609e+00, 5.97691559e+00, 9.96706510e+00, ...,\n",
|
||||
" 7.88046219e+02, 7.92036368e+02, 7.96026518e+02]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"mymatrixmult(m1, m1.T)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[6.61708085e-03, 1.65675784e-02, 2.65180759e-02, ...,\n",
|
||||
" 1.96686509e+00, 1.97681559e+00, 1.98676609e+00],\n",
|
||||
" [1.65675784e-02, 4.65190759e-02, 7.64705735e-02, ...,\n",
|
||||
" 5.91701260e+00, 5.94696409e+00, 5.97691559e+00],\n",
|
||||
" [2.65180759e-02, 7.64705735e-02, 1.26423071e-01, ...,\n",
|
||||
" 9.86716010e+00, 9.91711260e+00, 9.96706510e+00],\n",
|
||||
" ...,\n",
|
||||
" [1.96686509e+00, 5.91701260e+00, 9.86716010e+00, ...,\n",
|
||||
" 7.80145924e+02, 7.84096071e+02, 7.88046219e+02],\n",
|
||||
" [1.97681559e+00, 5.94696409e+00, 9.91711260e+00, ...,\n",
|
||||
" 7.84096071e+02, 7.88066220e+02, 7.92036368e+02],\n",
|
||||
" [1.98676609e+00, 5.97691559e+00, 9.96706510e+00, ...,\n",
|
||||
" 7.88046219e+02, 7.92036368e+02, 7.96026518e+02]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"m1.dot(m1.T)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Now numpy: (2000**3) = 8 GFLOPS"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"M = np.arange(4000000).reshape(2000,2000) \n",
|
||||
"M1 = M/np.average(M) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[6.66167083e-04, 1.66566758e-03, 2.66516808e-03, ...,\n",
|
||||
" 1.99666867e+00, 1.99766817e+00, 1.99866767e+00],\n",
|
||||
" [1.66566758e-03, 4.66516908e-03, 7.66467058e-03, ...,\n",
|
||||
" 5.99167016e+00, 5.99466966e+00, 5.99766917e+00],\n",
|
||||
" [2.66516808e-03, 7.66467058e-03, 1.26641731e-02, ...,\n",
|
||||
" 9.98667166e+00, 9.99167116e+00, 9.99667067e+00],\n",
|
||||
" ...,\n",
|
||||
" [1.99666867e+00, 5.99167016e+00, 9.98667166e+00, ...,\n",
|
||||
" 7.98001466e+03, 7.98400966e+03, 7.98800466e+03],\n",
|
||||
" [1.99766817e+00, 5.99466966e+00, 9.99167116e+00, ...,\n",
|
||||
" 7.98400966e+03, 7.98800666e+03, 7.99200366e+03],\n",
|
||||
" [1.99866767e+00, 5.99766917e+00, 9.99667067e+00, ...,\n",
|
||||
" 7.98800466e+03, 7.99200366e+03, 7.99600267e+03]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"M1.dot(M1.T)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],\n",
|
||||
" [ 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],\n",
|
||||
" [ 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35],\n",
|
||||
" [ 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],\n",
|
||||
" [ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59],\n",
|
||||
" [ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71],\n",
|
||||
" [ 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83],\n",
|
||||
" [ 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95],\n",
|
||||
" [ 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107],\n",
|
||||
" [108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119],\n",
|
||||
" [120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131],\n",
|
||||
" [132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"n = 12\n",
|
||||
"np.arange(n*n).reshape(n,n)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Splitting the matrix\n",
|
||||
"\n",
|
||||
"Two reasons:\n",
|
||||
"1. optimize cache usage\n",
|
||||
"2. using SIMD power\n",
|
||||
"\n",
|
||||
"<img src=\"pics/matrix.png\">"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?\n",
|
||||
"\n",
|
||||
"No idea, about the following: $y = tanh(M)$"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y = np.tanh(M1.dot(M1.T))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(1000, 1000)"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"y.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"16.0"
|
||||
]
|
||||
},
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"((4*128)**3)*16/((128)**3*64)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# das wichtig"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue