{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done.\n",
      "Done.\n",
      "Done.\n",
      "Done.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%load_ext sql\n",
    "%sql sqlite:///complaint.db\n",
    "import time\n",
    "%sql drop index if exists state_index;\n",
    "%sql drop index if exists state_product_index;\n",
    "%sql drop index if exists helpful_index;\n",
    "%sql analyze"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "Activity 8: Indexes\n",
    "===========\n",
    "\n",
    "Let's play with the [consumer complaint database](https://catalog.data.gov/dataset/consumer-complaint-database) from data.gov"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "
\n",
       "    \n",
       "        | count(*)\n",
       " | 
\n",
       "    \n",
       "        | 79468\n",
       " | 
\n",
       "
"
      ],
      "text/plain": [
       "[(79468,)]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%sql select count(*) from complaints;"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    \n",
       "        | Date_received\n",
       " | Product\n",
       " | Subproduct\n",
       " | Issue\n",
       " | Subissue\n",
       " | Consumer_narrative\n",
       " | Company_public_response\n",
       " | Company\n",
       " | State\n",
       " | ZIP_code\n",
       " | Submitted_via\n",
       " | Date_sent_to_company\n",
       " | Company_response\n",
       " | Timely_response\n",
       " | Consumer_disputed\n",
       " | Complaint_ID\n",
       " | 
\n",
       "    \n",
       "        | 12/30/2015\n",
       " | Mortgage\n",
       " | Other mortgage\n",
       " | Loan servicing, payments, escrow account\n",
       " | \n",
       " | \n",
       " | Company chooses not to provide a public response\n",
       " | U.S. Bancorp\n",
       " | TN\n",
       " | 38138\n",
       " | \n",
       " | N/A\n",
       " | Referral\n",
       " | 01/05/2016\n",
       " | Closed with explanation\n",
       " | Yes\n",
       " | 
\n",
       "    \n",
       "        | 12/12/2015\n",
       " | Mortgage\n",
       " | Other mortgage\n",
       " | Loan modification,collection,foreclosure\n",
       " | \n",
       " | \n",
       " | Company chooses not to provide a public response\n",
       " | Citibank\n",
       " | NY\n",
       " | 13021\n",
       " | \n",
       " | N/A\n",
       " | Referral\n",
       " | 12/23/2015\n",
       " | Closed with explanation\n",
       " | Yes\n",
       " | 
\n",
       "    \n",
       "        | 12/02/2015\n",
       " | Mortgage\n",
       " | Other mortgage\n",
       " | Loan modification,collection,foreclosure\n",
       " | \n",
       " | \n",
       " | \n",
       " | Nationstar Mortgage\n",
       " | MI\n",
       " | 49102\n",
       " | \n",
       " | N/A\n",
       " | Referral\n",
       " | 12/17/2015\n",
       " | Closed with explanation\n",
       " | Yes\n",
       " | 
\n",
       "    \n",
       "        | 12/02/2015\n",
       " | Bank account or service\n",
       " | Other bank product/service\n",
       " | Account opening, closing, or management\n",
       " | \n",
       " | \n",
       " | Company chooses not to provide a public response\n",
       " | Wells Fargo & Company\n",
       " | \n",
       " | \n",
       " | \n",
       " | N/A\n",
       " | Referral\n",
       " | 12/07/2015\n",
       " | Closed with explanation\n",
       " | Yes\n",
       " | 
\n",
       "    \n",
       "        | 12/22/2015\n",
       " | Mortgage\n",
       " | Conventional fixed mortgage\n",
       " | Loan servicing, payments, escrow account\n",
       " | \n",
       " | \n",
       " | \n",
       " | Nationstar Mortgage\n",
       " | FL\n",
       " | 33484\n",
       " | Older American\n",
       " | Consent not provided\n",
       " | Web\n",
       " | 01/06/2016\n",
       " | Closed with explanation\n",
       " | Yes\n",
       " | 
\n",
       "
"
      ],
      "text/plain": [
       "[('12/30/2015', 'Mortgage', 'Other mortgage', 'Loan servicing, payments, escrow account', '', '', 'Company chooses not to provide a public response', 'U.S. Bancorp', 'TN', '38138', '', 'N/A', 'Referral', '01/05/2016', 'Closed with explanation', 'Yes'),\n",
       " ('12/12/2015', 'Mortgage', 'Other mortgage', 'Loan modification,collection,foreclosure', '', '', 'Company chooses not to provide a public response', 'Citibank', 'NY', '13021', '', 'N/A', 'Referral', '12/23/2015', 'Closed with explanation', 'Yes'),\n",
       " ('12/02/2015', 'Mortgage', 'Other mortgage', 'Loan modification,collection,foreclosure', '', '', '', 'Nationstar Mortgage', 'MI', '49102', '', 'N/A', 'Referral', '12/17/2015', 'Closed with explanation', 'Yes'),\n",
       " ('12/02/2015', 'Bank account or service', 'Other bank product/service', 'Account opening, closing, or management', '', '', 'Company chooses not to provide a public response', 'Wells Fargo & Company', '', '', '', 'N/A', 'Referral', '12/07/2015', 'Closed with explanation', 'Yes'),\n",
       " ('12/22/2015', 'Mortgage', 'Conventional fixed mortgage', 'Loan servicing, payments, escrow account', '', '', '', 'Nationstar Mortgage', 'FL', '33484', 'Older American', 'Consent not provided', 'Web', '01/06/2016', 'Closed with explanation', 'Yes')]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%sql select * from complaints limit 5;"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 1: Query without an index\n",
    "\n",
    "First, let's start off by writing a query to find the **counts of the top 5 Product, State pairs** in the complaints database (return the product and state as well as the count).  Use the single-line syntax for simple timing so we can see how long the query takes:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done.\n",
      "CPU times: user 101 ms, sys: 20.1 ms, total: 121 ms\n",
      "Wall time: 268 ms\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    \n",
       "        | Product\n",
       " | State\n",
       " | c\n",
       " | 
\n",
       "    \n",
       "        | None\n",
       " | None\n",
       " | 13451\n",
       " | 
\n",
       "    \n",
       "        | Mortgage\n",
       " | CA\n",
       " | 3891\n",
       " | 
\n",
       "    \n",
       "        | Mortgage\n",
       " | FL\n",
       " | 2343\n",
       " | 
\n",
       "    \n",
       "        | Debt collection\n",
       " | None\n",
       " | 1654\n",
       " | 
\n",
       "    \n",
       "        | Mortgage\n",
       " | None\n",
       " | 1427\n",
       " | 
\n",
       "
"
      ],
      "text/plain": [
       "[(None, None, 13451),\n",
       " ('Mortgage', 'CA', 3891),\n",
       " ('Mortgage', 'FL', 2343),\n",
       " ('Debt collection', None, 1654),\n",
       " ('Mortgage', None, 1427)]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time %sql SELECT product, state, count(*) AS c FROM complaints GROUP BY product, state ORDER BY c DESC LIMIT 5;"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exerise 2: Single search key index\n",
    "\n",
    "Now create a _single-key_ index such that the above query is faster!  The syntax to create an index in SQL is:\n",
    "> DROP INDEX IF EXISTS index_name;\n",
    "> CREATE INDEX index_name ON table(attributes);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done.\n",
      "Done.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%sql\n",
    "DROP INDEX IF EXISTS state_index;\n",
    "CREATE INDEX state_index ON complaints(state);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done.\n",
      "CPU times: user 106 ms, sys: 16.6 ms, total: 122 ms\n",
      "Wall time: 250 ms\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    \n",
       "        | Product\n",
       " | State\n",
       " | c\n",
       " | 
\n",
       "    \n",
       "        | None\n",
       " | None\n",
       " | 13451\n",
       " | 
\n",
       "    \n",
       "        | Mortgage\n",
       " | CA\n",
       " | 3891\n",
       " | 
\n",
       "    \n",
       "        | Mortgage\n",
       " | FL\n",
       " | 2343\n",
       " | 
\n",
       "    \n",
       "        | Debt collection\n",
       " | None\n",
       " | 1654\n",
       " | 
\n",
       "    \n",
       "        | Mortgage\n",
       " | None\n",
       " | 1427\n",
       " | 
\n",
       "
"
      ],
      "text/plain": [
       "[(None, None, 13451),\n",
       " ('Mortgage', 'CA', 3891),\n",
       " ('Mortgage', 'FL', 2343),\n",
       " ('Debt collection', None, 1654),\n",
       " ('Mortgage', None, 1427)]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time %sql SELECT product, state, count(*) AS c FROM complaints GROUP BY product, state ORDER BY c DESC LIMIT 5;"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 3\n",
    "\n",
    "Now, create a _covering_ index for the query and then see how long it takes to run!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done.\n",
      "Done.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%sql\n",
    "DROP INDEX IF EXISTS state_product_index;\n",
    "CREATE INDEX state_product_index ON complaints(state, product);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done.\n",
      "CPU times: user 14.1 ms, sys: 6.02 ms, total: 20.1 ms\n",
      "Wall time: 61.5 ms\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    \n",
       "        | Product\n",
       " | State\n",
       " | c\n",
       " | 
\n",
       "    \n",
       "        | None\n",
       " | None\n",
       " | 13451\n",
       " | 
\n",
       "    \n",
       "        | Mortgage\n",
       " | CA\n",
       " | 3891\n",
       " | 
\n",
       "    \n",
       "        | Mortgage\n",
       " | FL\n",
       " | 2343\n",
       " | 
\n",
       "    \n",
       "        | Debt collection\n",
       " | None\n",
       " | 1654\n",
       " | 
\n",
       "    \n",
       "        | Mortgage\n",
       " | None\n",
       " | 1427\n",
       " | 
\n",
       "
"
      ],
      "text/plain": [
       "[(None, None, 13451),\n",
       " ('Mortgage', 'CA', 3891),\n",
       " ('Mortgage', 'FL', 2343),\n",
       " ('Debt collection', None, 1654),\n",
       " ('Mortgage', None, 1427)]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time %sql SELECT product, state, count(*) AS c FROM complaints GROUP BY product, state ORDER BY c DESC LIMIT 5;"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 3(b):\n",
    "\n",
    "Use EXPLAIN to see if sqlite used/recognized your covering index.  EXPLAIN is an operator that tells SQL to explain its query plan... we'll look into this in more depth later.  For now, the syntax is:\n",
    "> EXPLAIN QUERY PLAN your_query_here;"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    \n",
       "        | selectid\n",
       " | order\n",
       " | from\n",
       " | detail\n",
       " | 
\n",
       "    \n",
       "        | 0\n",
       " | 0\n",
       " | 0\n",
       " | SCAN TABLE complaints USING COVERING INDEX state_product_index\n",
       " | 
\n",
       "    \n",
       "        | 0\n",
       " | 0\n",
       " | 0\n",
       " | USE TEMP B-TREE FOR ORDER BY\n",
       " | 
\n",
       "
"
      ],
      "text/plain": [
       "[(0, 0, 0, 'SCAN TABLE complaints USING COVERING INDEX state_product_index'),\n",
       " (0, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%sql\n",
    "EXPLAIN QUERY PLAN\n",
    "SELECT product, state, count(*) AS c\n",
    "FROM complaints\n",
    "GROUP BY product, state ORDER BY c DESC LIMIT 5;"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}