YenLinWu · Mar 29, 2021
diff --git a/‎Homework/期末專題實作(三)/Udacity之AB Test分析.ipynb
+808 b/‎Homework/期末專題實作(三)/Udacity之AB Test分析.ipynb
+808
@@ -0,0 +1,808 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "RTnv32wkA_Ox"
+   },
+   "source": [
+    "# **作業說明**\n",
+    "# (這是Udacity關於A/B Test的期末專題)\n",
+    "\n",
+    "Udacity希望了解，在免費14天試學網頁上，除了要信用卡資訊外，還想了解學生願意花多少小時學。如果少於某門檻(5小時)，就建議學生不要註冊，免費聽聽影音就好，免得浪費資源，降低學習成功率。\n",
+    "\n",
+    "我們的題目是，增加這個頁面，是否對Gross Conversion(GC)和Net Conversion (NC)在統計學上(Alpha=0.05，Power=0.8)有幫助(d=0.01/0.0075)，亦即統計上的顯著(Significant)。\n",
+    "\n",
+    "CI = click 數目\n",
+    "\n",
+    "GC = 註冊數/CI (聽了建議仍然註冊的比例)\n",
+    "\n",
+    "NC = 繳費數/CI (14天之後繳費且繼續的比例)\n",
+    "\n",
+    "我們期待GC比原來下降，但NC不降，這表示省去資源但收入不降。\n",
+    "\n",
+    "檔名：ab-tests-with-python.ipynb\n",
+    "\n",
+    "**作業目標**\n",
+    "\n",
+    "1.   經由範例程式，學習A/B Test 的步驟\n",
+    "2.   最低樣本數的計算方法\n",
+    "3.   自行開發信賴區間計算函數\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "_cell_guid": "9a04b6ea-111a-4627-849f-53adf7efd40b",
+    "_uuid": "f4949401e2dab760c957525f17e90addf45fc9e8",
+    "id": "QfCMcrfTFyMx"
+   },
+   "outputs": [],
+   "source": [
+    "#載入程式庫\n",
+    "import math as mt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from scipy.stats import norm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "_cell_guid": "fc93fbb9-b9f0-4b51-8dad-c4a8ee7b4320",
+    "_uuid": "a3b1eab0da4bf7262ff7d98a41bed840d835762a",
+    "id": "_iZnYjxIFyMy"
+   },
+   "outputs": [],
+   "source": [
+    "#將基礎數據放入字典\n",
+    "baseline = {\"Cookies\":40000,\"Clicks\":3200,\"Enrollments\":660,\"CTP\":0.08,\"GConversion\":0.20625,\n",
+    "           \"Retention\":0.53,\"NConversion\":0.109313}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "_cell_guid": "d99f7ee0-4a65-402b-a709-a949cd549bc3",
+    "_uuid": "019536837b5aa146997b693563cfb22af1fb85ee",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "rE-idI4vFyMy",
+    "outputId": "af885825-24d4-4e2b-c7ca-5a7e98ff6182"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'Cookies': 5000,\n",
+       " 'Clicks': 400.0,\n",
+       " 'Enrollments': 82.5,\n",
+       " 'CTP': 0.08,\n",
+       " 'GConversion': 0.20625,\n",
+       " 'Retention': 0.53,\n",
+       " 'NConversion': 0.109313}"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#調整大小到以Cookie為基準\n",
+    "baseline[\"Cookies\"] = 5000\n",
+    "baseline[\"Clicks\"]=baseline[\"Clicks\"]*(5000/40000)\n",
+    "baseline[\"Enrollments\"]=baseline[\"Enrollments\"]*(5000/40000)\n",
+    "baseline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "_cell_guid": "4c47c350-99c8-4990-8456-cbbe014a972f",
+    "_uuid": "2c6930584c39024455900fce7e0e92e5812814b3",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "gNpNShHKFyMz",
+    "outputId": "6e1110d0-f002-45a1-9711-6c8c094cd641"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.0202"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 算出 Gross Conversion (GC) 的 p 和 n\n",
+    "# 還有 Stansard Deviation(sd) rounded to 4 decimal digits.\n",
+    "GC={}\n",
+    "GC[\"d_min\"]=0.01\n",
+    "GC[\"p\"]=baseline[\"GConversion\"]\n",
+    "#p is given in this case - or we could calculate it from enrollments/clicks\n",
+    "GC[\"n\"]=baseline[\"Clicks\"]\n",
+    "GC[\"sd\"]=round(mt.sqrt((GC[\"p\"]*(1-GC[\"p\"]))/GC[\"n\"]),4)\n",
+    "GC[\"sd\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "_cell_guid": "654d08d4-6638-4f9f-a327-6f2f5a6a9365",
+    "_uuid": "334e65cddd5a800a396fa630b676ca12bec42114",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "w_WjAIKkFyMz",
+    "outputId": "9cf4445e-d65d-408f-9798-d78480730dcd"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.0549"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Retention(R) \n",
+    "\n",
+    "R={}\n",
+    "R[\"d_min\"]=0.01\n",
+    "R[\"p\"]=baseline[\"Retention\"]\n",
+    "R[\"n\"]=baseline[\"Enrollments\"]\n",
+    "R[\"sd\"]=round(mt.sqrt((R[\"p\"]*(1-R[\"p\"]))/R[\"n\"]),4)\n",
+    "R[\"sd\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "_cell_guid": "162f3d90-467a-41e6-9489-f1ea01f08afa",
+    "_uuid": "be4b9c808cf098efde59ea1d17e1a35799873e5f",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "kwx3Of06FyMz",
+    "outputId": "efac36b1-ec20-4159-d113-fc43c81aa9f0"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.0156"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Net Conversion (NC)\n",
+    "NC={}\n",
+    "NC[\"d_min\"]=0.0075\n",
+    "NC[\"p\"]=baseline[\"NConversion\"]\n",
+    "NC[\"n\"]=baseline[\"Clicks\"]\n",
+    "NC[\"sd\"]=round(mt.sqrt((NC[\"p\"]*(1-NC[\"p\"]))/NC[\"n\"]),4)\n",
+    "NC[\"sd\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "_cell_guid": "1cedf3ae-f318-4036-b966-a299350fb048",
+    "_uuid": "8956f99d8ea1b9bb6d8b294a6e333c86a51d88d7",
+    "id": "pBk7b5uMFyM0"
+   },
+   "outputs": [],
+   "source": [
+    "def get_sds(p,d):\n",
+    "    sd1=mt.sqrt(2*p*(1-p))\n",
+    "    sd2=mt.sqrt(p*(1-p)+(p+d)*(1-(p+d)))\n",
+    "    x=[sd1,sd2]\n",
+    "    return x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "_cell_guid": "230d398f-5692-4f04-a8a9-316c9c661801",
+    "_uuid": "c1c7a715d55432c5d19fef049094527cf1f72343",
+    "id": "mB2im4rcFyM0"
+   },
+   "outputs": [],
+   "source": [
+    "#計算 Z-score\n",
+    "def get_z_score(alpha):\n",
+    "    return norm.ppf(alpha)\n",
+    "\n",
+    "# 得到兩個(A/B)標準差\n",
+    "def get_sds(p,d):\n",
+    "    sd1=mt.sqrt(2*p*(1-p))\n",
+    "    sd2=mt.sqrt(p*(1-p)+(p+d)*(1-(p+d)))\n",
+    "    sds=[sd1,sd2]\n",
+    "    return sds\n",
+    "\n",
+    "# 求Sample Size\n",
+    "def get_sampSize(sds,alpha,beta,d):\n",
+    "    n=pow((get_z_score(1-alpha/2)*sds[0]+get_z_score(1-beta)*sds[1]),2)/pow(d,2)\n",
+    "    return n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "_cell_guid": "e9b0d74a-35ba-461e-9b04-7527cc0147f9",
+    "_uuid": "f44e06b60b027dd83ec3734c52ed1347d2a80c0c",
+    "id": "uFh5tlyTFyM0"
+   },
+   "outputs": [],
+   "source": [
+    "GC[\"d\"]=0.01\n",
+    "R[\"d\"]=0.01\n",
+    "NC[\"d\"]=0.0075"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "_cell_guid": "28cf1802-c0ce-4189-b438-86504e53a721",
+    "_uuid": "b443cb437954e04ef889ef4b365d8875da2833d1",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "fiOPWnzNFyM0",
+    "outputId": "f709237b-5edc-40e8-96e4-5002bd4dc503"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "25835"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Let's get an integer value for simplicity\n",
+    "GC[\"SampSize\"]=round(get_sampSize(get_sds(GC[\"p\"],GC[\"d\"]),0.05,0.2,GC[\"d\"]))\n",
+    "GC[\"SampSize\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "_cell_guid": "ba7702c9-b092-404b-a212-8713a517d9bc",
+    "_uuid": "530c3e906980568677ce0a7fc5c92f5f9a26302b",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "X7vCHRGDFyM0",
+    "outputId": "f7f50a7f-aa8d-4ed9-b90c-ba1d31415a6f"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "645875"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "GC[\"SampSize\"]=round(GC[\"SampSize\"]/0.08*2)\n",
+    "GC[\"SampSize\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "_cell_guid": "c2c65956-f449-4d28-94b5-c5300aeadb4b",
+    "_uuid": "1aa61cc37f8839a44f0773623d41c3684be1a9c7",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "z3jx0jgiFyM0",
+    "outputId": "d2dba5cc-bf64-4177-9a95-6b8ebdd44674"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "39087"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Getting a nice integer value\n",
+    "R[\"SampSize\"]=round(get_sampSize(get_sds(R[\"p\"],R[\"d\"]),0.05,0.2,R[\"d\"]))\n",
+    "R[\"SampSize\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "_cell_guid": "0281b321-7506-49ba-a33e-29eed30eefbe",
+    "_uuid": "16bf69e2a0f6214b91485b736676a1fe7aadfdca",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "Zc1NFMp3FyM0",
+    "outputId": "5ca978b8-4eac-4589-9481-b634e2c259d5"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "4737818.181818182"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "R[\"SampSize\"]=R[\"SampSize\"]/0.08/0.20625*2\n",
+    "R[\"SampSize\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "_cell_guid": "56be284a-1c49-4c65-91d2-e007ae5b327d",
+    "_uuid": "42607629817a83c2be769165cff7a30d74067320",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "XqkTq_D-FyM0",
+    "outputId": "ce8f9a0b-eaf3-4ded-d383-a47df31c799d"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "27413"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Getting a nice integer value\n",
+    "NC[\"SampSize\"]=round(get_sampSize(get_sds(NC[\"p\"],NC[\"d\"]),0.05,0.2,NC[\"d\"]))\n",
+    "NC[\"SampSize\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "_cell_guid": "e4202dd9-7cb6-455c-9630-dd9277f84da6",
+    "_uuid": "38b580f63f9e25fa7e19958d7b12a14d6e2b8838",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "UGArSr5lFyM0",
+    "outputId": "0d53c8f8-e382-406e-9aeb-e2bdc9929a27"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "685325.0"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "NC[\"SampSize\"]=NC[\"SampSize\"]/0.08*2\n",
+    "NC[\"SampSize\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "_cell_guid": "58b7a728-3b45-4867-969b-48959085498c",
+    "_uuid": "4c29c3bce964e643ef30633cab8b95f268f76abc",
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 198
+    },
+    "id": "Oj_s62oaFyM1",
+    "outputId": "ba245606-e0ee-4721-f510-189477941b0d"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Date</th>\n",
+       "      <th>Pageviews</th>\n",
+       "      <th>Clicks</th>\n",
+       "      <th>Enrollments</th>\n",
+       "      <th>Payments</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Sat, Oct 11</td>\n",
+       "      <td>7723</td>\n",
+       "      <td>687</td>\n",
+       "      <td>134.0</td>\n",
+       "      <td>70.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Sun, Oct 12</td>\n",
+       "      <td>9102</td>\n",
+       "      <td>779</td>\n",
+       "      <td>147.0</td>\n",
+       "      <td>70.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Mon, Oct 13</td>\n",
+       "      <td>10511</td>\n",
+       "      <td>909</td>\n",
+       "      <td>167.0</td>\n",
+       "      <td>95.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Tue, Oct 14</td>\n",
+       "      <td>9871</td>\n",
+       "      <td>836</td>\n",
+       "      <td>156.0</td>\n",
+       "      <td>105.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Wed, Oct 15</td>\n",
+       "      <td>10014</td>\n",
+       "      <td>837</td>\n",
+       "      <td>163.0</td>\n",
+       "      <td>64.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          Date  Pageviews  Clicks  Enrollments  Payments\n",
+       "0  Sat, Oct 11       7723     687        134.0      70.0\n",
+       "1  Sun, Oct 12       9102     779        147.0      70.0\n",
+       "2  Mon, Oct 13      10511     909        167.0      95.0\n",
+       "3  Tue, Oct 14       9871     836        156.0     105.0\n",
+       "4  Wed, Oct 15      10014     837        163.0      64.0"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 載入數據\n",
+    "control=pd.read_csv( 'control_data.csv' )\n",
+    "experiment=pd.read_csv( 'experiment_data.csv' )\n",
+    "control.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "_cell_guid": "37ab672f-be1c-46bc-ae53-edf722bef4bc",
+    "_uuid": "15652351bc793b528ca853666ddb4b7defe4f4ff",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "c2AH9yHaFyM1",
+    "outputId": "c534d6e1-a872-4fab-ecf3-95cc10d411d4"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "number of pageviews in control: 345543\n",
+      "number of Pageviewsin experiment: 344660\n"
+     ]
+    }
+   ],
+   "source": [
+    "pageviews_cont=control['Pageviews'].sum()\n",
+    "pageviews_exp=experiment['Pageviews'].sum()\n",
+    "pageviews_total=pageviews_cont+pageviews_exp\n",
+    "print (\"number of pageviews in control:\", pageviews_cont)\n",
+    "print (\"number of Pageviewsin experiment:\" ,pageviews_exp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "_cell_guid": "8e13eba4-0daa-4220-a772-e1f371854d60",
+    "_uuid": "6d4d94df9e5afdfdf97703292b0ef9ac6c3e4251",
+    "id": "kxKPjYuWFyM1"
+   },
+   "outputs": [],
+   "source": [
+    "# Count the total clicks from complete records only\n",
+    "clicks_cont=control[\"Clicks\"].loc[control[\"Enrollments\"].notnull()].sum()\n",
+    "clicks_exp=experiment[\"Clicks\"].loc[experiment[\"Enrollments\"].notnull()].sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "_cell_guid": "5615673e-529c-452a-ad48-9186cb88f74b",
+    "_uuid": "3f17a8086726ee8cc01f150808f88fe8693a95a2",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "I4gtEo6LFyM1",
+    "outputId": "6b952d56-de9a-472f-f6ce-bf2cc0ccd9ae"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The change due to the experiment is -2.06 %\n",
+      "Confidence Interval: [ -0.0292 , -0.012 ]\n",
+      "The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if -0.01 is not in the CI as well.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Gross Conversion - number of enrollments divided by number of clicks\n",
+    "alpha = 0.05\n",
+    "enrollments_cont=control[\"Enrollments\"].sum()\n",
+    "enrollments_exp=experiment[\"Enrollments\"].sum()\n",
+    "\n",
+    "GC_cont=enrollments_cont/clicks_cont\n",
+    "GC_exp=enrollments_exp/clicks_exp\n",
+    "GC_pooled=(enrollments_cont+enrollments_exp)/(clicks_cont+clicks_exp)\n",
+    "GC_sd_pooled=mt.sqrt(GC_pooled*(1-GC_pooled)*(1/clicks_cont+1/clicks_exp))\n",
+    "GC_ME=round(get_z_score(1-alpha/2)*GC_sd_pooled,4)\n",
+    "GC_diff=round(GC_exp-GC_cont,4)\n",
+    "print(\"The change due to the experiment is\",GC_diff*100,\"%\")\n",
+    "print(\"Confidence Interval: [\",GC_diff-GC_ME,\",\",GC_diff+GC_ME,\"]\")\n",
+    "print (\"The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if\",-GC[\"d_min\"],\"is not in the CI as well.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "_cell_guid": "8cb352ad-fc92-4f21-b939-3874314ba8f4",
+    "_uuid": "83e99af5dea50f22629ccbd34faa196d4065172f",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "MQIg2XBsFyM1",
+    "outputId": "f8f7bb99-0194-4884-dc39-acf4fb962b40"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The change due to the experiment is -0.49 %\n",
+      "Confidence Interval: [ -0.0116 , 0.0018000000000000004 ]\n",
+      "The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if 0.0075 is not in the CI as well.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Net Conversion - number of payments divided by number of clicks\n",
+    "payments_cont=control[\"Payments\"].sum()\n",
+    "payments_exp=experiment[\"Payments\"].sum()\n",
+    "\n",
+    "NC_cont=payments_cont/clicks_cont\n",
+    "NC_exp=payments_exp/clicks_exp\n",
+    "NC_pooled=(payments_cont+payments_exp)/(clicks_cont+clicks_exp)\n",
+    "NC_sd_pooled=mt.sqrt(NC_pooled*(1-NC_pooled)*(1/clicks_cont+1/clicks_exp))\n",
+    "NC_ME=round(get_z_score(1-alpha/2)*NC_sd_pooled,4)\n",
+    "NC_diff=round(NC_exp-NC_cont,4)\n",
+    "print(\"The change due to the experiment is\",NC_diff*100,\"%\")\n",
+    "print(\"Confidence Interval: [\",NC_diff-NC_ME,\",\",NC_diff+NC_ME,\"]\")\n",
+    "print (\"The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if\",NC[\"d_min\"],\"is not in the CI as well.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "K2hnLtKrFS75"
+   },
+   "source": [
+    "# **作業**\n",
+    "# 經由範例程式碼，熟悉A/B Test的步驟\n",
+    "\n",
+    "請同學逐步跟隨程式了解A/B Test步驟"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "-lO_8AYwuEDY"
+   },
+   "source": [
+    "# **作業 嘗試以函數算出樣本數**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-0.02494345647889673 25231\n"
+     ]
+    }
+   ],
+   "source": [
+    "import statsmodels.stats.api as sms\n",
+    "from math import ceil\n",
+    "\n",
+    "effect_size = sms.proportion_effectsize(GC[\"p\"]-1.0*GC[\"d_min\"], GC[\"p\"]+0.0*GC[\"d_min\"])\n",
+    "required_n = sms.NormalIndPower().solve_power(\n",
+    "    effect_size, \n",
+    "    power = 0.8, \n",
+    "    alpha = 0.05, \n",
+    "    ratio = 1\n",
+    "    ) \n",
+    "required_n = ceil(required_n) \n",
+    "print (effect_size,required_n)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "MKB09_mjFwjN"
+   },
+   "source": [
+    "# **作業** 自行開發雙樣本比例的信賴區間函數\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(-0.020554874580361565, array([-0.02912016, -0.01198959]))"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import scipy.stats as stats\n",
+    "def two_proprotions_confint(success_a, size_a, success_b, size_b, significance = 0.05):\n",
+    "    \n",
+    "    prop_a = success_a / size_a\n",
+    "    prop_b = success_b / size_b\n",
+    "    var = prop_a * (1 - prop_a) / size_a + prop_b * (1 - prop_b) / size_b\n",
+    "    se = np.sqrt(var)\n",
+    "\n",
+    "    # z critical value\n",
+    "    confidence = 1 - significance\n",
+    "    z = stats.norm(loc = 0, scale = 1).ppf(confidence + significance / 2)\n",
+    "\n",
+    "    # standard formula for the confidence interval\n",
+    "    # point-estimtate +- z * standard-error\n",
+    "    prop_diff = prop_b - prop_a\n",
+    "    confint = prop_diff + np.array([-1, 1]) * z * se\n",
+    "    return prop_diff, confint\n",
+    "two_proprotions_confint(enrollments_cont, clicks_cont, enrollments_exp, clicks_exp, significance = 0.05)"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "name": "ab-tests-with-python_作業.ipynb",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}