|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "markdown", |
| 5 | + "metadata": { |
| 6 | + "id": "RTnv32wkA_Ox" |
| 7 | + }, |
| 8 | + "source": [ |
| 9 | + "# **作業說明**\n", |
| 10 | + "# (這是Udacity關於A/B Test的期末專題)\n", |
| 11 | + "\n", |
| 12 | + "Udacity希望了解,在免費14天試學網頁上,除了要信用卡資訊外,還想了解學生願意花多少小時學。如果少於某門檻(5小時),就建議學生不要註冊,免費聽聽影音就好,免得浪費資源,降低學習成功率。\n", |
| 13 | + "\n", |
| 14 | + "我們的題目是,增加這個頁面,是否對Gross Conversion(GC)和Net Conversion (NC)在統計學上(Alpha=0.05,Power=0.8)有幫助(d=0.01/0.0075),亦即統計上的顯著(Significant)。\n", |
| 15 | + "\n", |
| 16 | + "CI = click 數目\n", |
| 17 | + "\n", |
| 18 | + "GC = 註冊數/CI (聽了建議仍然註冊的比例)\n", |
| 19 | + "\n", |
| 20 | + "NC = 繳費數/CI (14天之後繳費且繼續的比例)\n", |
| 21 | + "\n", |
| 22 | + "我們期待GC比原來下降,但NC不降,這表示省去資源但收入不降。\n", |
| 23 | + "\n", |
| 24 | + "檔名:ab-tests-with-python.ipynb\n", |
| 25 | + "\n", |
| 26 | + "**作業目標**\n", |
| 27 | + "\n", |
| 28 | + "1. 經由範例程式,學習A/B Test 的步驟\n", |
| 29 | + "2. 最低樣本數的計算方法\n", |
| 30 | + "3. 自行開發信賴區間計算函數\n", |
| 31 | + "\n", |
| 32 | + "\n", |
| 33 | + "\n", |
| 34 | + "\n", |
| 35 | + "\n", |
| 36 | + "\n", |
| 37 | + "\n", |
| 38 | + "\n", |
| 39 | + "\n", |
| 40 | + "\n", |
| 41 | + "\n" |
| 42 | + ] |
| 43 | + }, |
| 44 | + { |
| 45 | + "cell_type": "code", |
| 46 | + "execution_count": 1, |
| 47 | + "metadata": { |
| 48 | + "_cell_guid": "9a04b6ea-111a-4627-849f-53adf7efd40b", |
| 49 | + "_uuid": "f4949401e2dab760c957525f17e90addf45fc9e8", |
| 50 | + "id": "QfCMcrfTFyMx" |
| 51 | + }, |
| 52 | + "outputs": [], |
| 53 | + "source": [ |
| 54 | + "#載入程式庫\n", |
| 55 | + "import math as mt\n", |
| 56 | + "import numpy as np\n", |
| 57 | + "import pandas as pd\n", |
| 58 | + "from scipy.stats import norm" |
| 59 | + ] |
| 60 | + }, |
| 61 | + { |
| 62 | + "cell_type": "code", |
| 63 | + "execution_count": 2, |
| 64 | + "metadata": { |
| 65 | + "_cell_guid": "fc93fbb9-b9f0-4b51-8dad-c4a8ee7b4320", |
| 66 | + "_uuid": "a3b1eab0da4bf7262ff7d98a41bed840d835762a", |
| 67 | + "id": "_iZnYjxIFyMy" |
| 68 | + }, |
| 69 | + "outputs": [], |
| 70 | + "source": [ |
| 71 | + "#將基礎數據放入字典\n", |
| 72 | + "baseline = {\"Cookies\":40000,\"Clicks\":3200,\"Enrollments\":660,\"CTP\":0.08,\"GConversion\":0.20625,\n", |
| 73 | + " \"Retention\":0.53,\"NConversion\":0.109313}" |
| 74 | + ] |
| 75 | + }, |
| 76 | + { |
| 77 | + "cell_type": "code", |
| 78 | + "execution_count": 3, |
| 79 | + "metadata": { |
| 80 | + "_cell_guid": "d99f7ee0-4a65-402b-a709-a949cd549bc3", |
| 81 | + "_uuid": "019536837b5aa146997b693563cfb22af1fb85ee", |
| 82 | + "colab": { |
| 83 | + "base_uri": "https://localhost:8080/" |
| 84 | + }, |
| 85 | + "id": "rE-idI4vFyMy", |
| 86 | + "outputId": "af885825-24d4-4e2b-c7ca-5a7e98ff6182" |
| 87 | + }, |
| 88 | + "outputs": [ |
| 89 | + { |
| 90 | + "data": { |
| 91 | + "text/plain": [ |
| 92 | + "{'Cookies': 5000,\n", |
| 93 | + " 'Clicks': 400.0,\n", |
| 94 | + " 'Enrollments': 82.5,\n", |
| 95 | + " 'CTP': 0.08,\n", |
| 96 | + " 'GConversion': 0.20625,\n", |
| 97 | + " 'Retention': 0.53,\n", |
| 98 | + " 'NConversion': 0.109313}" |
| 99 | + ] |
| 100 | + }, |
| 101 | + "execution_count": 3, |
| 102 | + "metadata": {}, |
| 103 | + "output_type": "execute_result" |
| 104 | + } |
| 105 | + ], |
| 106 | + "source": [ |
| 107 | + "#調整大小到以Cookie為基準\n", |
| 108 | + "baseline[\"Cookies\"] = 5000\n", |
| 109 | + "baseline[\"Clicks\"]=baseline[\"Clicks\"]*(5000/40000)\n", |
| 110 | + "baseline[\"Enrollments\"]=baseline[\"Enrollments\"]*(5000/40000)\n", |
| 111 | + "baseline" |
| 112 | + ] |
| 113 | + }, |
| 114 | + { |
| 115 | + "cell_type": "code", |
| 116 | + "execution_count": 4, |
| 117 | + "metadata": { |
| 118 | + "_cell_guid": "4c47c350-99c8-4990-8456-cbbe014a972f", |
| 119 | + "_uuid": "2c6930584c39024455900fce7e0e92e5812814b3", |
| 120 | + "colab": { |
| 121 | + "base_uri": "https://localhost:8080/" |
| 122 | + }, |
| 123 | + "id": "gNpNShHKFyMz", |
| 124 | + "outputId": "6e1110d0-f002-45a1-9711-6c8c094cd641" |
| 125 | + }, |
| 126 | + "outputs": [ |
| 127 | + { |
| 128 | + "data": { |
| 129 | + "text/plain": [ |
| 130 | + "0.0202" |
| 131 | + ] |
| 132 | + }, |
| 133 | + "execution_count": 4, |
| 134 | + "metadata": {}, |
| 135 | + "output_type": "execute_result" |
| 136 | + } |
| 137 | + ], |
| 138 | + "source": [ |
| 139 | + "# 算出 Gross Conversion (GC) 的 p 和 n\n", |
| 140 | + "# 還有 Stansard Deviation(sd) rounded to 4 decimal digits.\n", |
| 141 | + "GC={}\n", |
| 142 | + "GC[\"d_min\"]=0.01\n", |
| 143 | + "GC[\"p\"]=baseline[\"GConversion\"]\n", |
| 144 | + "#p is given in this case - or we could calculate it from enrollments/clicks\n", |
| 145 | + "GC[\"n\"]=baseline[\"Clicks\"]\n", |
| 146 | + "GC[\"sd\"]=round(mt.sqrt((GC[\"p\"]*(1-GC[\"p\"]))/GC[\"n\"]),4)\n", |
| 147 | + "GC[\"sd\"]" |
| 148 | + ] |
| 149 | + }, |
| 150 | + { |
| 151 | + "cell_type": "code", |
| 152 | + "execution_count": 5, |
| 153 | + "metadata": { |
| 154 | + "_cell_guid": "654d08d4-6638-4f9f-a327-6f2f5a6a9365", |
| 155 | + "_uuid": "334e65cddd5a800a396fa630b676ca12bec42114", |
| 156 | + "colab": { |
| 157 | + "base_uri": "https://localhost:8080/" |
| 158 | + }, |
| 159 | + "id": "w_WjAIKkFyMz", |
| 160 | + "outputId": "9cf4445e-d65d-408f-9798-d78480730dcd" |
| 161 | + }, |
| 162 | + "outputs": [ |
| 163 | + { |
| 164 | + "data": { |
| 165 | + "text/plain": [ |
| 166 | + "0.0549" |
| 167 | + ] |
| 168 | + }, |
| 169 | + "execution_count": 5, |
| 170 | + "metadata": {}, |
| 171 | + "output_type": "execute_result" |
| 172 | + } |
| 173 | + ], |
| 174 | + "source": [ |
| 175 | + "# Retention(R) \n", |
| 176 | + "\n", |
| 177 | + "R={}\n", |
| 178 | + "R[\"d_min\"]=0.01\n", |
| 179 | + "R[\"p\"]=baseline[\"Retention\"]\n", |
| 180 | + "R[\"n\"]=baseline[\"Enrollments\"]\n", |
| 181 | + "R[\"sd\"]=round(mt.sqrt((R[\"p\"]*(1-R[\"p\"]))/R[\"n\"]),4)\n", |
| 182 | + "R[\"sd\"]" |
| 183 | + ] |
| 184 | + }, |
| 185 | + { |
| 186 | + "cell_type": "code", |
| 187 | + "execution_count": 6, |
| 188 | + "metadata": { |
| 189 | + "_cell_guid": "162f3d90-467a-41e6-9489-f1ea01f08afa", |
| 190 | + "_uuid": "be4b9c808cf098efde59ea1d17e1a35799873e5f", |
| 191 | + "colab": { |
| 192 | + "base_uri": "https://localhost:8080/" |
| 193 | + }, |
| 194 | + "id": "kwx3Of06FyMz", |
| 195 | + "outputId": "efac36b1-ec20-4159-d113-fc43c81aa9f0" |
| 196 | + }, |
| 197 | + "outputs": [ |
| 198 | + { |
| 199 | + "data": { |
| 200 | + "text/plain": [ |
| 201 | + "0.0156" |
| 202 | + ] |
| 203 | + }, |
| 204 | + "execution_count": 6, |
| 205 | + "metadata": {}, |
| 206 | + "output_type": "execute_result" |
| 207 | + } |
| 208 | + ], |
| 209 | + "source": [ |
| 210 | + "# Net Conversion (NC)\n", |
| 211 | + "NC={}\n", |
| 212 | + "NC[\"d_min\"]=0.0075\n", |
| 213 | + "NC[\"p\"]=baseline[\"NConversion\"]\n", |
| 214 | + "NC[\"n\"]=baseline[\"Clicks\"]\n", |
| 215 | + "NC[\"sd\"]=round(mt.sqrt((NC[\"p\"]*(1-NC[\"p\"]))/NC[\"n\"]),4)\n", |
| 216 | + "NC[\"sd\"]" |
| 217 | + ] |
| 218 | + }, |
| 219 | + { |
| 220 | + "cell_type": "code", |
| 221 | + "execution_count": 7, |
| 222 | + "metadata": { |
| 223 | + "_cell_guid": "1cedf3ae-f318-4036-b966-a299350fb048", |
| 224 | + "_uuid": "8956f99d8ea1b9bb6d8b294a6e333c86a51d88d7", |
| 225 | + "id": "pBk7b5uMFyM0" |
| 226 | + }, |
| 227 | + "outputs": [], |
| 228 | + "source": [ |
| 229 | + "def get_sds(p,d):\n", |
| 230 | + " sd1=mt.sqrt(2*p*(1-p))\n", |
| 231 | + " sd2=mt.sqrt(p*(1-p)+(p+d)*(1-(p+d)))\n", |
| 232 | + " x=[sd1,sd2]\n", |
| 233 | + " return x" |
| 234 | + ] |
| 235 | + }, |
| 236 | + { |
| 237 | + "cell_type": "code", |
| 238 | + "execution_count": 8, |
| 239 | + "metadata": { |
| 240 | + "_cell_guid": "230d398f-5692-4f04-a8a9-316c9c661801", |
| 241 | + "_uuid": "c1c7a715d55432c5d19fef049094527cf1f72343", |
| 242 | + "id": "mB2im4rcFyM0" |
| 243 | + }, |
| 244 | + "outputs": [], |
| 245 | + "source": [ |
| 246 | + "#計算 Z-score\n", |
| 247 | + "def get_z_score(alpha):\n", |
| 248 | + " return norm.ppf(alpha)\n", |
| 249 | + "\n", |
| 250 | + "# 得到兩個(A/B)標準差\n", |
| 251 | + "def get_sds(p,d):\n", |
| 252 | + " sd1=mt.sqrt(2*p*(1-p))\n", |
| 253 | + " sd2=mt.sqrt(p*(1-p)+(p+d)*(1-(p+d)))\n", |
| 254 | + " sds=[sd1,sd2]\n", |
| 255 | + " return sds\n", |
| 256 | + "\n", |
| 257 | + "# 求Sample Size\n", |
| 258 | + "def get_sampSize(sds,alpha,beta,d):\n", |
| 259 | + " n=pow((get_z_score(1-alpha/2)*sds[0]+get_z_score(1-beta)*sds[1]),2)/pow(d,2)\n", |
| 260 | + " return n" |
| 261 | + ] |
| 262 | + }, |
| 263 | + { |
| 264 | + "cell_type": "code", |
| 265 | + "execution_count": 9, |
| 266 | + "metadata": { |
| 267 | + "_cell_guid": "e9b0d74a-35ba-461e-9b04-7527cc0147f9", |
| 268 | + "_uuid": "f44e06b60b027dd83ec3734c52ed1347d2a80c0c", |
| 269 | + "id": "uFh5tlyTFyM0" |
| 270 | + }, |
| 271 | + "outputs": [], |
| 272 | + "source": [ |
| 273 | + "GC[\"d\"]=0.01\n", |
| 274 | + "R[\"d\"]=0.01\n", |
| 275 | + "NC[\"d\"]=0.0075" |
| 276 | + ] |
| 277 | + }, |
| 278 | + { |
| 279 | + "cell_type": "code", |
| 280 | + "execution_count": 10, |
| 281 | + "metadata": { |
| 282 | + "_cell_guid": "28cf1802-c0ce-4189-b438-86504e53a721", |
| 283 | + "_uuid": "b443cb437954e04ef889ef4b365d8875da2833d1", |
| 284 | + "colab": { |
| 285 | + "base_uri": "https://localhost:8080/" |
| 286 | + }, |
| 287 | + "id": "fiOPWnzNFyM0", |
| 288 | + "outputId": "f709237b-5edc-40e8-96e4-5002bd4dc503" |
| 289 | + }, |
| 290 | + "outputs": [ |
| 291 | + { |
| 292 | + "data": { |
| 293 | + "text/plain": [ |
| 294 | + "25835" |
| 295 | + ] |
| 296 | + }, |
| 297 | + "execution_count": 10, |
| 298 | + "metadata": {}, |
| 299 | + "output_type": "execute_result" |
| 300 | + } |
| 301 | + ], |
| 302 | + "source": [ |
| 303 | + "# Let's get an integer value for simplicity\n", |
| 304 | + "GC[\"SampSize\"]=round(get_sampSize(get_sds(GC[\"p\"],GC[\"d\"]),0.05,0.2,GC[\"d\"]))\n", |
| 305 | + "GC[\"SampSize\"]" |
| 306 | + ] |
| 307 | + }, |
| 308 | + { |
| 309 | + "cell_type": "code", |
| 310 | + "execution_count": 11, |
| 311 | + "metadata": { |
| 312 | + "_cell_guid": "ba7702c9-b092-404b-a212-8713a517d9bc", |
| 313 | + "_uuid": "530c3e906980568677ce0a7fc5c92f5f9a26302b", |
| 314 | + "colab": { |
| 315 | + "base_uri": "https://localhost:8080/" |
| 316 | + }, |
| 317 | + "id": "X7vCHRGDFyM0", |
| 318 | + "outputId": "f7f50a7f-aa8d-4ed9-b90c-ba1d31415a6f" |
| 319 | + }, |
| 320 | + "outputs": [ |
| 321 | + { |
| 322 | + "data": { |
| 323 | + "text/plain": [ |
| 324 | + "645875" |
| 325 | + ] |
| 326 | + }, |
| 327 | + "execution_count": 11, |
| 328 | + "metadata": {}, |
| 329 | + "output_type": "execute_result" |
| 330 | + } |
| 331 | + ], |
| 332 | + "source": [ |
| 333 | + "GC[\"SampSize\"]=round(GC[\"SampSize\"]/0.08*2)\n", |
| 334 | + "GC[\"SampSize\"]" |
| 335 | + ] |
| 336 | + }, |
| 337 | + { |
| 338 | + "cell_type": "code", |
| 339 | + "execution_count": 12, |
| 340 | + "metadata": { |
| 341 | + "_cell_guid": "c2c65956-f449-4d28-94b5-c5300aeadb4b", |
| 342 | + "_uuid": "1aa61cc37f8839a44f0773623d41c3684be1a9c7", |
| 343 | + "colab": { |
| 344 | + "base_uri": "https://localhost:8080/" |
| 345 | + }, |
| 346 | + "id": "z3jx0jgiFyM0", |
| 347 | + "outputId": "d2dba5cc-bf64-4177-9a95-6b8ebdd44674" |
| 348 | + }, |
| 349 | + "outputs": [ |
| 350 | + { |
| 351 | + "data": { |
| 352 | + "text/plain": [ |
| 353 | + "39087" |
| 354 | + ] |
| 355 | + }, |
| 356 | + "execution_count": 12, |
| 357 | + "metadata": {}, |
| 358 | + "output_type": "execute_result" |
| 359 | + } |
| 360 | + ], |
| 361 | + "source": [ |
| 362 | + "# Getting a nice integer value\n", |
| 363 | + "R[\"SampSize\"]=round(get_sampSize(get_sds(R[\"p\"],R[\"d\"]),0.05,0.2,R[\"d\"]))\n", |
| 364 | + "R[\"SampSize\"]" |
| 365 | + ] |
| 366 | + }, |
| 367 | + { |
| 368 | + "cell_type": "code", |
| 369 | + "execution_count": 13, |
| 370 | + "metadata": { |
| 371 | + "_cell_guid": "0281b321-7506-49ba-a33e-29eed30eefbe", |
| 372 | + "_uuid": "16bf69e2a0f6214b91485b736676a1fe7aadfdca", |
| 373 | + "colab": { |
| 374 | + "base_uri": "https://localhost:8080/" |
| 375 | + }, |
| 376 | + "id": "Zc1NFMp3FyM0", |
| 377 | + "outputId": "5ca978b8-4eac-4589-9481-b634e2c259d5" |
| 378 | + }, |
| 379 | + "outputs": [ |
| 380 | + { |
| 381 | + "data": { |
| 382 | + "text/plain": [ |
| 383 | + "4737818.181818182" |
| 384 | + ] |
| 385 | + }, |
| 386 | + "execution_count": 13, |
| 387 | + "metadata": {}, |
| 388 | + "output_type": "execute_result" |
| 389 | + } |
| 390 | + ], |
| 391 | + "source": [ |
| 392 | + "R[\"SampSize\"]=R[\"SampSize\"]/0.08/0.20625*2\n", |
| 393 | + "R[\"SampSize\"]" |
| 394 | + ] |
| 395 | + }, |
| 396 | + { |
| 397 | + "cell_type": "code", |
| 398 | + "execution_count": 14, |
| 399 | + "metadata": { |
| 400 | + "_cell_guid": "56be284a-1c49-4c65-91d2-e007ae5b327d", |
| 401 | + "_uuid": "42607629817a83c2be769165cff7a30d74067320", |
| 402 | + "colab": { |
| 403 | + "base_uri": "https://localhost:8080/" |
| 404 | + }, |
| 405 | + "id": "XqkTq_D-FyM0", |
| 406 | + "outputId": "ce8f9a0b-eaf3-4ded-d383-a47df31c799d" |
| 407 | + }, |
| 408 | + "outputs": [ |
| 409 | + { |
| 410 | + "data": { |
| 411 | + "text/plain": [ |
| 412 | + "27413" |
| 413 | + ] |
| 414 | + }, |
| 415 | + "execution_count": 14, |
| 416 | + "metadata": {}, |
| 417 | + "output_type": "execute_result" |
| 418 | + } |
| 419 | + ], |
| 420 | + "source": [ |
| 421 | + "# Getting a nice integer value\n", |
| 422 | + "NC[\"SampSize\"]=round(get_sampSize(get_sds(NC[\"p\"],NC[\"d\"]),0.05,0.2,NC[\"d\"]))\n", |
| 423 | + "NC[\"SampSize\"]" |
| 424 | + ] |
| 425 | + }, |
| 426 | + { |
| 427 | + "cell_type": "code", |
| 428 | + "execution_count": 15, |
| 429 | + "metadata": { |
| 430 | + "_cell_guid": "e4202dd9-7cb6-455c-9630-dd9277f84da6", |
| 431 | + "_uuid": "38b580f63f9e25fa7e19958d7b12a14d6e2b8838", |
| 432 | + "colab": { |
| 433 | + "base_uri": "https://localhost:8080/" |
| 434 | + }, |
| 435 | + "id": "UGArSr5lFyM0", |
| 436 | + "outputId": "0d53c8f8-e382-406e-9aeb-e2bdc9929a27" |
| 437 | + }, |
| 438 | + "outputs": [ |
| 439 | + { |
| 440 | + "data": { |
| 441 | + "text/plain": [ |
| 442 | + "685325.0" |
| 443 | + ] |
| 444 | + }, |
| 445 | + "execution_count": 15, |
| 446 | + "metadata": {}, |
| 447 | + "output_type": "execute_result" |
| 448 | + } |
| 449 | + ], |
| 450 | + "source": [ |
| 451 | + "NC[\"SampSize\"]=NC[\"SampSize\"]/0.08*2\n", |
| 452 | + "NC[\"SampSize\"]" |
| 453 | + ] |
| 454 | + }, |
| 455 | + { |
| 456 | + "cell_type": "code", |
| 457 | + "execution_count": 16, |
| 458 | + "metadata": { |
| 459 | + "_cell_guid": "58b7a728-3b45-4867-969b-48959085498c", |
| 460 | + "_uuid": "4c29c3bce964e643ef30633cab8b95f268f76abc", |
| 461 | + "colab": { |
| 462 | + "base_uri": "https://localhost:8080/", |
| 463 | + "height": 198 |
| 464 | + }, |
| 465 | + "id": "Oj_s62oaFyM1", |
| 466 | + "outputId": "ba245606-e0ee-4721-f510-189477941b0d" |
| 467 | + }, |
| 468 | + "outputs": [ |
| 469 | + { |
| 470 | + "data": { |
| 471 | + "text/html": [ |
| 472 | + "<div>\n", |
| 473 | + "<style scoped>\n", |
| 474 | + " .dataframe tbody tr th:only-of-type {\n", |
| 475 | + " vertical-align: middle;\n", |
| 476 | + " }\n", |
| 477 | + "\n", |
| 478 | + " .dataframe tbody tr th {\n", |
| 479 | + " vertical-align: top;\n", |
| 480 | + " }\n", |
| 481 | + "\n", |
| 482 | + " .dataframe thead th {\n", |
| 483 | + " text-align: right;\n", |
| 484 | + " }\n", |
| 485 | + "</style>\n", |
| 486 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 487 | + " <thead>\n", |
| 488 | + " <tr style=\"text-align: right;\">\n", |
| 489 | + " <th></th>\n", |
| 490 | + " <th>Date</th>\n", |
| 491 | + " <th>Pageviews</th>\n", |
| 492 | + " <th>Clicks</th>\n", |
| 493 | + " <th>Enrollments</th>\n", |
| 494 | + " <th>Payments</th>\n", |
| 495 | + " </tr>\n", |
| 496 | + " </thead>\n", |
| 497 | + " <tbody>\n", |
| 498 | + " <tr>\n", |
| 499 | + " <th>0</th>\n", |
| 500 | + " <td>Sat, Oct 11</td>\n", |
| 501 | + " <td>7723</td>\n", |
| 502 | + " <td>687</td>\n", |
| 503 | + " <td>134.0</td>\n", |
| 504 | + " <td>70.0</td>\n", |
| 505 | + " </tr>\n", |
| 506 | + " <tr>\n", |
| 507 | + " <th>1</th>\n", |
| 508 | + " <td>Sun, Oct 12</td>\n", |
| 509 | + " <td>9102</td>\n", |
| 510 | + " <td>779</td>\n", |
| 511 | + " <td>147.0</td>\n", |
| 512 | + " <td>70.0</td>\n", |
| 513 | + " </tr>\n", |
| 514 | + " <tr>\n", |
| 515 | + " <th>2</th>\n", |
| 516 | + " <td>Mon, Oct 13</td>\n", |
| 517 | + " <td>10511</td>\n", |
| 518 | + " <td>909</td>\n", |
| 519 | + " <td>167.0</td>\n", |
| 520 | + " <td>95.0</td>\n", |
| 521 | + " </tr>\n", |
| 522 | + " <tr>\n", |
| 523 | + " <th>3</th>\n", |
| 524 | + " <td>Tue, Oct 14</td>\n", |
| 525 | + " <td>9871</td>\n", |
| 526 | + " <td>836</td>\n", |
| 527 | + " <td>156.0</td>\n", |
| 528 | + " <td>105.0</td>\n", |
| 529 | + " </tr>\n", |
| 530 | + " <tr>\n", |
| 531 | + " <th>4</th>\n", |
| 532 | + " <td>Wed, Oct 15</td>\n", |
| 533 | + " <td>10014</td>\n", |
| 534 | + " <td>837</td>\n", |
| 535 | + " <td>163.0</td>\n", |
| 536 | + " <td>64.0</td>\n", |
| 537 | + " </tr>\n", |
| 538 | + " </tbody>\n", |
| 539 | + "</table>\n", |
| 540 | + "</div>" |
| 541 | + ], |
| 542 | + "text/plain": [ |
| 543 | + " Date Pageviews Clicks Enrollments Payments\n", |
| 544 | + "0 Sat, Oct 11 7723 687 134.0 70.0\n", |
| 545 | + "1 Sun, Oct 12 9102 779 147.0 70.0\n", |
| 546 | + "2 Mon, Oct 13 10511 909 167.0 95.0\n", |
| 547 | + "3 Tue, Oct 14 9871 836 156.0 105.0\n", |
| 548 | + "4 Wed, Oct 15 10014 837 163.0 64.0" |
| 549 | + ] |
| 550 | + }, |
| 551 | + "execution_count": 16, |
| 552 | + "metadata": {}, |
| 553 | + "output_type": "execute_result" |
| 554 | + } |
| 555 | + ], |
| 556 | + "source": [ |
| 557 | + "# 載入數據\n", |
| 558 | + "control=pd.read_csv( 'control_data.csv' )\n", |
| 559 | + "experiment=pd.read_csv( 'experiment_data.csv' )\n", |
| 560 | + "control.head()" |
| 561 | + ] |
| 562 | + }, |
| 563 | + { |
| 564 | + "cell_type": "code", |
| 565 | + "execution_count": 17, |
| 566 | + "metadata": { |
| 567 | + "_cell_guid": "37ab672f-be1c-46bc-ae53-edf722bef4bc", |
| 568 | + "_uuid": "15652351bc793b528ca853666ddb4b7defe4f4ff", |
| 569 | + "colab": { |
| 570 | + "base_uri": "https://localhost:8080/" |
| 571 | + }, |
| 572 | + "id": "c2AH9yHaFyM1", |
| 573 | + "outputId": "c534d6e1-a872-4fab-ecf3-95cc10d411d4" |
| 574 | + }, |
| 575 | + "outputs": [ |
| 576 | + { |
| 577 | + "name": "stdout", |
| 578 | + "output_type": "stream", |
| 579 | + "text": [ |
| 580 | + "number of pageviews in control: 345543\n", |
| 581 | + "number of Pageviewsin experiment: 344660\n" |
| 582 | + ] |
| 583 | + } |
| 584 | + ], |
| 585 | + "source": [ |
| 586 | + "pageviews_cont=control['Pageviews'].sum()\n", |
| 587 | + "pageviews_exp=experiment['Pageviews'].sum()\n", |
| 588 | + "pageviews_total=pageviews_cont+pageviews_exp\n", |
| 589 | + "print (\"number of pageviews in control:\", pageviews_cont)\n", |
| 590 | + "print (\"number of Pageviewsin experiment:\" ,pageviews_exp)" |
| 591 | + ] |
| 592 | + }, |
| 593 | + { |
| 594 | + "cell_type": "code", |
| 595 | + "execution_count": 18, |
| 596 | + "metadata": { |
| 597 | + "_cell_guid": "8e13eba4-0daa-4220-a772-e1f371854d60", |
| 598 | + "_uuid": "6d4d94df9e5afdfdf97703292b0ef9ac6c3e4251", |
| 599 | + "id": "kxKPjYuWFyM1" |
| 600 | + }, |
| 601 | + "outputs": [], |
| 602 | + "source": [ |
| 603 | + "# Count the total clicks from complete records only\n", |
| 604 | + "clicks_cont=control[\"Clicks\"].loc[control[\"Enrollments\"].notnull()].sum()\n", |
| 605 | + "clicks_exp=experiment[\"Clicks\"].loc[experiment[\"Enrollments\"].notnull()].sum()" |
| 606 | + ] |
| 607 | + }, |
| 608 | + { |
| 609 | + "cell_type": "code", |
| 610 | + "execution_count": 19, |
| 611 | + "metadata": { |
| 612 | + "_cell_guid": "5615673e-529c-452a-ad48-9186cb88f74b", |
| 613 | + "_uuid": "3f17a8086726ee8cc01f150808f88fe8693a95a2", |
| 614 | + "colab": { |
| 615 | + "base_uri": "https://localhost:8080/" |
| 616 | + }, |
| 617 | + "id": "I4gtEo6LFyM1", |
| 618 | + "outputId": "6b952d56-de9a-472f-f6ce-bf2cc0ccd9ae" |
| 619 | + }, |
| 620 | + "outputs": [ |
| 621 | + { |
| 622 | + "name": "stdout", |
| 623 | + "output_type": "stream", |
| 624 | + "text": [ |
| 625 | + "The change due to the experiment is -2.06 %\n", |
| 626 | + "Confidence Interval: [ -0.0292 , -0.012 ]\n", |
| 627 | + "The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if -0.01 is not in the CI as well.\n" |
| 628 | + ] |
| 629 | + } |
| 630 | + ], |
| 631 | + "source": [ |
| 632 | + "# Gross Conversion - number of enrollments divided by number of clicks\n", |
| 633 | + "alpha = 0.05\n", |
| 634 | + "enrollments_cont=control[\"Enrollments\"].sum()\n", |
| 635 | + "enrollments_exp=experiment[\"Enrollments\"].sum()\n", |
| 636 | + "\n", |
| 637 | + "GC_cont=enrollments_cont/clicks_cont\n", |
| 638 | + "GC_exp=enrollments_exp/clicks_exp\n", |
| 639 | + "GC_pooled=(enrollments_cont+enrollments_exp)/(clicks_cont+clicks_exp)\n", |
| 640 | + "GC_sd_pooled=mt.sqrt(GC_pooled*(1-GC_pooled)*(1/clicks_cont+1/clicks_exp))\n", |
| 641 | + "GC_ME=round(get_z_score(1-alpha/2)*GC_sd_pooled,4)\n", |
| 642 | + "GC_diff=round(GC_exp-GC_cont,4)\n", |
| 643 | + "print(\"The change due to the experiment is\",GC_diff*100,\"%\")\n", |
| 644 | + "print(\"Confidence Interval: [\",GC_diff-GC_ME,\",\",GC_diff+GC_ME,\"]\")\n", |
| 645 | + "print (\"The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if\",-GC[\"d_min\"],\"is not in the CI as well.\")" |
| 646 | + ] |
| 647 | + }, |
| 648 | + { |
| 649 | + "cell_type": "code", |
| 650 | + "execution_count": 20, |
| 651 | + "metadata": { |
| 652 | + "_cell_guid": "8cb352ad-fc92-4f21-b939-3874314ba8f4", |
| 653 | + "_uuid": "83e99af5dea50f22629ccbd34faa196d4065172f", |
| 654 | + "colab": { |
| 655 | + "base_uri": "https://localhost:8080/" |
| 656 | + }, |
| 657 | + "id": "MQIg2XBsFyM1", |
| 658 | + "outputId": "f8f7bb99-0194-4884-dc39-acf4fb962b40" |
| 659 | + }, |
| 660 | + "outputs": [ |
| 661 | + { |
| 662 | + "name": "stdout", |
| 663 | + "output_type": "stream", |
| 664 | + "text": [ |
| 665 | + "The change due to the experiment is -0.49 %\n", |
| 666 | + "Confidence Interval: [ -0.0116 , 0.0018000000000000004 ]\n", |
| 667 | + "The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if 0.0075 is not in the CI as well.\n" |
| 668 | + ] |
| 669 | + } |
| 670 | + ], |
| 671 | + "source": [ |
| 672 | + "# Net Conversion - number of payments divided by number of clicks\n", |
| 673 | + "payments_cont=control[\"Payments\"].sum()\n", |
| 674 | + "payments_exp=experiment[\"Payments\"].sum()\n", |
| 675 | + "\n", |
| 676 | + "NC_cont=payments_cont/clicks_cont\n", |
| 677 | + "NC_exp=payments_exp/clicks_exp\n", |
| 678 | + "NC_pooled=(payments_cont+payments_exp)/(clicks_cont+clicks_exp)\n", |
| 679 | + "NC_sd_pooled=mt.sqrt(NC_pooled*(1-NC_pooled)*(1/clicks_cont+1/clicks_exp))\n", |
| 680 | + "NC_ME=round(get_z_score(1-alpha/2)*NC_sd_pooled,4)\n", |
| 681 | + "NC_diff=round(NC_exp-NC_cont,4)\n", |
| 682 | + "print(\"The change due to the experiment is\",NC_diff*100,\"%\")\n", |
| 683 | + "print(\"Confidence Interval: [\",NC_diff-NC_ME,\",\",NC_diff+NC_ME,\"]\")\n", |
| 684 | + "print (\"The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if\",NC[\"d_min\"],\"is not in the CI as well.\")" |
| 685 | + ] |
| 686 | + }, |
| 687 | + { |
| 688 | + "cell_type": "markdown", |
| 689 | + "metadata": { |
| 690 | + "id": "K2hnLtKrFS75" |
| 691 | + }, |
| 692 | + "source": [ |
| 693 | + "# **作業**\n", |
| 694 | + "# 經由範例程式碼,熟悉A/B Test的步驟\n", |
| 695 | + "\n", |
| 696 | + "請同學逐步跟隨程式了解A/B Test步驟" |
| 697 | + ] |
| 698 | + }, |
| 699 | + { |
| 700 | + "cell_type": "markdown", |
| 701 | + "metadata": { |
| 702 | + "id": "-lO_8AYwuEDY" |
| 703 | + }, |
| 704 | + "source": [ |
| 705 | + "# **作業 嘗試以函數算出樣本數**" |
| 706 | + ] |
| 707 | + }, |
| 708 | + { |
| 709 | + "cell_type": "code", |
| 710 | + "execution_count": 21, |
| 711 | + "metadata": {}, |
| 712 | + "outputs": [ |
| 713 | + { |
| 714 | + "name": "stdout", |
| 715 | + "output_type": "stream", |
| 716 | + "text": [ |
| 717 | + "-0.02494345647889673 25231\n" |
| 718 | + ] |
| 719 | + } |
| 720 | + ], |
| 721 | + "source": [ |
| 722 | + "import statsmodels.stats.api as sms\n", |
| 723 | + "from math import ceil\n", |
| 724 | + "\n", |
| 725 | + "effect_size = sms.proportion_effectsize(GC[\"p\"]-1.0*GC[\"d_min\"], GC[\"p\"]+0.0*GC[\"d_min\"])\n", |
| 726 | + "required_n = sms.NormalIndPower().solve_power(\n", |
| 727 | + " effect_size, \n", |
| 728 | + " power = 0.8, \n", |
| 729 | + " alpha = 0.05, \n", |
| 730 | + " ratio = 1\n", |
| 731 | + " ) \n", |
| 732 | + "required_n = ceil(required_n) \n", |
| 733 | + "print (effect_size,required_n)" |
| 734 | + ] |
| 735 | + }, |
| 736 | + { |
| 737 | + "cell_type": "markdown", |
| 738 | + "metadata": { |
| 739 | + "id": "MKB09_mjFwjN" |
| 740 | + }, |
| 741 | + "source": [ |
| 742 | + "# **作業** 自行開發雙樣本比例的信賴區間函數\n" |
| 743 | + ] |
| 744 | + }, |
| 745 | + { |
| 746 | + "cell_type": "code", |
| 747 | + "execution_count": 22, |
| 748 | + "metadata": {}, |
| 749 | + "outputs": [ |
| 750 | + { |
| 751 | + "data": { |
| 752 | + "text/plain": [ |
| 753 | + "(-0.020554874580361565, array([-0.02912016, -0.01198959]))" |
| 754 | + ] |
| 755 | + }, |
| 756 | + "execution_count": 22, |
| 757 | + "metadata": {}, |
| 758 | + "output_type": "execute_result" |
| 759 | + } |
| 760 | + ], |
| 761 | + "source": [ |
| 762 | + "import scipy.stats as stats\n", |
| 763 | + "def two_proprotions_confint(success_a, size_a, success_b, size_b, significance = 0.05):\n", |
| 764 | + " \n", |
| 765 | + " prop_a = success_a / size_a\n", |
| 766 | + " prop_b = success_b / size_b\n", |
| 767 | + " var = prop_a * (1 - prop_a) / size_a + prop_b * (1 - prop_b) / size_b\n", |
| 768 | + " se = np.sqrt(var)\n", |
| 769 | + "\n", |
| 770 | + " # z critical value\n", |
| 771 | + " confidence = 1 - significance\n", |
| 772 | + " z = stats.norm(loc = 0, scale = 1).ppf(confidence + significance / 2)\n", |
| 773 | + "\n", |
| 774 | + " # standard formula for the confidence interval\n", |
| 775 | + " # point-estimtate +- z * standard-error\n", |
| 776 | + " prop_diff = prop_b - prop_a\n", |
| 777 | + " confint = prop_diff + np.array([-1, 1]) * z * se\n", |
| 778 | + " return prop_diff, confint\n", |
| 779 | + "two_proprotions_confint(enrollments_cont, clicks_cont, enrollments_exp, clicks_exp, significance = 0.05)" |
| 780 | + ] |
| 781 | + } |
| 782 | + ], |
| 783 | + "metadata": { |
| 784 | + "colab": { |
| 785 | + "name": "ab-tests-with-python_作業.ipynb", |
| 786 | + "provenance": [] |
| 787 | + }, |
| 788 | + "kernelspec": { |
| 789 | + "display_name": "Python 3", |
| 790 | + "language": "python", |
| 791 | + "name": "python3" |
| 792 | + }, |
| 793 | + "language_info": { |
| 794 | + "codemirror_mode": { |
| 795 | + "name": "ipython", |
| 796 | + "version": 3 |
| 797 | + }, |
| 798 | + "file_extension": ".py", |
| 799 | + "mimetype": "text/x-python", |
| 800 | + "name": "python", |
| 801 | + "nbconvert_exporter": "python", |
| 802 | + "pygments_lexer": "ipython3", |
| 803 | + "version": "3.7.7" |
| 804 | + } |
| 805 | + }, |
| 806 | + "nbformat": 4, |
| 807 | + "nbformat_minor": 1 |
| 808 | +} |
0 commit comments