YenLinWu · Mar 29, 2021
diff --git a/‎Solution/期末專題實作(三)/Udacity 教學網站註冊效果之 AB Test 分析_Solution.ipynb
+789 b/‎Solution/期末專題實作(三)/Udacity 教學網站註冊效果之 AB Test 分析_Solution.ipynb
+789
@@ -0,0 +1,789 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "ab-tests-with-python_作業解答.ipynb",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RTnv32wkA_Ox"
+      },
+      "source": [
+        "# **作業說明**\n",
+        "# (這是Udacity關於A/B Test的期末專題)\n",
+        "\n",
+        "Udacity希望了解，在免費14天試學網頁上，除了要信用卡資訊外，還想了解學生願意花多少小時學。如果少於某門檻(5小時)，就建議學生不要註冊，免費聽聽影音就好，免得浪費資源，降低學習成功率。\n",
+        "\n",
+        "我們的題目是，增加這個頁面，是否對Gross Conversion(GC)和Net Conversion (NC)在統計學上(Alpha=0.05，Power=0.8)有幫助(d=0.01/0.0075)，亦即統計上的顯著(Significant)。\n",
+        "\n",
+        "CI = click 數目\n",
+        "\n",
+        "GC = 註冊數/CI (聽了建議仍然註冊的比例)\n",
+        "\n",
+        "NC = 繳費數/CI (14天之後繳費且繼續的比例)\n",
+        "\n",
+        "我們期待GC比原來下降，但NC不降，這表示省去資源但收入不降。\n",
+        "\n",
+        "檔名：ab-tests-with-python.ipynb\n",
+        "\n",
+        "**作業目標**\n",
+        "\n",
+        "1.   經由範例程式，學習A/B Test 的步驟\n",
+        "2.   最低樣本數的計算方法\n",
+        "3.   自行開發信賴區間計算函數\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "background_save": true
+        },
+        "id": "QfCMcrfTFyMx"
+      },
+      "source": [
+        "#載入程式庫\n",
+        "import math as mt\n",
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "from scipy.stats import norm"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "_iZnYjxIFyMy"
+      },
+      "source": [
+        "#將基礎數據放入字典\n",
+        "baseline = {\"Cookies\":40000,\"Clicks\":3200,\"Enrollments\":660,\"CTP\":0.08,\"GConversion\":0.20625,\n",
+        "           \"Retention\":0.53,\"NConversion\":0.109313}"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "rE-idI4vFyMy"
+      },
+      "source": [
+        "#調整大小到以Cookie為基準\n",
+        "baseline[\"Cookies\"] = 5000\n",
+        "baseline[\"Clicks\"]=baseline[\"Clicks\"]*(5000/40000)\n",
+        "baseline[\"Enrollments\"]=baseline[\"Enrollments\"]*(5000/40000)\n",
+        "baseline"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "gNpNShHKFyMz",
+        "outputId": "1c00edec-27d3-4729-88e8-ce58eeef0c99"
+      },
+      "source": [
+        "# 算出 Gross Conversion (GC) 的 p 和 n\n",
+        "# 還有 Stansard Deviation(sd) rounded to 4 decimal digits.\n",
+        "GC={}\n",
+        "GC[\"d_min\"]=0.01\n",
+        "GC[\"p\"]=baseline[\"GConversion\"]\n",
+        "#p is given in this case - or we could calculate it from enrollments/clicks\n",
+        "GC[\"n\"]=baseline[\"Clicks\"]\n",
+        "GC[\"sd\"]=round(mt.sqrt((GC[\"p\"]*(1-GC[\"p\"]))/GC[\"n\"]),4)\n",
+        "GC[\"sd\"]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "0.0202"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 9
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "w_WjAIKkFyMz",
+        "outputId": "c924bda0-918c-4073-8e55-7cc3d67ad4e9"
+      },
+      "source": [
+        "# Retention(R) \n",
+        "\n",
+        "R={}\n",
+        "R[\"d_min\"]=0.01\n",
+        "R[\"p\"]=baseline[\"Retention\"]\n",
+        "R[\"n\"]=baseline[\"Enrollments\"]\n",
+        "R[\"sd\"]=round(mt.sqrt((R[\"p\"]*(1-R[\"p\"]))/R[\"n\"]),4)\n",
+        "R[\"sd\"]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "0.0549"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 4
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "kwx3Of06FyMz",
+        "outputId": "efc60cde-6e3b-4bed-e5f6-a3b697b3d393"
+      },
+      "source": [
+        "# Net Conversion (NC)\n",
+        "NC={}\n",
+        "NC[\"d_min\"]=0.0075\n",
+        "NC[\"p\"]=baseline[\"NConversion\"]\n",
+        "NC[\"n\"]=baseline[\"Clicks\"]\n",
+        "NC[\"sd\"]=round(mt.sqrt((NC[\"p\"]*(1-NC[\"p\"]))/NC[\"n\"]),4)\n",
+        "NC[\"sd\"]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "0.0156"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 5
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "pBk7b5uMFyM0"
+      },
+      "source": [
+        "def get_sds(p,d):\n",
+        "    sd1=mt.sqrt(2*p*(1-p))\n",
+        "    sd2=mt.sqrt(p*(1-p)+(p+d)*(1-(p+d)))\n",
+        "    x=[sd1,sd2]\n",
+        "    return x"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "mB2im4rcFyM0"
+      },
+      "source": [
+        "#計算 Z-score\n",
+        "def get_z_score(alpha):\n",
+        "    return norm.ppf(alpha)\n",
+        "\n",
+        "# 得到兩個(A/B)標準差\n",
+        "def get_sds(p,d):\n",
+        "    sd1=mt.sqrt(2*p*(1-p))\n",
+        "    sd2=mt.sqrt(p*(1-p)+(p+d)*(1-(p+d)))\n",
+        "    sds=[sd1,sd2]\n",
+        "    return sds\n",
+        "\n",
+        "# 求Sample Size\n",
+        "def get_sampSize(sds,alpha,beta,d):\n",
+        "    n=pow((get_z_score(1-alpha/2)*sds[0]+get_z_score(1-beta)*sds[1]),2)/pow(d,2)\n",
+        "    return n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "uFh5tlyTFyM0"
+      },
+      "source": [
+        "GC[\"d\"]=0.01\n",
+        "R[\"d\"]=0.01\n",
+        "NC[\"d\"]=0.0075"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "fiOPWnzNFyM0",
+        "outputId": "f1b76240-2fe5-44bd-ddcf-225afa73b54a"
+      },
+      "source": [
+        "# Let's get an integer value for simplicity\n",
+        "GC[\"SampSize\"]=round(get_sampSize(get_sds(GC[\"p\"],GC[\"d\"]),0.05,0.2,GC[\"d\"]))\n",
+        "GC[\"SampSize\"]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "25835.0"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 11
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "X7vCHRGDFyM0",
+        "outputId": "647b76b3-d56e-4dba-e45b-005e919fd854"
+      },
+      "source": [
+        "GC[\"SampSize\"]=round(GC[\"SampSize\"]/0.08*2)\n",
+        "GC[\"SampSize\"]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "645875.0"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 12
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "collapsed": true,
+        "id": "z3jx0jgiFyM0",
+        "outputId": "1464feca-9b43-43c6-cb6d-aae739d2eb5a"
+      },
+      "source": [
+        "# Getting a nice integer value\n",
+        "R[\"SampSize\"]=round(get_sampSize(get_sds(R[\"p\"],R[\"d\"]),0.05,0.2,R[\"d\"]))\n",
+        "R[\"SampSize\"]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "39087.0"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 13
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "collapsed": true,
+        "id": "Zc1NFMp3FyM0",
+        "outputId": "ce917636-2197-45af-a7a5-d75eccc51398"
+      },
+      "source": [
+        "R[\"SampSize\"]=R[\"SampSize\"]/0.08/0.20625*2\n",
+        "R[\"SampSize\"]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "4737818.181818182"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 14
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "collapsed": true,
+        "id": "XqkTq_D-FyM0",
+        "outputId": "2b1a96db-4bd1-49c9-ac49-02f65f95ee0b"
+      },
+      "source": [
+        "# Getting a nice integer value\n",
+        "NC[\"SampSize\"]=round(get_sampSize(get_sds(NC[\"p\"],NC[\"d\"]),0.05,0.2,NC[\"d\"]))\n",
+        "NC[\"SampSize\"]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "27413.0"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 15
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "collapsed": true,
+        "id": "UGArSr5lFyM0",
+        "outputId": "db5dcecd-d9ff-4b29-8ef4-41364a2591bf"
+      },
+      "source": [
+        "NC[\"SampSize\"]=NC[\"SampSize\"]/0.08*2\n",
+        "NC[\"SampSize\"]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "685325.0"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 16
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 198
+        },
+        "id": "Oj_s62oaFyM1",
+        "outputId": "b9b4f542-7483-4d29-b598-c27e595f2e2e"
+      },
+      "source": [
+        "# 載入數據\n",
+        "control=pd.read_csv(\"./sample_data/control_data.csv\")\n",
+        "experiment=pd.read_csv(\"./sample_data/experiment_data.csv\")\n",
+        "control.head()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Date</th>\n",
+              "      <th>Pageviews</th>\n",
+              "      <th>Clicks</th>\n",
+              "      <th>Enrollments</th>\n",
+              "      <th>Payments</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Sat, Oct 11</td>\n",
+              "      <td>7723</td>\n",
+              "      <td>687</td>\n",
+              "      <td>134.0</td>\n",
+              "      <td>70.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Sun, Oct 12</td>\n",
+              "      <td>9102</td>\n",
+              "      <td>779</td>\n",
+              "      <td>147.0</td>\n",
+              "      <td>70.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Mon, Oct 13</td>\n",
+              "      <td>10511</td>\n",
+              "      <td>909</td>\n",
+              "      <td>167.0</td>\n",
+              "      <td>95.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>Tue, Oct 14</td>\n",
+              "      <td>9871</td>\n",
+              "      <td>836</td>\n",
+              "      <td>156.0</td>\n",
+              "      <td>105.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>Wed, Oct 15</td>\n",
+              "      <td>10014</td>\n",
+              "      <td>837</td>\n",
+              "      <td>163.0</td>\n",
+              "      <td>64.0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "          Date  Pageviews  Clicks  Enrollments  Payments\n",
+              "0  Sat, Oct 11       7723     687        134.0      70.0\n",
+              "1  Sun, Oct 12       9102     779        147.0      70.0\n",
+              "2  Mon, Oct 13      10511     909        167.0      95.0\n",
+              "3  Tue, Oct 14       9871     836        156.0     105.0\n",
+              "4  Wed, Oct 15      10014     837        163.0      64.0"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 21
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "collapsed": true,
+        "id": "c2AH9yHaFyM1",
+        "outputId": "5828ee0c-f831-4862-e918-14b3dcf12f28"
+      },
+      "source": [
+        "pageviews_cont=control['Pageviews'].sum()\n",
+        "pageviews_exp=experiment['Pageviews'].sum()\n",
+        "pageviews_total=pageviews_cont+pageviews_exp\n",
+        "print (\"number of pageviews in control:\", pageviews_cont)\n",
+        "print (\"number of Pageviewsin experiment:\" ,pageviews_exp)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "number of pageviews in control: 345543\n",
+            "number of Pageviewsin experiment: 344660\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "kxKPjYuWFyM1"
+      },
+      "source": [
+        "# Count the total clicks from complete records only\n",
+        "clicks_cont=control[\"Clicks\"].loc[control[\"Enrollments\"].notnull()].sum()\n",
+        "clicks_exp=experiment[\"Clicks\"].loc[experiment[\"Enrollments\"].notnull()].sum()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "I4gtEo6LFyM1",
+        "outputId": "cb83ee22-d69a-4feb-c6bb-52592a08b077"
+      },
+      "source": [
+        "#Gross Conversion - number of enrollments divided by number of clicks\n",
+        "enrollments_cont=control[\"Enrollments\"].sum()\n",
+        "enrollments_exp=experiment[\"Enrollments\"].sum()\n",
+        "\n",
+        "GC_cont=enrollments_cont/clicks_cont\n",
+        "GC_exp=enrollments_exp/clicks_exp\n",
+        "GC_pooled=(enrollments_cont+enrollments_exp)/(clicks_cont+clicks_exp)\n",
+        "GC_sd_pooled=mt.sqrt(GC_pooled*(1-GC_pooled)*(1/clicks_cont+1/clicks_exp))\n",
+        "GC_ME=round(get_z_score(1-alpha/2)*GC_sd_pooled,4)\n",
+        "GC_diff=round(GC_exp-GC_cont,4)\n",
+        "print(\"The change due to the experiment is\",GC_diff*100,\"%\")\n",
+        "print(\"Confidence Interval: [\",GC_diff-GC_ME,\",\",GC_diff+GC_ME,\"]\")\n",
+        "print (\"The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if\",-GC[\"d_min\"],\"is not in the CI as well.\")"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "The change due to the experiment is -2.06 %\n",
+            "Confidence Interval: [ -0.0292 , -0.012 ]\n",
+            "The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if -0.01 is not in the CI as well.\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "collapsed": true,
+        "id": "MQIg2XBsFyM1",
+        "outputId": "74843a74-15e2-4f60-c0a7-469d7a11acc2"
+      },
+      "source": [
+        "#Net Conversion - number of payments divided by number of clicks\n",
+        "payments_cont=control[\"Payments\"].sum()\n",
+        "payments_exp=experiment[\"Payments\"].sum()\n",
+        "\n",
+        "NC_cont=payments_cont/clicks_cont\n",
+        "NC_exp=payments_exp/clicks_exp\n",
+        "NC_pooled=(payments_cont+payments_exp)/(clicks_cont+clicks_exp)\n",
+        "NC_sd_pooled=mt.sqrt(NC_pooled*(1-NC_pooled)*(1/clicks_cont+1/clicks_exp))\n",
+        "NC_ME=round(get_z_score(1-alpha/2)*NC_sd_pooled,4)\n",
+        "NC_diff=round(NC_exp-NC_cont,4)\n",
+        "print(\"The change due to the experiment is\",NC_diff*100,\"%\")\n",
+        "print(\"Confidence Interval: [\",NC_diff-NC_ME,\",\",NC_diff+NC_ME,\"]\")\n",
+        "print (\"The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if\",NC[\"d_min\"],\"is not in the CI as well.\")"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "The change due to the experiment is -0.49 %\n",
+            "Confidence Interval: [ -0.0116 , 0.0018000000000000004 ]\n",
+            "The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if 0.0075 is not in the CI as well.\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "K2hnLtKrFS75"
+      },
+      "source": [
+        "# **作業**\n",
+        "# 經由範例程式碼，熟悉A/B Test的步驟\n",
+        "\n",
+        "請同學逐步跟隨程式了解A/B Test步驟"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-lO_8AYwuEDY"
+      },
+      "source": [
+        "# **作業 嘗試以函數算出樣本數**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "oRSbrNaRuRJA"
+      },
+      "source": [
+        "#作業 Sample Size\n",
+        "import statsmodels.stats.api as sms\n",
+        "from math import ceil\n",
+        "\n",
+        "effect_size = sms.proportion_effectsize(GC[\"p\"]-1.0*GC[\"d_min\"], GC[\"p\"]+0.0*GC[\"d_min\"])\n",
+        "required_n = sms.NormalIndPower().solve_power(\n",
+        "    effect_size, \n",
+        "    power=0.8, \n",
+        "    alpha=0.05, \n",
+        "    ratio=1\n",
+        "    ) \n",
+        "required_n = ceil(required_n) \n",
+        "print (effect_size,required_n)   "
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MKB09_mjFwjN"
+      },
+      "source": [
+        "# **作業** 自行開發雙樣本比例的信賴區間函數\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "yT5goD1jHKpl"
+      },
+      "source": [
+        "#作業解答\n",
+        "import scipy.stats as stats\n",
+        "def two_proprotions_confint(success_a, size_a, success_b, size_b, significance = 0.05):\n",
+        "    \"\"\"\n",
+        "    A/B test for two proportions;\n",
+        "    given a success a trial size of group A and B compute\n",
+        "    its confidence interval;\n",
+        "    resulting confidence interval matches R's prop.test function\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    success_a, success_b : int\n",
+        "        Number of successes in each group\n",
+        "\n",
+        "    size_a, size_b : int\n",
+        "        Size, or number of observations in each group\n",
+        "\n",
+        "    significance : float, default 0.05\n",
+        "        Often denoted as alpha. Governs the chance of a false positive.\n",
+        "        A significance level of 0.05 means that there is a 5% chance of\n",
+        "        a false positive. In other words, our confidence level is\n",
+        "        1 - 0.05 = 0.95\n",
+        "\n",
+        "    Returns\n",
+        "    -------\n",
+        "    prop_diff : float\n",
+        "        Difference between the two proportion\n",
+        "\n",
+        "    confint : 1d ndarray\n",
+        "        Confidence interval of the two proportion test\n",
+        "    \"\"\"\n",
+        "    prop_a = success_a / size_a\n",
+        "    prop_b = success_b / size_b\n",
+        "    var = prop_a * (1 - prop_a) / size_a + prop_b * (1 - prop_b) / size_b\n",
+        "    se = np.sqrt(var)\n",
+        "\n",
+        "    # z critical value\n",
+        "    confidence = 1 - significance\n",
+        "    z = stats.norm(loc = 0, scale = 1).ppf(confidence + significance / 2)\n",
+        "\n",
+        "    # standard formula for the confidence interval\n",
+        "    # point-estimtate +- z * standard-error\n",
+        "    prop_diff = prop_b - prop_a\n",
+        "    confint = prop_diff + np.array([-1, 1]) * z * se\n",
+        "    return prop_diff, confint\n",
+        "two_proprotions_confint(enrollments_cont, clicks_cont, enrollments_exp, clicks_exp, significance = 0.05)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}