Skip to content

Commit 0c4958c

Browse files
committedMar 29, 2021
期末實作專題(三) - Udacity 教學網站註冊效果之 AB Test 分析
1 parent 03923a0 commit 0c4958c

File tree

1 file changed

+789
-0
lines changed

1 file changed

+789
-0
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,789 @@
1+
{
2+
"nbformat": 4,
3+
"nbformat_minor": 0,
4+
"metadata": {
5+
"colab": {
6+
"name": "ab-tests-with-python_作業解答.ipynb",
7+
"provenance": []
8+
},
9+
"kernelspec": {
10+
"display_name": "Python 3",
11+
"language": "python",
12+
"name": "python3"
13+
},
14+
"language_info": {
15+
"codemirror_mode": {
16+
"name": "ipython",
17+
"version": 3
18+
},
19+
"file_extension": ".py",
20+
"mimetype": "text/x-python",
21+
"name": "python",
22+
"nbconvert_exporter": "python",
23+
"pygments_lexer": "ipython3",
24+
"version": "3.6.3"
25+
}
26+
},
27+
"cells": [
28+
{
29+
"cell_type": "markdown",
30+
"metadata": {
31+
"id": "RTnv32wkA_Ox"
32+
},
33+
"source": [
34+
"# **作業說明**\n",
35+
"# (這是Udacity關於A/B Test的期末專題)\n",
36+
"\n",
37+
"Udacity希望了解,在免費14天試學網頁上,除了要信用卡資訊外,還想了解學生願意花多少小時學。如果少於某門檻(5小時),就建議學生不要註冊,免費聽聽影音就好,免得浪費資源,降低學習成功率。\n",
38+
"\n",
39+
"我們的題目是,增加這個頁面,是否對Gross Conversion(GC)和Net Conversion (NC)在統計學上(Alpha=0.05,Power=0.8)有幫助(d=0.01/0.0075),亦即統計上的顯著(Significant)。\n",
40+
"\n",
41+
"CI = click 數目\n",
42+
"\n",
43+
"GC = 註冊數/CI (聽了建議仍然註冊的比例)\n",
44+
"\n",
45+
"NC = 繳費數/CI (14天之後繳費且繼續的比例)\n",
46+
"\n",
47+
"我們期待GC比原來下降,但NC不降,這表示省去資源但收入不降。\n",
48+
"\n",
49+
"檔名:ab-tests-with-python.ipynb\n",
50+
"\n",
51+
"**作業目標**\n",
52+
"\n",
53+
"1. 經由範例程式,學習A/B Test 的步驟\n",
54+
"2. 最低樣本數的計算方法\n",
55+
"3. 自行開發信賴區間計算函數\n",
56+
"\n",
57+
"\n",
58+
"\n",
59+
"\n",
60+
"\n",
61+
"\n",
62+
"\n",
63+
"\n",
64+
"\n",
65+
"\n",
66+
"\n"
67+
]
68+
},
69+
{
70+
"cell_type": "code",
71+
"metadata": {
72+
"colab": {
73+
"background_save": true
74+
},
75+
"id": "QfCMcrfTFyMx"
76+
},
77+
"source": [
78+
"#載入程式庫\n",
79+
"import math as mt\n",
80+
"import numpy as np\n",
81+
"import pandas as pd\n",
82+
"from scipy.stats import norm"
83+
],
84+
"execution_count": null,
85+
"outputs": []
86+
},
87+
{
88+
"cell_type": "code",
89+
"metadata": {
90+
"id": "_iZnYjxIFyMy"
91+
},
92+
"source": [
93+
"#將基礎數據放入字典\n",
94+
"baseline = {\"Cookies\":40000,\"Clicks\":3200,\"Enrollments\":660,\"CTP\":0.08,\"GConversion\":0.20625,\n",
95+
" \"Retention\":0.53,\"NConversion\":0.109313}"
96+
],
97+
"execution_count": null,
98+
"outputs": []
99+
},
100+
{
101+
"cell_type": "code",
102+
"metadata": {
103+
"id": "rE-idI4vFyMy"
104+
},
105+
"source": [
106+
"#調整大小到以Cookie為基準\n",
107+
"baseline[\"Cookies\"] = 5000\n",
108+
"baseline[\"Clicks\"]=baseline[\"Clicks\"]*(5000/40000)\n",
109+
"baseline[\"Enrollments\"]=baseline[\"Enrollments\"]*(5000/40000)\n",
110+
"baseline"
111+
],
112+
"execution_count": null,
113+
"outputs": []
114+
},
115+
{
116+
"cell_type": "code",
117+
"metadata": {
118+
"colab": {
119+
"base_uri": "https://localhost:8080/"
120+
},
121+
"id": "gNpNShHKFyMz",
122+
"outputId": "1c00edec-27d3-4729-88e8-ce58eeef0c99"
123+
},
124+
"source": [
125+
"# 算出 Gross Conversion (GC) 的 p 和 n\n",
126+
"# 還有 Stansard Deviation(sd) rounded to 4 decimal digits.\n",
127+
"GC={}\n",
128+
"GC[\"d_min\"]=0.01\n",
129+
"GC[\"p\"]=baseline[\"GConversion\"]\n",
130+
"#p is given in this case - or we could calculate it from enrollments/clicks\n",
131+
"GC[\"n\"]=baseline[\"Clicks\"]\n",
132+
"GC[\"sd\"]=round(mt.sqrt((GC[\"p\"]*(1-GC[\"p\"]))/GC[\"n\"]),4)\n",
133+
"GC[\"sd\"]"
134+
],
135+
"execution_count": null,
136+
"outputs": [
137+
{
138+
"output_type": "execute_result",
139+
"data": {
140+
"text/plain": [
141+
"0.0202"
142+
]
143+
},
144+
"metadata": {
145+
"tags": []
146+
},
147+
"execution_count": 9
148+
}
149+
]
150+
},
151+
{
152+
"cell_type": "code",
153+
"metadata": {
154+
"colab": {
155+
"base_uri": "https://localhost:8080/"
156+
},
157+
"id": "w_WjAIKkFyMz",
158+
"outputId": "c924bda0-918c-4073-8e55-7cc3d67ad4e9"
159+
},
160+
"source": [
161+
"# Retention(R) \n",
162+
"\n",
163+
"R={}\n",
164+
"R[\"d_min\"]=0.01\n",
165+
"R[\"p\"]=baseline[\"Retention\"]\n",
166+
"R[\"n\"]=baseline[\"Enrollments\"]\n",
167+
"R[\"sd\"]=round(mt.sqrt((R[\"p\"]*(1-R[\"p\"]))/R[\"n\"]),4)\n",
168+
"R[\"sd\"]"
169+
],
170+
"execution_count": null,
171+
"outputs": [
172+
{
173+
"output_type": "execute_result",
174+
"data": {
175+
"text/plain": [
176+
"0.0549"
177+
]
178+
},
179+
"metadata": {
180+
"tags": []
181+
},
182+
"execution_count": 4
183+
}
184+
]
185+
},
186+
{
187+
"cell_type": "code",
188+
"metadata": {
189+
"colab": {
190+
"base_uri": "https://localhost:8080/"
191+
},
192+
"id": "kwx3Of06FyMz",
193+
"outputId": "efc60cde-6e3b-4bed-e5f6-a3b697b3d393"
194+
},
195+
"source": [
196+
"# Net Conversion (NC)\n",
197+
"NC={}\n",
198+
"NC[\"d_min\"]=0.0075\n",
199+
"NC[\"p\"]=baseline[\"NConversion\"]\n",
200+
"NC[\"n\"]=baseline[\"Clicks\"]\n",
201+
"NC[\"sd\"]=round(mt.sqrt((NC[\"p\"]*(1-NC[\"p\"]))/NC[\"n\"]),4)\n",
202+
"NC[\"sd\"]"
203+
],
204+
"execution_count": null,
205+
"outputs": [
206+
{
207+
"output_type": "execute_result",
208+
"data": {
209+
"text/plain": [
210+
"0.0156"
211+
]
212+
},
213+
"metadata": {
214+
"tags": []
215+
},
216+
"execution_count": 5
217+
}
218+
]
219+
},
220+
{
221+
"cell_type": "code",
222+
"metadata": {
223+
"id": "pBk7b5uMFyM0"
224+
},
225+
"source": [
226+
"def get_sds(p,d):\n",
227+
" sd1=mt.sqrt(2*p*(1-p))\n",
228+
" sd2=mt.sqrt(p*(1-p)+(p+d)*(1-(p+d)))\n",
229+
" x=[sd1,sd2]\n",
230+
" return x"
231+
],
232+
"execution_count": null,
233+
"outputs": []
234+
},
235+
{
236+
"cell_type": "code",
237+
"metadata": {
238+
"id": "mB2im4rcFyM0"
239+
},
240+
"source": [
241+
"#計算 Z-score\n",
242+
"def get_z_score(alpha):\n",
243+
" return norm.ppf(alpha)\n",
244+
"\n",
245+
"# 得到兩個(A/B)標準差\n",
246+
"def get_sds(p,d):\n",
247+
" sd1=mt.sqrt(2*p*(1-p))\n",
248+
" sd2=mt.sqrt(p*(1-p)+(p+d)*(1-(p+d)))\n",
249+
" sds=[sd1,sd2]\n",
250+
" return sds\n",
251+
"\n",
252+
"# 求Sample Size\n",
253+
"def get_sampSize(sds,alpha,beta,d):\n",
254+
" n=pow((get_z_score(1-alpha/2)*sds[0]+get_z_score(1-beta)*sds[1]),2)/pow(d,2)\n",
255+
" return n"
256+
],
257+
"execution_count": null,
258+
"outputs": []
259+
},
260+
{
261+
"cell_type": "code",
262+
"metadata": {
263+
"id": "uFh5tlyTFyM0"
264+
},
265+
"source": [
266+
"GC[\"d\"]=0.01\n",
267+
"R[\"d\"]=0.01\n",
268+
"NC[\"d\"]=0.0075"
269+
],
270+
"execution_count": null,
271+
"outputs": []
272+
},
273+
{
274+
"cell_type": "code",
275+
"metadata": {
276+
"colab": {
277+
"base_uri": "https://localhost:8080/"
278+
},
279+
"id": "fiOPWnzNFyM0",
280+
"outputId": "f1b76240-2fe5-44bd-ddcf-225afa73b54a"
281+
},
282+
"source": [
283+
"# Let's get an integer value for simplicity\n",
284+
"GC[\"SampSize\"]=round(get_sampSize(get_sds(GC[\"p\"],GC[\"d\"]),0.05,0.2,GC[\"d\"]))\n",
285+
"GC[\"SampSize\"]"
286+
],
287+
"execution_count": null,
288+
"outputs": [
289+
{
290+
"output_type": "execute_result",
291+
"data": {
292+
"text/plain": [
293+
"25835.0"
294+
]
295+
},
296+
"metadata": {
297+
"tags": []
298+
},
299+
"execution_count": 11
300+
}
301+
]
302+
},
303+
{
304+
"cell_type": "code",
305+
"metadata": {
306+
"colab": {
307+
"base_uri": "https://localhost:8080/"
308+
},
309+
"id": "X7vCHRGDFyM0",
310+
"outputId": "647b76b3-d56e-4dba-e45b-005e919fd854"
311+
},
312+
"source": [
313+
"GC[\"SampSize\"]=round(GC[\"SampSize\"]/0.08*2)\n",
314+
"GC[\"SampSize\"]"
315+
],
316+
"execution_count": null,
317+
"outputs": [
318+
{
319+
"output_type": "execute_result",
320+
"data": {
321+
"text/plain": [
322+
"645875.0"
323+
]
324+
},
325+
"metadata": {
326+
"tags": []
327+
},
328+
"execution_count": 12
329+
}
330+
]
331+
},
332+
{
333+
"cell_type": "code",
334+
"metadata": {
335+
"colab": {
336+
"base_uri": "https://localhost:8080/"
337+
},
338+
"collapsed": true,
339+
"id": "z3jx0jgiFyM0",
340+
"outputId": "1464feca-9b43-43c6-cb6d-aae739d2eb5a"
341+
},
342+
"source": [
343+
"# Getting a nice integer value\n",
344+
"R[\"SampSize\"]=round(get_sampSize(get_sds(R[\"p\"],R[\"d\"]),0.05,0.2,R[\"d\"]))\n",
345+
"R[\"SampSize\"]"
346+
],
347+
"execution_count": null,
348+
"outputs": [
349+
{
350+
"output_type": "execute_result",
351+
"data": {
352+
"text/plain": [
353+
"39087.0"
354+
]
355+
},
356+
"metadata": {
357+
"tags": []
358+
},
359+
"execution_count": 13
360+
}
361+
]
362+
},
363+
{
364+
"cell_type": "code",
365+
"metadata": {
366+
"colab": {
367+
"base_uri": "https://localhost:8080/"
368+
},
369+
"collapsed": true,
370+
"id": "Zc1NFMp3FyM0",
371+
"outputId": "ce917636-2197-45af-a7a5-d75eccc51398"
372+
},
373+
"source": [
374+
"R[\"SampSize\"]=R[\"SampSize\"]/0.08/0.20625*2\n",
375+
"R[\"SampSize\"]"
376+
],
377+
"execution_count": null,
378+
"outputs": [
379+
{
380+
"output_type": "execute_result",
381+
"data": {
382+
"text/plain": [
383+
"4737818.181818182"
384+
]
385+
},
386+
"metadata": {
387+
"tags": []
388+
},
389+
"execution_count": 14
390+
}
391+
]
392+
},
393+
{
394+
"cell_type": "code",
395+
"metadata": {
396+
"colab": {
397+
"base_uri": "https://localhost:8080/"
398+
},
399+
"collapsed": true,
400+
"id": "XqkTq_D-FyM0",
401+
"outputId": "2b1a96db-4bd1-49c9-ac49-02f65f95ee0b"
402+
},
403+
"source": [
404+
"# Getting a nice integer value\n",
405+
"NC[\"SampSize\"]=round(get_sampSize(get_sds(NC[\"p\"],NC[\"d\"]),0.05,0.2,NC[\"d\"]))\n",
406+
"NC[\"SampSize\"]"
407+
],
408+
"execution_count": null,
409+
"outputs": [
410+
{
411+
"output_type": "execute_result",
412+
"data": {
413+
"text/plain": [
414+
"27413.0"
415+
]
416+
},
417+
"metadata": {
418+
"tags": []
419+
},
420+
"execution_count": 15
421+
}
422+
]
423+
},
424+
{
425+
"cell_type": "code",
426+
"metadata": {
427+
"colab": {
428+
"base_uri": "https://localhost:8080/"
429+
},
430+
"collapsed": true,
431+
"id": "UGArSr5lFyM0",
432+
"outputId": "db5dcecd-d9ff-4b29-8ef4-41364a2591bf"
433+
},
434+
"source": [
435+
"NC[\"SampSize\"]=NC[\"SampSize\"]/0.08*2\n",
436+
"NC[\"SampSize\"]"
437+
],
438+
"execution_count": null,
439+
"outputs": [
440+
{
441+
"output_type": "execute_result",
442+
"data": {
443+
"text/plain": [
444+
"685325.0"
445+
]
446+
},
447+
"metadata": {
448+
"tags": []
449+
},
450+
"execution_count": 16
451+
}
452+
]
453+
},
454+
{
455+
"cell_type": "code",
456+
"metadata": {
457+
"colab": {
458+
"base_uri": "https://localhost:8080/",
459+
"height": 198
460+
},
461+
"id": "Oj_s62oaFyM1",
462+
"outputId": "b9b4f542-7483-4d29-b598-c27e595f2e2e"
463+
},
464+
"source": [
465+
"# 載入數據\n",
466+
"control=pd.read_csv(\"./sample_data/control_data.csv\")\n",
467+
"experiment=pd.read_csv(\"./sample_data/experiment_data.csv\")\n",
468+
"control.head()"
469+
],
470+
"execution_count": null,
471+
"outputs": [
472+
{
473+
"output_type": "execute_result",
474+
"data": {
475+
"text/html": [
476+
"<div>\n",
477+
"<style scoped>\n",
478+
" .dataframe tbody tr th:only-of-type {\n",
479+
" vertical-align: middle;\n",
480+
" }\n",
481+
"\n",
482+
" .dataframe tbody tr th {\n",
483+
" vertical-align: top;\n",
484+
" }\n",
485+
"\n",
486+
" .dataframe thead th {\n",
487+
" text-align: right;\n",
488+
" }\n",
489+
"</style>\n",
490+
"<table border=\"1\" class=\"dataframe\">\n",
491+
" <thead>\n",
492+
" <tr style=\"text-align: right;\">\n",
493+
" <th></th>\n",
494+
" <th>Date</th>\n",
495+
" <th>Pageviews</th>\n",
496+
" <th>Clicks</th>\n",
497+
" <th>Enrollments</th>\n",
498+
" <th>Payments</th>\n",
499+
" </tr>\n",
500+
" </thead>\n",
501+
" <tbody>\n",
502+
" <tr>\n",
503+
" <th>0</th>\n",
504+
" <td>Sat, Oct 11</td>\n",
505+
" <td>7723</td>\n",
506+
" <td>687</td>\n",
507+
" <td>134.0</td>\n",
508+
" <td>70.0</td>\n",
509+
" </tr>\n",
510+
" <tr>\n",
511+
" <th>1</th>\n",
512+
" <td>Sun, Oct 12</td>\n",
513+
" <td>9102</td>\n",
514+
" <td>779</td>\n",
515+
" <td>147.0</td>\n",
516+
" <td>70.0</td>\n",
517+
" </tr>\n",
518+
" <tr>\n",
519+
" <th>2</th>\n",
520+
" <td>Mon, Oct 13</td>\n",
521+
" <td>10511</td>\n",
522+
" <td>909</td>\n",
523+
" <td>167.0</td>\n",
524+
" <td>95.0</td>\n",
525+
" </tr>\n",
526+
" <tr>\n",
527+
" <th>3</th>\n",
528+
" <td>Tue, Oct 14</td>\n",
529+
" <td>9871</td>\n",
530+
" <td>836</td>\n",
531+
" <td>156.0</td>\n",
532+
" <td>105.0</td>\n",
533+
" </tr>\n",
534+
" <tr>\n",
535+
" <th>4</th>\n",
536+
" <td>Wed, Oct 15</td>\n",
537+
" <td>10014</td>\n",
538+
" <td>837</td>\n",
539+
" <td>163.0</td>\n",
540+
" <td>64.0</td>\n",
541+
" </tr>\n",
542+
" </tbody>\n",
543+
"</table>\n",
544+
"</div>"
545+
],
546+
"text/plain": [
547+
" Date Pageviews Clicks Enrollments Payments\n",
548+
"0 Sat, Oct 11 7723 687 134.0 70.0\n",
549+
"1 Sun, Oct 12 9102 779 147.0 70.0\n",
550+
"2 Mon, Oct 13 10511 909 167.0 95.0\n",
551+
"3 Tue, Oct 14 9871 836 156.0 105.0\n",
552+
"4 Wed, Oct 15 10014 837 163.0 64.0"
553+
]
554+
},
555+
"metadata": {
556+
"tags": []
557+
},
558+
"execution_count": 21
559+
}
560+
]
561+
},
562+
{
563+
"cell_type": "code",
564+
"metadata": {
565+
"colab": {
566+
"base_uri": "https://localhost:8080/"
567+
},
568+
"collapsed": true,
569+
"id": "c2AH9yHaFyM1",
570+
"outputId": "5828ee0c-f831-4862-e918-14b3dcf12f28"
571+
},
572+
"source": [
573+
"pageviews_cont=control['Pageviews'].sum()\n",
574+
"pageviews_exp=experiment['Pageviews'].sum()\n",
575+
"pageviews_total=pageviews_cont+pageviews_exp\n",
576+
"print (\"number of pageviews in control:\", pageviews_cont)\n",
577+
"print (\"number of Pageviewsin experiment:\" ,pageviews_exp)"
578+
],
579+
"execution_count": null,
580+
"outputs": [
581+
{
582+
"output_type": "stream",
583+
"text": [
584+
"number of pageviews in control: 345543\n",
585+
"number of Pageviewsin experiment: 344660\n"
586+
],
587+
"name": "stdout"
588+
}
589+
]
590+
},
591+
{
592+
"cell_type": "code",
593+
"metadata": {
594+
"id": "kxKPjYuWFyM1"
595+
},
596+
"source": [
597+
"# Count the total clicks from complete records only\n",
598+
"clicks_cont=control[\"Clicks\"].loc[control[\"Enrollments\"].notnull()].sum()\n",
599+
"clicks_exp=experiment[\"Clicks\"].loc[experiment[\"Enrollments\"].notnull()].sum()"
600+
],
601+
"execution_count": null,
602+
"outputs": []
603+
},
604+
{
605+
"cell_type": "code",
606+
"metadata": {
607+
"colab": {
608+
"base_uri": "https://localhost:8080/"
609+
},
610+
"id": "I4gtEo6LFyM1",
611+
"outputId": "cb83ee22-d69a-4feb-c6bb-52592a08b077"
612+
},
613+
"source": [
614+
"#Gross Conversion - number of enrollments divided by number of clicks\n",
615+
"enrollments_cont=control[\"Enrollments\"].sum()\n",
616+
"enrollments_exp=experiment[\"Enrollments\"].sum()\n",
617+
"\n",
618+
"GC_cont=enrollments_cont/clicks_cont\n",
619+
"GC_exp=enrollments_exp/clicks_exp\n",
620+
"GC_pooled=(enrollments_cont+enrollments_exp)/(clicks_cont+clicks_exp)\n",
621+
"GC_sd_pooled=mt.sqrt(GC_pooled*(1-GC_pooled)*(1/clicks_cont+1/clicks_exp))\n",
622+
"GC_ME=round(get_z_score(1-alpha/2)*GC_sd_pooled,4)\n",
623+
"GC_diff=round(GC_exp-GC_cont,4)\n",
624+
"print(\"The change due to the experiment is\",GC_diff*100,\"%\")\n",
625+
"print(\"Confidence Interval: [\",GC_diff-GC_ME,\",\",GC_diff+GC_ME,\"]\")\n",
626+
"print (\"The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if\",-GC[\"d_min\"],\"is not in the CI as well.\")"
627+
],
628+
"execution_count": null,
629+
"outputs": [
630+
{
631+
"output_type": "stream",
632+
"text": [
633+
"The change due to the experiment is -2.06 %\n",
634+
"Confidence Interval: [ -0.0292 , -0.012 ]\n",
635+
"The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if -0.01 is not in the CI as well.\n"
636+
],
637+
"name": "stdout"
638+
}
639+
]
640+
},
641+
{
642+
"cell_type": "code",
643+
"metadata": {
644+
"colab": {
645+
"base_uri": "https://localhost:8080/"
646+
},
647+
"collapsed": true,
648+
"id": "MQIg2XBsFyM1",
649+
"outputId": "74843a74-15e2-4f60-c0a7-469d7a11acc2"
650+
},
651+
"source": [
652+
"#Net Conversion - number of payments divided by number of clicks\n",
653+
"payments_cont=control[\"Payments\"].sum()\n",
654+
"payments_exp=experiment[\"Payments\"].sum()\n",
655+
"\n",
656+
"NC_cont=payments_cont/clicks_cont\n",
657+
"NC_exp=payments_exp/clicks_exp\n",
658+
"NC_pooled=(payments_cont+payments_exp)/(clicks_cont+clicks_exp)\n",
659+
"NC_sd_pooled=mt.sqrt(NC_pooled*(1-NC_pooled)*(1/clicks_cont+1/clicks_exp))\n",
660+
"NC_ME=round(get_z_score(1-alpha/2)*NC_sd_pooled,4)\n",
661+
"NC_diff=round(NC_exp-NC_cont,4)\n",
662+
"print(\"The change due to the experiment is\",NC_diff*100,\"%\")\n",
663+
"print(\"Confidence Interval: [\",NC_diff-NC_ME,\",\",NC_diff+NC_ME,\"]\")\n",
664+
"print (\"The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if\",NC[\"d_min\"],\"is not in the CI as well.\")"
665+
],
666+
"execution_count": null,
667+
"outputs": [
668+
{
669+
"output_type": "stream",
670+
"text": [
671+
"The change due to the experiment is -0.49 %\n",
672+
"Confidence Interval: [ -0.0116 , 0.0018000000000000004 ]\n",
673+
"The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if 0.0075 is not in the CI as well.\n"
674+
],
675+
"name": "stdout"
676+
}
677+
]
678+
},
679+
{
680+
"cell_type": "markdown",
681+
"metadata": {
682+
"id": "K2hnLtKrFS75"
683+
},
684+
"source": [
685+
"# **作業**\n",
686+
"# 經由範例程式碼,熟悉A/B Test的步驟\n",
687+
"\n",
688+
"請同學逐步跟隨程式了解A/B Test步驟"
689+
]
690+
},
691+
{
692+
"cell_type": "markdown",
693+
"metadata": {
694+
"id": "-lO_8AYwuEDY"
695+
},
696+
"source": [
697+
"# **作業 嘗試以函數算出樣本數**"
698+
]
699+
},
700+
{
701+
"cell_type": "code",
702+
"metadata": {
703+
"id": "oRSbrNaRuRJA"
704+
},
705+
"source": [
706+
"#作業 Sample Size\n",
707+
"import statsmodels.stats.api as sms\n",
708+
"from math import ceil\n",
709+
"\n",
710+
"effect_size = sms.proportion_effectsize(GC[\"p\"]-1.0*GC[\"d_min\"], GC[\"p\"]+0.0*GC[\"d_min\"])\n",
711+
"required_n = sms.NormalIndPower().solve_power(\n",
712+
" effect_size, \n",
713+
" power=0.8, \n",
714+
" alpha=0.05, \n",
715+
" ratio=1\n",
716+
" ) \n",
717+
"required_n = ceil(required_n) \n",
718+
"print (effect_size,required_n) "
719+
],
720+
"execution_count": null,
721+
"outputs": []
722+
},
723+
{
724+
"cell_type": "markdown",
725+
"metadata": {
726+
"id": "MKB09_mjFwjN"
727+
},
728+
"source": [
729+
"# **作業** 自行開發雙樣本比例的信賴區間函數\n"
730+
]
731+
},
732+
{
733+
"cell_type": "code",
734+
"metadata": {
735+
"id": "yT5goD1jHKpl"
736+
},
737+
"source": [
738+
"#作業解答\n",
739+
"import scipy.stats as stats\n",
740+
"def two_proprotions_confint(success_a, size_a, success_b, size_b, significance = 0.05):\n",
741+
" \"\"\"\n",
742+
" A/B test for two proportions;\n",
743+
" given a success a trial size of group A and B compute\n",
744+
" its confidence interval;\n",
745+
" resulting confidence interval matches R's prop.test function\n",
746+
"\n",
747+
" Parameters\n",
748+
" ----------\n",
749+
" success_a, success_b : int\n",
750+
" Number of successes in each group\n",
751+
"\n",
752+
" size_a, size_b : int\n",
753+
" Size, or number of observations in each group\n",
754+
"\n",
755+
" significance : float, default 0.05\n",
756+
" Often denoted as alpha. Governs the chance of a false positive.\n",
757+
" A significance level of 0.05 means that there is a 5% chance of\n",
758+
" a false positive. In other words, our confidence level is\n",
759+
" 1 - 0.05 = 0.95\n",
760+
"\n",
761+
" Returns\n",
762+
" -------\n",
763+
" prop_diff : float\n",
764+
" Difference between the two proportion\n",
765+
"\n",
766+
" confint : 1d ndarray\n",
767+
" Confidence interval of the two proportion test\n",
768+
" \"\"\"\n",
769+
" prop_a = success_a / size_a\n",
770+
" prop_b = success_b / size_b\n",
771+
" var = prop_a * (1 - prop_a) / size_a + prop_b * (1 - prop_b) / size_b\n",
772+
" se = np.sqrt(var)\n",
773+
"\n",
774+
" # z critical value\n",
775+
" confidence = 1 - significance\n",
776+
" z = stats.norm(loc = 0, scale = 1).ppf(confidence + significance / 2)\n",
777+
"\n",
778+
" # standard formula for the confidence interval\n",
779+
" # point-estimtate +- z * standard-error\n",
780+
" prop_diff = prop_b - prop_a\n",
781+
" confint = prop_diff + np.array([-1, 1]) * z * se\n",
782+
" return prop_diff, confint\n",
783+
"two_proprotions_confint(enrollments_cont, clicks_cont, enrollments_exp, clicks_exp, significance = 0.05)"
784+
],
785+
"execution_count": null,
786+
"outputs": []
787+
}
788+
]
789+
}

0 commit comments

Comments
 (0)
Please sign in to comment.