Skip to content

Commit 03923a0

Browse files
committedMar 29, 2021
期末實作專題(三) - Udacity 教學網站註冊效果之 A/B Test 分析
1 parent fe39c50 commit 03923a0

File tree

1 file changed

+808
-0
lines changed

1 file changed

+808
-0
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,808 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {
6+
"id": "RTnv32wkA_Ox"
7+
},
8+
"source": [
9+
"# **作業說明**\n",
10+
"# (這是Udacity關於A/B Test的期末專題)\n",
11+
"\n",
12+
"Udacity希望了解,在免費14天試學網頁上,除了要信用卡資訊外,還想了解學生願意花多少小時學。如果少於某門檻(5小時),就建議學生不要註冊,免費聽聽影音就好,免得浪費資源,降低學習成功率。\n",
13+
"\n",
14+
"我們的題目是,增加這個頁面,是否對Gross Conversion(GC)和Net Conversion (NC)在統計學上(Alpha=0.05,Power=0.8)有幫助(d=0.01/0.0075),亦即統計上的顯著(Significant)。\n",
15+
"\n",
16+
"CI = click 數目\n",
17+
"\n",
18+
"GC = 註冊數/CI (聽了建議仍然註冊的比例)\n",
19+
"\n",
20+
"NC = 繳費數/CI (14天之後繳費且繼續的比例)\n",
21+
"\n",
22+
"我們期待GC比原來下降,但NC不降,這表示省去資源但收入不降。\n",
23+
"\n",
24+
"檔名:ab-tests-with-python.ipynb\n",
25+
"\n",
26+
"**作業目標**\n",
27+
"\n",
28+
"1. 經由範例程式,學習A/B Test 的步驟\n",
29+
"2. 最低樣本數的計算方法\n",
30+
"3. 自行開發信賴區間計算函數\n",
31+
"\n",
32+
"\n",
33+
"\n",
34+
"\n",
35+
"\n",
36+
"\n",
37+
"\n",
38+
"\n",
39+
"\n",
40+
"\n",
41+
"\n"
42+
]
43+
},
44+
{
45+
"cell_type": "code",
46+
"execution_count": 1,
47+
"metadata": {
48+
"_cell_guid": "9a04b6ea-111a-4627-849f-53adf7efd40b",
49+
"_uuid": "f4949401e2dab760c957525f17e90addf45fc9e8",
50+
"id": "QfCMcrfTFyMx"
51+
},
52+
"outputs": [],
53+
"source": [
54+
"#載入程式庫\n",
55+
"import math as mt\n",
56+
"import numpy as np\n",
57+
"import pandas as pd\n",
58+
"from scipy.stats import norm"
59+
]
60+
},
61+
{
62+
"cell_type": "code",
63+
"execution_count": 2,
64+
"metadata": {
65+
"_cell_guid": "fc93fbb9-b9f0-4b51-8dad-c4a8ee7b4320",
66+
"_uuid": "a3b1eab0da4bf7262ff7d98a41bed840d835762a",
67+
"id": "_iZnYjxIFyMy"
68+
},
69+
"outputs": [],
70+
"source": [
71+
"#將基礎數據放入字典\n",
72+
"baseline = {\"Cookies\":40000,\"Clicks\":3200,\"Enrollments\":660,\"CTP\":0.08,\"GConversion\":0.20625,\n",
73+
" \"Retention\":0.53,\"NConversion\":0.109313}"
74+
]
75+
},
76+
{
77+
"cell_type": "code",
78+
"execution_count": 3,
79+
"metadata": {
80+
"_cell_guid": "d99f7ee0-4a65-402b-a709-a949cd549bc3",
81+
"_uuid": "019536837b5aa146997b693563cfb22af1fb85ee",
82+
"colab": {
83+
"base_uri": "https://localhost:8080/"
84+
},
85+
"id": "rE-idI4vFyMy",
86+
"outputId": "af885825-24d4-4e2b-c7ca-5a7e98ff6182"
87+
},
88+
"outputs": [
89+
{
90+
"data": {
91+
"text/plain": [
92+
"{'Cookies': 5000,\n",
93+
" 'Clicks': 400.0,\n",
94+
" 'Enrollments': 82.5,\n",
95+
" 'CTP': 0.08,\n",
96+
" 'GConversion': 0.20625,\n",
97+
" 'Retention': 0.53,\n",
98+
" 'NConversion': 0.109313}"
99+
]
100+
},
101+
"execution_count": 3,
102+
"metadata": {},
103+
"output_type": "execute_result"
104+
}
105+
],
106+
"source": [
107+
"#調整大小到以Cookie為基準\n",
108+
"baseline[\"Cookies\"] = 5000\n",
109+
"baseline[\"Clicks\"]=baseline[\"Clicks\"]*(5000/40000)\n",
110+
"baseline[\"Enrollments\"]=baseline[\"Enrollments\"]*(5000/40000)\n",
111+
"baseline"
112+
]
113+
},
114+
{
115+
"cell_type": "code",
116+
"execution_count": 4,
117+
"metadata": {
118+
"_cell_guid": "4c47c350-99c8-4990-8456-cbbe014a972f",
119+
"_uuid": "2c6930584c39024455900fce7e0e92e5812814b3",
120+
"colab": {
121+
"base_uri": "https://localhost:8080/"
122+
},
123+
"id": "gNpNShHKFyMz",
124+
"outputId": "6e1110d0-f002-45a1-9711-6c8c094cd641"
125+
},
126+
"outputs": [
127+
{
128+
"data": {
129+
"text/plain": [
130+
"0.0202"
131+
]
132+
},
133+
"execution_count": 4,
134+
"metadata": {},
135+
"output_type": "execute_result"
136+
}
137+
],
138+
"source": [
139+
"# 算出 Gross Conversion (GC) 的 p 和 n\n",
140+
"# 還有 Stansard Deviation(sd) rounded to 4 decimal digits.\n",
141+
"GC={}\n",
142+
"GC[\"d_min\"]=0.01\n",
143+
"GC[\"p\"]=baseline[\"GConversion\"]\n",
144+
"#p is given in this case - or we could calculate it from enrollments/clicks\n",
145+
"GC[\"n\"]=baseline[\"Clicks\"]\n",
146+
"GC[\"sd\"]=round(mt.sqrt((GC[\"p\"]*(1-GC[\"p\"]))/GC[\"n\"]),4)\n",
147+
"GC[\"sd\"]"
148+
]
149+
},
150+
{
151+
"cell_type": "code",
152+
"execution_count": 5,
153+
"metadata": {
154+
"_cell_guid": "654d08d4-6638-4f9f-a327-6f2f5a6a9365",
155+
"_uuid": "334e65cddd5a800a396fa630b676ca12bec42114",
156+
"colab": {
157+
"base_uri": "https://localhost:8080/"
158+
},
159+
"id": "w_WjAIKkFyMz",
160+
"outputId": "9cf4445e-d65d-408f-9798-d78480730dcd"
161+
},
162+
"outputs": [
163+
{
164+
"data": {
165+
"text/plain": [
166+
"0.0549"
167+
]
168+
},
169+
"execution_count": 5,
170+
"metadata": {},
171+
"output_type": "execute_result"
172+
}
173+
],
174+
"source": [
175+
"# Retention(R) \n",
176+
"\n",
177+
"R={}\n",
178+
"R[\"d_min\"]=0.01\n",
179+
"R[\"p\"]=baseline[\"Retention\"]\n",
180+
"R[\"n\"]=baseline[\"Enrollments\"]\n",
181+
"R[\"sd\"]=round(mt.sqrt((R[\"p\"]*(1-R[\"p\"]))/R[\"n\"]),4)\n",
182+
"R[\"sd\"]"
183+
]
184+
},
185+
{
186+
"cell_type": "code",
187+
"execution_count": 6,
188+
"metadata": {
189+
"_cell_guid": "162f3d90-467a-41e6-9489-f1ea01f08afa",
190+
"_uuid": "be4b9c808cf098efde59ea1d17e1a35799873e5f",
191+
"colab": {
192+
"base_uri": "https://localhost:8080/"
193+
},
194+
"id": "kwx3Of06FyMz",
195+
"outputId": "efac36b1-ec20-4159-d113-fc43c81aa9f0"
196+
},
197+
"outputs": [
198+
{
199+
"data": {
200+
"text/plain": [
201+
"0.0156"
202+
]
203+
},
204+
"execution_count": 6,
205+
"metadata": {},
206+
"output_type": "execute_result"
207+
}
208+
],
209+
"source": [
210+
"# Net Conversion (NC)\n",
211+
"NC={}\n",
212+
"NC[\"d_min\"]=0.0075\n",
213+
"NC[\"p\"]=baseline[\"NConversion\"]\n",
214+
"NC[\"n\"]=baseline[\"Clicks\"]\n",
215+
"NC[\"sd\"]=round(mt.sqrt((NC[\"p\"]*(1-NC[\"p\"]))/NC[\"n\"]),4)\n",
216+
"NC[\"sd\"]"
217+
]
218+
},
219+
{
220+
"cell_type": "code",
221+
"execution_count": 7,
222+
"metadata": {
223+
"_cell_guid": "1cedf3ae-f318-4036-b966-a299350fb048",
224+
"_uuid": "8956f99d8ea1b9bb6d8b294a6e333c86a51d88d7",
225+
"id": "pBk7b5uMFyM0"
226+
},
227+
"outputs": [],
228+
"source": [
229+
"def get_sds(p,d):\n",
230+
" sd1=mt.sqrt(2*p*(1-p))\n",
231+
" sd2=mt.sqrt(p*(1-p)+(p+d)*(1-(p+d)))\n",
232+
" x=[sd1,sd2]\n",
233+
" return x"
234+
]
235+
},
236+
{
237+
"cell_type": "code",
238+
"execution_count": 8,
239+
"metadata": {
240+
"_cell_guid": "230d398f-5692-4f04-a8a9-316c9c661801",
241+
"_uuid": "c1c7a715d55432c5d19fef049094527cf1f72343",
242+
"id": "mB2im4rcFyM0"
243+
},
244+
"outputs": [],
245+
"source": [
246+
"#計算 Z-score\n",
247+
"def get_z_score(alpha):\n",
248+
" return norm.ppf(alpha)\n",
249+
"\n",
250+
"# 得到兩個(A/B)標準差\n",
251+
"def get_sds(p,d):\n",
252+
" sd1=mt.sqrt(2*p*(1-p))\n",
253+
" sd2=mt.sqrt(p*(1-p)+(p+d)*(1-(p+d)))\n",
254+
" sds=[sd1,sd2]\n",
255+
" return sds\n",
256+
"\n",
257+
"# 求Sample Size\n",
258+
"def get_sampSize(sds,alpha,beta,d):\n",
259+
" n=pow((get_z_score(1-alpha/2)*sds[0]+get_z_score(1-beta)*sds[1]),2)/pow(d,2)\n",
260+
" return n"
261+
]
262+
},
263+
{
264+
"cell_type": "code",
265+
"execution_count": 9,
266+
"metadata": {
267+
"_cell_guid": "e9b0d74a-35ba-461e-9b04-7527cc0147f9",
268+
"_uuid": "f44e06b60b027dd83ec3734c52ed1347d2a80c0c",
269+
"id": "uFh5tlyTFyM0"
270+
},
271+
"outputs": [],
272+
"source": [
273+
"GC[\"d\"]=0.01\n",
274+
"R[\"d\"]=0.01\n",
275+
"NC[\"d\"]=0.0075"
276+
]
277+
},
278+
{
279+
"cell_type": "code",
280+
"execution_count": 10,
281+
"metadata": {
282+
"_cell_guid": "28cf1802-c0ce-4189-b438-86504e53a721",
283+
"_uuid": "b443cb437954e04ef889ef4b365d8875da2833d1",
284+
"colab": {
285+
"base_uri": "https://localhost:8080/"
286+
},
287+
"id": "fiOPWnzNFyM0",
288+
"outputId": "f709237b-5edc-40e8-96e4-5002bd4dc503"
289+
},
290+
"outputs": [
291+
{
292+
"data": {
293+
"text/plain": [
294+
"25835"
295+
]
296+
},
297+
"execution_count": 10,
298+
"metadata": {},
299+
"output_type": "execute_result"
300+
}
301+
],
302+
"source": [
303+
"# Let's get an integer value for simplicity\n",
304+
"GC[\"SampSize\"]=round(get_sampSize(get_sds(GC[\"p\"],GC[\"d\"]),0.05,0.2,GC[\"d\"]))\n",
305+
"GC[\"SampSize\"]"
306+
]
307+
},
308+
{
309+
"cell_type": "code",
310+
"execution_count": 11,
311+
"metadata": {
312+
"_cell_guid": "ba7702c9-b092-404b-a212-8713a517d9bc",
313+
"_uuid": "530c3e906980568677ce0a7fc5c92f5f9a26302b",
314+
"colab": {
315+
"base_uri": "https://localhost:8080/"
316+
},
317+
"id": "X7vCHRGDFyM0",
318+
"outputId": "f7f50a7f-aa8d-4ed9-b90c-ba1d31415a6f"
319+
},
320+
"outputs": [
321+
{
322+
"data": {
323+
"text/plain": [
324+
"645875"
325+
]
326+
},
327+
"execution_count": 11,
328+
"metadata": {},
329+
"output_type": "execute_result"
330+
}
331+
],
332+
"source": [
333+
"GC[\"SampSize\"]=round(GC[\"SampSize\"]/0.08*2)\n",
334+
"GC[\"SampSize\"]"
335+
]
336+
},
337+
{
338+
"cell_type": "code",
339+
"execution_count": 12,
340+
"metadata": {
341+
"_cell_guid": "c2c65956-f449-4d28-94b5-c5300aeadb4b",
342+
"_uuid": "1aa61cc37f8839a44f0773623d41c3684be1a9c7",
343+
"colab": {
344+
"base_uri": "https://localhost:8080/"
345+
},
346+
"id": "z3jx0jgiFyM0",
347+
"outputId": "d2dba5cc-bf64-4177-9a95-6b8ebdd44674"
348+
},
349+
"outputs": [
350+
{
351+
"data": {
352+
"text/plain": [
353+
"39087"
354+
]
355+
},
356+
"execution_count": 12,
357+
"metadata": {},
358+
"output_type": "execute_result"
359+
}
360+
],
361+
"source": [
362+
"# Getting a nice integer value\n",
363+
"R[\"SampSize\"]=round(get_sampSize(get_sds(R[\"p\"],R[\"d\"]),0.05,0.2,R[\"d\"]))\n",
364+
"R[\"SampSize\"]"
365+
]
366+
},
367+
{
368+
"cell_type": "code",
369+
"execution_count": 13,
370+
"metadata": {
371+
"_cell_guid": "0281b321-7506-49ba-a33e-29eed30eefbe",
372+
"_uuid": "16bf69e2a0f6214b91485b736676a1fe7aadfdca",
373+
"colab": {
374+
"base_uri": "https://localhost:8080/"
375+
},
376+
"id": "Zc1NFMp3FyM0",
377+
"outputId": "5ca978b8-4eac-4589-9481-b634e2c259d5"
378+
},
379+
"outputs": [
380+
{
381+
"data": {
382+
"text/plain": [
383+
"4737818.181818182"
384+
]
385+
},
386+
"execution_count": 13,
387+
"metadata": {},
388+
"output_type": "execute_result"
389+
}
390+
],
391+
"source": [
392+
"R[\"SampSize\"]=R[\"SampSize\"]/0.08/0.20625*2\n",
393+
"R[\"SampSize\"]"
394+
]
395+
},
396+
{
397+
"cell_type": "code",
398+
"execution_count": 14,
399+
"metadata": {
400+
"_cell_guid": "56be284a-1c49-4c65-91d2-e007ae5b327d",
401+
"_uuid": "42607629817a83c2be769165cff7a30d74067320",
402+
"colab": {
403+
"base_uri": "https://localhost:8080/"
404+
},
405+
"id": "XqkTq_D-FyM0",
406+
"outputId": "ce8f9a0b-eaf3-4ded-d383-a47df31c799d"
407+
},
408+
"outputs": [
409+
{
410+
"data": {
411+
"text/plain": [
412+
"27413"
413+
]
414+
},
415+
"execution_count": 14,
416+
"metadata": {},
417+
"output_type": "execute_result"
418+
}
419+
],
420+
"source": [
421+
"# Getting a nice integer value\n",
422+
"NC[\"SampSize\"]=round(get_sampSize(get_sds(NC[\"p\"],NC[\"d\"]),0.05,0.2,NC[\"d\"]))\n",
423+
"NC[\"SampSize\"]"
424+
]
425+
},
426+
{
427+
"cell_type": "code",
428+
"execution_count": 15,
429+
"metadata": {
430+
"_cell_guid": "e4202dd9-7cb6-455c-9630-dd9277f84da6",
431+
"_uuid": "38b580f63f9e25fa7e19958d7b12a14d6e2b8838",
432+
"colab": {
433+
"base_uri": "https://localhost:8080/"
434+
},
435+
"id": "UGArSr5lFyM0",
436+
"outputId": "0d53c8f8-e382-406e-9aeb-e2bdc9929a27"
437+
},
438+
"outputs": [
439+
{
440+
"data": {
441+
"text/plain": [
442+
"685325.0"
443+
]
444+
},
445+
"execution_count": 15,
446+
"metadata": {},
447+
"output_type": "execute_result"
448+
}
449+
],
450+
"source": [
451+
"NC[\"SampSize\"]=NC[\"SampSize\"]/0.08*2\n",
452+
"NC[\"SampSize\"]"
453+
]
454+
},
455+
{
456+
"cell_type": "code",
457+
"execution_count": 16,
458+
"metadata": {
459+
"_cell_guid": "58b7a728-3b45-4867-969b-48959085498c",
460+
"_uuid": "4c29c3bce964e643ef30633cab8b95f268f76abc",
461+
"colab": {
462+
"base_uri": "https://localhost:8080/",
463+
"height": 198
464+
},
465+
"id": "Oj_s62oaFyM1",
466+
"outputId": "ba245606-e0ee-4721-f510-189477941b0d"
467+
},
468+
"outputs": [
469+
{
470+
"data": {
471+
"text/html": [
472+
"<div>\n",
473+
"<style scoped>\n",
474+
" .dataframe tbody tr th:only-of-type {\n",
475+
" vertical-align: middle;\n",
476+
" }\n",
477+
"\n",
478+
" .dataframe tbody tr th {\n",
479+
" vertical-align: top;\n",
480+
" }\n",
481+
"\n",
482+
" .dataframe thead th {\n",
483+
" text-align: right;\n",
484+
" }\n",
485+
"</style>\n",
486+
"<table border=\"1\" class=\"dataframe\">\n",
487+
" <thead>\n",
488+
" <tr style=\"text-align: right;\">\n",
489+
" <th></th>\n",
490+
" <th>Date</th>\n",
491+
" <th>Pageviews</th>\n",
492+
" <th>Clicks</th>\n",
493+
" <th>Enrollments</th>\n",
494+
" <th>Payments</th>\n",
495+
" </tr>\n",
496+
" </thead>\n",
497+
" <tbody>\n",
498+
" <tr>\n",
499+
" <th>0</th>\n",
500+
" <td>Sat, Oct 11</td>\n",
501+
" <td>7723</td>\n",
502+
" <td>687</td>\n",
503+
" <td>134.0</td>\n",
504+
" <td>70.0</td>\n",
505+
" </tr>\n",
506+
" <tr>\n",
507+
" <th>1</th>\n",
508+
" <td>Sun, Oct 12</td>\n",
509+
" <td>9102</td>\n",
510+
" <td>779</td>\n",
511+
" <td>147.0</td>\n",
512+
" <td>70.0</td>\n",
513+
" </tr>\n",
514+
" <tr>\n",
515+
" <th>2</th>\n",
516+
" <td>Mon, Oct 13</td>\n",
517+
" <td>10511</td>\n",
518+
" <td>909</td>\n",
519+
" <td>167.0</td>\n",
520+
" <td>95.0</td>\n",
521+
" </tr>\n",
522+
" <tr>\n",
523+
" <th>3</th>\n",
524+
" <td>Tue, Oct 14</td>\n",
525+
" <td>9871</td>\n",
526+
" <td>836</td>\n",
527+
" <td>156.0</td>\n",
528+
" <td>105.0</td>\n",
529+
" </tr>\n",
530+
" <tr>\n",
531+
" <th>4</th>\n",
532+
" <td>Wed, Oct 15</td>\n",
533+
" <td>10014</td>\n",
534+
" <td>837</td>\n",
535+
" <td>163.0</td>\n",
536+
" <td>64.0</td>\n",
537+
" </tr>\n",
538+
" </tbody>\n",
539+
"</table>\n",
540+
"</div>"
541+
],
542+
"text/plain": [
543+
" Date Pageviews Clicks Enrollments Payments\n",
544+
"0 Sat, Oct 11 7723 687 134.0 70.0\n",
545+
"1 Sun, Oct 12 9102 779 147.0 70.0\n",
546+
"2 Mon, Oct 13 10511 909 167.0 95.0\n",
547+
"3 Tue, Oct 14 9871 836 156.0 105.0\n",
548+
"4 Wed, Oct 15 10014 837 163.0 64.0"
549+
]
550+
},
551+
"execution_count": 16,
552+
"metadata": {},
553+
"output_type": "execute_result"
554+
}
555+
],
556+
"source": [
557+
"# 載入數據\n",
558+
"control=pd.read_csv( 'control_data.csv' )\n",
559+
"experiment=pd.read_csv( 'experiment_data.csv' )\n",
560+
"control.head()"
561+
]
562+
},
563+
{
564+
"cell_type": "code",
565+
"execution_count": 17,
566+
"metadata": {
567+
"_cell_guid": "37ab672f-be1c-46bc-ae53-edf722bef4bc",
568+
"_uuid": "15652351bc793b528ca853666ddb4b7defe4f4ff",
569+
"colab": {
570+
"base_uri": "https://localhost:8080/"
571+
},
572+
"id": "c2AH9yHaFyM1",
573+
"outputId": "c534d6e1-a872-4fab-ecf3-95cc10d411d4"
574+
},
575+
"outputs": [
576+
{
577+
"name": "stdout",
578+
"output_type": "stream",
579+
"text": [
580+
"number of pageviews in control: 345543\n",
581+
"number of Pageviewsin experiment: 344660\n"
582+
]
583+
}
584+
],
585+
"source": [
586+
"pageviews_cont=control['Pageviews'].sum()\n",
587+
"pageviews_exp=experiment['Pageviews'].sum()\n",
588+
"pageviews_total=pageviews_cont+pageviews_exp\n",
589+
"print (\"number of pageviews in control:\", pageviews_cont)\n",
590+
"print (\"number of Pageviewsin experiment:\" ,pageviews_exp)"
591+
]
592+
},
593+
{
594+
"cell_type": "code",
595+
"execution_count": 18,
596+
"metadata": {
597+
"_cell_guid": "8e13eba4-0daa-4220-a772-e1f371854d60",
598+
"_uuid": "6d4d94df9e5afdfdf97703292b0ef9ac6c3e4251",
599+
"id": "kxKPjYuWFyM1"
600+
},
601+
"outputs": [],
602+
"source": [
603+
"# Count the total clicks from complete records only\n",
604+
"clicks_cont=control[\"Clicks\"].loc[control[\"Enrollments\"].notnull()].sum()\n",
605+
"clicks_exp=experiment[\"Clicks\"].loc[experiment[\"Enrollments\"].notnull()].sum()"
606+
]
607+
},
608+
{
609+
"cell_type": "code",
610+
"execution_count": 19,
611+
"metadata": {
612+
"_cell_guid": "5615673e-529c-452a-ad48-9186cb88f74b",
613+
"_uuid": "3f17a8086726ee8cc01f150808f88fe8693a95a2",
614+
"colab": {
615+
"base_uri": "https://localhost:8080/"
616+
},
617+
"id": "I4gtEo6LFyM1",
618+
"outputId": "6b952d56-de9a-472f-f6ce-bf2cc0ccd9ae"
619+
},
620+
"outputs": [
621+
{
622+
"name": "stdout",
623+
"output_type": "stream",
624+
"text": [
625+
"The change due to the experiment is -2.06 %\n",
626+
"Confidence Interval: [ -0.0292 , -0.012 ]\n",
627+
"The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if -0.01 is not in the CI as well.\n"
628+
]
629+
}
630+
],
631+
"source": [
632+
"# Gross Conversion - number of enrollments divided by number of clicks\n",
633+
"alpha = 0.05\n",
634+
"enrollments_cont=control[\"Enrollments\"].sum()\n",
635+
"enrollments_exp=experiment[\"Enrollments\"].sum()\n",
636+
"\n",
637+
"GC_cont=enrollments_cont/clicks_cont\n",
638+
"GC_exp=enrollments_exp/clicks_exp\n",
639+
"GC_pooled=(enrollments_cont+enrollments_exp)/(clicks_cont+clicks_exp)\n",
640+
"GC_sd_pooled=mt.sqrt(GC_pooled*(1-GC_pooled)*(1/clicks_cont+1/clicks_exp))\n",
641+
"GC_ME=round(get_z_score(1-alpha/2)*GC_sd_pooled,4)\n",
642+
"GC_diff=round(GC_exp-GC_cont,4)\n",
643+
"print(\"The change due to the experiment is\",GC_diff*100,\"%\")\n",
644+
"print(\"Confidence Interval: [\",GC_diff-GC_ME,\",\",GC_diff+GC_ME,\"]\")\n",
645+
"print (\"The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if\",-GC[\"d_min\"],\"is not in the CI as well.\")"
646+
]
647+
},
648+
{
649+
"cell_type": "code",
650+
"execution_count": 20,
651+
"metadata": {
652+
"_cell_guid": "8cb352ad-fc92-4f21-b939-3874314ba8f4",
653+
"_uuid": "83e99af5dea50f22629ccbd34faa196d4065172f",
654+
"colab": {
655+
"base_uri": "https://localhost:8080/"
656+
},
657+
"id": "MQIg2XBsFyM1",
658+
"outputId": "f8f7bb99-0194-4884-dc39-acf4fb962b40"
659+
},
660+
"outputs": [
661+
{
662+
"name": "stdout",
663+
"output_type": "stream",
664+
"text": [
665+
"The change due to the experiment is -0.49 %\n",
666+
"Confidence Interval: [ -0.0116 , 0.0018000000000000004 ]\n",
667+
"The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if 0.0075 is not in the CI as well.\n"
668+
]
669+
}
670+
],
671+
"source": [
672+
"# Net Conversion - number of payments divided by number of clicks\n",
673+
"payments_cont=control[\"Payments\"].sum()\n",
674+
"payments_exp=experiment[\"Payments\"].sum()\n",
675+
"\n",
676+
"NC_cont=payments_cont/clicks_cont\n",
677+
"NC_exp=payments_exp/clicks_exp\n",
678+
"NC_pooled=(payments_cont+payments_exp)/(clicks_cont+clicks_exp)\n",
679+
"NC_sd_pooled=mt.sqrt(NC_pooled*(1-NC_pooled)*(1/clicks_cont+1/clicks_exp))\n",
680+
"NC_ME=round(get_z_score(1-alpha/2)*NC_sd_pooled,4)\n",
681+
"NC_diff=round(NC_exp-NC_cont,4)\n",
682+
"print(\"The change due to the experiment is\",NC_diff*100,\"%\")\n",
683+
"print(\"Confidence Interval: [\",NC_diff-NC_ME,\",\",NC_diff+NC_ME,\"]\")\n",
684+
"print (\"The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if\",NC[\"d_min\"],\"is not in the CI as well.\")"
685+
]
686+
},
687+
{
688+
"cell_type": "markdown",
689+
"metadata": {
690+
"id": "K2hnLtKrFS75"
691+
},
692+
"source": [
693+
"# **作業**\n",
694+
"# 經由範例程式碼,熟悉A/B Test的步驟\n",
695+
"\n",
696+
"請同學逐步跟隨程式了解A/B Test步驟"
697+
]
698+
},
699+
{
700+
"cell_type": "markdown",
701+
"metadata": {
702+
"id": "-lO_8AYwuEDY"
703+
},
704+
"source": [
705+
"# **作業 嘗試以函數算出樣本數**"
706+
]
707+
},
708+
{
709+
"cell_type": "code",
710+
"execution_count": 21,
711+
"metadata": {},
712+
"outputs": [
713+
{
714+
"name": "stdout",
715+
"output_type": "stream",
716+
"text": [
717+
"-0.02494345647889673 25231\n"
718+
]
719+
}
720+
],
721+
"source": [
722+
"import statsmodels.stats.api as sms\n",
723+
"from math import ceil\n",
724+
"\n",
725+
"effect_size = sms.proportion_effectsize(GC[\"p\"]-1.0*GC[\"d_min\"], GC[\"p\"]+0.0*GC[\"d_min\"])\n",
726+
"required_n = sms.NormalIndPower().solve_power(\n",
727+
" effect_size, \n",
728+
" power = 0.8, \n",
729+
" alpha = 0.05, \n",
730+
" ratio = 1\n",
731+
" ) \n",
732+
"required_n = ceil(required_n) \n",
733+
"print (effect_size,required_n)"
734+
]
735+
},
736+
{
737+
"cell_type": "markdown",
738+
"metadata": {
739+
"id": "MKB09_mjFwjN"
740+
},
741+
"source": [
742+
"# **作業** 自行開發雙樣本比例的信賴區間函數\n"
743+
]
744+
},
745+
{
746+
"cell_type": "code",
747+
"execution_count": 22,
748+
"metadata": {},
749+
"outputs": [
750+
{
751+
"data": {
752+
"text/plain": [
753+
"(-0.020554874580361565, array([-0.02912016, -0.01198959]))"
754+
]
755+
},
756+
"execution_count": 22,
757+
"metadata": {},
758+
"output_type": "execute_result"
759+
}
760+
],
761+
"source": [
762+
"import scipy.stats as stats\n",
763+
"def two_proprotions_confint(success_a, size_a, success_b, size_b, significance = 0.05):\n",
764+
" \n",
765+
" prop_a = success_a / size_a\n",
766+
" prop_b = success_b / size_b\n",
767+
" var = prop_a * (1 - prop_a) / size_a + prop_b * (1 - prop_b) / size_b\n",
768+
" se = np.sqrt(var)\n",
769+
"\n",
770+
" # z critical value\n",
771+
" confidence = 1 - significance\n",
772+
" z = stats.norm(loc = 0, scale = 1).ppf(confidence + significance / 2)\n",
773+
"\n",
774+
" # standard formula for the confidence interval\n",
775+
" # point-estimtate +- z * standard-error\n",
776+
" prop_diff = prop_b - prop_a\n",
777+
" confint = prop_diff + np.array([-1, 1]) * z * se\n",
778+
" return prop_diff, confint\n",
779+
"two_proprotions_confint(enrollments_cont, clicks_cont, enrollments_exp, clicks_exp, significance = 0.05)"
780+
]
781+
}
782+
],
783+
"metadata": {
784+
"colab": {
785+
"name": "ab-tests-with-python_作業.ipynb",
786+
"provenance": []
787+
},
788+
"kernelspec": {
789+
"display_name": "Python 3",
790+
"language": "python",
791+
"name": "python3"
792+
},
793+
"language_info": {
794+
"codemirror_mode": {
795+
"name": "ipython",
796+
"version": 3
797+
},
798+
"file_extension": ".py",
799+
"mimetype": "text/x-python",
800+
"name": "python",
801+
"nbconvert_exporter": "python",
802+
"pygments_lexer": "ipython3",
803+
"version": "3.7.7"
804+
}
805+
},
806+
"nbformat": 4,
807+
"nbformat_minor": 1
808+
}

0 commit comments

Comments
 (0)
Please sign in to comment.