1
+ {
2
+ "nbformat" : 4 ,
3
+ "nbformat_minor" : 0 ,
4
+ "metadata" : {
5
+ "colab" : {
6
+ "name" : " ab-tests-with-python_作業解答.ipynb" ,
7
+ "provenance" : []
8
+ },
9
+ "kernelspec" : {
10
+ "display_name" : " Python 3" ,
11
+ "language" : " python" ,
12
+ "name" : " python3"
13
+ },
14
+ "language_info" : {
15
+ "codemirror_mode" : {
16
+ "name" : " ipython" ,
17
+ "version" : 3
18
+ },
19
+ "file_extension" : " .py" ,
20
+ "mimetype" : " text/x-python" ,
21
+ "name" : " python" ,
22
+ "nbconvert_exporter" : " python" ,
23
+ "pygments_lexer" : " ipython3" ,
24
+ "version" : " 3.6.3"
25
+ }
26
+ },
27
+ "cells" : [
28
+ {
29
+ "cell_type" : " markdown" ,
30
+ "metadata" : {
31
+ "id" : " RTnv32wkA_Ox"
32
+ },
33
+ "source" : [
34
+ " # **作業說明**\n " ,
35
+ " # (這是Udacity關於A/B Test的期末專題)\n " ,
36
+ " \n " ,
37
+ " Udacity希望了解,在免費14天試學網頁上,除了要信用卡資訊外,還想了解學生願意花多少小時學。如果少於某門檻(5小時),就建議學生不要註冊,免費聽聽影音就好,免得浪費資源,降低學習成功率。\n " ,
38
+ " \n " ,
39
+ " 我們的題目是,增加這個頁面,是否對Gross Conversion(GC)和Net Conversion (NC)在統計學上(Alpha=0.05,Power=0.8)有幫助(d=0.01/0.0075),亦即統計上的顯著(Significant)。\n " ,
40
+ " \n " ,
41
+ " CI = click 數目\n " ,
42
+ " \n " ,
43
+ " GC = 註冊數/CI (聽了建議仍然註冊的比例)\n " ,
44
+ " \n " ,
45
+ " NC = 繳費數/CI (14天之後繳費且繼續的比例)\n " ,
46
+ " \n " ,
47
+ " 我們期待GC比原來下降,但NC不降,這表示省去資源但收入不降。\n " ,
48
+ " \n " ,
49
+ " 檔名:ab-tests-with-python.ipynb\n " ,
50
+ " \n " ,
51
+ " **作業目標**\n " ,
52
+ " \n " ,
53
+ " 1. 經由範例程式,學習A/B Test 的步驟\n " ,
54
+ " 2. 最低樣本數的計算方法\n " ,
55
+ " 3. 自行開發信賴區間計算函數\n " ,
56
+ " \n " ,
57
+ " \n " ,
58
+ " \n " ,
59
+ " \n " ,
60
+ " \n " ,
61
+ " \n " ,
62
+ " \n " ,
63
+ " \n " ,
64
+ " \n " ,
65
+ " \n " ,
66
+ " \n "
67
+ ]
68
+ },
69
+ {
70
+ "cell_type" : " code" ,
71
+ "metadata" : {
72
+ "colab" : {
73
+ "background_save" : true
74
+ },
75
+ "id" : " QfCMcrfTFyMx"
76
+ },
77
+ "source" : [
78
+ " #載入程式庫\n " ,
79
+ " import math as mt\n " ,
80
+ " import numpy as np\n " ,
81
+ " import pandas as pd\n " ,
82
+ " from scipy.stats import norm"
83
+ ],
84
+ "execution_count" : null ,
85
+ "outputs" : []
86
+ },
87
+ {
88
+ "cell_type" : " code" ,
89
+ "metadata" : {
90
+ "id" : " _iZnYjxIFyMy"
91
+ },
92
+ "source" : [
93
+ " #將基礎數據放入字典\n " ,
94
+ " baseline = {\" Cookies\" :40000,\" Clicks\" :3200,\" Enrollments\" :660,\" CTP\" :0.08,\" GConversion\" :0.20625,\n " ,
95
+ " \" Retention\" :0.53,\" NConversion\" :0.109313}"
96
+ ],
97
+ "execution_count" : null ,
98
+ "outputs" : []
99
+ },
100
+ {
101
+ "cell_type" : " code" ,
102
+ "metadata" : {
103
+ "id" : " rE-idI4vFyMy"
104
+ },
105
+ "source" : [
106
+ " #調整大小到以Cookie為基準\n " ,
107
+ " baseline[\" Cookies\" ] = 5000\n " ,
108
+ " baseline[\" Clicks\" ]=baseline[\" Clicks\" ]*(5000/40000)\n " ,
109
+ " baseline[\" Enrollments\" ]=baseline[\" Enrollments\" ]*(5000/40000)\n " ,
110
+ " baseline"
111
+ ],
112
+ "execution_count" : null ,
113
+ "outputs" : []
114
+ },
115
+ {
116
+ "cell_type" : " code" ,
117
+ "metadata" : {
118
+ "colab" : {
119
+ "base_uri" : " https://localhost:8080/"
120
+ },
121
+ "id" : " gNpNShHKFyMz" ,
122
+ "outputId" : " 1c00edec-27d3-4729-88e8-ce58eeef0c99"
123
+ },
124
+ "source" : [
125
+ " # 算出 Gross Conversion (GC) 的 p 和 n\n " ,
126
+ " # 還有 Stansard Deviation(sd) rounded to 4 decimal digits.\n " ,
127
+ " GC={}\n " ,
128
+ " GC[\" d_min\" ]=0.01\n " ,
129
+ " GC[\" p\" ]=baseline[\" GConversion\" ]\n " ,
130
+ " #p is given in this case - or we could calculate it from enrollments/clicks\n " ,
131
+ " GC[\" n\" ]=baseline[\" Clicks\" ]\n " ,
132
+ " GC[\" sd\" ]=round(mt.sqrt((GC[\" p\" ]*(1-GC[\" p\" ]))/GC[\" n\" ]),4)\n " ,
133
+ " GC[\" sd\" ]"
134
+ ],
135
+ "execution_count" : null ,
136
+ "outputs" : [
137
+ {
138
+ "output_type" : " execute_result" ,
139
+ "data" : {
140
+ "text/plain" : [
141
+ " 0.0202"
142
+ ]
143
+ },
144
+ "metadata" : {
145
+ "tags" : []
146
+ },
147
+ "execution_count" : 9
148
+ }
149
+ ]
150
+ },
151
+ {
152
+ "cell_type" : " code" ,
153
+ "metadata" : {
154
+ "colab" : {
155
+ "base_uri" : " https://localhost:8080/"
156
+ },
157
+ "id" : " w_WjAIKkFyMz" ,
158
+ "outputId" : " c924bda0-918c-4073-8e55-7cc3d67ad4e9"
159
+ },
160
+ "source" : [
161
+ " # Retention(R) \n " ,
162
+ " \n " ,
163
+ " R={}\n " ,
164
+ " R[\" d_min\" ]=0.01\n " ,
165
+ " R[\" p\" ]=baseline[\" Retention\" ]\n " ,
166
+ " R[\" n\" ]=baseline[\" Enrollments\" ]\n " ,
167
+ " R[\" sd\" ]=round(mt.sqrt((R[\" p\" ]*(1-R[\" p\" ]))/R[\" n\" ]),4)\n " ,
168
+ " R[\" sd\" ]"
169
+ ],
170
+ "execution_count" : null ,
171
+ "outputs" : [
172
+ {
173
+ "output_type" : " execute_result" ,
174
+ "data" : {
175
+ "text/plain" : [
176
+ " 0.0549"
177
+ ]
178
+ },
179
+ "metadata" : {
180
+ "tags" : []
181
+ },
182
+ "execution_count" : 4
183
+ }
184
+ ]
185
+ },
186
+ {
187
+ "cell_type" : " code" ,
188
+ "metadata" : {
189
+ "colab" : {
190
+ "base_uri" : " https://localhost:8080/"
191
+ },
192
+ "id" : " kwx3Of06FyMz" ,
193
+ "outputId" : " efc60cde-6e3b-4bed-e5f6-a3b697b3d393"
194
+ },
195
+ "source" : [
196
+ " # Net Conversion (NC)\n " ,
197
+ " NC={}\n " ,
198
+ " NC[\" d_min\" ]=0.0075\n " ,
199
+ " NC[\" p\" ]=baseline[\" NConversion\" ]\n " ,
200
+ " NC[\" n\" ]=baseline[\" Clicks\" ]\n " ,
201
+ " NC[\" sd\" ]=round(mt.sqrt((NC[\" p\" ]*(1-NC[\" p\" ]))/NC[\" n\" ]),4)\n " ,
202
+ " NC[\" sd\" ]"
203
+ ],
204
+ "execution_count" : null ,
205
+ "outputs" : [
206
+ {
207
+ "output_type" : " execute_result" ,
208
+ "data" : {
209
+ "text/plain" : [
210
+ " 0.0156"
211
+ ]
212
+ },
213
+ "metadata" : {
214
+ "tags" : []
215
+ },
216
+ "execution_count" : 5
217
+ }
218
+ ]
219
+ },
220
+ {
221
+ "cell_type" : " code" ,
222
+ "metadata" : {
223
+ "id" : " pBk7b5uMFyM0"
224
+ },
225
+ "source" : [
226
+ " def get_sds(p,d):\n " ,
227
+ " sd1=mt.sqrt(2*p*(1-p))\n " ,
228
+ " sd2=mt.sqrt(p*(1-p)+(p+d)*(1-(p+d)))\n " ,
229
+ " x=[sd1,sd2]\n " ,
230
+ " return x"
231
+ ],
232
+ "execution_count" : null ,
233
+ "outputs" : []
234
+ },
235
+ {
236
+ "cell_type" : " code" ,
237
+ "metadata" : {
238
+ "id" : " mB2im4rcFyM0"
239
+ },
240
+ "source" : [
241
+ " #計算 Z-score\n " ,
242
+ " def get_z_score(alpha):\n " ,
243
+ " return norm.ppf(alpha)\n " ,
244
+ " \n " ,
245
+ " # 得到兩個(A/B)標準差\n " ,
246
+ " def get_sds(p,d):\n " ,
247
+ " sd1=mt.sqrt(2*p*(1-p))\n " ,
248
+ " sd2=mt.sqrt(p*(1-p)+(p+d)*(1-(p+d)))\n " ,
249
+ " sds=[sd1,sd2]\n " ,
250
+ " return sds\n " ,
251
+ " \n " ,
252
+ " # 求Sample Size\n " ,
253
+ " def get_sampSize(sds,alpha,beta,d):\n " ,
254
+ " n=pow((get_z_score(1-alpha/2)*sds[0]+get_z_score(1-beta)*sds[1]),2)/pow(d,2)\n " ,
255
+ " return n"
256
+ ],
257
+ "execution_count" : null ,
258
+ "outputs" : []
259
+ },
260
+ {
261
+ "cell_type" : " code" ,
262
+ "metadata" : {
263
+ "id" : " uFh5tlyTFyM0"
264
+ },
265
+ "source" : [
266
+ " GC[\" d\" ]=0.01\n " ,
267
+ " R[\" d\" ]=0.01\n " ,
268
+ " NC[\" d\" ]=0.0075"
269
+ ],
270
+ "execution_count" : null ,
271
+ "outputs" : []
272
+ },
273
+ {
274
+ "cell_type" : " code" ,
275
+ "metadata" : {
276
+ "colab" : {
277
+ "base_uri" : " https://localhost:8080/"
278
+ },
279
+ "id" : " fiOPWnzNFyM0" ,
280
+ "outputId" : " f1b76240-2fe5-44bd-ddcf-225afa73b54a"
281
+ },
282
+ "source" : [
283
+ " # Let's get an integer value for simplicity\n " ,
284
+ " GC[\" SampSize\" ]=round(get_sampSize(get_sds(GC[\" p\" ],GC[\" d\" ]),0.05,0.2,GC[\" d\" ]))\n " ,
285
+ " GC[\" SampSize\" ]"
286
+ ],
287
+ "execution_count" : null ,
288
+ "outputs" : [
289
+ {
290
+ "output_type" : " execute_result" ,
291
+ "data" : {
292
+ "text/plain" : [
293
+ " 25835.0"
294
+ ]
295
+ },
296
+ "metadata" : {
297
+ "tags" : []
298
+ },
299
+ "execution_count" : 11
300
+ }
301
+ ]
302
+ },
303
+ {
304
+ "cell_type" : " code" ,
305
+ "metadata" : {
306
+ "colab" : {
307
+ "base_uri" : " https://localhost:8080/"
308
+ },
309
+ "id" : " X7vCHRGDFyM0" ,
310
+ "outputId" : " 647b76b3-d56e-4dba-e45b-005e919fd854"
311
+ },
312
+ "source" : [
313
+ " GC[\" SampSize\" ]=round(GC[\" SampSize\" ]/0.08*2)\n " ,
314
+ " GC[\" SampSize\" ]"
315
+ ],
316
+ "execution_count" : null ,
317
+ "outputs" : [
318
+ {
319
+ "output_type" : " execute_result" ,
320
+ "data" : {
321
+ "text/plain" : [
322
+ " 645875.0"
323
+ ]
324
+ },
325
+ "metadata" : {
326
+ "tags" : []
327
+ },
328
+ "execution_count" : 12
329
+ }
330
+ ]
331
+ },
332
+ {
333
+ "cell_type" : " code" ,
334
+ "metadata" : {
335
+ "colab" : {
336
+ "base_uri" : " https://localhost:8080/"
337
+ },
338
+ "collapsed" : true ,
339
+ "id" : " z3jx0jgiFyM0" ,
340
+ "outputId" : " 1464feca-9b43-43c6-cb6d-aae739d2eb5a"
341
+ },
342
+ "source" : [
343
+ " # Getting a nice integer value\n " ,
344
+ " R[\" SampSize\" ]=round(get_sampSize(get_sds(R[\" p\" ],R[\" d\" ]),0.05,0.2,R[\" d\" ]))\n " ,
345
+ " R[\" SampSize\" ]"
346
+ ],
347
+ "execution_count" : null ,
348
+ "outputs" : [
349
+ {
350
+ "output_type" : " execute_result" ,
351
+ "data" : {
352
+ "text/plain" : [
353
+ " 39087.0"
354
+ ]
355
+ },
356
+ "metadata" : {
357
+ "tags" : []
358
+ },
359
+ "execution_count" : 13
360
+ }
361
+ ]
362
+ },
363
+ {
364
+ "cell_type" : " code" ,
365
+ "metadata" : {
366
+ "colab" : {
367
+ "base_uri" : " https://localhost:8080/"
368
+ },
369
+ "collapsed" : true ,
370
+ "id" : " Zc1NFMp3FyM0" ,
371
+ "outputId" : " ce917636-2197-45af-a7a5-d75eccc51398"
372
+ },
373
+ "source" : [
374
+ " R[\" SampSize\" ]=R[\" SampSize\" ]/0.08/0.20625*2\n " ,
375
+ " R[\" SampSize\" ]"
376
+ ],
377
+ "execution_count" : null ,
378
+ "outputs" : [
379
+ {
380
+ "output_type" : " execute_result" ,
381
+ "data" : {
382
+ "text/plain" : [
383
+ " 4737818.181818182"
384
+ ]
385
+ },
386
+ "metadata" : {
387
+ "tags" : []
388
+ },
389
+ "execution_count" : 14
390
+ }
391
+ ]
392
+ },
393
+ {
394
+ "cell_type" : " code" ,
395
+ "metadata" : {
396
+ "colab" : {
397
+ "base_uri" : " https://localhost:8080/"
398
+ },
399
+ "collapsed" : true ,
400
+ "id" : " XqkTq_D-FyM0" ,
401
+ "outputId" : " 2b1a96db-4bd1-49c9-ac49-02f65f95ee0b"
402
+ },
403
+ "source" : [
404
+ " # Getting a nice integer value\n " ,
405
+ " NC[\" SampSize\" ]=round(get_sampSize(get_sds(NC[\" p\" ],NC[\" d\" ]),0.05,0.2,NC[\" d\" ]))\n " ,
406
+ " NC[\" SampSize\" ]"
407
+ ],
408
+ "execution_count" : null ,
409
+ "outputs" : [
410
+ {
411
+ "output_type" : " execute_result" ,
412
+ "data" : {
413
+ "text/plain" : [
414
+ " 27413.0"
415
+ ]
416
+ },
417
+ "metadata" : {
418
+ "tags" : []
419
+ },
420
+ "execution_count" : 15
421
+ }
422
+ ]
423
+ },
424
+ {
425
+ "cell_type" : " code" ,
426
+ "metadata" : {
427
+ "colab" : {
428
+ "base_uri" : " https://localhost:8080/"
429
+ },
430
+ "collapsed" : true ,
431
+ "id" : " UGArSr5lFyM0" ,
432
+ "outputId" : " db5dcecd-d9ff-4b29-8ef4-41364a2591bf"
433
+ },
434
+ "source" : [
435
+ " NC[\" SampSize\" ]=NC[\" SampSize\" ]/0.08*2\n " ,
436
+ " NC[\" SampSize\" ]"
437
+ ],
438
+ "execution_count" : null ,
439
+ "outputs" : [
440
+ {
441
+ "output_type" : " execute_result" ,
442
+ "data" : {
443
+ "text/plain" : [
444
+ " 685325.0"
445
+ ]
446
+ },
447
+ "metadata" : {
448
+ "tags" : []
449
+ },
450
+ "execution_count" : 16
451
+ }
452
+ ]
453
+ },
454
+ {
455
+ "cell_type" : " code" ,
456
+ "metadata" : {
457
+ "colab" : {
458
+ "base_uri" : " https://localhost:8080/" ,
459
+ "height" : 198
460
+ },
461
+ "id" : " Oj_s62oaFyM1" ,
462
+ "outputId" : " b9b4f542-7483-4d29-b598-c27e595f2e2e"
463
+ },
464
+ "source" : [
465
+ " # 載入數據\n " ,
466
+ " control=pd.read_csv(\" ./sample_data/control_data.csv\" )\n " ,
467
+ " experiment=pd.read_csv(\" ./sample_data/experiment_data.csv\" )\n " ,
468
+ " control.head()"
469
+ ],
470
+ "execution_count" : null ,
471
+ "outputs" : [
472
+ {
473
+ "output_type" : " execute_result" ,
474
+ "data" : {
475
+ "text/html" : [
476
+ " <div>\n " ,
477
+ " <style scoped>\n " ,
478
+ " .dataframe tbody tr th:only-of-type {\n " ,
479
+ " vertical-align: middle;\n " ,
480
+ " }\n " ,
481
+ " \n " ,
482
+ " .dataframe tbody tr th {\n " ,
483
+ " vertical-align: top;\n " ,
484
+ " }\n " ,
485
+ " \n " ,
486
+ " .dataframe thead th {\n " ,
487
+ " text-align: right;\n " ,
488
+ " }\n " ,
489
+ " </style>\n " ,
490
+ " <table border=\" 1\" class=\" dataframe\" >\n " ,
491
+ " <thead>\n " ,
492
+ " <tr style=\" text-align: right;\" >\n " ,
493
+ " <th></th>\n " ,
494
+ " <th>Date</th>\n " ,
495
+ " <th>Pageviews</th>\n " ,
496
+ " <th>Clicks</th>\n " ,
497
+ " <th>Enrollments</th>\n " ,
498
+ " <th>Payments</th>\n " ,
499
+ " </tr>\n " ,
500
+ " </thead>\n " ,
501
+ " <tbody>\n " ,
502
+ " <tr>\n " ,
503
+ " <th>0</th>\n " ,
504
+ " <td>Sat, Oct 11</td>\n " ,
505
+ " <td>7723</td>\n " ,
506
+ " <td>687</td>\n " ,
507
+ " <td>134.0</td>\n " ,
508
+ " <td>70.0</td>\n " ,
509
+ " </tr>\n " ,
510
+ " <tr>\n " ,
511
+ " <th>1</th>\n " ,
512
+ " <td>Sun, Oct 12</td>\n " ,
513
+ " <td>9102</td>\n " ,
514
+ " <td>779</td>\n " ,
515
+ " <td>147.0</td>\n " ,
516
+ " <td>70.0</td>\n " ,
517
+ " </tr>\n " ,
518
+ " <tr>\n " ,
519
+ " <th>2</th>\n " ,
520
+ " <td>Mon, Oct 13</td>\n " ,
521
+ " <td>10511</td>\n " ,
522
+ " <td>909</td>\n " ,
523
+ " <td>167.0</td>\n " ,
524
+ " <td>95.0</td>\n " ,
525
+ " </tr>\n " ,
526
+ " <tr>\n " ,
527
+ " <th>3</th>\n " ,
528
+ " <td>Tue, Oct 14</td>\n " ,
529
+ " <td>9871</td>\n " ,
530
+ " <td>836</td>\n " ,
531
+ " <td>156.0</td>\n " ,
532
+ " <td>105.0</td>\n " ,
533
+ " </tr>\n " ,
534
+ " <tr>\n " ,
535
+ " <th>4</th>\n " ,
536
+ " <td>Wed, Oct 15</td>\n " ,
537
+ " <td>10014</td>\n " ,
538
+ " <td>837</td>\n " ,
539
+ " <td>163.0</td>\n " ,
540
+ " <td>64.0</td>\n " ,
541
+ " </tr>\n " ,
542
+ " </tbody>\n " ,
543
+ " </table>\n " ,
544
+ " </div>"
545
+ ],
546
+ "text/plain" : [
547
+ " Date Pageviews Clicks Enrollments Payments\n " ,
548
+ " 0 Sat, Oct 11 7723 687 134.0 70.0\n " ,
549
+ " 1 Sun, Oct 12 9102 779 147.0 70.0\n " ,
550
+ " 2 Mon, Oct 13 10511 909 167.0 95.0\n " ,
551
+ " 3 Tue, Oct 14 9871 836 156.0 105.0\n " ,
552
+ " 4 Wed, Oct 15 10014 837 163.0 64.0"
553
+ ]
554
+ },
555
+ "metadata" : {
556
+ "tags" : []
557
+ },
558
+ "execution_count" : 21
559
+ }
560
+ ]
561
+ },
562
+ {
563
+ "cell_type" : " code" ,
564
+ "metadata" : {
565
+ "colab" : {
566
+ "base_uri" : " https://localhost:8080/"
567
+ },
568
+ "collapsed" : true ,
569
+ "id" : " c2AH9yHaFyM1" ,
570
+ "outputId" : " 5828ee0c-f831-4862-e918-14b3dcf12f28"
571
+ },
572
+ "source" : [
573
+ " pageviews_cont=control['Pageviews'].sum()\n " ,
574
+ " pageviews_exp=experiment['Pageviews'].sum()\n " ,
575
+ " pageviews_total=pageviews_cont+pageviews_exp\n " ,
576
+ " print (\" number of pageviews in control:\" , pageviews_cont)\n " ,
577
+ " print (\" number of Pageviewsin experiment:\" ,pageviews_exp)"
578
+ ],
579
+ "execution_count" : null ,
580
+ "outputs" : [
581
+ {
582
+ "output_type" : " stream" ,
583
+ "text" : [
584
+ " number of pageviews in control: 345543\n " ,
585
+ " number of Pageviewsin experiment: 344660\n "
586
+ ],
587
+ "name" : " stdout"
588
+ }
589
+ ]
590
+ },
591
+ {
592
+ "cell_type" : " code" ,
593
+ "metadata" : {
594
+ "id" : " kxKPjYuWFyM1"
595
+ },
596
+ "source" : [
597
+ " # Count the total clicks from complete records only\n " ,
598
+ " clicks_cont=control[\" Clicks\" ].loc[control[\" Enrollments\" ].notnull()].sum()\n " ,
599
+ " clicks_exp=experiment[\" Clicks\" ].loc[experiment[\" Enrollments\" ].notnull()].sum()"
600
+ ],
601
+ "execution_count" : null ,
602
+ "outputs" : []
603
+ },
604
+ {
605
+ "cell_type" : " code" ,
606
+ "metadata" : {
607
+ "colab" : {
608
+ "base_uri" : " https://localhost:8080/"
609
+ },
610
+ "id" : " I4gtEo6LFyM1" ,
611
+ "outputId" : " cb83ee22-d69a-4feb-c6bb-52592a08b077"
612
+ },
613
+ "source" : [
614
+ " #Gross Conversion - number of enrollments divided by number of clicks\n " ,
615
+ " enrollments_cont=control[\" Enrollments\" ].sum()\n " ,
616
+ " enrollments_exp=experiment[\" Enrollments\" ].sum()\n " ,
617
+ " \n " ,
618
+ " GC_cont=enrollments_cont/clicks_cont\n " ,
619
+ " GC_exp=enrollments_exp/clicks_exp\n " ,
620
+ " GC_pooled=(enrollments_cont+enrollments_exp)/(clicks_cont+clicks_exp)\n " ,
621
+ " GC_sd_pooled=mt.sqrt(GC_pooled*(1-GC_pooled)*(1/clicks_cont+1/clicks_exp))\n " ,
622
+ " GC_ME=round(get_z_score(1-alpha/2)*GC_sd_pooled,4)\n " ,
623
+ " GC_diff=round(GC_exp-GC_cont,4)\n " ,
624
+ " print(\" The change due to the experiment is\" ,GC_diff*100,\" %\" )\n " ,
625
+ " print(\" Confidence Interval: [\" ,GC_diff-GC_ME,\" ,\" ,GC_diff+GC_ME,\" ]\" )\n " ,
626
+ " print (\" The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if\" ,-GC[\" d_min\" ],\" is not in the CI as well.\" )"
627
+ ],
628
+ "execution_count" : null ,
629
+ "outputs" : [
630
+ {
631
+ "output_type" : " stream" ,
632
+ "text" : [
633
+ " The change due to the experiment is -2.06 %\n " ,
634
+ " Confidence Interval: [ -0.0292 , -0.012 ]\n " ,
635
+ " The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if -0.01 is not in the CI as well.\n "
636
+ ],
637
+ "name" : " stdout"
638
+ }
639
+ ]
640
+ },
641
+ {
642
+ "cell_type" : " code" ,
643
+ "metadata" : {
644
+ "colab" : {
645
+ "base_uri" : " https://localhost:8080/"
646
+ },
647
+ "collapsed" : true ,
648
+ "id" : " MQIg2XBsFyM1" ,
649
+ "outputId" : " 74843a74-15e2-4f60-c0a7-469d7a11acc2"
650
+ },
651
+ "source" : [
652
+ " #Net Conversion - number of payments divided by number of clicks\n " ,
653
+ " payments_cont=control[\" Payments\" ].sum()\n " ,
654
+ " payments_exp=experiment[\" Payments\" ].sum()\n " ,
655
+ " \n " ,
656
+ " NC_cont=payments_cont/clicks_cont\n " ,
657
+ " NC_exp=payments_exp/clicks_exp\n " ,
658
+ " NC_pooled=(payments_cont+payments_exp)/(clicks_cont+clicks_exp)\n " ,
659
+ " NC_sd_pooled=mt.sqrt(NC_pooled*(1-NC_pooled)*(1/clicks_cont+1/clicks_exp))\n " ,
660
+ " NC_ME=round(get_z_score(1-alpha/2)*NC_sd_pooled,4)\n " ,
661
+ " NC_diff=round(NC_exp-NC_cont,4)\n " ,
662
+ " print(\" The change due to the experiment is\" ,NC_diff*100,\" %\" )\n " ,
663
+ " print(\" Confidence Interval: [\" ,NC_diff-NC_ME,\" ,\" ,NC_diff+NC_ME,\" ]\" )\n " ,
664
+ " print (\" The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if\" ,NC[\" d_min\" ],\" is not in the CI as well.\" )"
665
+ ],
666
+ "execution_count" : null ,
667
+ "outputs" : [
668
+ {
669
+ "output_type" : " stream" ,
670
+ "text" : [
671
+ " The change due to the experiment is -0.49 %\n " ,
672
+ " Confidence Interval: [ -0.0116 , 0.0018000000000000004 ]\n " ,
673
+ " The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if 0.0075 is not in the CI as well.\n "
674
+ ],
675
+ "name" : " stdout"
676
+ }
677
+ ]
678
+ },
679
+ {
680
+ "cell_type" : " markdown" ,
681
+ "metadata" : {
682
+ "id" : " K2hnLtKrFS75"
683
+ },
684
+ "source" : [
685
+ " # **作業**\n " ,
686
+ " # 經由範例程式碼,熟悉A/B Test的步驟\n " ,
687
+ " \n " ,
688
+ " 請同學逐步跟隨程式了解A/B Test步驟"
689
+ ]
690
+ },
691
+ {
692
+ "cell_type" : " markdown" ,
693
+ "metadata" : {
694
+ "id" : " -lO_8AYwuEDY"
695
+ },
696
+ "source" : [
697
+ " # **作業 嘗試以函數算出樣本數**"
698
+ ]
699
+ },
700
+ {
701
+ "cell_type" : " code" ,
702
+ "metadata" : {
703
+ "id" : " oRSbrNaRuRJA"
704
+ },
705
+ "source" : [
706
+ " #作業 Sample Size\n " ,
707
+ " import statsmodels.stats.api as sms\n " ,
708
+ " from math import ceil\n " ,
709
+ " \n " ,
710
+ " effect_size = sms.proportion_effectsize(GC[\" p\" ]-1.0*GC[\" d_min\" ], GC[\" p\" ]+0.0*GC[\" d_min\" ])\n " ,
711
+ " required_n = sms.NormalIndPower().solve_power(\n " ,
712
+ " effect_size, \n " ,
713
+ " power=0.8, \n " ,
714
+ " alpha=0.05, \n " ,
715
+ " ratio=1\n " ,
716
+ " ) \n " ,
717
+ " required_n = ceil(required_n) \n " ,
718
+ " print (effect_size,required_n) "
719
+ ],
720
+ "execution_count" : null ,
721
+ "outputs" : []
722
+ },
723
+ {
724
+ "cell_type" : " markdown" ,
725
+ "metadata" : {
726
+ "id" : " MKB09_mjFwjN"
727
+ },
728
+ "source" : [
729
+ " # **作業** 自行開發雙樣本比例的信賴區間函數\n "
730
+ ]
731
+ },
732
+ {
733
+ "cell_type" : " code" ,
734
+ "metadata" : {
735
+ "id" : " yT5goD1jHKpl"
736
+ },
737
+ "source" : [
738
+ " #作業解答\n " ,
739
+ " import scipy.stats as stats\n " ,
740
+ " def two_proprotions_confint(success_a, size_a, success_b, size_b, significance = 0.05):\n " ,
741
+ " \"\"\"\n " ,
742
+ " A/B test for two proportions;\n " ,
743
+ " given a success a trial size of group A and B compute\n " ,
744
+ " its confidence interval;\n " ,
745
+ " resulting confidence interval matches R's prop.test function\n " ,
746
+ " \n " ,
747
+ " Parameters\n " ,
748
+ " ----------\n " ,
749
+ " success_a, success_b : int\n " ,
750
+ " Number of successes in each group\n " ,
751
+ " \n " ,
752
+ " size_a, size_b : int\n " ,
753
+ " Size, or number of observations in each group\n " ,
754
+ " \n " ,
755
+ " significance : float, default 0.05\n " ,
756
+ " Often denoted as alpha. Governs the chance of a false positive.\n " ,
757
+ " A significance level of 0.05 means that there is a 5% chance of\n " ,
758
+ " a false positive. In other words, our confidence level is\n " ,
759
+ " 1 - 0.05 = 0.95\n " ,
760
+ " \n " ,
761
+ " Returns\n " ,
762
+ " -------\n " ,
763
+ " prop_diff : float\n " ,
764
+ " Difference between the two proportion\n " ,
765
+ " \n " ,
766
+ " confint : 1d ndarray\n " ,
767
+ " Confidence interval of the two proportion test\n " ,
768
+ " \"\"\"\n " ,
769
+ " prop_a = success_a / size_a\n " ,
770
+ " prop_b = success_b / size_b\n " ,
771
+ " var = prop_a * (1 - prop_a) / size_a + prop_b * (1 - prop_b) / size_b\n " ,
772
+ " se = np.sqrt(var)\n " ,
773
+ " \n " ,
774
+ " # z critical value\n " ,
775
+ " confidence = 1 - significance\n " ,
776
+ " z = stats.norm(loc = 0, scale = 1).ppf(confidence + significance / 2)\n " ,
777
+ " \n " ,
778
+ " # standard formula for the confidence interval\n " ,
779
+ " # point-estimtate +- z * standard-error\n " ,
780
+ " prop_diff = prop_b - prop_a\n " ,
781
+ " confint = prop_diff + np.array([-1, 1]) * z * se\n " ,
782
+ " return prop_diff, confint\n " ,
783
+ " two_proprotions_confint(enrollments_cont, clicks_cont, enrollments_exp, clicks_exp, significance = 0.05)"
784
+ ],
785
+ "execution_count" : null ,
786
+ "outputs" : []
787
+ }
788
+ ]
789
+ }
0 commit comments