Skip to content

Commit 4323f1a

Browse files
committed
Merge pull request JuliaLang#74 from JuliaStats/dh/sample3
Improved & new sampling algorithms
2 parents 2152080 + c960307 commit 4323f1a

File tree

5 files changed

+407
-193
lines changed

5 files changed

+407
-193
lines changed

perf/sampling.jl

+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Benchmark on non-weighted sampling
2+
3+
# require the BenchmarkLite package
4+
using BenchmarkLite
5+
6+
import StatsBase: direct_sample!, xmultinom_sample!
7+
import StatsBase: knuths_sample!, fisher_yates_sample!, self_avoid_sample!
8+
import StatsBase: seqsample_a!, seqsample_c!
9+
10+
### generic sampling benchmarking
11+
12+
type SampleProc{Alg} <: Proc end
13+
14+
abstract WithRep
15+
abstract NoRep
16+
17+
type Direct <: WithRep end
18+
tsample!(s::Direct, a, x) = direct_sample!(a, x)
19+
20+
type Xmultinom <: WithRep end
21+
tsample!(s::Xmultinom, a, x) = xmultinom_sample!(a, x)
22+
23+
type Knuths <: NoRep end
24+
tsample!(s::Knuths, a, x) = knuths_sample!(a, x)
25+
26+
type Fisher_Yates <: NoRep end
27+
tsample!(s::Fisher_Yates, a, x) = fisher_yates_sample!(a, x)
28+
29+
type Self_Avoid <: NoRep end
30+
tsample!(s::Self_Avoid, a, x) = self_avoid_sample!(a, x)
31+
32+
type Seq_A <: NoRep end
33+
tsample!(s::Seq_A, a, x) = seqsample_a!(a, x)
34+
35+
type Seq_C <: NoRep end
36+
tsample!(s::Seq_C, a, x) = seqsample_c!(a, x)
37+
38+
39+
# config is in the form of (n, k)
40+
41+
Base.string{Alg}(p::SampleProc{Alg}) = lowercase(string(Alg))
42+
43+
Base.length(p::SampleProc, cfg::(Int, Int)) = cfg[2]
44+
Base.isvalid{Alg<:WithRep}(p::SampleProc{Alg}, cfg::(Int, Int)) = ((n, k) = cfg; n >= 1 && k >= 1)
45+
Base.isvalid{Alg<:NoRep}(p::SampleProc{Alg}, cfg::(Int, Int)) = ((n, k) = cfg; n >= k >= 1)
46+
47+
Base.start(p::SampleProc, cfg::(Int, Int)) = Array(Int, cfg[2])
48+
Base.run{Alg}(p::SampleProc{Alg}, cfg::(Int, Int), s::Vector{Int}) = tsample!(Alg(), 1:cfg[1], s)
49+
Base.done(p::SampleProc, cfg, s) = nothing
50+
51+
52+
### benchmarking
53+
54+
const ns = 5 * (2 .^ [0:9])
55+
const ks = 2 .^ [1:16]
56+
57+
## with replacement
58+
59+
const procs1 = Proc[ SampleProc{Direct}(),
60+
SampleProc{Xmultinom}() ]
61+
62+
const cfgs1 = vec([(n, k) for k in ks, n in ns])
63+
64+
rtable1 = run(procs1, cfgs1; duration=0.2)
65+
println()
66+
67+
## without replacement
68+
69+
const procs2 = Proc[ SampleProc{Knuths}(),
70+
SampleProc{Fisher_Yates}(),
71+
SampleProc{Self_Avoid}(),
72+
SampleProc{Seq_A}(),
73+
SampleProc{Seq_C}() ]
74+
75+
const cfgs2 = (Int, Int)[]
76+
for n in 5 * (2 .^ [0:11]), k in 2 .^ [1:16]
77+
if k < n
78+
push!(cfgs2, (n, k))
79+
end
80+
end
81+
82+
rtable2 = run(procs2, cfgs2; duration=0.2)
83+
println()
84+
85+
## show results
86+
87+
println("Sampling With Replacement")
88+
println("===================================")
89+
show(rtable1; unit=:mps, cfghead="(n, k)")
90+
println()
91+
92+
println("Sampling Without Replacement")
93+
println("===================================")
94+
show(rtable2; unit=:mps, cfghead="(n, k)")
95+
println()
96+

perf/sampling1.jl

-45
This file was deleted.

src/StatsBase.jl

+3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ module StatsBase
88

99
export
1010

11+
# reexport from ArrayViews
12+
view,
13+
1114
## mathfuns
1215
xlogx, # x * log(x)
1316
xlogy, # x * log(y)

0 commit comments

Comments
 (0)