Skip to content

Commit

Permalink
Merge pull request #30 from jimbrig/feature/simulate_function
Browse files Browse the repository at this point in the history
Feature: Function to Support Simulation of Transactional Claims Data
  • Loading branch information
jimbrig authored Jan 13, 2022
2 parents 5152881 + ab61b18 commit 890c3e6
Show file tree
Hide file tree
Showing 5 changed files with 214 additions and 0 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ Imports:
magrittr,
pool,
purrr,
randomNames,
readr,
rlang (>= 0.4.11),
stats,
RPostgres,
shiny,
stringr,
Expand Down
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,15 @@ importFrom(dbx,dbxExecute)
importFrom(dbx,dbxInsert)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(dplyr,coalesce)
importFrom(dplyr,collect)
importFrom(dplyr,desc)
importFrom(dplyr,distinct)
importFrom(dplyr,everything)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,left_join)
importFrom(dplyr,lag)
importFrom(dplyr,mutate)
importFrom(dplyr,pull)
Expand All @@ -62,13 +64,15 @@ importFrom(glue,glue_collapse)
importFrom(here,here)
importFrom(lubridate,as_date)
importFrom(lubridate,ceiling_date)
importFrom(lubridate,days)
importFrom(lubridate,is.Date)
importFrom(lubridate,mdy)
importFrom(lubridate,year)
importFrom(lubridate,ymd)
importFrom(magrittr,"%>%")
importFrom(pool,dbPool)
importFrom(purrr,map_dfc)
importFrom(randomNames,randomNames)
importFrom(readr,read_csv)
importFrom(readr,read_file)
importFrom(rlang,":=")
Expand All @@ -79,6 +83,11 @@ importFrom(rlang,as_name)
importFrom(rlang,enquo)
importFrom(rlang,enquos)
importFrom(rlang,inform)
importFrom(stats,rbinom)
importFrom(stats,rlnorm)
importFrom(stats,rnbinom)
importFrom(stats,rnorm)
importFrom(stats,runif)
importFrom(shiny,HTML)
importFrom(stringr,str_extract)
importFrom(stringr,str_extract_all)
Expand Down
153 changes: 153 additions & 0 deletions R/actuary-simulate_claims.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
#' simulate_claims
#'
#' @description A function to simulate *transactional* actuarial claims/loss
#' data for Property Casualty Insurance.
#'
#' @param n_claims Numeric - Number of claims to be simulated.
#' @param start_date,end_date Character/Date - Start and End dates for simulation to create claims within (experience_period).
#' @param seed Numeric - the seed is used to isolate randomness during statistical simulations.
#' @param loss_distribution Character - must be one of the distributions mentioned in the details below. Defaults to lognormal.
#' @param probability_open Numeric - must be within `0 < x < 1` and represents probability a claim is open when running binomial simulations for claims' status.
#'
#' @details
#'
#' Severity/Loss Distributions:
#' - Normal: `norm`
#' - Lognormal: `lnorm`
#' - Gamma: `gamma`
#' - LogGamma: `lgamma`
#' - Pareto: `pareto`
#' - Weibull: `weibull`
#' - Generalized Beta: `genbeta`
#'
#' @return The return value, if any, from executing the function.
#'
#' @importFrom dplyr mutate arrange bind_rows group_by ungroup filter left_join select
#' @importFrom lubridate days
#' @importFrom randomNames randomNames
#' @importFrom stats rlnorm rnbinom rbinom runif rnorm
#' @importFrom tibble tibble
simulate_claims <- function(n_claims = 1000,
start_date = "2015-01-01",
end_date = Sys.Date(),
seed = 12345,
loss_distribution = "lnorm",
params = list(mean_log = 7.5, sd_log = 1.5),
status_prob_open = 0.96,
cache = FALSE,
...) {

# loss_distribution <- match.arg("loss_distribution")

stopifnot(
is.numeric(n_claims) && n_claims > 0,
class(as.Date(start_date)) == "Date",
class(as.Date(end_date)) == "Date" &&
as.Date(end_date) > as.Date(start_date),
is.numeric(seed),
loss_distribution %in% c(
"lnorm",
"lognormal",
"normal",
"gamma",
"lgamma",
"pareto",
"weibull",
"genbeta"
),
is.numeric(status_prob_open),
status_prob_open > 0 && status_prob_open < 1
)

beg_date <- as.Date(start_date)
end_date <- as.Date(end_date)
accident_range <- as.numeric(end_date - beg_date)
set.seed(seed)
accident_date <- sample(0:accident_range, size = n_claims, replace = TRUE)

# mean_log <- 7.5
# sd_log <- 1.5

payment_fun <- function(n) stats::rlnorm(n, params$mean_log, params$sd_log)

claims <- tibble::tibble(
claim_num = paste0("claim-", 1:n_claims),
accident_date = beg_date + lubridate::days(accident_date),
state = sample(c("TX", "CA", "GA", "FL"), size = n_claims, replace = TRUE),
claimant = randomNames::randomNames(n_claims),
report_lag = stats::rnbinom(n_claims, 5, .25), # 0 if claim closed when reported
status = stats::rbinom(n_claims, 1, 0.96), # initial payment amount
payment = payment_fun(n_claims)
) %>%
dplyr::mutate(
report_date = accident_date + report_lag,
payment = ifelse(status == 0, 0, payment),
case = payment + stats::runif(n_claims, 0.25, 8.0),
transaction_date = report_date
) %>%
dplyr::arrange(accident_date)

n_trans <- stats::rnbinom(n_claims, 3, 0.25)
trans_lag <- lapply(n_trans, function(x) stats::rnbinom(x, 7, 0.1)) %>%
lapply(function(x) { if (length(x) == 0) 0 else x })

for (i in seq_len(n_claims)) {
trans_lag[[i]] <- tibble::tibble(
"trans_lag" = trans_lag[[i]],
"claim_num" = paste0("claim-", i)
)
}

trans_tbl <- dplyr::bind_rows(trans_lag) %>%
dplyr::group_by(.data$claim_num) %>%
dplyr::mutate(trans_lag = cumsum(trans_lag)) %>%
dplyr::ungroup()

# separate all zero claims from the claims that have payments
zero_claims <- dplyr::filter(claims, status == 0)
first_trans <- dplyr::filter(claims, status == 1)

subsequent_trans <- dplyr::left_join(trans_tbl, first_trans, by = "claim_num") %>%
dplyr::filter(!is.na(.data$accident_date))

n_trans <- nrow(subsequent_trans)

subsequent_trans <- subsequent_trans %>%
dplyr::mutate(payment = payment_fun(n_trans),
case = pmax(.data$case * stats::rnorm(n_trans, 1.5, 0.1) - .data$payment, 500),
transaction_date = .data$report_date + .data$trans_lag) %>%
dplyr::select(-.data$trans_lag)

trans <- dplyr::bind_rows(zero_claims, first_trans, subsequent_trans) %>%
dplyr::arrange(.data$transaction_date)

# add in a transaction number
trans$trans_num <- 1:nrow(trans)

# set final trans status to closed and case to 0
trans <- trans %>%
dplyr::arrange(.data$trans_num) %>%
dplyr::group_by(.data$claim_num) %>%
dplyr::mutate(final_trans = ifelse(.data$trans_num == max(.data$trans_num), TRUE, FALSE),
status = ifelse(.data$final_trans, 0, 1),
case = ifelse(.data$final_trans, 0, .data$case),
status = ifelse(.data$status == 0, "Closed", "Open"),
paid = round(cumsum(.data$payment), 0),
case = round(.data$case, 0),
payment = round(.data$payment, 0)) %>%
dplyr::select(-.data$final_trans) %>%
dplyr::arrange(.data$accident_date) %>%
dplyr::ungroup() %>%
dplyr::arrange(.data$claim_num, dplyr::desc(.data$transaction_date))

if (cache) { saveRDS(trans, file = "trans.RDS") }

trans

}

# t <- simulate_claims(n = 10)

get_claim_transactions <- function(claim_num, data = lossrx::claims_transactional) {

}
2 changes: 2 additions & 0 deletions inst/scripts/pkgdevt.R
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ c(
) |>
purrr::walk(usethis::use_r)

usethis::use_r("simulate_claims")

# Tests -------------------------------------------------------------------

usethis::use_testthat()
Expand Down
48 changes: 48 additions & 0 deletions man/simulate_claims.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 890c3e6

Please sign in to comment.