Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: Function to Support Simulation of Transactional Claims Data #30

Merged
merged 3 commits into from
Jan 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ Imports:
magrittr,
pool,
purrr,
randomNames,
readr,
rlang (>= 0.4.11),
stats,
RPostgres,
shiny,
stringr,
Expand Down
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,15 @@ importFrom(dbx,dbxExecute)
importFrom(dbx,dbxInsert)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(dplyr,coalesce)
importFrom(dplyr,collect)
importFrom(dplyr,desc)
importFrom(dplyr,distinct)
importFrom(dplyr,everything)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,left_join)
importFrom(dplyr,lag)
importFrom(dplyr,mutate)
importFrom(dplyr,pull)
Expand All @@ -62,13 +64,15 @@ importFrom(glue,glue_collapse)
importFrom(here,here)
importFrom(lubridate,as_date)
importFrom(lubridate,ceiling_date)
importFrom(lubridate,days)
importFrom(lubridate,is.Date)
importFrom(lubridate,mdy)
importFrom(lubridate,year)
importFrom(lubridate,ymd)
importFrom(magrittr,"%>%")
importFrom(pool,dbPool)
importFrom(purrr,map_dfc)
importFrom(randomNames,randomNames)
importFrom(readr,read_csv)
importFrom(readr,read_file)
importFrom(rlang,":=")
Expand All @@ -79,6 +83,11 @@ importFrom(rlang,as_name)
importFrom(rlang,enquo)
importFrom(rlang,enquos)
importFrom(rlang,inform)
importFrom(stats,rbinom)
importFrom(stats,rlnorm)
importFrom(stats,rnbinom)
importFrom(stats,rnorm)
importFrom(stats,runif)
importFrom(shiny,HTML)
importFrom(stringr,str_extract)
importFrom(stringr,str_extract_all)
Expand Down
153 changes: 153 additions & 0 deletions R/actuary-simulate_claims.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
#' simulate_claims
#'
#' @description A function to simulate *transactional* actuarial claims/loss
#' data for Property Casualty Insurance.
#'
#' @param n_claims Numeric - Number of claims to be simulated.
#' @param start_date,end_date Character/Date - Start and End dates for simulation to create claims within (experience_period).
#' @param seed Numeric - the seed is used to isolate randomness during statistical simulations.
#' @param loss_distribution Character - must be one of the distributions mentioned in the details below. Defaults to lognormal.
#' @param probability_open Numeric - must be within `0 < x < 1` and represents probability a claim is open when running binomial simulations for claims' status.
#'
#' @details
#'
#' Severity/Loss Distributions:
#' - Normal: `norm`
#' - Lognormal: `lnorm`
#' - Gamma: `gamma`
#' - LogGamma: `lgamma`
#' - Pareto: `pareto`
#' - Weibull: `weibull`
#' - Generalized Beta: `genbeta`
#'
#' @return The return value, if any, from executing the function.
#'
#' @importFrom dplyr mutate arrange bind_rows group_by ungroup filter left_join select
#' @importFrom lubridate days
#' @importFrom randomNames randomNames
#' @importFrom stats rlnorm rnbinom rbinom runif rnorm
#' @importFrom tibble tibble
simulate_claims <- function(n_claims = 1000,
start_date = "2015-01-01",
end_date = Sys.Date(),
seed = 12345,
loss_distribution = "lnorm",
params = list(mean_log = 7.5, sd_log = 1.5),
status_prob_open = 0.96,
cache = FALSE,
...) {

# loss_distribution <- match.arg("loss_distribution")

stopifnot(
is.numeric(n_claims) && n_claims > 0,
class(as.Date(start_date)) == "Date",
class(as.Date(end_date)) == "Date" &&
as.Date(end_date) > as.Date(start_date),
is.numeric(seed),
loss_distribution %in% c(
"lnorm",
"lognormal",
"normal",
"gamma",
"lgamma",
"pareto",
"weibull",
"genbeta"
),
is.numeric(status_prob_open),
status_prob_open > 0 && status_prob_open < 1
)

beg_date <- as.Date(start_date)
end_date <- as.Date(end_date)
accident_range <- as.numeric(end_date - beg_date)
set.seed(seed)
accident_date <- sample(0:accident_range, size = n_claims, replace = TRUE)

# mean_log <- 7.5
# sd_log <- 1.5

payment_fun <- function(n) stats::rlnorm(n, params$mean_log, params$sd_log)

claims <- tibble::tibble(
claim_num = paste0("claim-", 1:n_claims),
accident_date = beg_date + lubridate::days(accident_date),
state = sample(c("TX", "CA", "GA", "FL"), size = n_claims, replace = TRUE),
claimant = randomNames::randomNames(n_claims),
report_lag = stats::rnbinom(n_claims, 5, .25), # 0 if claim closed when reported
status = stats::rbinom(n_claims, 1, 0.96), # initial payment amount
payment = payment_fun(n_claims)
) %>%
dplyr::mutate(
report_date = accident_date + report_lag,
payment = ifelse(status == 0, 0, payment),
case = payment + stats::runif(n_claims, 0.25, 8.0),
transaction_date = report_date
) %>%
dplyr::arrange(accident_date)

n_trans <- stats::rnbinom(n_claims, 3, 0.25)
trans_lag <- lapply(n_trans, function(x) stats::rnbinom(x, 7, 0.1)) %>%
lapply(function(x) { if (length(x) == 0) 0 else x })

for (i in seq_len(n_claims)) {
trans_lag[[i]] <- tibble::tibble(
"trans_lag" = trans_lag[[i]],
"claim_num" = paste0("claim-", i)
)
}

trans_tbl <- dplyr::bind_rows(trans_lag) %>%
dplyr::group_by(.data$claim_num) %>%
dplyr::mutate(trans_lag = cumsum(trans_lag)) %>%
dplyr::ungroup()

# separate all zero claims from the claims that have payments
zero_claims <- dplyr::filter(claims, status == 0)
first_trans <- dplyr::filter(claims, status == 1)

subsequent_trans <- dplyr::left_join(trans_tbl, first_trans, by = "claim_num") %>%
dplyr::filter(!is.na(.data$accident_date))

n_trans <- nrow(subsequent_trans)

subsequent_trans <- subsequent_trans %>%
dplyr::mutate(payment = payment_fun(n_trans),
case = pmax(.data$case * stats::rnorm(n_trans, 1.5, 0.1) - .data$payment, 500),
transaction_date = .data$report_date + .data$trans_lag) %>%
dplyr::select(-.data$trans_lag)

trans <- dplyr::bind_rows(zero_claims, first_trans, subsequent_trans) %>%
dplyr::arrange(.data$transaction_date)

# add in a transaction number
trans$trans_num <- 1:nrow(trans)

# set final trans status to closed and case to 0
trans <- trans %>%
dplyr::arrange(.data$trans_num) %>%
dplyr::group_by(.data$claim_num) %>%
dplyr::mutate(final_trans = ifelse(.data$trans_num == max(.data$trans_num), TRUE, FALSE),
status = ifelse(.data$final_trans, 0, 1),
case = ifelse(.data$final_trans, 0, .data$case),
status = ifelse(.data$status == 0, "Closed", "Open"),
paid = round(cumsum(.data$payment), 0),
case = round(.data$case, 0),
payment = round(.data$payment, 0)) %>%
dplyr::select(-.data$final_trans) %>%
dplyr::arrange(.data$accident_date) %>%
dplyr::ungroup() %>%
dplyr::arrange(.data$claim_num, dplyr::desc(.data$transaction_date))

if (cache) { saveRDS(trans, file = "trans.RDS") }

trans

}

# t <- simulate_claims(n = 10)

get_claim_transactions <- function(claim_num, data = lossrx::claims_transactional) {

}
2 changes: 2 additions & 0 deletions inst/scripts/pkgdevt.R
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ c(
) |>
purrr::walk(usethis::use_r)

usethis::use_r("simulate_claims")

# Tests -------------------------------------------------------------------

usethis::use_testthat()
Expand Down
48 changes: 48 additions & 0 deletions man/simulate_claims.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.