IncidencePrevalence

Estimate Incidence Rates and Prevalence in OMOP CDM

2025-06-29

Oveview

Concepts
Interface
More information

Concepts

Denominator population
Incidence rates
Prevalence
- Point prevalence
- Period prevalence

Denominator population

Observation periods

Denominator population

Observation periods + study period

Denominator population

Observation periods + study period + prior history requirement

Denominator population

Observation periods + study period + prior history requirement + age (and sex) restriction

Incidence rates

Washout all history, no repetitive events

Incidence rates

No washout, no repetitive events

Incidence rates

Some washout, no repetitive events

Incidence rates

Some washout, repetitive events

Prevalence

Point prevalence

Prevalence

Period prevalence

Oveview

Concepts
Interface
More information

Required packages

install.packages("IncidencePrevalence")

library(CDMConnector)
library(IncidencePrevalence)
library(dplyr)
library(tidyr)
library(ggplot2)
library(gt)

generateDenominatorCohortSet()

cdm <- mockIncidencePrevalence(sampleSize = 50000)

cdm <- generateDenominatorCohortSet(cdm, name = "dpop")

cdm$dpop %>%
  glimpse()

Rows: ??
Columns: 4
Database: DuckDB v1.3.1 [unknown@Linux 6.11.0-1015-azure:R 4.5.1/:memory:]
$ cohort_definition_id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ subject_id           <int> 1, 3, 7, 8, 11, 12, 14, 21, 22, 29, 32, 33, 36, 37, 41, 42, 43, 45, 46, 47, 49, 51, 52, 5…
$ cohort_start_date    <date> 1949-10-29, 1977-03-02, 1994-07-10, 1960-05-06, 1945-08-21, 1950-04-30, 1981-06-10, 1973…
$ cohort_end_date      <date> 1957-03-20, 1982-06-05, 1996-11-16, 1963-04-12, 1951-02-11, 1954-05-09, 1982-05-31, 1975…

generateDenominatorCohortSet()

cdm <- generateDenominatorCohortSet(
  cdm = cdm, name = "dpop",
  cohortDateRange = as.Date(c("2008-01-01", "2012-01-01"))
)

cdm$dpop %>%
  glimpse()

Rows: ??
Columns: 4
Database: DuckDB v1.3.1 [unknown@Linux 6.11.0-1015-azure:R 4.5.1/:memory:]
$ cohort_definition_id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ subject_id           <int> 33, 41, 52, 59, 69, 76, 79, 89, 107, 122, 126, 136, 141, 151, 156, 159, 164, 174, 175, 17…
$ cohort_start_date    <date> 2008-01-01, 2008-01-01, 2008-01-01, 2008-01-01, 2008-01-01, 2008-01-01, 2008-01-01, 2008…
$ cohort_end_date      <date> 2008-08-04, 2012-01-01, 2011-10-24, 2012-01-01, 2012-01-01, 2009-03-21, 2012-01-01, 2009…

generateDenominatorCohortSet()

cohortCount(cdm$dpop)

# A tibble: 1 × 3
  cohort_definition_id number_records number_subjects
                 <int>          <int>           <int>
1                    1           3575            3575

settings(cdm$dpop)

# A tibble: 1 × 11
  cohort_definition_id cohort_name    age_group sex   days_prior_observation start_date end_date   requirements_at_entry
                 <int> <chr>          <chr>     <chr>                  <dbl> <date>     <date>     <chr>                
1                    1 denominator_c… 0 to 150  Both                       0 2008-01-01 2012-01-01 FALSE                
# ℹ 3 more variables: target_cohort_definition_id <int>, target_cohort_name <chr>, time_at_risk <chr>

generateDenominatorCohortSet()

attrition(cdm$dpop)

# A tibble: 8 × 7
  cohort_definition_id number_records number_subjects reason_id reason                excluded_records excluded_subjects
                 <int>          <int>           <int>     <int> <chr>                            <int>             <int>
1                    1          50000           50000         1 Starting population                 NA                NA
2                    1          50000           50000         2 Missing year of birth                0                 0
3                    1          50000           50000         3 Missing sex                          0                 0
4                    1          50000           50000         4 Cannot satisfy age c…                0                 0
5                    1           3575            3575         5 No observation time …            46425             46425
6                    1           3575            3575         6 Doesn't satisfy age …                0                 0
7                    1           3575            3575         7 Prior history requir…                0                 0
8                    1           3575            3575        10 No observation time …                0                 0

generateDenominatorCohortSet()

cdm <- generateDenominatorCohortSet(
  cdm = cdm, name = "dpop",
  cohortDateRange = as.Date(c("2008-01-01", "2012-01-01")),
  ageGroup = list(
    c(0, 49),
    c(50, 100)
  ),
  sex = c("Male", "Female")
)

cdm$dpop %>%
  glimpse()

Rows: ??
Columns: 4
Database: DuckDB v1.3.1 [unknown@Linux 6.11.0-1015-azure:R 4.5.1/:memory:]
$ cohort_definition_id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ subject_id           <int> 59, 89, 136, 200, 208, 396, 501, 579, 581, 615, 642, 665, 740, 744, 773, 834, 910, 923, 1…
$ cohort_start_date    <date> 2008-01-01, 2008-01-01, 2008-01-01, 2008-01-01, 2008-01-01, 2009-08-30, 2008-01-01, 2008…
$ cohort_end_date      <date> 2012-01-01, 2009-08-06, 2010-02-18, 2010-12-23, 2012-01-01, 2012-01-01, 2008-12-05, 2011…

generateDenominatorCohortSet()

cdm <- generateDenominatorCohortSet(
  cdm = cdm, name = "dpop",
  cohortDateRange = as.Date(c("2008-01-01", "2012-01-01")),
  ageGroup = list(
    c(0, 49),
    c(50, 100)
  ),
  sex = c("Male", "Female"),
  daysPriorObservation= c(0, 180)
)

cdm$dpop %>%
  glimpse()

Rows: ??
Columns: 4
Database: DuckDB v1.3.1 [unknown@Linux 6.11.0-1015-azure:R 4.5.1/:memory:]
$ cohort_definition_id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ subject_id           <int> 59, 89, 136, 200, 208, 396, 501, 579, 581, 615, 642, 665, 740, 744, 773, 834, 910, 923, 1…
$ cohort_start_date    <date> 2008-01-01, 2008-01-01, 2008-01-01, 2008-01-01, 2008-01-01, 2009-08-30, 2008-01-01, 2008…
$ cohort_end_date      <date> 2012-01-01, 2009-08-06, 2010-02-18, 2010-12-23, 2012-01-01, 2012-01-01, 2008-12-05, 2011…

generateDenominatorCohortSet()

settings(cdm$dpop)

# A tibble: 8 × 11
  cohort_definition_id cohort_name    age_group sex   days_prior_observation start_date end_date   requirements_at_entry
                 <int> <chr>          <chr>     <chr>                  <dbl> <date>     <date>     <chr>                
1                    1 denominator_c… 0 to 49   Male                       0 2008-01-01 2012-01-01 FALSE                
2                    2 denominator_c… 0 to 49   Male                     180 2008-01-01 2012-01-01 FALSE                
3                    3 denominator_c… 0 to 49   Fema…                      0 2008-01-01 2012-01-01 FALSE                
4                    4 denominator_c… 0 to 49   Fema…                    180 2008-01-01 2012-01-01 FALSE                
5                    5 denominator_c… 50 to 100 Male                       0 2008-01-01 2012-01-01 FALSE                
6                    6 denominator_c… 50 to 100 Male                     180 2008-01-01 2012-01-01 FALSE                
7                    7 denominator_c… 50 to 100 Fema…                      0 2008-01-01 2012-01-01 FALSE                
8                    8 denominator_c… 50 to 100 Fema…                    180 2008-01-01 2012-01-01 FALSE                
# ℹ 3 more variables: target_cohort_definition_id <int>, target_cohort_name <chr>, time_at_risk <chr>

generateDenominatorCohortSet()

cohortCount(cdm$dpop)

# A tibble: 8 × 3
  cohort_definition_id number_records number_subjects
                 <int>          <int>           <int>
1                    1            973             973
2                    2            959             959
3                    3            902             902
4                    4            890             890
5                    5            933             933
6                    6            925             925
7                    7            922             922
8                    8            900             900

generateDenominatorCohortSet()

attrition(cdm$dpop) %>%
  filter(cohort_definition_id == 1)

# A tibble: 9 × 7
  cohort_definition_id number_records number_subjects reason_id reason                excluded_records excluded_subjects
                 <int>          <int>           <int>     <int> <chr>                            <int>             <int>
1                    1          50000           50000         1 Starting population                 NA                NA
2                    1          50000           50000         2 Missing year of birth                0                 0
3                    1          50000           50000         3 Missing sex                          0                 0
4                    1          50000           50000         4 Cannot satisfy age c…                0                 0
5                    1           3575            3575         5 No observation time …            46425             46425
6                    1           3575            3575         6 Doesn't satisfy age …                0                 0
7                    1           3575            3575         7 Prior history requir…                0                 0
8                    1           1822            1822         8 Not Male                          1753              1753
9                    1            973             973        10 No observation time …              849               849

generateDenominatorCohortSet()

# get some people who are in more than one cohort
ids <- cdm$dpop %>% 
  group_by(subject_id) %>% 
  tally() %>% 
  collect() %>% 
  arrange(desc(n)) %>% 
  head(4) %>% 
  pull("subject_id")

generateDenominatorCohortSet()

dpop <- cdm$dpop %>%
  collect() %>%
  left_join(settings(cdm$dpop),
            by = "cohort_definition_id") %>% 
  mutate(cohort_definition_id=as.character(cohort_definition_id))

plot <- dpop %>%
  filter(subject_id %in% ids) %>%
  pivot_longer(cols = c(
    "cohort_start_date",
    "cohort_end_date"
  )) %>%
  ggplot(aes(x = as.character(subject_id), y = value, colour = cohort_definition_id, group = subject_id)) +
  facet_grid(sex + days_prior_observation ~ ., space = "free_y") +
  geom_point(position = position_dodge(width = 0.5)) +
  geom_line(position = position_dodge(width = 0.5)) +
  theme_bw() +
  theme(legend.position = "top") +
  ylab("Year") +
  coord_flip()

generateDenominatorCohortSet()

plot

Adding (time-invariant) variables for stratification

If later we want to estimate incidence or prevalence stratified for some time-invariant characteristic, we will need to add a variable to our denominator cohort table.

cdm$dpop <- cdm$dpop %>% 
  mutate(group = if_else(as.numeric(subject_id)  < 20, "first", "second")) 

cdm$dpop |> 
  glimpse()

Rows: ??
Columns: 5
Database: DuckDB v1.3.1 [unknown@Linux 6.11.0-1015-azure:R 4.5.1/:memory:]
$ cohort_definition_id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ subject_id           <int> 59, 89, 136, 200, 208, 396, 501, 579, 581, 615, 642, 665, 740, 744, 773, 834, 910, 923, 1…
$ cohort_start_date    <date> 2008-01-01, 2008-01-01, 2008-01-01, 2008-01-01, 2008-01-01, 2009-08-30, 2008-01-01, 2008…
$ cohort_end_date      <date> 2012-01-01, 2009-08-06, 2010-02-18, 2010-12-23, 2012-01-01, 2012-01-01, 2008-12-05, 2011…
$ group                <chr> "second", "second", "second", "second", "second", "second", "second", "second", "second",…

generateTargetDenominatorCohortSet()

When we want to stratify on a time-varying characteristic, we will do this by first creating a cohort for it. Once we have the cohort we will will use it when creating our denominator cohort.

generateTargetDenominatorCohortSet()

personTable <- tibble(
  person_id = c("1", "2", "3", "4", "5"),
  gender_concept_id = c(rep("8507", 2), rep("8532", 3)),
  year_of_birth = 2000,
  month_of_birth = 06,
  day_of_birth = 01
)
observationPeriodTable <- tibble(
  observation_period_id = "1",
  person_id = c("1", "2", "3", "4", "5"),
  observation_period_start_date = c(
    as.Date("2010-12-19"),
    as.Date("2005-04-01"),
    as.Date("2009-04-10"),
    as.Date("2010-08-20"),
    as.Date("2010-01-01")
  ),
  observation_period_end_date = c(
    as.Date("2011-06-19"),
    as.Date("2005-11-29"),
    as.Date("2016-01-02"),
    as.Date("2011-12-11"),
    as.Date("2015-06-01")
  )
)

acute_asthma <- tibble(
  cohort_definition_id = rep("1", 5),
  subject_id = c("3", "3", "5", "5", "2"),
  cohort_start_date = c(
    as.Date("2012-01-01"),
    as.Date("2015-06-01"),
    as.Date("2014-10-01"),
    as.Date("2010-06-01"),
    as.Date("2005-08-20")
  ),
  cohort_end_date = c(
    as.Date("2013-01-01"),
    as.Date("2015-12-31"),
    as.Date("2015-04-01"),
    as.Date("2010-06-01"),
    as.Date("2005-09-20")
  )
)

# mock database
cdm <- mockIncidencePrevalence(
  personTable = personTable,
  observationPeriodTable = observationPeriodTable,
  targetCohortTable = acute_asthma
)

generateTargetDenominatorCohortSet()

cdm <- generateTargetDenominatorCohortSet(
  cdm = cdm, 
  name = "denominator_acute_asthma",
  targetCohortTable = "target"
)

cdm$denominator_acute_asthma |> 
  dplyr::glimpse()

Rows: ??
Columns: 4
Database: DuckDB v1.3.1 [unknown@Linux 6.11.0-1015-azure:R 4.5.1/:memory:]
$ cohort_definition_id <int> 1, 1, 1, 1, 1
$ subject_id           <chr> "2", "3", "5", "3", "5"
$ cohort_start_date    <date> 2005-08-20, 2015-06-01, 2010-06-01, 2012-01-01, 2014-10-01
$ cohort_end_date      <date> 2005-09-20, 2015-12-31, 2010-06-01, 2013-01-01, 2015-04-01

generateTargetDenominatorCohortSet()

We can add demographic requirements like before. But it is important to note that these are applied at the cohort start date of the target cohort.

cdm <- generateTargetDenominatorCohortSet(
  cdm = cdm, 
  name = "denominator_acute_asthma_2",
  ageGroup = list(c(11, 15)),
  sex = "Female",
  daysPriorObservation = 0,
  targetCohortTable = "target"
)
cdm$denominator_acute_asthma_2 |> 
  dplyr::glimpse()

Rows: ??
Columns: 4
Database: DuckDB v1.3.1 [unknown@Linux 6.11.0-1015-azure:R 4.5.1/:memory:]
$ cohort_definition_id <int> 1, 1, 1
$ subject_id           <chr> "3", "3", "5"
$ cohort_start_date    <date> 2015-06-01, 2012-01-01, 2014-10-01
$ cohort_end_date      <date> 2015-12-31, 2013-01-01, 2015-04-01

estimateIncidence()

cdm <- mockIncidencePrevalence(
  sampleSize = 50000,
  outPre = 0.5
)

cdm <- generateDenominatorCohortSet(
  cdm = cdm, name = "denominator",
  cohortDateRange = as.Date(c("2008-01-01", "2012-01-01")),
  ageGroup = list(
    c(0, 30),
    c(31, 50),
    c(51, 70),
    c(71, 100)
  )
)
inc <- estimateIncidence(
  cdm = cdm,
  denominatorTable = "denominator",
  outcomeTable = "outcome",
  interval = "years",
  outcomeWashout = Inf,
  repeatedEvents = FALSE
)

estimateIncidence()

inc %>%
  glimpse()

Rows: 288
Columns: 13
$ result_id        <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, …
$ cdm_name         <chr> "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock…
$ group_name       <chr> "denominator_cohort_name &&& outcome_cohort_name", "denominator_cohort_name &&& outcome_cohor…
$ group_level      <chr> "denominator_cohort_1 &&& cohort_1", "denominator_cohort_1 &&& cohort_1", "denominator_cohort…
$ strata_name      <chr> "overall", "overall", "overall", "overall", "overall", "overall", "overall", "overall", "over…
$ strata_level     <chr> "overall", "overall", "overall", "overall", "overall", "overall", "overall", "overall", "over…
$ variable_name    <chr> "Denominator", "Outcome", "Denominator", "Denominator", "Outcome", "Outcome", "Outcome", "Den…
$ variable_level   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ estimate_name    <chr> "denominator_count", "outcome_count", "person_days", "person_years", "incidence_100000_pys", …
$ estimate_type    <chr> "integer", "integer", "numeric", "numeric", "numeric", "numeric", "numeric", "integer", "inte…
$ estimate_value   <chr> "662", "63", "189075", "517.659", "12170.174", "9351.896", "15570.939", "624", "64", "180488"…
$ additional_name  <chr> "incidence_start_date &&& incidence_end_date &&& analysis_interval", "incidence_start_date &&…
$ additional_level <chr> "2008-01-01 &&& 2008-12-31 &&& years", "2008-01-01 &&& 2008-12-31 &&& years", "2008-01-01 &&&…

estimateIncidence()

inc <- estimateIncidence(
  cdm = cdm,
  denominatorTable = "denominator",
  outcomeTable = "outcome",
  interval = "quarters",
  outcomeWashout = 365,
  repeatedEvents = TRUE
)
inc %>%
  glimpse()

Rows: 624
Columns: 13
$ result_id        <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ cdm_name         <chr> "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock…
$ group_name       <chr> "denominator_cohort_name &&& outcome_cohort_name", "denominator_cohort_name &&& outcome_cohor…
$ group_level      <chr> "denominator_cohort_1 &&& cohort_1", "denominator_cohort_1 &&& cohort_1", "denominator_cohort…
$ strata_name      <chr> "overall", "overall", "overall", "overall", "overall", "overall", "overall", "overall", "over…
$ strata_level     <chr> "overall", "overall", "overall", "overall", "overall", "overall", "overall", "overall", "over…
$ variable_name    <chr> "Denominator", "Outcome", "Denominator", "Denominator", "Outcome", "Outcome", "Outcome", "Den…
$ variable_level   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ estimate_name    <chr> "denominator_count", "outcome_count", "person_days", "person_years", "incidence_100000_pys", …
$ estimate_type    <chr> "integer", "integer", "numeric", "numeric", "numeric", "numeric", "numeric", "integer", "inte…
$ estimate_value   <chr> "732", "13", "62234", "170.387", "7629.69", "4062.489", "13047.002", "718", "17", "61093", "1…
$ additional_name  <chr> "incidence_start_date &&& incidence_end_date &&& analysis_interval", "incidence_start_date &&…
$ additional_level <chr> "2008-01-01 &&& 2008-03-31 &&& quarters", "2008-01-01 &&& 2008-03-31 &&& quarters", "2008-01-…

plotIncidence()

plotIncidence(inc,
              facet = "denominator_age_group")

estimateIncidence()

tableIncidence(inc)

Incidence start date	Incidence end date	Denominator age group	Denominator sex	Estimate name
Incidence start date	Incidence end date	Denominator age group	Denominator sex	Denominator (N)	Person-years	Outcome (N)	Incidence 100,000 person-years [95% CI]
mock; cohort_1
2008-01-01	2008-03-31	0 to 30	Both	732	170.39	13	7,629.69 (4,062.49 - 13,047.00)
2008-04-01	2008-06-30	0 to 30	Both	718	167.26	17	10,163.57 (5,920.66 - 16,272.87)
2008-07-01	2008-09-30	0 to 30	Both	708	167.39	18	10,753.52 (6,373.22 - 16,995.20)
2008-10-01	2008-12-31	0 to 30	Both	715	165.77	15	9,048.63 (5,064.45 - 14,924.33)
2009-01-01	2009-03-31	0 to 30	Both	695	160.15	15	9,366.51 (5,242.36 - 15,448.64)
2009-04-01	2009-06-30	0 to 30	Both	695	159.13	22	13,825.43 (8,664.33 - 20,931.88)
2009-07-01	2009-09-30	0 to 30	Both	670	157.35	15	9,533.07 (5,335.59 - 15,723.35)
2009-10-01	2009-12-31	0 to 30	Both	656	156.67	12	7,659.22 (3,957.63 - 13,379.11)
2010-01-01	2010-03-31	0 to 30	Both	626	147.78	14	9,473.28 (5,179.13 - 15,894.56)
2010-04-01	2010-06-30	0 to 30	Both	600	142.42	10	7,021.34 (3,367.00 - 12,912.49)
2010-07-01	2010-09-30	0 to 30	Both	570	136.14	16	11,752.61 (6,717.63 - 19,085.50)
2010-10-01	2010-12-31	0 to 30	Both	530	126.99	9	7,087.17 (3,240.71 - 13,453.66)
2011-01-01	2011-03-31	0 to 30	Both	497	117.62	8	6,801.33 (2,936.33 - 13,401.34)
2011-04-01	2011-06-30	0 to 30	Both	470	112.61	4	3,551.99 (967.80 - 9,094.50)
2011-07-01	2011-09-30	0 to 30	Both	447	106.78	6	5,619.08 (2,062.10 - 12,230.38)
2011-10-01	2011-12-31	0 to 30	Both	418	99.00	8	8,080.97 (3,488.79 - 15,922.74)
2008-01-01	2008-03-31	31 to 50	Both	694	162.12	13	8,018.95 (4,269.75 - 13,712.65)
2008-04-01	2008-06-30	31 to 50	Both	702	165.28	10	6,050.19 (2,901.30 - 11,126.52)
2008-07-01	2008-09-30	31 to 50	Both	712	167.91	14	8,337.70 (4,558.30 - 13,989.25)
2008-10-01	2008-12-31	31 to 50	Both	695	163.34	13	7,959.10 (4,237.89 - 13,610.31)
2009-01-01	2009-03-31	31 to 50	Both	682	158.32	10	6,316.24 (3,028.88 - 11,615.79)
2009-04-01	2009-06-30	31 to 50	Both	689	160.18	12	7,491.67 (3,871.05 - 13,086.43)
2009-07-01	2009-09-30	31 to 50	Both	699	164.70	18	10,929.09 (6,477.27 - 17,272.68)
2009-10-01	2009-12-31	31 to 50	Both	697	161.48	16	9,908.29 (5,663.44 - 16,090.44)
2010-01-01	2010-03-31	31 to 50	Both	655	153.79	19	12,354.27 (7,438.08 - 19,292.72)
2010-04-01	2010-06-30	31 to 50	Both	621	146.93	6	4,083.58 (1,498.60 - 8,888.23)
2010-07-01	2010-09-30	31 to 50	Both	589	142.36	14	9,834.02 (5,376.35 - 16,499.81)
2010-10-01	2010-12-31	31 to 50	Both	568	135.00	9	6,666.76 (3,048.47 - 12,655.60)
2011-01-01	2011-03-31	31 to 50	Both	535	124.54	7	5,620.46 (2,259.72 - 11,580.29)
2011-04-01	2011-06-30	31 to 50	Both	504	121.08	9	7,432.86 (3,398.78 - 14,109.88)
2011-07-01	2011-09-30	31 to 50	Both	486	114.72	11	9,588.90 (4,786.74 - 17,157.19)
2011-10-01	2011-12-31	31 to 50	Both	455	108.55	6	5,527.51 (2,028.50 - 12,031.06)
2008-01-01	2008-03-31	51 to 70	Both	662	153.18	12	7,834.18 (4,048.03 - 13,684.73)
2008-04-01	2008-06-30	51 to 70	Both	655	151.24	18	11,901.69 (7,053.70 - 18,809.80)
2008-07-01	2008-09-30	51 to 70	Both	656	154.21	15	9,727.06 (5,444.16 - 16,043.30)
2008-10-01	2008-12-31	51 to 70	Both	646	154.67	11	7,112.05 (3,550.31 - 12,725.43)
2009-01-01	2009-03-31	51 to 70	Both	658	152.59	15	9,830.52 (5,502.07 - 16,213.95)
2009-04-01	2009-06-30	51 to 70	Both	666	153.96	16	10,392.44 (5,940.18 - 16,876.68)
2009-07-01	2009-09-30	51 to 70	Both	667	155.18	22	14,176.99 (8,884.65 - 21,464.14)
2009-10-01	2009-12-31	51 to 70	Both	660	155.92	11	7,054.76 (3,521.71 - 12,622.92)
2010-01-01	2010-03-31	51 to 70	Both	633	149.84	10	6,673.92 (3,200.40 - 12,273.58)
2010-04-01	2010-06-30	51 to 70	Both	619	147.54	12	8,133.39 (4,202.64 - 14,207.39)
2010-07-01	2010-09-30	51 to 70	Both	598	142.71	8	5,605.77 (2,420.18 - 11,045.61)
2010-10-01	2010-12-31	51 to 70	Both	574	137.31	8	5,826.27 (2,515.37 - 11,480.08)
2011-01-01	2011-03-31	51 to 70	Both	547	127.41	15	11,773.39 (6,589.48 - 19,418.41)
2011-04-01	2011-06-30	51 to 70	Both	525	122.76	10	8,145.98 (3,906.31 - 14,980.74)
2011-07-01	2011-09-30	51 to 70	Both	498	119.00	8	6,722.97 (2,902.50 - 13,246.93)
2011-10-01	2011-12-31	51 to 70	Both	482	114.96	9	7,829.01 (3,579.92 - 14,861.91)
2008-01-01	2008-03-31	71 to 100	Both	553	129.06	12	9,298.00 (4,804.41 - 16,241.74)
2008-04-01	2008-06-30	71 to 100	Both	568	130.07	22	16,913.58 (10,599.65 - 25,607.36)
2008-07-01	2008-09-30	71 to 100	Both	565	133.99	13	9,702.00 (5,165.91 - 16,590.71)
2008-10-01	2008-12-31	71 to 100	Both	572	135.16	17	12,577.22 (7,326.69 - 20,137.35)
2009-01-01	2009-03-31	71 to 100	Both	581	132.86	15	11,289.91 (6,318.88 - 18,620.99)
2009-04-01	2009-06-30	71 to 100	Both	585	137.14	12	8,750.25 (4,521.38 - 15,284.92)
2009-07-01	2009-09-30	71 to 100	Both	586	139.15	16	11,498.71 (6,572.51 - 18,673.19)
2009-10-01	2009-12-31	71 to 100	Both	610	141.44	6	4,242.23 (1,556.82 - 9,233.55)
2010-01-01	2010-03-31	71 to 100	Both	586	138.06	16	11,589.25 (6,624.26 - 18,820.21)
2010-04-01	2010-06-30	71 to 100	Both	563	134.34	12	8,932.29 (4,615.45 - 15,602.92)
2010-07-01	2010-09-30	71 to 100	Both	553	132.16	13	9,836.64 (5,237.59 - 16,820.95)
2010-10-01	2010-12-31	71 to 100	Both	522	127.57	12	9,406.38 (4,860.41 - 16,431.05)
2011-01-01	2011-03-31	71 to 100	Both	512	119.66	15	12,535.73 (7,016.15 - 20,675.78)
2011-04-01	2011-06-30	71 to 100	Both	493	118.44	8	6,754.19 (2,915.98 - 13,308.45)
2011-07-01	2011-09-30	71 to 100	Both	483	115.92	11	9,489.06 (4,736.90 - 16,978.54)
2011-10-01	2011-12-31	71 to 100	Both	468	111.96	6	5,358.86 (1,966.61 - 11,663.99)

estimatePointPrevalence() and estimatePeriodPrevalence()

cdm <- mockIncidencePrevalence(
  sampleSize = 50000,
  outPre = 0.5
)

cdm <- generateDenominatorCohortSet(
  cdm = cdm, name = "denominator",
  cohortDateRange = as.Date(c("2008-01-01", "2012-01-01")),
  ageGroup = list(
    c(0, 30),
    c(31, 50),
    c(51, 70),
    c(71, 100)
  )
)
prev <- estimatePointPrevalence(
  cdm = cdm,
  denominatorTable = "denominator",
  outcomeTable = "outcome",
  interval = "Years"
)

estimatePointPrevalence() and estimatePeriodPrevalence()

prev %>%
  glimpse()

Rows: 276
Columns: 13
$ result_id        <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, …
$ cdm_name         <chr> "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock…
$ group_name       <chr> "denominator_cohort_name &&& outcome_cohort_name", "denominator_cohort_name &&& outcome_cohor…
$ group_level      <chr> "denominator_cohort_1 &&& cohort_1", "denominator_cohort_1 &&& cohort_1", "denominator_cohort…
$ strata_name      <chr> "overall", "overall", "overall", "overall", "overall", "overall", "overall", "overall", "over…
$ strata_level     <chr> "overall", "overall", "overall", "overall", "overall", "overall", "overall", "overall", "over…
$ variable_name    <chr> "Denominator", "Outcome", "Outcome", "Outcome", "Outcome", "Denominator", "Outcome", "Outcome…
$ variable_level   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ estimate_name    <chr> "denominator_count", "outcome_count", "prevalence", "prevalence_95CI_lower", "prevalence_95CI…
$ estimate_type    <chr> "integer", "integer", "numeric", "numeric", "numeric", "integer", "integer", "numeric", "nume…
$ estimate_value   <chr> "755", "1", "0.00132", "0.00023", "0.00746", "708", "1", "0.00141", "0.00025", "0.00796", "67…
$ additional_name  <chr> "prevalence_start_date &&& prevalence_end_date &&& analysis_interval", "prevalence_start_date…
$ additional_level <chr> "2008-01-01 &&& 2008-01-01 &&& years", "2008-01-01 &&& 2008-01-01 &&& years", "2008-01-01 &&&…

estimatePointPrevalence() and estimatePeriodPrevalence()

prev <- estimatePeriodPrevalence(
  cdm = cdm,
  denominatorTable = "denominator",
  outcomeTable = "outcome",
  interval = "quarters"
)

prev %>%
  glimpse()

Rows: 496
Columns: 13
$ result_id        <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ cdm_name         <chr> "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock", "mock…
$ group_name       <chr> "denominator_cohort_name &&& outcome_cohort_name", "denominator_cohort_name &&& outcome_cohor…
$ group_level      <chr> "denominator_cohort_1 &&& cohort_1", "denominator_cohort_1 &&& cohort_1", "denominator_cohort…
$ strata_name      <chr> "overall", "overall", "overall", "overall", "overall", "overall", "overall", "overall", "over…
$ strata_level     <chr> "overall", "overall", "overall", "overall", "overall", "overall", "overall", "overall", "over…
$ variable_name    <chr> "Denominator", "Outcome", "Outcome", "Outcome", "Outcome", "Denominator", "Outcome", "Outcome…
$ variable_level   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ estimate_name    <chr> "denominator_count", "outcome_count", "prevalence", "prevalence_95CI_lower", "prevalence_95CI…
$ estimate_type    <chr> "integer", "integer", "numeric", "numeric", "numeric", "integer", "integer", "numeric", "nume…
$ estimate_value   <chr> "790", "14", "0.01772", "0.01059", "0.02953", "770", "18", "0.02338", "0.01484", "0.03665", "…
$ additional_name  <chr> "prevalence_start_date &&& prevalence_end_date &&& analysis_interval", "prevalence_start_date…
$ additional_level <chr> "2008-01-01 &&& 2008-03-31 &&& quarters", "2008-01-01 &&& 2008-03-31 &&& quarters", "2008-01-…

plotPrevalence()

plotPrevalence(prev,
              facet = "denominator_age_group")

plotPrevalence()

tablePrevalence(prev)

Prevalence start date	Prevalence end date	Denominator age group	Denominator sex	Estimate name
Prevalence start date	Prevalence end date	Denominator age group	Denominator sex	Denominator (N)	Outcome (N)	Prevalence [95% CI]
mock; cohort_1
2008-01-01	2008-03-31	0 to 30	Both	790	14	0.02 (0.01 - 0.03)
2008-04-01	2008-06-30	0 to 30	Both	770	18	0.02 (0.01 - 0.04)
2008-07-01	2008-09-30	0 to 30	Both	757	18	0.02 (0.02 - 0.04)
2008-10-01	2008-12-31	0 to 30	Both	759	16	0.02 (0.01 - 0.03)
2009-01-01	2009-03-31	0 to 30	Both	734	16	0.02 (0.01 - 0.04)
2009-04-01	2009-06-30	0 to 30	Both	735	22	0.03 (0.02 - 0.04)
2009-07-01	2009-09-30	0 to 30	Both	716	15	0.02 (0.01 - 0.03)
2009-10-01	2009-12-31	0 to 30	Both	705	12	0.02 (0.01 - 0.03)
2010-01-01	2010-03-31	0 to 30	Both	670	14	0.02 (0.01 - 0.03)
2010-04-01	2010-06-30	0 to 30	Both	636	11	0.02 (0.01 - 0.03)
2010-07-01	2010-09-30	0 to 30	Both	602	16	0.03 (0.02 - 0.04)
2010-10-01	2010-12-31	0 to 30	Both	566	12	0.02 (0.01 - 0.04)
2011-01-01	2011-03-31	0 to 30	Both	530	9	0.02 (0.01 - 0.03)
2011-04-01	2011-06-30	0 to 30	Both	501	4	0.01 (0.00 - 0.02)
2011-07-01	2011-09-30	0 to 30	Both	469	6	0.01 (0.01 - 0.03)
2011-10-01	2011-12-31	0 to 30	Both	435	8	0.02 (0.01 - 0.04)
2008-01-01	2008-03-31	31 to 50	Both	735	13	0.02 (0.01 - 0.03)
2008-04-01	2008-06-30	31 to 50	Both	742	11	0.01 (0.01 - 0.03)
2008-07-01	2008-09-30	31 to 50	Both	748	15	0.02 (0.01 - 0.03)
2008-10-01	2008-12-31	31 to 50	Both	730	15	0.02 (0.01 - 0.03)
2009-01-01	2009-03-31	31 to 50	Both	717	11	0.02 (0.01 - 0.03)
2009-04-01	2009-06-30	31 to 50	Both	716	13	0.02 (0.01 - 0.03)
2009-07-01	2009-09-30	31 to 50	Both	727	18	0.02 (0.02 - 0.04)
2009-10-01	2009-12-31	31 to 50	Both	732	16	0.02 (0.01 - 0.04)
2010-01-01	2010-03-31	31 to 50	Both	694	20	0.03 (0.02 - 0.04)
2010-04-01	2010-06-30	31 to 50	Both	665	6	0.01 (0.00 - 0.02)
2010-07-01	2010-09-30	31 to 50	Both	623	14	0.02 (0.01 - 0.04)
2010-10-01	2010-12-31	31 to 50	Both	603	9	0.01 (0.01 - 0.03)
2011-01-01	2011-03-31	31 to 50	Both	564	7	0.01 (0.01 - 0.03)
2011-04-01	2011-06-30	31 to 50	Both	535	9	0.02 (0.01 - 0.03)
2011-07-01	2011-09-30	31 to 50	Both	513	12	0.02 (0.01 - 0.04)
2011-10-01	2011-12-31	31 to 50	Both	484	7	0.01 (0.01 - 0.03)
2008-01-01	2008-03-31	51 to 70	Both	694	13	0.02 (0.01 - 0.03)
2008-04-01	2008-06-30	51 to 70	Both	682	18	0.03 (0.02 - 0.04)
2008-07-01	2008-09-30	51 to 70	Both	691	18	0.03 (0.02 - 0.04)
2008-10-01	2008-12-31	51 to 70	Both	684	12	0.02 (0.01 - 0.03)
2009-01-01	2009-03-31	51 to 70	Both	691	16	0.02 (0.01 - 0.04)
2009-04-01	2009-06-30	51 to 70	Both	702	18	0.03 (0.02 - 0.04)
2009-07-01	2009-09-30	51 to 70	Both	707	23	0.03 (0.02 - 0.05)
2009-10-01	2009-12-31	51 to 70	Both	708	14	0.02 (0.01 - 0.03)
2010-01-01	2010-03-31	51 to 70	Both	678	10	0.01 (0.01 - 0.03)
2010-04-01	2010-06-30	51 to 70	Both	658	13	0.02 (0.01 - 0.03)
2010-07-01	2010-09-30	51 to 70	Both	630	8	0.01 (0.01 - 0.02)
2010-10-01	2010-12-31	51 to 70	Both	601	8	0.01 (0.01 - 0.03)
2011-01-01	2011-03-31	51 to 70	Both	572	15	0.03 (0.02 - 0.04)
2011-04-01	2011-06-30	51 to 70	Both	555	10	0.02 (0.01 - 0.03)
2011-07-01	2011-09-30	51 to 70	Both	527	9	0.02 (0.01 - 0.03)
2011-10-01	2011-12-31	51 to 70	Both	513	10	0.02 (0.01 - 0.04)
2008-01-01	2008-03-31	71 to 100	Both	583	14	0.02 (0.01 - 0.04)
2008-04-01	2008-06-30	71 to 100	Both	597	24	0.04 (0.03 - 0.06)
2008-07-01	2008-09-30	71 to 100	Both	605	14	0.02 (0.01 - 0.04)
2008-10-01	2008-12-31	71 to 100	Both	615	19	0.03 (0.02 - 0.05)
2009-01-01	2009-03-31	71 to 100	Both	628	15	0.02 (0.01 - 0.04)
2009-04-01	2009-06-30	71 to 100	Both	629	12	0.02 (0.01 - 0.03)
2009-07-01	2009-09-30	71 to 100	Both	628	17	0.03 (0.02 - 0.04)
2009-10-01	2009-12-31	71 to 100	Both	651	6	0.01 (0.00 - 0.02)
2010-01-01	2010-03-31	71 to 100	Both	621	18	0.03 (0.02 - 0.05)
2010-04-01	2010-06-30	71 to 100	Both	598	12	0.02 (0.01 - 0.03)
2010-07-01	2010-09-30	71 to 100	Both	583	13	0.02 (0.01 - 0.04)
2010-10-01	2010-12-31	71 to 100	Both	562	12	0.02 (0.01 - 0.04)
2011-01-01	2011-03-31	71 to 100	Both	549	16	0.03 (0.02 - 0.05)
2011-04-01	2011-06-30	71 to 100	Both	534	10	0.02 (0.01 - 0.03)
2011-07-01	2011-09-30	71 to 100	Both	516	14	0.03 (0.02 - 0.05)
2011-10-01	2011-12-31	71 to 100	Both	501	7	0.01 (0.01 - 0.03)

Oveview

Concepts
Interface
More information

Package paper

https://onlinelibrary.wiley.com/doi/10.1002/pds.5717

IncidencePrevalence

👉 Packages website
👉 CRAN link
👉 Manual

📧 edward.burn@ndorms.ox.ac.uk