result <- summariseXXX(...)
Characterise cohorts
2025-07-06
This package aims to standardise and provide the tools to conduct Characterisation studies as of the Darwin-EU Catalogue of Standard Analytics.
We have three types of functions:
summarise: these functions produce an standardised output to summarise a cohort. This standard output is called summarised_result.
plot: these functions produce plots (currently, only ggplot, but working to implement plotly) from a summarised_result object.
table: these functions produce tables (gt and flextable) from a summarised_result object.
result <- summariseXXX(...)
tableXXX(result)
plotXXX(result)
flowchart LR A[summarise function ] --> B[Plot function ] A --> C[Table function ]
library(duckdb)
library(CDMConnector)
library(dplyr)
library(here)
library(CodelistGenerator)
library(CohortConstructor)
library(CohortCharacteristics)
library(PatientProfiles)
library(gt)
library(ggplot2)
library(dplyr, warn.conflicts = FALSE)
library(visOmopResults)
library(plotly)
GiBleed
cdm_reference objectdatasetName <- "GiBleed"
requireEunomia(datasetName = datasetName)
con <- dbConnect(drv = duckdb(dbdir = eunomiaDir(datasetName = datasetName)))
Creating CDM database /home/runner/work/darwinTutorial2025/darwinTutorial2025/mock_datasets//GiBleed_5.3.zip
cdm <- cdmFromCon(con = con, cdmSchema = "main", writeSchema = "main", cdmName = datasetName)
cdm
── # OMOP CDM reference (duckdb) of GiBleed ────────────────────────────────────────────────────────────────────────────
• omop tables: person, observation_period, visit_occurrence, visit_detail, condition_occurrence, drug_exposure,
procedure_occurrence, device_exposure, measurement, observation, death, note, note_nlp, specimen, fact_relationship,
location, care_site, provider, payer_plan_period, cost, drug_era, dose_era, condition_era, metadata, cdm_source,
concept, vocabulary, domain, concept_class, concept_relationship, relationship, concept_synonym, concept_ancestor,
source_to_concept_map, drug_strength
• cohort tables: -
• achilles tables: -
• other tables: -
acetaminophen
cohortLet’s create a cohort that is all records of acetaminophen
with the following inclusion criteria:
acetaminophen
cohortcodelist <- getDrugIngredientCodes(cdm = cdm, name = "acetaminophen", nameStyle = "{concept_name}")
cdm$acetaminophen <- conceptCohort(
cdm = cdm,
conceptSet = codelist,
name = "acetaminophen",
exit = "event_end_date"
) |>
collapseCohorts(gap = 30) |>
requireCohortIntersect(
targetCohortTable = "acetaminophen",
window = c(-180, -1),
intersections = 0
) |>
requirePriorObservation(minPriorObservation = 180) |>
requireInDateRange(dateRange = as.Date(c("1990-01-01", "2019-12-31")))
summariseCohortAttrition
extracts the data from attrition(cdm$dus_cohort)
in a standard format:
result <- summariseCohortAttrition(cohort = cdm$acetaminophen)
glimpse(x = result)
Rows: 44
Columns: 13
$ result_id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ cdm_name <chr> "GiBleed", "GiBleed", "GiBleed", "GiBleed", "GiBleed", "GiBleed", "GiBleed", "GiBleed", "GiBl…
$ group_name <chr> "cohort_name", "cohort_name", "cohort_name", "cohort_name", "cohort_name", "cohort_name", "co…
$ group_level <chr> "acetaminophen", "acetaminophen", "acetaminophen", "acetaminophen", "acetaminophen", "acetami…
$ strata_name <chr> "reason", "reason", "reason", "reason", "reason", "reason", "reason", "reason", "reason", "re…
$ strata_level <chr> "Initial qualifying events", "Initial qualifying events", "Initial qualifying events", "Initi…
$ variable_name <chr> "number_records", "number_subjects", "excluded_records", "excluded_subjects", "number_records…
$ variable_level <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ estimate_name <chr> "count", "count", "count", "count", "count", "count", "count", "count", "count", "count", "co…
$ estimate_type <chr> "integer", "integer", "integer", "integer", "integer", "integer", "integer", "integer", "inte…
$ estimate_value <chr> "14205", "2679", "0", "0", "14205", "2679", "0", "0", "14205", "2679", "0", "0", "14205", "26…
$ additional_name <chr> "reason_id", "reason_id", "reason_id", "reason_id", "reason_id", "reason_id", "reason_id", "r…
$ additional_level <chr> "1", "1", "1", "1", "2", "2", "2", "2", "3", "3", "3", "3", "4", "4", "4", "4", "5", "5", "5"…
Rows: 1
Columns: 13
$ result_id <int> 1
$ result_type <chr> "summarise_cohort_attrition"
$ package_name <chr> "CohortCharacteristics"
$ package_version <chr> "1.0.0"
$ group <chr> "cohort_name"
$ strata <chr> "reason"
$ additional <chr> "reason_id"
$ min_cell_count <chr> "0"
$ cdm_version <chr> "5.3"
$ cohort_definition_id <chr> "1"
$ min_prior_observation <chr> "180"
$ table_name <chr> "acetaminophen"
$ vocabulary_version <chr> "v5.0 18-JAN-19"
bind Join multiple results objects.
settings Read the settings of the result object.
suppress Suppress the results to counts less than a certain number.
exportSummarisedResult Export the result object to a csv file.
importSummarisedResult Import a result object from a csv file.
tableCohortAttrition(result = result)
Reason |
Variable name
|
|||
---|---|---|---|---|
number_records | number_subjects | excluded_records | excluded_subjects | |
GiBleed; acetaminophen | ||||
Initial qualifying events | 14,205 | 2,679 | 0 | 0 |
Record start <= record end | 14,205 | 2,679 | 0 | 0 |
Record in observation | 14,205 | 2,679 | 0 | 0 |
Non-missing sex | 14,205 | 2,679 | 0 | 0 |
Non-missing year of birth | 14,205 | 2,679 | 0 | 0 |
Merge overlapping records | 13,908 | 2,679 | 297 | 0 |
Collapse cohort with a gap of 30 days. | 13,860 | 2,679 | 48 | 0 |
Not in cohort acetaminophen between -180 & -1 days relative to cohort_start_date | 13,506 | 2,679 | 354 | 0 |
Prior observation requirement: 180 days | 13,361 | 2,677 | 145 | 2 |
cohort_start_date after 1990-01-01 | 5,401 | 2,340 | 7,960 | 337 |
cohort_start_date before 2019-12-31 | 5,401 | 2,340 | 0 | 0 |
gt tables can easily be exported:
tab <- tableCohortAttrition(result = result)
gtsave(data = tab, filename = here("attrition.docx")) # pdf/html/png also allowed
tableCohortAttrition(result = result, type = "flextable")
Reason |
Variable name |
|||
---|---|---|---|---|
number_records |
number_subjects |
excluded_records |
excluded_subjects |
|
GiBleed; acetaminophen | ||||
Initial qualifying events |
14,205 |
2,679 |
0 |
0 |
Record start <= record end |
14,205 |
2,679 |
0 |
0 |
Record in observation |
14,205 |
2,679 |
0 |
0 |
Non-missing sex |
14,205 |
2,679 |
0 |
0 |
Non-missing year of birth |
14,205 |
2,679 |
0 |
0 |
Merge overlapping records |
13,908 |
2,679 |
297 |
0 |
Collapse cohort with a gap of 30 days. |
13,860 |
2,679 |
48 |
0 |
Not in cohort acetaminophen between -180 & -1 days relative to cohort_start_date |
13,506 |
2,679 |
354 |
0 |
Prior observation requirement: 180 days |
13,361 |
2,677 |
145 |
2 |
cohort_start_date after 1990-01-01 |
5,401 |
2,340 |
7,960 |
337 |
cohort_start_date before 2019-12-31 |
5,401 |
2,340 |
0 |
0 |
plotCohortAttrition(result = result)
Can you visualise the attrition of a simvastatin
cohort?
simvastatin
Can you suppress counts under 10?
# create codelist
codelist <- getDrugIngredientCodes(cdm = cdm, name = "simvastatin")
# create cohort
cdm$simvastatin_cohort <- conceptCohort(
cdm = cdm,
conceptSet = codelist,
name = "simvastatin_cohort",
exit = "event_end_date"
) |>
collapseCohorts(gap = 90) |>
requireIsFirstEntry() |>
requirePriorObservation(minPriorObservation = 365) |>
requireInDateRange(dateRange = as.Date(c("1990-01-01", "2019-12-31")))
# summarise attrition
result <- cdm$simvastatin_cohort |>
summariseCohortAttrition() |>
suppress(minCellCount = 10)
# plot attrition
plotCohortAttrition(result)
summariseCharacteristics() summarises the demographics of the cohort by default, but you can use the intersect arguments to evaluate:
You need to identify the medications and/or conditions of interest as a codelist or cohort beforehand.
medications <- getDrugIngredientCodes(
cdm = cdm,
name = c("warfarin", "morphine", "verapamil", "atorvastatin", "nitroglycerin"),
nameStyle = "{concept_name}"
)
conditions <- list(
"fever" = getCandidateCodes(cdm = cdm, keywords = "fever")$concept_id,
"cough" = getCandidateCodes(cdm = cdm, keywords = "cough")$concept_id,
"cardiac_arrest" = getCandidateCodes(cdm = cdm, keywords = "cardiac arrest")$concept_id,
"myocardial_infarction" = getCandidateCodes(cdm = cdm, keywords = "myocardial infarction")$concept_id,
"headache" = getCandidateCodes(cdm = cdm, keywords = "headache")$concept_id
)
result <- cdm$acetaminophen |>
addSex() |>
summariseCharacteristics(
strata = list("sex"),
demographics = TRUE,
ageGroup = list(c(0, 19), c(20, 39), c(40, 59), c(60, 79), c(80, Inf)),
tableIntersectCount = list(
"Number visits prior year" = list(tableName = "visit_occurrence", window = c(-365, -1))
),
conceptIntersectFlag = list(
"Conditions any time prior" = list(conceptSet = conditions, window = c(-Inf, -1)),
"Medications prior year" = list(conceptSet = medications, window = c(-365, -1))
)
)
result |>
glimpse()
Rows: 239
Columns: 13
$ result_id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ cdm_name <chr> "GiBleed", "GiBleed", "GiBleed", "GiBleed", "GiBleed", "GiBleed", "GiBleed", "GiBleed", "GiBl…
$ group_name <chr> "cohort_name", "cohort_name", "cohort_name", "cohort_name", "cohort_name", "cohort_name", "co…
$ group_level <chr> "acetaminophen", "acetaminophen", "acetaminophen", "acetaminophen", "acetaminophen", "acetami…
$ strata_name <chr> "overall", "overall", "overall", "overall", "overall", "overall", "overall", "overall", "over…
$ strata_level <chr> "overall", "overall", "overall", "overall", "overall", "overall", "overall", "overall", "over…
$ variable_name <chr> "Number records", "Number subjects", "Cohort start date", "Cohort start date", "Cohort start …
$ variable_level <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "0 to 19", "0 to …
$ estimate_name <chr> "count", "count", "min", "q25", "median", "q75", "max", "min", "q25", "median", "q75", "max",…
$ estimate_type <chr> "integer", "integer", "date", "date", "date", "date", "date", "date", "date", "date", "date",…
$ estimate_value <chr> "5401", "2340", "1990-01-01", "1996-04-28", "2004-01-07", "2011-10-21", "2019-06-20", "1990-0…
$ additional_name <chr> "overall", "overall", "overall", "overall", "overall", "overall", "overall", "overall", "over…
$ additional_level <chr> "overall", "overall", "overall", "overall", "overall", "overall", "overall", "overall", "over…
tableCharacteristics(result = result)
CDM name
|
||||
---|---|---|---|---|
GiBleed
|
||||
Sex | Variable name | Variable level | Estimate name |
Cohort name
|
acetaminophen | ||||
overall | Number records | - | N | 5,401 |
Number subjects | - | N | 2,340 | |
Cohort start date | - | Median [Q25 - Q75] | 2004-01-07 [1996-04-28 - 2011-10-21] | |
Range | 1990-01-01 to 2019-06-20 | |||
Cohort end date | - | Median [Q25 - Q75] | 2004-01-21 [1996-05-12 - 2011-11-07] | |
Range | 1990-01-10 to 2019-06-24 | |||
Age | - | Median [Q25 - Q75] | 43 [32 - 55] | |
Mean (SD) | 44.58 (17.93) | |||
Range | 5 to 108 | |||
Age group | 0 to 19 | N (%) | 366 (6.78%) | |
20 to 39 | N (%) | 1,909 (35.35%) | ||
40 to 59 | N (%) | 2,117 (39.20%) | ||
60 to 79 | N (%) | 784 (14.52%) | ||
80 or above | N (%) | 225 (4.17%) | ||
Sex | Female | N (%) | 2,687 (49.75%) | |
Male | N (%) | 2,714 (50.25%) | ||
Prior observation | - | Median [Q25 - Q75] | 15,837 [11,937 - 20,419] | |
Mean (SD) | 16,464.35 (6,548.63) | |||
Range | 1,851 to 39,792 | |||
Future observation | - | Median [Q25 - Q75] | 5,193 [2,385 - 8,100] | |
Mean (SD) | 5,213.44 (3,203.25) | |||
Range | 0 to 10,720 | |||
Days in cohort | - | Median [Q25 - Q75] | 15 [8 - 15] | |
Mean (SD) | 16.95 (15.69) | |||
Range | 1 to 128 | |||
Number visits prior year | - | Median [Q25 - Q75] | 0.00 [0.00 - 0.00] | |
Mean (SD) | 0.01 (0.08) | |||
Range | 0.00 to 1.00 | |||
Conditions any time prior | Cardiac arrest | N (%) | 210 (3.89%) | |
Myocardial infarction | N (%) | 17 (0.31%) | ||
Cough | N (%) | 0 (0.00%) | ||
Fever | N (%) | 0 (0.00%) | ||
Headache | N (%) | 0 (0.00%) | ||
Medications prior year | Nitroglycerin | N (%) | 104 (1.93%) | |
Warfarin | N (%) | 8 (0.15%) | ||
Verapamil | N (%) | 8 (0.15%) | ||
Morphine | N (%) | 6 (0.11%) | ||
Atorvastatin | N (%) | 0 (0.00%) | ||
Female | Number records | - | N | 2,687 |
Number subjects | - | N | 1,168 | |
Cohort start date | - | Median [Q25 - Q75] | 2004-02-01 [1996-08-10 - 2011-07-11] | |
Range | 1990-01-02 to 2019-06-19 | |||
Cohort end date | - | Median [Q25 - Q75] | 2004-02-19 [1996-08-20 - 2011-07-19] | |
Range | 1990-01-16 to 2019-06-24 | |||
Age | - | Median [Q25 - Q75] | 42 [32 - 55] | |
Mean (SD) | 44.27 (18.08) | |||
Range | 5 to 108 | |||
Age group | 0 to 19 | N (%) | 181 (6.74%) | |
20 to 39 | N (%) | 980 (36.47%) | ||
40 to 59 | N (%) | 1,047 (38.97%) | ||
60 to 79 | N (%) | 358 (13.32%) | ||
80 or above | N (%) | 121 (4.50%) | ||
Sex | Female | N (%) | 2,687 (100.00%) | |
Prior observation | - | Median [Q25 - Q75] | 15,632 [11,772 - 20,152] | |
Mean (SD) | 16,353.62 (6,603.54) | |||
Range | 1,851 to 39,792 | |||
Future observation | - | Median [Q25 - Q75] | 5,235 [2,602 - 8,028] | |
Mean (SD) | 5,268.14 (3,141.99) | |||
Range | 0 to 10,713 | |||
Days in cohort | - | Median [Q25 - Q75] | 15 [8 - 15] | |
Mean (SD) | 17.28 (16.24) | |||
Range | 1 to 97 | |||
Number visits prior year | - | Median [Q25 - Q75] | 0.00 [0.00 - 0.00] | |
Mean (SD) | 0.01 (0.08) | |||
Range | 0.00 to 1.00 | |||
Conditions any time prior | Cardiac arrest | N (%) | 120 (4.47%) | |
Myocardial infarction | N (%) | 1 (0.04%) | ||
Cough | N (%) | 0 (0.00%) | ||
Fever | N (%) | 0 (0.00%) | ||
Headache | N (%) | 0 (0.00%) | ||
Medications prior year | Nitroglycerin | N (%) | 21 (0.78%) | |
Warfarin | N (%) | 2 (0.07%) | ||
Verapamil | N (%) | 2 (0.07%) | ||
Morphine | N (%) | 4 (0.15%) | ||
Atorvastatin | N (%) | 0 (0.00%) | ||
Male | Number records | - | N | 2,714 |
Number subjects | - | N | 1,172 | |
Cohort start date | - | Median [Q25 - Q75] | 2003-12-10 [1996-03-11 - 2012-02-06] | |
Range | 1990-01-01 to 2019-06-20 | |||
Cohort end date | - | Median [Q25 - Q75] | 2003-12-26 [1996-03-27 - 2012-02-17] | |
Range | 1990-01-10 to 2019-06-20 | |||
Age | - | Median [Q25 - Q75] | 44 [32 - 56] | |
Mean (SD) | 44.88 (17.78) | |||
Range | 5 to 105 | |||
Age group | 0 to 19 | N (%) | 185 (6.82%) | |
20 to 39 | N (%) | 929 (34.23%) | ||
40 to 59 | N (%) | 1,070 (39.43%) | ||
60 to 79 | N (%) | 426 (15.70%) | ||
80 or above | N (%) | 104 (3.83%) | ||
Sex | Male | N (%) | 2,714 (100.00%) | |
Prior observation | - | Median [Q25 - Q75] | 16,107 [12,010 - 20,689] | |
Mean (SD) | 16,573.97 (6,493.16) | |||
Range | 2,070 to 38,529 | |||
Future observation | - | Median [Q25 - Q75] | 5,101 [2,223 - 8,132] | |
Mean (SD) | 5,159.29 (3,262.43) | |||
Range | 0 to 10,720 | |||
Days in cohort | - | Median [Q25 - Q75] | 15 [8 - 15] | |
Mean (SD) | 16.63 (15.12) | |||
Range | 1 to 128 | |||
Number visits prior year | - | Median [Q25 - Q75] | 0.00 [0.00 - 0.00] | |
Mean (SD) | 0.00 (0.07) | |||
Range | 0.00 to 1.00 | |||
Conditions any time prior | Cardiac arrest | N (%) | 90 (3.32%) | |
Myocardial infarction | N (%) | 16 (0.59%) | ||
Cough | N (%) | 0 (0.00%) | ||
Fever | N (%) | 0 (0.00%) | ||
Headache | N (%) | 0 (0.00%) | ||
Medications prior year | Nitroglycerin | N (%) | 83 (3.06%) | |
Warfarin | N (%) | 6 (0.22%) | ||
Verapamil | N (%) | 6 (0.22%) | ||
Morphine | N (%) | 2 (0.07%) | ||
Atorvastatin | N (%) | 0 (0.00%) |
tableCharacteristics(
result = result,
header = c("cdm_name", "sex"),
groupColumn = c("cohort_name")
)
CDM name
|
|||||
---|---|---|---|---|---|
GiBleed
|
|||||
Variable name | Variable level | Estimate name |
Sex
|
||
overall | Female | Male | |||
acetaminophen | |||||
Number records | - | N | 5,401 | 2,687 | 2,714 |
Number subjects | - | N | 2,340 | 1,168 | 1,172 |
Cohort start date | - | Median [Q25 - Q75] | 2004-01-07 [1996-04-28 - 2011-10-21] | 2004-02-01 [1996-08-10 - 2011-07-11] | 2003-12-10 [1996-03-11 - 2012-02-06] |
Range | 1990-01-01 to 2019-06-20 | 1990-01-02 to 2019-06-19 | 1990-01-01 to 2019-06-20 | ||
Cohort end date | - | Median [Q25 - Q75] | 2004-01-21 [1996-05-12 - 2011-11-07] | 2004-02-19 [1996-08-20 - 2011-07-19] | 2003-12-26 [1996-03-27 - 2012-02-17] |
Range | 1990-01-10 to 2019-06-24 | 1990-01-16 to 2019-06-24 | 1990-01-10 to 2019-06-20 | ||
Age | - | Median [Q25 - Q75] | 43 [32 - 55] | 42 [32 - 55] | 44 [32 - 56] |
Mean (SD) | 44.58 (17.93) | 44.27 (18.08) | 44.88 (17.78) | ||
Range | 5 to 108 | 5 to 108 | 5 to 105 | ||
Age group | 0 to 19 | N (%) | 366 (6.78%) | 181 (6.74%) | 185 (6.82%) |
20 to 39 | N (%) | 1,909 (35.35%) | 980 (36.47%) | 929 (34.23%) | |
40 to 59 | N (%) | 2,117 (39.20%) | 1,047 (38.97%) | 1,070 (39.43%) | |
60 to 79 | N (%) | 784 (14.52%) | 358 (13.32%) | 426 (15.70%) | |
80 or above | N (%) | 225 (4.17%) | 121 (4.50%) | 104 (3.83%) | |
Sex | Female | N (%) | 2,687 (49.75%) | 2,687 (100.00%) | - |
Male | N (%) | 2,714 (50.25%) | - | 2,714 (100.00%) | |
Prior observation | - | Median [Q25 - Q75] | 15,837 [11,937 - 20,419] | 15,632 [11,772 - 20,152] | 16,107 [12,010 - 20,689] |
Mean (SD) | 16,464.35 (6,548.63) | 16,353.62 (6,603.54) | 16,573.97 (6,493.16) | ||
Range | 1,851 to 39,792 | 1,851 to 39,792 | 2,070 to 38,529 | ||
Future observation | - | Median [Q25 - Q75] | 5,193 [2,385 - 8,100] | 5,235 [2,602 - 8,028] | 5,101 [2,223 - 8,132] |
Mean (SD) | 5,213.44 (3,203.25) | 5,268.14 (3,141.99) | 5,159.29 (3,262.43) | ||
Range | 0 to 10,720 | 0 to 10,713 | 0 to 10,720 | ||
Days in cohort | - | Median [Q25 - Q75] | 15 [8 - 15] | 15 [8 - 15] | 15 [8 - 15] |
Mean (SD) | 16.95 (15.69) | 17.28 (16.24) | 16.63 (15.12) | ||
Range | 1 to 128 | 1 to 97 | 1 to 128 | ||
Number visits prior year | - | Median [Q25 - Q75] | 0.00 [0.00 - 0.00] | 0.00 [0.00 - 0.00] | 0.00 [0.00 - 0.00] |
Mean (SD) | 0.01 (0.08) | 0.01 (0.08) | 0.00 (0.07) | ||
Range | 0.00 to 1.00 | 0.00 to 1.00 | 0.00 to 1.00 | ||
Conditions any time prior | Cardiac arrest | N (%) | 210 (3.89%) | 120 (4.47%) | 90 (3.32%) |
Myocardial infarction | N (%) | 17 (0.31%) | 1 (0.04%) | 16 (0.59%) | |
Cough | N (%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | |
Fever | N (%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | |
Headache | N (%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | |
Medications prior year | Nitroglycerin | N (%) | 104 (1.93%) | 21 (0.78%) | 83 (3.06%) |
Warfarin | N (%) | 8 (0.15%) | 2 (0.07%) | 6 (0.22%) | |
Verapamil | N (%) | 8 (0.15%) | 2 (0.07%) | 6 (0.22%) | |
Morphine | N (%) | 6 (0.11%) | 4 (0.15%) | 2 (0.07%) | |
Atorvastatin | N (%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) |
CDM name | Variable name | Variable level | Estimate name |
Sex
|
||
---|---|---|---|---|---|---|
overall | Female | Male | ||||
acetaminophen | ||||||
GiBleed | Number records | - | N | 5,401 | 2,687 | 2,714 |
Number subjects | - | N | 2,340 | 1,168 | 1,172 | |
Cohort start date | - | Median [Q25 - Q75] | 2004-01-07 [1996-04-28 - 2011-10-21] | 2004-02-01 [1996-08-10 - 2011-07-11] | 2003-12-10 [1996-03-11 - 2012-02-06] | |
Cohort end date | - | Median [Q25 - Q75] | 2004-01-21 [1996-05-12 - 2011-11-07] | 2004-02-19 [1996-08-20 - 2011-07-19] | 2003-12-26 [1996-03-27 - 2012-02-17] | |
Age | - | Median [Q25 - Q75] | 43 [32 - 55] | 42 [32 - 55] | 44 [32 - 56] | |
Age group | 0 to 19 | N (%) | 366 (6.78%) | 181 (6.74%) | 185 (6.82%) | |
20 to 39 | N (%) | 1,909 (35.35%) | 980 (36.47%) | 929 (34.23%) | ||
40 to 59 | N (%) | 2,117 (39.20%) | 1,047 (38.97%) | 1,070 (39.43%) | ||
60 to 79 | N (%) | 784 (14.52%) | 358 (13.32%) | 426 (15.70%) | ||
80 or above | N (%) | 225 (4.17%) | 121 (4.50%) | 104 (3.83%) | ||
Sex | Female | N (%) | 2,687 (49.75%) | 2,687 (100.00%) | - | |
Male | N (%) | 2,714 (50.25%) | - | 2,714 (100.00%) | ||
Prior observation | - | Median [Q25 - Q75] | 15,837 [11,937 - 20,419] | 15,632 [11,772 - 20,152] | 16,107 [12,010 - 20,689] | |
Future observation | - | Median [Q25 - Q75] | 5,193 [2,385 - 8,100] | 5,235 [2,602 - 8,028] | 5,101 [2,223 - 8,132] | |
Days in cohort | - | Median [Q25 - Q75] | 15 [8 - 15] | 15 [8 - 15] | 15 [8 - 15] | |
Number visits prior year | - | Median [Q25 - Q75] | 0.00 [0.00 - 0.00] | 0.00 [0.00 - 0.00] | 0.00 [0.00 - 0.00] | |
Conditions any time prior | Cardiac arrest | N (%) | 210 (3.89%) | 120 (4.47%) | 90 (3.32%) | |
Myocardial infarction | N (%) | 17 (0.31%) | 1 (0.04%) | 16 (0.59%) | ||
Cough | N (%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
Fever | N (%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
Headache | N (%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
Medications prior year | Nitroglycerin | N (%) | 104 (1.93%) | 21 (0.78%) | 83 (3.06%) | |
Warfarin | N (%) | 8 (0.15%) | 2 (0.07%) | 6 (0.22%) | ||
Verapamil | N (%) | 8 (0.15%) | 2 (0.07%) | 6 (0.22%) | ||
Morphine | N (%) | 6 (0.11%) | 4 (0.15%) | 2 (0.07%) | ||
Atorvastatin | N (%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) |
Characterise the ‘simvastatin’ cohort with:
Suppress counts under 10
Visualise it in a nice table
Can you stratify the result by age group? (PatientProfiles::addAge()
allows you to add age_group column to any table).
Variable name | Variable level | Estimate name |
Age group
|
||
---|---|---|---|---|---|
overall | 0 to 49 | 50 or above | |||
36567_simvastatin | |||||
Number records | - | N | 182 | 61 | 121 |
Number subjects | - | N | 182 | 61 | 121 |
Cohort start date | - | Median [Q25 - Q75] | 2002-12-21 [1992-07-12 - 2011-12-20] | 1995-06-24 [1992-03-06 - 2003-02-25] | 2007-02-25 [1993-01-23 - 2014-03-16] |
Cohort end date | - | Median [Q25 - Q75] | 2002-12-21 [1992-07-12 - 2011-12-20] | 1995-06-24 [1992-03-06 - 2003-02-25] | 2007-02-25 [1993-01-23 - 2014-03-16] |
Age | - | Median [Q25 - Q75] | 57 [42 - 72] | 37 [33 - 42] | 68 [57 - 77] |
Age group | 0 to 49 | N (%) | 61 (33.52%) | 61 (100.00%) | - |
50 or above | N (%) | 121 (66.48%) | - | 121 (100.00%) | |
Sex | Female | N (%) | 47 (25.82%) | 19 (31.15%) | 28 (23.14%) |
Male | N (%) | 135 (74.18%) | 42 (68.85%) | 93 (76.86%) | |
Prior observation | - | Median [Q25 - Q75] | 21,136 [15,351 - 26,352] | 13,594 [12,138 - 15,351] | 24,955 [21,140 - 28,280] |
Future observation | - | Median [Q25 - Q75] | 3,238 [1,503 - 6,678] | 6,391 [3,466 - 9,072] | 2,275 [1,292 - 4,824] |
Days in cohort | - | Median [Q25 - Q75] | 1 [1 - 1] | 1 [1 - 1] | 1 [1 - 1] |
Number conditions on index | - | Median [Q25 - Q75] | 1.00 [0.00 - 1.00] | 1.00 [0.00 - 1.00] | 1.00 [1.00 - 1.00] |
Conditions any time prior | Myocardial infarction | N (%) | <10 | <10 | <10 |
Cardiac arrest | N (%) | <10 | <10 | <10 | |
Cough | N (%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | |
Fever | N (%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | |
Headache | N (%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | |
Medications prior year | Nitroglycerin | N (%) | 155 (85.16%) | 44 (72.13%) | 111 (91.74%) |
Atorvastatin | N (%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | |
Morphine | N (%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | |
Verapamil | N (%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | |
Warfarin | N (%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) |
result <- cdm$simvastatin_cohort |>
PatientProfiles::addAge(ageGroup = list(c(0, 49), c(50, Inf))) |>
summariseCharacteristics(
strata = list("age_group"),
demographics = TRUE,
ageGroup = list(c(0, 49), c(50, Inf)),
tableIntersectCount = list(
"Number conditions on index" = list(tableName = "condition_occurrence", window = c(0, 0))
),
cohortIntersectFlag = list(
"Conditions any time prior" = list(targetCohortTable = "conditions", window = c(-Inf, 0)),
"Medications prior year" = list(targetCohortTable = "medications", window = c(-365, 0))
)
)
result |>
suppress(minCellCount = 10) |>
dplyr::filter(!estimate_name %in% c("min", "max", "mean", "sd")) |>
tableCharacteristics(
header = "age_group",
groupColumn = "cohort_name",
hide = c("table", "window", "value", "cdm_name")
)
Large scale characterisation is a data driven approach to characterise data and find which are the most frequently concepts in the desired windows of observation.
result |>
filterSettings(table_name == "drug_exposure") |>
tableTopLargeScaleCharacteristics(topConcepts = 10)
Top |
Window
|
||
---|---|---|---|
-365 to -1 | 0 to 0 | 1 to 365 | |
1 | tetanus and diphtheria toxoids, adsorbed, preservative free, for adult use (40213227) 513 (9.5%) |
Acetaminophen 325 MG Oral Tablet (1127433) 4249 (78.7%) |
Acetaminophen 325 MG Oral Tablet (1127433) 4296 (79.8%) |
2 | hepatitis A vaccine, adult dosage (40213296) 208 (3.9%) |
Acetaminophen 21.7 MG/ML / Dextromethorphan Hydrobromide 1 MG/ML / doxylamine succinate 0.417 MG/ML Oral Solution (40229134) 874 (16.2%) |
Acetaminophen 21.7 MG/ML / Dextromethorphan Hydrobromide 1 MG/ML / doxylamine succinate 0.417 MG/ML Oral Solution (40229134) 908 (16.9%) |
3 | Acetaminophen 325 MG Oral Tablet (1127433) 172 (3.2%) |
Acetaminophen 325 MG / Hydrocodone Bitartrate 7.5 MG Oral Tablet (40162522) 153 (2.8%) |
tetanus and diphtheria toxoids, adsorbed, preservative free, for adult use (40213227) 473 (8.8%) |
4 | zoster vaccine, live (40213260) 145 (2.7%) |
Acetaminophen 325 MG / Oxycodone Hydrochloride 5 MG Oral Tablet (40231925) 118 (2.2%) |
hepatitis A vaccine, adult dosage (40213296) 244 (4.5%) |
5 | Amoxicillin 250 MG / Clavulanate 125 MG Oral Tablet (1713671) 123 (2.3%) |
Nitroglycerin 0.4 MG/ACTUAT Mucosal Spray (19126352) 98 (1.8%) |
Ibuprofen 200 MG Oral Tablet (19078461) 174 (3.2%) |
6 | Ibuprofen 200 MG Oral Tablet (19078461) 109 (2.0%) |
Ibuprofen 200 MG Oral Tablet (19078461) 95 (1.8%) |
hepatitis B vaccine, adult dosage (40213306) 164 (3.0%) |
7 | Nitroglycerin 0.4 MG/ACTUAT Mucosal Spray (19126352) 104 (1.9%) |
clopidogrel 75 MG Oral Tablet (19075601) 86 (1.6%) |
Amoxicillin 250 MG / Clavulanate 125 MG Oral Tablet (1713671) 152 (2.8%) |
8 | clopidogrel 75 MG Oral Tablet (19075601) 101 (1.9%) |
Amoxicillin 500 MG Oral Tablet (19073188) 83 (1.5%) |
Naproxen sodium 220 MG Oral Tablet (1115171) 120 (2.2%) |
9 | Naproxen sodium 220 MG Oral Tablet (1115171) 90 (1.7%) |
Naproxen sodium 220 MG Oral Tablet (1115171) 74 (1.4%) |
zoster vaccine, live (40213260) 111 (2.1%) |
10 | hepatitis B vaccine, adult dosage (40213306) 75 (1.4%) |
Acetaminophen 750 MG / Hydrocodone Bitartrate 7.5 MG Oral Tablet (19133768) 61 (1.1%) |
Amoxicillin 500 MG Oral Tablet (19073188) 97 (1.8%) |
tableLargeScaleCharacteristics(result = result)
tableLargeScaleCharacteristics(result = result, compareBy = "variable_level")
tableLargeScaleCharacteristics(result = result, compareBy = "variable_level", smdReference = "-365 to -1")
plotLargeScaleCharacteristics(result = result)
plotComparedLargeScaleCharacteristics(
result = result,
colour = "variable_level",
facet = "table_name"
)
plotComparedLargeScaleCharacteristics(
result = result,
colour = "variable_level",
facet = "table_name"
) |>
ggplotly()