Here, we’re just setting a few options.

knitr::opts_chunk$set(
  warning = TRUE, # show warnings during codebook generation
  message = TRUE, # show messages during codebook generation
  error = TRUE, # do not interrupt codebook generation in case of errors,
                # usually better for debugging
  echo = TRUE  # show R code
)
ggplot2::theme_set(ggplot2::theme_bw())
pander::panderOptions("table.split.table", Inf)

Now, we’re preparing our data for the codebook.

library(codebook)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ readr   1.2.0
## ✔ tibble  1.4.2     ✔ purrr   0.2.5
## ✔ tidyr   0.8.2     ✔ stringr 1.3.1
## ✔ ggplot2 3.1.0     ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(labelled)
## 
## LABELLED 2.0.0: BREAKING CHANGE
## 
## Following version 2.0.0 of `haven`, `labelled()` and `labelled_spss()` now produce objects with class 'haven_labelled' and 'haven_labelled_spss', due to conflict between the previous 'labelled' class and the 'labelled' class used by `Hmisc`.
## 
## A new function `update_labelled()` could be used to convert data imported with an older version of `haven`/`labelled` to the new classes.
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
## 
##     flatten
codebook_data <- rio::import("https://osf.io/s87kd/download", "csv")
bfi_data <- rio::import("https://osf.io/s87kd/download", "csv")
dict <- rio::import("https://osf.io/cs678/download", "csv")

var_label(codebook_data) <- dict %>% dplyr::select(variable, label) %>% dict_to_list()

val_labels(codebook_data$gender) <- c("male" = 1, "female" = 2)
val_labels(codebook_data$education) <- c("in high school" = 1,
   "finished high school" = 2,
              "some college" = 3, 
               "college graduate" = 4, 
              "graduate degree" = 5)

add_likert_labels <- function(x) {
  val_labels(x) <- c("Very Inaccurate" = 1, 
                  "Moderately Inaccurate" = 2, 
                  "Slightly Inaccurate" = 3,
                  "Slightly Accurate" = 4,
                  "Moderately Accurate" = 5,
                  "Very Accurate" = 6)
  x
}

likert_items <- dict %>% filter(Big6 != "") %>% pull(variable)

codebook_data <- codebook_data %>% mutate_at(likert_items,  add_likert_labels)

codebook_data$extraversion <- codebook_data %>% dplyr::select(E1:E5) %>% aggregate_and_document_scale()

reversed_items <- dict %>% filter(Keying == -1) %>% pull(variable)

codebook_data <- codebook_data %>% 
  rename_at(reversed_items,  add_R)

codebook_data <- codebook_data %>% 
    mutate_at(vars(matches("\\dR$")), reverse_labelled_values)

codebook_data$extraversion <- codebook_data %>% dplyr::select(E1R:E5) %>% aggregate_and_document_scale()

codebook_data$plasticity <- codebook_data %>% dplyr::select(E1R:E5, O1:O5R) %>% aggregate_and_document_scale() 




# omit the following lines, if your missing values are already properly labelled
codebook_data <- detect_missing(codebook_data,
    only_labelled = TRUE, # only labelled values are autodetected as
                                   # missing
    negative_values_are_missing = FALSE, # negative values are missing values
    ninety_nine_problems = TRUE,   # 99/999 are missing values, if they
                                   # are more than 5 MAD from the median
    )

# If you are not using formr, the codebook package needs to guess which items
# form a scale. The following line finds item aggregates with names like this:
# scale = scale_1 + scale_2R + scale_3R
# identifying these aggregates allows the codebook function to
# automatically compute reliabilities.
# However, it will not reverse items automatically.
codebook_data <- detect_scales(codebook_data)
## Warning in detect_scales(codebook_data): A items found, but no aggregate
## Warning in detect_scales(codebook_data): C items found, but no aggregate
## Warning in detect_scales(codebook_data): E items found, but no aggregate
## Warning in detect_scales(codebook_data): N items found, but no aggregate
## Warning in detect_scales(codebook_data): O items found, but no aggregate

Create codebook

metadata(codebook_data)$name <- "25 Personality items representing 5 factors"
metadata(codebook_data)$description <- "25 personality self report items taken from the International Personality Item Pool (ipip.ori.org)[...]"
metadata(codebook_data)$identifier <- "https://dx.doi.org/10.17605/OSF.IO/K39BG"
metadata(codebook_data)$creator <- "William Revelle"
metadata(codebook_data)$citation <- "Revelle, W., Wilt, J., and Rosenthal, A. (2010) Individual Differences in Cognition: New Methods for examining the Personality-Cognition Link In Gruszka, A. and Matthews, G. and Szymura, B. (Eds.) Handbook of Individual Differences in Cognition: Attention, Memory and Executive Control, Springer."
metadata(codebook_data)$url <- "https://cran.r-project.org/web/packages/psych/index.html"
metadata(codebook_data)$datePublished <- "2010-01-01"
metadata(codebook_data)$temporalCoverage <- "Spring 2010" 
metadata(codebook_data)$spatialCoverage <- "Online" 

codebook(codebook_data)
## Warning in codebook(codebook_data): The variables session, created, ended
## have to be defined for automatic survey repetition detection to work. Set
## to no repetition by default.
knitr::asis_output(data_info)

Metadata

Description

if (exists("name", meta)) {
  glue::glue(
    "__Dataset name__: {name}",
    .envir = meta)
}

Dataset name: 25 Personality items representing 5 factors

cat(description)

25 personality self report items taken from the International Personality Item Pool (ipip.ori.org)[…]

  • Temporal Coverage: Spring 2010
  • Spatial Coverage: Online
  • Citation: Revelle, W., Wilt, J., and Rosenthal, A. (2010) Individual Differences in Cognition: New Methods for examining the Personality-Cognition Link In Gruszka, A. and Matthews, G. and Szymura, B. (Eds.) Handbook of Individual Differences in Cognition: Attention, Memory and Executive Control, Springer.
  • URL: https://cran.r-project.org/web/packages/psych/index.html
  • Identifier: https://dx.doi.org/10.17605/OSF.IO/K39BG
  • Date published: 2010-01-01

  • Creator:William Revelle

meta <- meta[setdiff(names(meta),
                     c("creator", "datePublished", "identifier",
                       "url", "citation", "spatialCoverage", 
                       "temporalCoverage", "description", "name"))]
pander::pander(meta)
  • keywords: A1R, A2, A3, A4, A5, C1, C2, C3, C4R, C5R, E1R, E2R, E3, E4, E5, N1R, N2R, N3R, N4R, N5R, O1, O2R, O3, O4, O5R, gender, education, age, extraversion and plasticity
knitr::asis_output(survey_overview)

Variables

knitr::asis_output(paste0(scales_items, sep = "\n\n\n", collapse = "\n\n\n"))

A1R

Am indifferent to the feelings of others.

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

16 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
A1R Am indifferent to the feelings of others. numeric 6. Very Inaccurate,
5. Moderately Inaccurate,
4. Slightly Inaccurate,
3. Slightly Accurate,
2. Moderately Accurate,
1. Very Accurate
16 2784 2800 4.59 1.41 1 4 5 6 6 ▁▂▁▃▃▁▇▇
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • Very Inaccurate: 6
  • Moderately Inaccurate: 5
  • Slightly Inaccurate: 4
  • Slightly Accurate: 3
  • Moderately Accurate: 2
  • Very Accurate: 1

A2

Inquire about others’ well-being.

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

27 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
A2 Inquire about others’ well-being. integer 1. Very Inaccurate,
2. Moderately Inaccurate,
3. Slightly Inaccurate,
4. Slightly Accurate,
5. Moderately Accurate,
6. Very Accurate
27 2773 2800 4.8 1.17 1 4 5 6 6 ▁▁▁▁▅▁▇▇
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • Very Inaccurate: 1
  • Moderately Inaccurate: 2
  • Slightly Inaccurate: 3
  • Slightly Accurate: 4
  • Moderately Accurate: 5
  • Very Accurate: 6

A3

Know how to comfort others.

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

26 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
A3 Know how to comfort others. integer 1. Very Inaccurate,
2. Moderately Inaccurate,
3. Slightly Inaccurate,
4. Slightly Accurate,
5. Moderately Accurate,
6. Very Accurate
26 2774 2800 4.6 1.3 1 4 5 6 6 ▁▂▁▂▅▁▇▆
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • Very Inaccurate: 1
  • Moderately Inaccurate: 2
  • Slightly Inaccurate: 3
  • Slightly Accurate: 4
  • Moderately Accurate: 5
  • Very Accurate: 6

A4

Love children.

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

19 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
A4 Love children. integer 1. Very Inaccurate,
2. Moderately Inaccurate,
3. Slightly Inaccurate,
4. Slightly Accurate,
5. Moderately Accurate,
6. Very Accurate
19 2781 2800 4.7 1.48 1 4 5 6 6 ▁▂▁▁▃▁▅▇
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • Very Inaccurate: 1
  • Moderately Inaccurate: 2
  • Slightly Inaccurate: 3
  • Slightly Accurate: 4
  • Moderately Accurate: 5
  • Very Accurate: 6

A5

Make people feel at ease.

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

16 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
A5 Make people feel at ease. integer 1. Very Inaccurate,
2. Moderately Inaccurate,
3. Slightly Inaccurate,
4. Slightly Accurate,
5. Moderately Accurate,
6. Very Accurate
16 2784 2800 4.56 1.26 1 4 5 5 6 ▁▂▁▂▅▁▇▆
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • Very Inaccurate: 1
  • Moderately Inaccurate: 2
  • Slightly Inaccurate: 3
  • Slightly Accurate: 4
  • Moderately Accurate: 5
  • Very Accurate: 6

C1

Am exacting in my work.

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

21 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
C1 Am exacting in my work. integer 1. Very Inaccurate,
2. Moderately Inaccurate,
3. Slightly Inaccurate,
4. Slightly Accurate,
5. Moderately Accurate,
6. Very Accurate
21 2779 2800 4.5 1.24 1 4 5 5 6 ▁▁▁▂▅▁▇▅
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • Very Inaccurate: 1
  • Moderately Inaccurate: 2
  • Slightly Inaccurate: 3
  • Slightly Accurate: 4
  • Moderately Accurate: 5
  • Very Accurate: 6

C2

Continue until everything is perfect.

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

24 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
C2 Continue until everything is perfect. integer 1. Very Inaccurate,
2. Moderately Inaccurate,
3. Slightly Inaccurate,
4. Slightly Accurate,
5. Moderately Accurate,
6. Very Accurate
24 2776 2800 4.37 1.32 1 4 5 5 6 ▁▂▁▂▆▁▇▅
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • Very Inaccurate: 1
  • Moderately Inaccurate: 2
  • Slightly Inaccurate: 3
  • Slightly Accurate: 4
  • Moderately Accurate: 5
  • Very Accurate: 6

C3

Do things according to a plan.

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

20 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
C3 Do things according to a plan. integer 1. Very Inaccurate,
2. Moderately Inaccurate,
3. Slightly Inaccurate,
4. Slightly Accurate,
5. Moderately Accurate,
6. Very Accurate
20 2780 2800 4.3 1.29 1 4 5 5 6 ▁▂▁▂▆▁▇▅
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • Very Inaccurate: 1
  • Moderately Inaccurate: 2
  • Slightly Inaccurate: 3
  • Slightly Accurate: 4
  • Moderately Accurate: 5
  • Very Accurate: 6

C4R

Do things in a half-way manner.

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

26 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
C4R Do things in a half-way manner. numeric 6. Very Inaccurate,
5. Moderately Inaccurate,
4. Slightly Inaccurate,
3. Slightly Accurate,
2. Moderately Accurate,
1. Very Accurate
26 2774 2800 4.45 1.38 1 3 5 6 6 ▁▂▁▅▅▁▇▇
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • Very Inaccurate: 6
  • Moderately Inaccurate: 5
  • Slightly Inaccurate: 4
  • Slightly Accurate: 3
  • Moderately Accurate: 2
  • Very Accurate: 1

C5R

Waste my time.

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

16 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
C5R Waste my time. numeric 6. Very Inaccurate,
5. Moderately Inaccurate,
4. Slightly Inaccurate,
3. Slightly Accurate,
2. Moderately Accurate,
1. Very Accurate
16 2784 2800 3.7 1.63 1 2 4 5 6 ▃▆▁▇▅▁▇▆
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • Very Inaccurate: 6
  • Moderately Inaccurate: 5
  • Slightly Inaccurate: 4
  • Slightly Accurate: 3
  • Moderately Accurate: 2
  • Very Accurate: 1

N1R

Get angry easily.

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

22 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
N1R Get angry easily. numeric 6. Very Inaccurate,
5. Moderately Inaccurate,
4. Slightly Inaccurate,
3. Slightly Accurate,
2. Moderately Accurate,
1. Very Accurate
22 2778 2800 4.07 1.57 1 3 4 5 6 ▂▅▁▆▅▁▇▇
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • Very Inaccurate: 6
  • Moderately Inaccurate: 5
  • Slightly Inaccurate: 4
  • Slightly Accurate: 3
  • Moderately Accurate: 2
  • Very Accurate: 1

N2R

Get irritated easily.

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

21 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
N2R Get irritated easily. numeric 6. Very Inaccurate,
5. Moderately Inaccurate,
4. Slightly Inaccurate,
3. Slightly Accurate,
2. Moderately Accurate,
1. Very Accurate
21 2779 2800 3.49 1.53 1 2 3 5 6 ▃▆▁▇▅▁▆▃
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • Very Inaccurate: 6
  • Moderately Inaccurate: 5
  • Slightly Inaccurate: 4
  • Slightly Accurate: 3
  • Moderately Accurate: 2
  • Very Accurate: 1

N3R

Have frequent mood swings.

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

11 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
N3R Have frequent mood swings. numeric 6. Very Inaccurate,
5. Moderately Inaccurate,
4. Slightly Inaccurate,
3. Slightly Accurate,
2. Moderately Accurate,
1. Very Accurate
11 2789 2800 3.78 1.6 1 3 4 5 6 ▃▆▁▇▅▁▇▆
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • Very Inaccurate: 6
  • Moderately Inaccurate: 5
  • Slightly Inaccurate: 4
  • Slightly Accurate: 3
  • Moderately Accurate: 2
  • Very Accurate: 1

N4R

Often feel blue.

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

36 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
N4R Often feel blue. numeric 6. Very Inaccurate,
5. Moderately Inaccurate,
4. Slightly Inaccurate,
3. Slightly Accurate,
2. Moderately Accurate,
1. Very Accurate
36 2764 2800 3.81 1.57 1 3 4 5 6 ▃▅▁▇▅▁▇▆
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • Very Inaccurate: 6
  • Moderately Inaccurate: 5
  • Slightly Inaccurate: 4
  • Slightly Accurate: 3
  • Moderately Accurate: 2
  • Very Accurate: 1

N5R

Panic easily.

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

29 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
N5R Panic easily. numeric 6. Very Inaccurate,
5. Moderately Inaccurate,
4. Slightly Inaccurate,
3. Slightly Accurate,
2. Moderately Accurate,
1. Very Accurate
29 2771 2800 4.03 1.62 1 3 4 5 6 ▃▃▁▆▅▁▇▇
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • Very Inaccurate: 6
  • Moderately Inaccurate: 5
  • Slightly Inaccurate: 4
  • Slightly Accurate: 3
  • Moderately Accurate: 2
  • Very Accurate: 1

gender

gender

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

0 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
gender gender integer 1. male,
2. female
0 2800 2800 1.67 0.47 1 1 2 2 2 ▃▁▁▁▁▁▁▇
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • male: 1
  • female: 2

education

education

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

223 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
education education integer 1. in high school,
2. finished high school,
3. some college,
4. college graduate,
5. graduate degree
223 2577 2800 3.19 1.11 1 3 3 4 5 ▂▂▁▇▁▂▁▃
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}

Value labels

if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
  • in high school: 1
  • finished high school: 2
  • some college: 3
  • college graduate: 4
  • graduate degree: 5

age

age

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is.numeric(item_nomiss) || many_labels
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (is.numeric(item_nomiss) || dplyr::n_distinct(item_nomiss) < 20) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
    cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}

knitr::opts_chunk$set(fig.height = old_height)

0 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name label data_type missing complete n mean sd p0 p25 p50 p75 p100 hist
age age integer 0 2800 2800 28.78 11.13 3 20 26 35 86 ▁▇▆▃▂▁▁▁
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}

Scale: extraversion

Overview

Reliability: Cronbach’s α [95% CI] = 0.76 [0.75;0.78].

Missing: 87.

old_height <- knitr::opts_chunk$get("fig.height")
new_height <- length(scale_info$scale_item_names)
new_height <- ifelse(new_height > 20, 20, new_height)
new_height <- ifelse(new_height < 1, 1, new_height)
new_height <- ifelse(is.na(new_height) | is.nan(new_height), 
                     old_height, new_height)
knitr::opts_chunk$set(fig.height = new_height)
likert_plot <- likert_from_items(items)
if (!is.null(likert_plot)) {
  graphics::plot(likert_plot)
}

knitr::opts_chunk$set(fig.height = old_height)
binwidth <- mean(diff(sort(unique(scale))))

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
dist_plot <- plot_labelled(scale, scale_name, wrap_at)

choices <- attributes(items[[1]])$item$choices
breaks <- as.numeric(names(choices))
if (length(breaks)) {
  suppressMessages( # ignore message about overwriting x axis
  dist_plot <- dist_plot +
        ggplot2::scale_x_continuous("values", 
                                breaks = breaks, 
                                labels = stringr::str_wrap(unlist(choices), ceiling(wrap_at * 0.21))) +
      ggplot2::expand_limits(x = range(breaks)))
  
}

dist_plot

Reliability details

for (i in seq_along(reliabilities)) {
  rel <- reliabilities[[i]]
  cat(knitr::knit_print(rel, indent = paste0(indent, "####")))
}
Reliability
95% Confidence Interval
if (!is.null(x$total$ase)) {
  pander::pander(data.frame(lower = x$total$raw_alpha - 1.96 * x$total$ase, 
             estimate = x$total$raw_alpha, 
             upper = x$total$raw_alpha + 1.96 * 
  x$total$ase))
}
lower estimate upper
0.748 0.7617 0.7755
pander::pander(x$total)
raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
0.7617 0.7618 0.7266 0.3901 3.198 0.007027 4.145 1.061 0.3818
Reliability if an item is dropped:
rownames(x$alpha.drop) <- recursive_escape(rownames(x$alpha.drop))
pander::pander(x$alpha.drop)
  raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
E1R 0.7257 0.7254 0.6731 0.3978 2.642 0.00837 0.004394 0.3818
E2R 0.6902 0.6931 0.6342 0.3608 2.258 0.009509 0.002792 0.3546
E3 0.7279 0.7262 0.6737 0.3988 2.653 0.008241 0.007098 0.396
E4 0.7019 0.7032 0.6464 0.372 2.37 0.009073 0.003275 0.3767
E5 0.7436 0.7442 0.6914 0.4211 2.909 0.007824 0.004344 0.419
Item statistics
rownames(x$item.stats) <- recursive_escape(rownames(x$item.stats))
pander::pander(x$item.stats)
  n raw.r std.r r.cor r.drop mean sd
E1R 2777 0.7238 0.7027 0.5882 0.5163 4.026 1.632
E2R 2784 0.7797 0.7647 0.6936 0.6054 3.858 1.605
E3 2775 0.683 0.7011 0.5827 0.5046 4.001 1.353
E4 2791 0.7467 0.7459 0.6625 0.578 4.422 1.458
E5 2779 0.6432 0.6637 0.5229 0.4542 4.416 1.335
Non missing response frequency for each item
rownames(x$response.freq) <- recursive_escape(rownames(x$response.freq))
pander::pander(x$response.freq)
  1 2 3 4 5 6 miss
E1R 0.08678 0.1322 0.162 0.1455 0.2348 0.2387 0.008214
E2R 0.09124 0.1383 0.2152 0.1232 0.2407 0.1915 0.005714
E3 0.05369 0.1056 0.1485 0.2977 0.2677 0.1268 0.008929
E4 0.05016 0.09387 0.0971 0.1612 0.3375 0.2601 0.003214
E5 0.03418 0.07953 0.1036 0.2227 0.3383 0.2217 0.0075

Summary statistics

for (i in seq_along(names(items))) {
  attributes(items[[i]]) = recursive_escape(attributes(items[[i]]))
}
escaped_table(codebook_table(items))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
E1R Don’t talk a lot. numeric 6. Very Inaccurate,
5. Moderately Inaccurate,
4. Slightly Inaccurate,
3. Slightly Accurate,
2. Moderately Accurate,
1. Very Accurate
23 2777 2800 4.03 1.63 1 3 4 5 6 ▃▅▁▆▅▁▇▇
E2R Find it difficult to approach others. numeric 6. Very Inaccurate,
5. Moderately Inaccurate,
4. Slightly Inaccurate,
3. Slightly Accurate,
2. Moderately Accurate,
1. Very Accurate
16 2784 2800 3.86 1.61 1 3 4 5 6 ▃▅▁▇▅▁▇▆
E3 Know how to captivate people. integer 1. Very Inaccurate,
2. Moderately Inaccurate,
3. Slightly Inaccurate,
4. Slightly Accurate,
5. Moderately Accurate,
6. Very Accurate
25 2775 2800 4 1.35 1 3 4 5 6 ▂▃▁▃▇▁▇▃
E4 Make friends easily. integer 1. Very Inaccurate,
2. Moderately Inaccurate,
3. Slightly Inaccurate,
4. Slightly Accurate,
5. Moderately Accurate,
6. Very Accurate
9 2791 2800 4.42 1.46 1 4 5 6 6 ▁▂▁▂▃▁▇▆
E5 Take charge. integer 1. Very Inaccurate,
2. Moderately Inaccurate,
3. Slightly Inaccurate,
4. Slightly Accurate,
5. Moderately Accurate,
6. Very Accurate
21 2779 2800 4.42 1.33 1 4 5 5 6 ▁▂▁▂▅▁▇▅

Scale: plasticity

Overview

Reliability: Cronbach’s α [95% CI] = 0.71 [0.69;0.72].

Missing: 149.

old_height <- knitr::opts_chunk$get("fig.height")
new_height <- length(scale_info$scale_item_names)
new_height <- ifelse(new_height > 20, 20, new_height)
new_height <- ifelse(new_height < 1, 1, new_height)
new_height <- ifelse(is.na(new_height) | is.nan(new_height), 
                     old_height, new_height)
knitr::opts_chunk$set(fig.height = new_height)
likert_plot <- likert_from_items(items)
if (!is.null(likert_plot)) {
  graphics::plot(likert_plot)
}

knitr::opts_chunk$set(fig.height = old_height)
binwidth <- mean(diff(sort(unique(scale))))

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
dist_plot <- plot_labelled(scale, scale_name, wrap_at)

choices <- attributes(items[[1]])$item$choices
breaks <- as.numeric(names(choices))
if (length(breaks)) {
  suppressMessages( # ignore message about overwriting x axis
  dist_plot <- dist_plot +
        ggplot2::scale_x_continuous("values", 
                                breaks = breaks, 
                                labels = stringr::str_wrap(unlist(choices), ceiling(wrap_at * 0.21))) +
      ggplot2::expand_limits(x = range(breaks)))
  
}

dist_plot

Reliability details

for (i in seq_along(reliabilities)) {
  rel <- reliabilities[[i]]
  cat(knitr::knit_print(rel, indent = paste0(indent, "####")))
}
Reliability
95% Confidence Interval
if (!is.null(x$total$ase)) {
  pander::pander(data.frame(lower = x$total$raw_alpha - 1.96 * x$total$ase, 
             estimate = x$total$raw_alpha, 
             upper = x$total$raw_alpha + 1.96 * 
  x$total$ase))
}
lower estimate upper
0.6923 0.7083 0.7242
pander::pander(x$total)
raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
0.7083 0.7112 0.7379 0.1976 2.463 0.008133 4.366 0.7322 0.2068
Reliability if an item is dropped:
rownames(x$alpha.drop) <- recursive_escape(rownames(x$alpha.drop))
pander::pander(x$alpha.drop)
  raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
E1R 0.6774 0.6845 0.7102 0.1942 2.169 0.009125 0.02631 0.2005
E2R 0.6667 0.6763 0.6955 0.1884 2.089 0.009438 0.02228 0.2005
E3 0.659 0.662 0.6917 0.1787 1.958 0.009599 0.02692 0.1864
E4 0.6807 0.6881 0.706 0.1969 2.207 0.009006 0.02164 0.2039
E5 0.6706 0.6744 0.706 0.1871 2.071 0.009274 0.02899 0.1864
O1 0.6807 0.6788 0.708 0.1902 2.113 0.008907 0.03156 0.2005
O2R 0.7166 0.7136 0.7351 0.2168 2.492 0.007848 0.02876 0.2236
O3 0.663 0.6603 0.6905 0.1776 1.943 0.009392 0.03058 0.1481
O4 0.7322 0.7378 0.7529 0.2382 2.814 0.007579 0.02139 0.25
O5R 0.7004 0.7027 0.7254 0.208 2.364 0.008319 0.0301 0.216
Item statistics
rownames(x$item.stats) <- recursive_escape(rownames(x$item.stats))
pander::pander(x$item.stats)
  n raw.r std.r r.cor r.drop mean sd
E1R 2777 0.5904 0.5503 0.4837 0.4147 4.026 1.632
E2R 2784 0.6301 0.5901 0.5555 0.4669 3.858 1.605
E3 2775 0.6526 0.6562 0.6194 0.5259 4.001 1.353
E4 2791 0.5582 0.532 0.4816 0.3965 4.422 1.458
E5 2779 0.5976 0.5991 0.5333 0.4598 4.416 1.335
O1 2778 0.5381 0.578 0.5066 0.4142 4.816 1.13
O2R 2800 0.4047 0.3959 0.2744 0.2041 4.287 1.565
O3 2772 0.6372 0.6639 0.6235 0.5219 4.438 1.221
O4 2786 0.2084 0.2498 0.09655 0.04146 4.892 1.221
O5R 2780 0.44 0.456 0.3535 0.2762 4.51 1.328
Non missing response frequency for each item
rownames(x$response.freq) <- recursive_escape(rownames(x$response.freq))
pander::pander(x$response.freq)
  1 2 3 4 5 6 miss
E1R 0.08678 0.1322 0.162 0.1455 0.2348 0.2387 0.008214
E2R 0.09124 0.1383 0.2152 0.1232 0.2407 0.1915 0.005714
E3 0.05369 0.1056 0.1485 0.2977 0.2677 0.1268 0.008929
E4 0.05016 0.09387 0.0971 0.1612 0.3375 0.2601 0.003214
E5 0.03418 0.07953 0.1036 0.2227 0.3383 0.2217 0.0075
O1 0.007919 0.03708 0.07559 0.2181 0.333 0.3283 0.007857
O2R 0.06393 0.09857 0.1554 0.1386 0.2561 0.2875 0
O3 0.02742 0.05231 0.1053 0.2796 0.3402 0.1952 0.01
O4 0.01974 0.04487 0.05528 0.1726 0.3184 0.3891 0.005
O5R 0.02518 0.06871 0.1309 0.1892 0.3176 0.2683 0.007143

Summary statistics

for (i in seq_along(names(items))) {
  attributes(items[[i]]) = recursive_escape(attributes(items[[i]]))
}
escaped_table(codebook_table(items))
name label data_type value_labels missing complete n mean sd p0 p25 p50 p75 p100 hist
E1R Don’t talk a lot. numeric 6. Very Inaccurate,
5. Moderately Inaccurate,
4. Slightly Inaccurate,
3. Slightly Accurate,
2. Moderately Accurate,
1. Very Accurate
23 2777 2800 4.03 1.63 1 3 4 5 6 ▃▅▁▆▅▁▇▇
E2R Find it difficult to approach others. numeric 6. Very Inaccurate,
5. Moderately Inaccurate,
4. Slightly Inaccurate,
3. Slightly Accurate,
2. Moderately Accurate,
1. Very Accurate
16 2784 2800 3.86 1.61 1 3 4 5 6 ▃▅▁▇▅▁▇▆
E3 Know how to captivate people. integer 1. Very Inaccurate,
2. Moderately Inaccurate,
3. Slightly Inaccurate,
4. Slightly Accurate,
5. Moderately Accurate,
6. Very Accurate
25 2775 2800 4 1.35 1 3 4 5 6 ▂▃▁▃▇▁▇▃
E4 Make friends easily. integer 1. Very Inaccurate,
2. Moderately Inaccurate,
3. Slightly Inaccurate,
4. Slightly Accurate,
5. Moderately Accurate,
6. Very Accurate
9 2791 2800 4.42 1.46 1 4 5 6 6 ▁▂▁▂▃▁▇▆
E5 Take charge. integer 1. Very Inaccurate,
2. Moderately Inaccurate,
3. Slightly Inaccurate,
4. Slightly Accurate,
5. Moderately Accurate,
6. Very Accurate
21 2779 2800 4.42 1.33 1 4 5 5 6 ▁▂▁▂▅▁▇▅
O1 Am full of ideas. integer 1. Very Inaccurate,
2. Moderately Inaccurate,
3. Slightly Inaccurate,
4. Slightly Accurate,
5. Moderately Accurate,
6. Very Accurate
22 2778 2800 4.82 1.13 1 4 5 6 6 ▁▁▁▂▅▁▇▇
O2R Avoid difficult reading material. numeric 6. Very Inaccurate,
5. Moderately Inaccurate,
4. Slightly Inaccurate,
3. Slightly Accurate,
2. Moderately Accurate,
1. Very Accurate
0 2800 2800 4.29 1.57 1 3 5 6 6 ▂▃▁▅▃▁▇▇
O3 Carry the conversation to a higher level. integer 1. Very Inaccurate,
2. Moderately Inaccurate,
3. Slightly Inaccurate,
4. Slightly Accurate,
5. Moderately Accurate,
6. Very Accurate
28 2772 2800 4.44 1.22 1 4 5 5 6 ▁▁▁▂▆▁▇▅
O4 Spend time reflecting on things. integer 1. Very Inaccurate,
2. Moderately Inaccurate,
3. Slightly Inaccurate,
4. Slightly Accurate,
5. Moderately Accurate,
6. Very Accurate
14 2786 2800 4.89 1.22 1 4 5 6 6 ▁▁▁▁▃▁▆▇
O5R Will not probe deeply into a subject. numeric 6. Very Inaccurate,
5. Moderately Inaccurate,
4. Slightly Inaccurate,
3. Slightly Accurate,
2. Moderately Accurate,
1. Very Accurate
20 2780 2800 4.51 1.33 1 4 5 6 6 ▁▂▁▃▅▁▇▇
missingness_report

Missingness report

Among those who finished the survey. Only variables that have missing values are shown.

if (!exists("ended", results) ||
  !exists("expired", results)) {
  warning("Could not figure out who finished the surveys, because the ",
          "variables expired and ended were missing.")
}
## Warning: Could not figure out who finished the surveys, because the
## variables expired and ended were missing.
if (length(md_pattern)) {
  if (knitr::is_html_output()) {
    rmarkdown::paged_table(md_pattern, options = list(rows.print = 10))
  } else {
    knitr::kable(md_pattern)
  }
}
items

Codebook table

export_table(metadata_table)
jsonld
bfi_meta <- metadata_list(codebook_data)
write_json(bfi_meta, "/home/eleftheriadou/Users/EleftheriadouIoannaIro/bfi-dataset/dataset_description.json", pretty = TRUE)
rio::export(codebook_data, "/home/eleftheriadou/Users/EleftheriadouIoannaIro/bfi-dataset/processed_data/bfi-codebook_data.tsv")