(original) (raw)

1 #' Identify Issue Rows
2 #'
3 #' This function takes a `diffdf` object and a dataframe and subsets
4 #' the `data.frame` for problem rows as identified in the comparison object.
5 #' If \code{vars} has been specified only issue rows associated with those
6 #' variable(s) will be returned.
7 #' @param df dataframe to be subsetted
8 #' @param diff diffdf object
9 #' @param vars (optional) character vector containing names of issue variables to subset dataframe
10 #' on. A value of NULL (default) will be taken to mean available issue variables.
11 #' @examples
12 #' iris2 <- iris
13 #' for (i in 1:3) iris2[i, i] <- 99
14 #' x <- diffdf(iris, iris2, suppress_warnings = TRUE)
15 #' diffdf_issuerows(iris, x)
16 #' diffdf_issuerows(iris2, x)
17 #' diffdf_issuerows(iris2, x, vars = "Sepal.Length")
18 #' diffdf_issuerows(iris2, x, vars = c("Sepal.Length", "Sepal.Width"))
19 #' @details
20 #' Note that `diffdf_issuerows` can be used to subset against any dataframe. The only
21 #' requirement is that the original variables specified in the keys argument to diffdf
22 #' are present on the dataframe you are subsetting against. However please note that if
23 #' no keys were specified in diffdf then the row number is used. This means using
24 #' `diffdf_issuerows` without a keys against an arbitrary dataset can easily result in
25 #' nonsense rows being returned. It is always recommended to supply keys to diffdf.
26 #' @export
27 diffdf_issuerows <- function(df, diff, vars = NULL) {
28
29 19_x_ assertthat::assert_that(
30 19_x_ class(diff)[[1]] == "diffdf"
31 )
32
33
34 19_x_ KEYS_ATT <- attr(diff, "keys")
35
36 19_x_ assertthat::assert_that(
37 19_x_ !is.null(KEYS_ATT),
38 19_x_ msg = "diff is missing the keys attribute"
39 )
40
41
42 19_x_ issue_vars <- names(diff)[grep("^VarDiff_", names(diff))]
43
44 19_x_ if (is.null(vars)) {
45 15_x_ vars <- issue_vars
46 } else {
47 4_x_ vars <- paste0("VarDiff_", vars)
48 }
49
50 19_x_ if (length(issue_vars) == 0 |
51 7_x_ return(df[FALSE, ])
52 }
53
54 12_x_ KEEP <- mapply(
55 12_x_ FUN = get_issue_dataset,
56 12_x_ issue = vars,
57 12_x_ diff = list(diff),
58 12_x_ SIMPLIFY = FALSE
59 )
60
61 12_x_ KEEP <- recursive_reduce(KEEP, rbind)
62 12_x_ KEEP <- KEEP[!duplicated(KEEP), ]
63
64 12_x_ if (KEYS_ATT$is_derived) {
65 2_x_ df[[KEYS_ATT$value]] <- seq_len(nrow(df))
66 }
67
68 12_x_ keys <- KEYS_ATT$value
69
70 12_x_ if (any(!keys %in% names(df))) {
71 1_x_ stop("df does not contain all variables specified as keys in diff")
72 }
73
74 11_x_ RET <- merge(
75 11_x_ x = df,
76 11_x_ y = KEEP,
77 11_x_ sort = TRUE
78 )
79
80 11_x_ RET <- RET[do.call("order", RET[keys]), ]
81
82 11_x_ if (KEYS_ATT$is_derived) {
83 2_x_ keep_vars <- !names(RET) %in% KEYS_ATT$value
84 2_x_ RET <- RET[, keep_vars, drop = FALSE]
85 }
86
87 11_x_ return(RET)
88 }
89
90
91
92
93 #' get_issue_dataset
94 #'
95 #' Internal function used by `diffdf_issuerows` to extract the dataframe
96 #' from each a target issue. In particular it also strips off any
97 #' non-key variables
98 #' @param issue name of issue to extract the dataset from diff
99 #' @param diff diffdf object which contains issues
100 #' @keywords internal
101 get_issue_dataset <- function(issue, diff) {
102 20_x_ issue_df <- diff[[issue]]
103 20_x_ keep <- names(issue_df)[!(names(issue_df) %in% c("BASE", "COMPARE", "VARIABLE"))]
104 20_x_ issue_df[, keep, drop = FALSE]
105 }
1 #' diffdf
2 #' @description
3 #' Compares 2 dataframes and outputs any differences.
4 #' @param base input dataframe
5 #' @param compare comparison dataframe
6 #' @param keys vector of variables (as strings) that defines a unique row in
7 #' the base and compare dataframes
8 #' @param strict_numeric Flag for strict numeric to numeric comparisons
9 #' (default = TRUE). If False diffdf will cast integer to double where
10 #' required for comparisons. Note that variables specified in the keys
11 #' will never be casted.
12 #' @param strict_factor Flag for strict factor to character comparisons
13 #' (default = TRUE). If False diffdf will cast factors to characters where
14 #' required for comparisons. Note that variables specified in the keys will
15 #' never be casted.
16 #' @param suppress_warnings Do you want to suppress warnings? (logical)
17 #' @param file Location and name of a text file to output the results to.
18 #' Setting to NULL will cause no file to be produced.
19 #' @param tolerance Set tolerance for numeric comparisons. Note that
20 #' comparisons fail if (x-y)/scale > tolerance.
21 #' @param scale Set scale for numeric comparisons. Note that comparisons fail
22 #' if (x-y)/scale > tolerance. Setting as NULL is a slightly more efficient
23 #' version of scale = 1.
24 #' @param check_column_order Should the column ordering be checked? (logical)
25 #' @param check_df_class Do you want to check for differences in the class
26 #' between `base` and `compare`? (logical)
27 #' @examples
28 #' x <- subset(iris, -Species)
29 #' x[1, 2] <- 5
30 #' COMPARE <- diffdf(iris, x)
31 #' print(COMPARE)
32 #'
33 #' #### Sample data frames
34 #'
35 #' DF1 <- data.frame(
36 #' id = c(1, 2, 3, 4, 5, 6),
37 #' v1 = letters[1:6],
38 #' v2 = c(NA, NA, 1, 2, 3, NA)
39 #' )
40 #'
41 #' DF2 <- data.frame(
42 #' id = c(1, 2, 3, 4, 5, 7),
43 #' v1 = letters[1:6],
44 #' v2 = c(NA, NA, 1, 2, NA, NA),
45 #' v3 = c(NA, NA, 1, 2, NA, 4)
46 #' )
47 #'
48 #' diffdf(DF1, DF1, keys = "id")
49 #'
50 #' # We can control matching with scale/location for example:
51 #'
52 #' DF1 <- data.frame(
53 #' id = c(1, 2, 3, 4, 5, 6),
54 #' v1 = letters[1:6],
55 #' v2 = c(1, 2, 3, 4, 5, 6)
56 #' )
57 #' DF2 <- data.frame(
58 #' id = c(1, 2, 3, 4, 5, 6),
59 #' v1 = letters[1:6],
60 #' v2 = c(1.1, 2, 3, 4, 5, 6)
61 #' )
62 #'
63 #' diffdf(DF1, DF2, keys = "id")
64 #' diffdf(DF1, DF2, keys = "id", tolerance = 0.2)
65 #' diffdf(DF1, DF2, keys = "id", scale = 10, tolerance = 0.2)
66 #'
67 #' # We can use strict_factor to compare factors with characters for example:
68 #'
69 #' DF1 <- data.frame(
70 #' id = c(1, 2, 3, 4, 5, 6),
71 #' v1 = letters[1:6],
72 #' v2 = c(NA, NA, 1, 2, 3, NA),
73 #' stringsAsFactors = FALSE
74 #' )
75 #'
76 #' DF2 <- data.frame(
77 #' id = c(1, 2, 3, 4, 5, 6),
78 #' v1 = letters[1:6],
79 #' v2 = c(NA, NA, 1, 2, 3, NA)
80 #' )
81 #'
82 #' diffdf(DF1, DF2, keys = "id", strict_factor = TRUE)
83 #' diffdf(DF1, DF2, keys = "id", strict_factor = FALSE)
84 #'
85 #' @export
86 diffdf <- function(
87 base,
88 compare,
89 keys = NULL,
90 suppress_warnings = FALSE,
91 strict_numeric = TRUE,
92 strict_factor = TRUE,
93 file = NULL,
94 tolerance = sqrt(.Machine$double.eps),
95 scale = NULL,
96 check_column_order = FALSE,
97 check_df_class = FALSE
98 ) {
99
100 124_x_ assertthat::assert_that(
101 124_x_ assertthat::is.flag(check_df_class),
102 124_x_ !is.na(check_df_class),
103 124_x_ msg = "`check_df_class` must be a length 1 logical"
104 )
105
106 124_x_ BASE <- base
107 124_x_ COMP <- compare
108 124_x_ KEYS <- keys
109 124_x_ SUPWARN <- suppress_warnings
110
111 ### Initatiate output object
112 124_x_ COMPARE <- list()
113 124_x_ class(COMPARE) <- c("diffdf", "list")
114
115
116 124_x_ BASE_NAME <- deparse(substitute(base))
117 124_x_ COMP_NAME <- deparse(substitute(compare))
118 124_x_ COMPARE[["DataSummary"]] <- construct_issue(
119 124_x_ value = describe_dataframe(BASE, COMP, BASE_NAME, COMP_NAME),
120 124_x_ message = "Summary of BASE and COMPARE"
121 )
122
123
124 124_x_ is_derived <- FALSE
125
126 ### If no key is suplied match values based upon row number
127 124_x_ if (is.null(KEYS)) {
128 102_x_ is_derived <- TRUE
129 102_x_ keyname <- generate_keyname(BASE, COMP)
130 102_x_ BASE[[keyname]] <- seq_len(nrow(BASE))
131 102_x_ COMP[[keyname]] <- seq_len(nrow(COMP))
132 102_x_ KEYS <- keyname
133 }
134 124_x_ attr(COMPARE, "keys") <- list(value = KEYS, is_derived = is_derived)
135
136 124_x_ assertthat::assert_that(
137 124_x_ is.numeric(tolerance),
138 124_x_ is.numeric(scale) |
139 )
140
141 120_x_ missing_keys_base <- KEYS[!KEYS %in% names(BASE)]
142 120_x_ assertthat::assert_that(
143 120_x_ length(missing_keys_base) == 0,
144 120_x_ msg = sprintf(
145 120_x_ "The following KEYS are not available in BASE:\n %s",
146 120_x_ paste(missing_keys_base, collapse = "\n ")
147 )
148 )
149
150 119_x_ missing_keys_comp <- KEYS[!KEYS %in% names(COMP)]
151 119_x_ assertthat::assert_that(
152 119_x_ length(missing_keys_comp) == 0,
153 119_x_ msg = sprintf(
154 119_x_ "The following KEYS are not available in COMPARE:\n %s",
155 119_x_ paste(missing_keys_comp, collapse = "\n ")
156 )
157 )
158
159 118_x_ assertthat::assert_that(
160 118_x_ has_unique_rows(BASE, KEYS),
161 118_x_ msg = "BY variables in BASE do not result in unique observations"
162 )
163
164 117_x_ assertthat::assert_that(
165 117_x_ has_unique_rows(COMP, KEYS),
166 117_x_ msg = "BY variables in COMPARE do not result in unique observations"
167 )
168
169
170
171 #### Check essential variable properties (class & mode)
172
173 117_x_ COMPARE[["UnsupportedColsBase"]] <- construct_issue(
174 117_x_ value = identify_unsupported_cols(BASE),
175 117_x_ message = "There are columns in BASE with unsupported modes !!"
176 )
177
178
179 117_x_ COMPARE[["UnsupportedColsComp"]] <- construct_issue(
180 117_x_ value = identify_unsupported_cols(COMP),
181 117_x_ message = "There are columns in COMPARE with unsupported modes !!"
182 )
183
184
185 # cast variables if strict is off
186 117_x_ if (!strict_factor |
187 9_x_ casted_df <- cast_variables(
188 9_x_ BASE = BASE,
189 9_x_ COMPARE = COMP,
190 9_x_ ignore_vars = KEYS,
191 9_x_ cast_integers = !strict_numeric,
192 9_x_ cast_factors = !strict_factor
193 )
194
195 9_x_ BASE <- casted_df$BASE
196 9_x_ COMP <- casted_df$COMP
197 }
198
199
200 117_x_ COMPARE[["VarModeDiffs"]] <- construct_issue(
201 117_x_ value = identify_mode_differences(BASE, COMP),
202 117_x_ message = "There are columns in BASE and COMPARE with different modes !!"
203 )
204
205
206 117_x_ COMPARE[["VarClassDiffs"]] <- construct_issue(
207 117_x_ value = identify_class_differences(BASE, COMP),
208 117_x_ message = "There are columns in BASE and COMPARE with different classes !!"
209 )
210
211
212
213
214 ##### Check Validity of Keys
215
216 117_x_ BASE_keys <- names(BASE)[names(BASE) %in% KEYS]
217 117_x_ COMP_keys <- names(COMP)[names(COMP) %in% KEYS]
218
219 117_x_ assertthat::assert_that(
220 117_x_ length(BASE_keys) == length(KEYS),
221 117_x_ msg = "BASE is missing variables specified in KEYS"
222 )
223
224 117_x_ assertthat::assert_that(
225 117_x_ length(COMP_keys) == length(KEYS),
226 117_x_ msg = "COMP is missing variables specified in KEYS"
227 )
228
229
230 117_x_ assert_valid_keys(
231 117_x_ COMPARE, KEYS, "UnsupportedColsBase",
232 117_x_ "The following KEYS in BASE have an unsupported mode (see `?mode()`)"
233 )
234 116_x_ assert_valid_keys(
235 116_x_ COMPARE, KEYS, "UnsupportedColsComp",
236 116_x_ "The following KEYS in COMPARE have an unsupported mode (see `?mode()`)"
237 )
238 115_x_ assert_valid_keys(
239 115_x_ COMPARE, KEYS, "VarModeDiffs",
240 115_x_ "The following KEYS have different modes between BASE and COMPARE"
241 )
242 113_x_ assert_valid_keys(
243 113_x_ COMPARE, KEYS, "VarClassDiffs",
244 113_x_ "The following KEYS have different classes between BASE and COMPARE"
245 )
246
247
248 112_x_ exclude_cols <- c(
249 112_x_ COMPARE[["UnsupportedColsBase"]]$VARIABLE,
250 112_x_ COMPARE[["UnsupportedColsComp"]]$VARIABLE,
251 112_x_ COMPARE[["VarClassDiffs"]]$VARIABLE,
252 112_x_ COMPARE[["VarModeDiffs"]]$VARIABLE
253 )
254
255 112_x_ if (check_column_order) {
256 5_x_ if (attr(COMPARE, "keys")$is_derived) {
257 4_x_ keep_vars_base <- !(names(BASE) %in% attr(COMPARE, "keys")$value)
258 4_x_ keep_vars_comp <- !(names(COMP) %in% attr(COMPARE, "keys")$value)
259 } else {
260 1_x_ keep_vars_base <- TRUE
261 1_x_ keep_vars_comp <- TRUE
262 }
263 5_x_ COMPARE[["ColumnOrder"]] <- construct_issue(
264 5_x_ value = identify_column_order_differences(
265 5_x_ BASE[, keep_vars_base, drop = FALSE],
266 5_x_ COMP[, keep_vars_comp, drop = FALSE]
267 ),
268 5_x_ message = "There are differences in the column ordering between BASE and COMPARE !!"
269 )
270 }
271
272
273 ##### Check Attributes
274 112_x_ COMPARE[["AttribDiffs"]] <- construct_issue(
275 112_x_ value = identify_att_differences(BASE, COMP, exclude_cols),
276 112_x_ message = "There are columns in BASE and COMPARE with differing attributes !!"
277 )
278
279
280 ##### Check data
281
282 112_x_ BASE <- factor_to_character(BASE, KEYS)
283 112_x_ COMP <- factor_to_character(COMP, KEYS)
284
285
286 112_x_ COMPARE[["ExtRowsBase"]] <- construct_issue(
287 112_x_ value = identify_extra_rows(BASE, COMP, KEYS),
288 112_x_ message = "There are rows in BASE that are not in COMPARE !!"
289 )
290
291
292 112_x_ COMPARE[["ExtRowsComp"]] <- construct_issue(
293 112_x_ value = identify_extra_rows(COMP, BASE, KEYS),
294 112_x_ message = "There are rows in COMPARE that are not in BASE !!"
295 )
296
297
298 112_x_ COMPARE[["ExtColsBase"]] <- construct_issue(
299 112_x_ value = identify_extra_cols(BASE, COMP),
300 112_x_ message = "There are columns in BASE that are not in COMPARE !!"
301 )
302
303
304 112_x_ COMPARE[["ExtColsComp"]] <- construct_issue(
305 112_x_ value = identify_extra_cols(COMP, BASE),
306 112_x_ message = "There are columns in COMPARE that are not in BASE !!"
307 )
308
309
310 112_x_ VALUE_DIFFERENCES <- identify_differences(
311 112_x_ BASE, COMP, KEYS, exclude_cols,
312 112_x_ tolerance = tolerance,
313 112_x_ scale = scale
314 )
315
316
317
318 ## Summarise the number of mismatching rows per variable
319
320 112_x_ if (length(VALUE_DIFFERENCES)) {
321 106_x_ NDIFF <- sapply(VALUE_DIFFERENCES, nrow)
322 106_x_ COMPARE[["NumDiff"]] <- construct_issue(
323 106_x_ value = convert_to_issue(NDIFF),
324 106_x_ message = "Not all Values Compared Equal"
325 )
326 }
327
328
329 112_x_ for (i in names(VALUE_DIFFERENCES)) {
330 917_x_ COMPARE[[paste0("VarDiff_", i)]] <- construct_issue(
331 917_x_ value = VALUE_DIFFERENCES[[i]],
332 917_x_ message = NULL
333 )
334 }
335
336
337 # suppress warning message of data summary if user didn't request to check it
338 # we leave the issue in the main compare object though for printing purposes
339 112_x_ COMPARE_WARNINGS <- COMPARE
340 112_x_ attr(COMPARE_WARNINGS[["DataSummary"]], "message") <- c(
341 112_x_ "There are differences between the class of BASE and COMPARE"
342 )
343 112_x_ if (!check_df_class |
344 110_x_ COMPARE_WARNINGS["DataSummary"] <- NULL
345 }
346
347 # Get all issue messages, remove blank message, and collapse into single string
348 112_x_ ISSUE_MSGS <- sapply(COMPARE_WARNINGS, function(x) get_issue_message(x))
349 112_x_ ISSUE_MSGS <- Filter(function(x) !is.null(x), ISSUE_MSGS)
350 112_x_ ISSUE_MSGS <- Filter(function(x) x != "", ISSUE_MSGS)
351
352 112_x_ if (length(ISSUE_MSGS) != 0) {
353 73_x_ if (!SUPWARN) {
354 45_x_ ISSUE_MSGS <- paste(ISSUE_MSGS, collapse = "\n")
355 45_x_ warning(c("\n", ISSUE_MSGS))
356 }
357 }
358
359 # If the classes are the same and it is the only entry in the compare
360 # object then remove it in order to trigger "no issues found"
361 112_x_ if (identical(class(base), class(compare)) && length(COMPARE) == 1) {
362 38_x_ COMPARE["DataSummary"] <- NULL
363 }
364
365 # If the summary is the only item and the user didn't want to check classes
366 # then remove the object to trigger the "no issues found"
367 112_x_ if (!check_df_class && length(COMPARE) == 1) {
368 1_x_ COMPARE["DataSummary"] <- NULL
369 }
370
371
372 112_x_ if (!is.null(file)) {
373 ! x <- print(COMPARE, as_string = TRUE)
374
375 ! tryCatch(
376 {
377 ! sink(file)
378 ! cat(x, sep = "\n")
379 ! sink()
380 },
381 ! warning = function(w) {
382 ! sink()
383 ! warning(w)
384 },
385 ! error = function(e) {
386 ! sink()
387 ! stop(e)
388 }
389 )
390 ! return(invisible(COMPARE))
391 }
392
393 112_x_ return(COMPARE)
394 }
395
396
397
398
399 #' diffdf_has_issues
400 #'
401 #' Utility function which returns TRUE if an diffdf
402 #' object has issues or FALSE if an diffdf object does not have issues
403 #' @param x diffdf object
404 #' @examples
405 #'
406 #' # Example with no issues
407 #' x <- diffdf(iris, iris)
408 #' diffdf_has_issues(x)
409 #'
410 #' # Example with issues
411 #' iris2 <- iris
412 #' iris2[2, 2] <- NA
413 #' x <- diffdf(iris, iris2, suppress_warnings = TRUE)
414 #' diffdf_has_issues(x)
415 #' @export
416 diffdf_has_issues <- function(x) {
417 ! if (class(x)[[1]] != "diffdf") stop("x is not an diffdf object")
418 3_x_ return(length(x) != 0)
419 }
420
421
422 #' Assert that keys are valid
423 #'
424 #' Utility function to check that user provided "keys" aren't listed as a problem
425 #' variable of the current list of issues.
426 #' @param COMPARE (`list`)\cr A named list of which each element is a `data.frame` with the
427 #' column `VARIABLE`
428 #' @param KEYS (`character`)\cr name of key variables to check to make sure they don't contain
429 #' any issues
430 #' @param component (`character`)\cr name of the component within `COMPARE` to check against
431 #' @param msg (`character`)\cr error message to print if any of `KEYS` are found within
432 #' `COMPARE[component]$VARIABLE`
433 #' @keywords internal
434 assert_valid_keys <- function(COMPARE, KEYS, component, msg) {
435 461_x_ keys_reduced <- KEYS[KEYS %in% COMPARE[[component]]$VARIABLE]
436 461_x_ assertthat::assert_that(
437 461_x_ length(keys_reduced) == 0,
438 461_x_ msg = sprintf(
439 461_x_ "%s:\n%s",
440 461_x_ msg,
441 461_x_ paste0("`", paste0(keys_reduced, collapse = "`, `"), "`")
442 )
443 )
444 }
1 #' factor_to_character
2 #'
3 #' Takes a dataframe and converts any factor variables to character
4 #' @param dsin input dataframe
5 #' @param vars variables to consider for conversion. Default NULL will consider
6 #' every variable within the dataset
7 #' @keywords internal
8 factor_to_character <- function(dsin, vars = NULL) {
9 ! if (is.null(vars)) vars <- names(dsin)
10
11 224_x_ for (var in vars) {
12 236_x_ if (is.factor(dsin[[var]])) {
13 ! dsin[[var]] <- as.character(dsin[[var]])
14 }
15 }
16 224_x_ return(dsin)
17 }
18
19
20
21
22 #' has_unique_rows
23 #'
24 #' Check if a data sets rows are unique
25 #' @param DAT input data set (data frame)
26 #' @param KEYS Set of keys which should be unique
27 #' @keywords internal
28 has_unique_rows <- function(DAT, KEYS) {
29 235_x_ DUPS <- duplicated(subset(DAT, select = KEYS))
30 235_x_ NDUPS <- sum(DUPS)
31 235_x_ return(NDUPS == 0)
32 }
33
34 #' convert_to_issue
35 #'
36 #' converts the count value into the correct issue format
37 #' @param datin data inputted
38 #' @importFrom tibble rownames_to_column
39 #' @keywords internal
40 convert_to_issue <- function(datin) {
41 106_x_ datin_tibble <- tibble(
42 106_x_ `Variable` = names(datin),
43 106_x_ `No of Differences` = datin
44 )
45
46 106_x_ datin_tibble_reduced <- datin_tibble[datin_tibble[["No of Differences"]] > 0, , drop = FALSE]
47 106_x_ return(datin_tibble_reduced)
48 }
49
50 #' Describe the datasets being compared
51 #'
52 #' This function is used to produce a basic summary table of the core
53 #' features of the two `data.frame`'s being compared.
54 #' @param base (`data.frame`)\cr base dataset to be described
55 #' @param comp (`data.frame`)\cr comparison dataset to be described
56 #' @param base_name (`character`)\cr name of the base dataset
57 #' @param comp_name (`character`)\cr name of the comparison dataset
58 #' @keywords internal
59 describe_dataframe <- function(base, comp, base_name, comp_name) {
60 124_x_ tibble(
61 124_x_ PROPERTY = list(
62 124_x_ "Name",
63 124_x_ "Class",
64 124_x_ "Rows(#)",
65 124_x_ "Columns(#)"
66 ),
67 124_x_ BASE = c(
68 124_x_ base_name,
69 124_x_ paste(class(base), collapse = ", "),
70 124_x_ as.character(nrow(base)),
71 124_x_ as.character(ncol(base))
72 ),
73 124_x_ COMP = c(
74 124_x_ comp_name,
75 124_x_ paste(class(comp), collapse = ", "),
76 124_x_ as.character(nrow(comp)),
77 124_x_ as.character(ncol(comp))
78 )
79 )
80 }
1 #' identify_extra_rows
2 #'
3 #' Identifies rows that are in a baseline dataset but not in a comparator dataset
4 #' @param DS1 Baseline dataset (data frame)
5 #' @param DS2 Comparator dataset (data frame)
6 #' @param KEYS List of variables that define a unique row within the datasets (strings)
7 #' @keywords internal
8 identify_extra_rows <- function(DS1, DS2, KEYS) {
9 224_x_ if (nrow(DS2) == 0 |
10 10_x_ return(DS1[, KEYS, drop = FALSE])
11 }
12 214_x_ DS2[["..FLAG.."]] <- "Y"
13 214_x_ dat <- merge(
14 214_x_ subset(DS1, select = KEYS),
15 214_x_ subset(DS2, select = c(KEYS, "..FLAG..")),
16 214_x_ by = KEYS, all.x = TRUE,
17 214_x_ sort = TRUE
18 )
19 214_x_ dat <- dat[do.call("order", dat[KEYS]), ]
20
21 214_x_ dat[is.na(dat[["..FLAG.."]]), KEYS, drop = FALSE]
22 }
23
24
25
26 #' identify_extra_cols
27 #'
28 #' Identifies columns that are in a baseline dataset but not in a comparator dataset
29 #' @param DS1 Baseline dataset (data frame)
30 #' @param DS2 Comparator dataset (data frame)
31 #' @importFrom tibble tibble
32 #' @keywords internal
33 identify_extra_cols <- function(DS1, DS2) {
34 224_x_ match.cols <- sapply(names(DS1), "%in%", names(DS2))
35 224_x_ assertthat::assert_that(
36 224_x_ all(is.logical(match.cols)),
37 224_x_ msg = "Assumption of logical return type is not true"
38 )
39 224_x_ tibble(
40 224_x_ COLUMNS = names(DS1)[!match.cols]
41 )
42 }
43
44
45
46
47
48
49
50 #' identify_matching_cols
51 #'
52 #' Identifies columns with the same name in two data frames
53 #' @param DS1 Input dataset 1 (data frame)
54 #' @param DS2 Input dataset 2 (data frame)
55 #' @param EXCLUDE Columns to ignore
56 #' @keywords internal
57 identify_matching_cols <- function(DS1, DS2, EXCLUDE = "") {
58 768_x_ match_cols <- sapply(names(DS1), "%in%", names(DS2))
59 768_x_ exclude_cols <- sapply(names(DS1), "%in%", EXCLUDE)
60 768_x_ names(DS1)[match_cols & !exclude_cols]
61 }
62
63
64
65
66
67 #' identify_unsupported_cols
68 #'
69 #' Identifies any columns for which the package is not setup to handle
70 #' @param dsin input dataset
71 #' @keywords internal
72 identify_unsupported_cols <- function(dsin) {
73 234_x_ dat <- subset(
74 234_x_ identify_properties(dsin),
75 234_x_ select = c("VARIABLE", "MODE")
76 )
77
78 234_x_ dat[!dat[["MODE"]] %in% c("numeric", "character", "logical"), , drop = FALSE]
79 }
80
81
82
83 #' identify_mode_differences
84 #'
85 #' Identifies any mode differences between two data frames
86 #' @param BASE Base dataset for comparison (data.frame)
87 #' @param COMP Comparator dataset to compare base against (data.frame)
88 #' @keywords internal
89 identify_mode_differences <- function(BASE, COMP) {
90 417_x_ matching_cols <- identify_matching_cols(BASE, COMP)
91
92 417_x_ dat <- merge(
93 417_x_ x = identify_properties(BASE),
94 417_x_ y = identify_properties(COMP),
95 417_x_ by = "VARIABLE",
96 417_x_ all = TRUE,
97 417_x_ suffixes = c(".BASE", ".COMP"),
98 417_x_ sort = TRUE
99 )
100 417_x_ dat <- subset(dat, select = c("VARIABLE", "MODE.BASE", "MODE.COMP"))
101
102 417_x_ KEEP1 <- dat[["VARIABLE"]] %in% matching_cols
103 417_x_ KEEP2 <- dat[["MODE.BASE"]] != dat[["MODE.COMP"]]
104
105 417_x_ dat[KEEP1 & KEEP2, , drop = FALSE]
106 }
107
108
109
110 #' identify_class_differences
111 #'
112 #' Identifies any class differences between two data frames
113 #' @param BASE Base dataset for comparison (data.frame)
114 #' @param COMP Comparator dataset to compare base against (data.frame)
115 #' @keywords internal
116 identify_class_differences <- function(BASE, COMP) {
117 117_x_ matching_cols <- identify_matching_cols(BASE, COMP)
118
119 117_x_ dat <- merge(
120 117_x_ x = identify_properties(BASE),
121 117_x_ y = identify_properties(COMP),
122 117_x_ by = "VARIABLE",
123 117_x_ all = TRUE,
124 117_x_ sort = TRUE,
125 117_x_ suffixes = c(".BASE", ".COMP")
126 )
127
128 117_x_ dat <- subset(dat, select = c("VARIABLE", "CLASS.BASE", "CLASS.COMP"))
129
130 117_x_ KEEP1 <- dat[["VARIABLE"]] %in% matching_cols
131 117_x_ KEEP2 <- !mapply(
132 117_x_ identical,
133 117_x_ dat[["CLASS.BASE"]],
134 117_x_ dat[["CLASS.COMP"]]
135 )
136
137 117_x_ dat[KEEP1 & KEEP2, , drop = FALSE]
138 }
139
140
141
142 #' Identify differences in attributes
143 #'
144 #' Identifies any attribute differences between two data frames
145 #' @param BASE Base dataset for comparison (data.frame)
146 #' @param COMP Comparator dataset to compare base against (data.frame)
147 #' @param exclude_cols Columns to exclude from comparison
148 #' @importFrom tibble tibble
149 #' @keywords internal
150 identify_att_differences <- function(BASE, COMP, exclude_cols = "") {
151 122_x_ matching_cols <- identify_matching_cols(BASE, COMP, exclude_cols)
152
153 122_x_ PROPS <- merge(
154 122_x_ x = identify_properties(BASE),
155 122_x_ y = identify_properties(COMP),
156 122_x_ by = "VARIABLE",
157 122_x_ all = TRUE,
158 122_x_ sort = TRUE,
159 122_x_ suffixes = c(".BASE", ".COMP")
160 )
161
162 122_x_ PROPS <- subset(PROPS, select = c("VARIABLE", "ATTRIBS.BASE", "ATTRIBS.COMP"))
163
164 122_x_ PROPS <- PROPS[PROPS[["VARIABLE"]] %in% matching_cols, , drop = FALSE]
165
166
167 ### Setup dummy return value
168 122_x_ RETURN <- tibble(
169 122_x_ VARIABLE = character(),
170 122_x_ ATTR_NAME = character(),
171 122_x_ VALUES.BASE = list(),
172 122_x_ VALUES.COMP = list()
173 )
174
175 122_x_ for (i in PROPS[["VARIABLE"]]) {
176 1157_x_ PROPS_filt <- PROPS[PROPS[["VARIABLE"]] == i, , drop = FALSE]
177
178 ### Get a vector of all available attributes across both variables
179 1157_x_ ATTRIB_NAMES <- unique(c(
180 1157_x_ names(PROPS_filt[["ATTRIBS.BASE"]][[1]]),
181 1157_x_ names(PROPS_filt[["ATTRIBS.COMP"]][[1]])
182 ))
183
184 ### If variable has no attributes move onto the next variable
185 850_x_ if (is.null(ATTRIB_NAMES)) next()
186
187 ### Loop over each attribute checking if they are identical and outputing
188 ### anyones that arn't
189 307_x_ for (j in ATTRIB_NAMES) {
190 528_x_ ATTRIB_BASE <- PROPS_filt[["ATTRIBS.BASE"]][[1]][j]
191 528_x_ ATTRIB_COMP <- PROPS_filt[["ATTRIBS.COMP"]][[1]][j]
192
193 528_x_ if (!identical(ATTRIB_BASE, ATTRIB_COMP)) {
194 64_x_ ATT_DIFFS <- tibble(
195 64_x_ VARIABLE = i,
196 64_x_ ATTR_NAME = j,
197 64_x_ VALUES.BASE = ifelse(is.null(ATTRIB_BASE), list(), ATTRIB_BASE),
198 64_x_ VALUES.COMP = ifelse(is.null(ATTRIB_COMP), list(), ATTRIB_COMP)
199 )
200
201 64_x_ RETURN <- rbind(RETURN, ATT_DIFFS)
202 }
203 }
204 }
205 122_x_ return(RETURN)
206 }
207
208
209
210
211
212 #' identify_differences
213 #'
214 #' Compares each column within 2 datasets to identify any values which they
215 #' mismatch on.
216 #' @param BASE Base dataset for comparison (data.frame)
217 #' @param COMP Comparator dataset to compare base against (data.frame)
218 #' @param KEYS List of variables that define a unique row within the datasets (strings)
219 #' @param exclude_cols Columns to exclude from comparison
220 #' @param tolerance Level of tolerance for numeric differences between two variables
221 #' @param scale Scale that tolerance should be set on. If NULL assume absolute
222 #' @keywords internal
223 identify_differences <- function(
224 BASE,
225 COMP,
226 KEYS,
227 exclude_cols,
228 tolerance = sqrt(.Machine$double.eps),
229 scale = NULL
230 ) {
231
232 112_x_ matching_cols <- identify_matching_cols(BASE, COMP, c(KEYS, exclude_cols))
233
234 112_x_ if (length(matching_cols) == 0) {
235 2_x_ return(tibble())
236 }
237
238 110_x_ DAT <- merge(
239 110_x_ x = BASE,
240 110_x_ y = COMP,
241 110_x_ by = KEYS,
242 110_x_ suffix = c(".x", ".y"),
243 110_x_ sort = TRUE
244 )
245 110_x_ if (nrow(DAT) == 0) {
246 4_x_ return(tibble())
247 }
248 106_x_ DAT <- DAT[do.call("order", DAT[KEYS]), ]
249
250 106_x_ matching_list <- mapply(
251 106_x_ is_variable_different,
252 106_x_ matching_cols,
253 106_x_ MoreArgs = list(
254 106_x_ keynames = KEYS,
255 106_x_ datain = DAT,
256 106_x_ tolerance = tolerance,
257 106_x_ scale = scale
258 ),
259 106_x_ SIMPLIFY = FALSE
260 )
261
262 106_x_ matching_list
263 }
264
265
266
267
268
269
270
271
272
273 #' identify_properties
274 #'
275 #' Returns a dataframe of metadata for a given dataset.
276 #' Returned values include variable names , class , mode , type & attributes
277 #' @param dsin input dataframe that you want to get the metadata from
278 #' @importFrom tibble tibble
279 #' @keywords internal
280 identify_properties <- function(dsin) {
281 ### If missing or null return empty dataset
282 1546_x_ if (is.null(dsin)) {
283 ! x <- tibble(
284 ! VARIABLE = character(),
285 ! CLASS = list(),
286 ! MODE = character(),
287 ! TYPE = character(),
288 ! ATTRIBS = list()
289 )
290 ! return(x)
291 }
292
293 1546_x_ tibble(
294 1546_x_ VARIABLE = names(dsin),
295 1546_x_ CLASS = lapply(dsin, class),
296 1546_x_ MODE = sapply(dsin, mode),
297 1546_x_ TYPE = sapply(dsin, typeof),
298 1546_x_ ATTRIBS = lapply(dsin, attributes)
299 )
300 }
301
302
303 #' Find column ordering differences
304 #'
305 #' Compares two datasets and outputs a table listing any differences in the column
306 #' orders between the two datasets. Columns that are not contained within both
307 #' are ignored however column ordering is derived prior to removing these columns.
308 #'
309 #' @param BASE (`data.frame`)\cr Base dataset for comparison
310 #' @param COMP (`data.frame`)\cr Comparator dataset to compare base against
311 #' @keywords internal
312 identify_column_order_differences <- function(BASE, COMP) {
313 8_x_ base_cols <- tibble(
314 8_x_ COLUMN = names(BASE),
315 8_x_ "BASE-INDEX" = seq_along(names(BASE))
316 )
317 8_x_ comp_cols <- tibble(
318 8_x_ COLUMN = names(COMP),
319 8_x_ "COMPARE-INDEX" = seq_along(names(COMP))
320 )
321 8_x_ col_index <- merge(
322 8_x_ base_cols,
323 8_x_ comp_cols,
324 8_x_ by = c("COLUMN"),
325 8_x_ all = TRUE,
326 8_x_ sort = FALSE
327 )
328 8_x_ keep_rows <- col_index[["BASE-INDEX"]] != col_index[["COMPARE-INDEX"]]
329 8_x_ keep_rows[is.na(keep_rows)] <- FALSE
330 8_x_ col_index[keep_rows, , drop = FALSE]
331 }
1 #' sort_then_join
2 #'
3 #' Convenience function to sort two strings and paste them together
4 #' @param string1 first string
5 #' @param string2 second string
6 #' @keywords internal
7 sort_then_join <- function(string1, string2) {
8 14_x_ paste0(sort(c(string1, string2)), collapse = "")
9 }
10
11
12 #' class_merge
13 #'
14 #' Convenience function to put all classes an object has into one string
15 #' @param x an object
16 #' @keywords internal
17 class_merge <- function(x) {
18 304_x_ paste(class(x), collapse = "_")
19 }
20
21
22 get_message <- function(colname, whichdat, totype) {
23 12_x_ message(paste0(
24 12_x_ "NOTE: Variable ", colname, " in ", tolower(whichdat), " was casted to ", totype
25 ))
26 }
27
28
29 #' get_casted_vector
30 #'
31 #' casts a vector depending on its type and input
32 #' @param colin column to cast
33 #' @param colname name of vector
34 #' @param whichdat whether base or compare is being casted (used for messages)
35 #' @importFrom methods is
36 #' @keywords internal
37 get_casted_vector <- function(colin, colname, whichdat) {
38 24_x_ if (is(colin, "factor")) {
39 6_x_ get_message(colname, whichdat, "character")
40 6_x_ return(as.character(colin))
41 }
42
43 18_x_ if (is(colin, "integer")) {
44 6_x_ get_message(colname, whichdat, "numeric")
45 6_x_ return(as.numeric(colin))
46 }
47
48 12_x_ colin
49 }
50
51
52
53 #' get_casted_dataset
54 #'
55 #' Internal utility function to loop across a dataset casting all target
56 #' variables
57 #' @param df dataset to be casted
58 #' @param columns columns to be casted
59 #' @param whichdat whether base or compare is being casted (used for messages)
60 #' @keywords internal
61 get_casted_dataset <- function(df, columns, whichdat) {
62 22_x_ for (col in columns) {
63 24_x_ df[[col]] <- get_casted_vector(df[[col]], col, whichdat)
64 }
65 22_x_ return(df)
66 }
67
68
69
70
71
72
73 #' cast_variables
74 #'
75 #' Function to cast datasets columns if they have differing types
76 #' Restricted to specific cases, currently integer and double, and character and factor
77 #' @param BASE base dataset
78 #' @param COMPARE comparison dataset
79 #' @param ignore_vars Variables not to be considered for casting
80 #' @param cast_integers Logical - Whether integers should be cased to double when compared to doubles
81 #' @param cast_factors Logical - Whether characters should be casted to characters when compared to characters
82 #' @keywords internal
83 cast_variables <- function(
84 BASE,
85 COMPARE,
86 ignore_vars = NULL,
87 cast_integers = FALSE,
88 cast_factors = FALSE
89 ) {
90
91 13_x_ allowed_class_casts <- c("integernumeric", "characterfactor")[c(cast_integers, cast_factors)]
92
93 13_x_ BASE_class <- data.frame(
94 13_x_ class_BASE = sapply(BASE, class_merge),
95 13_x_ colname = names(BASE),
96 13_x_ stringsAsFactors = FALSE
97 )
98 13_x_ BASE_class <- BASE_class[!BASE_class[["colname"]] %in% ignore_vars, , drop = FALSE]
99
100
101 13_x_ COMPARE_class <- data.frame(
102 13_x_ class_COMPARE = sapply(COMPARE, class_merge),
103 13_x_ colname = names(COMPARE),
104 13_x_ stringsAsFactors = FALSE
105 )
106 13_x_ COMPARE_class <- COMPARE_class[!COMPARE_class[["colname"]] %in% ignore_vars, , drop = FALSE]
107
108 13_x_ common_class <- merge(
109 13_x_ x = BASE_class,
110 13_x_ y = COMPARE_class,
111 13_x_ by = "colname"
112 )
113
114
115 13_x_ diff_class <- common_class[common_class[["class_BASE"]] != common_class[["class_COMPARE"]], , drop = FALSE]
116
117
118 13_x_ diff_class$classmerge <- mapply(
119 13_x_ sort_then_join,
120 13_x_ diff_class$class_COMPARE,
121 13_x_ diff_class$class_BASE
122 )
123
124
125 13_x_ cast_columns <- diff_class[diff_class[["classmerge"]] %in% allowed_class_casts, , drop = FALSE]
126
127
128 13_x_ DATASETS <- list(
129 13_x_ "BASE" = BASE,
130 13_x_ "COMPARE" = COMPARE
131 )
132
133 13_x_ if (nrow(cast_columns) == 0) {
134 2_x_ return(DATASETS)
135 }
136
137
138 11_x_ for (i in names(DATASETS)) {
139 22_x_ DATASETS[[i]] <- get_casted_dataset(DATASETS[[i]], cast_columns$colname, i)
140 }
141
142 11_x_ return(DATASETS)
143 }
1 #' Pad String
2 #'
3 #' Utility function used to replicate `str_pad`. Adds white space to either end
4 #' of a string to get it to equal the desired length
5 #' @param x string
6 #' @param width desired length
7 #' @keywords internal
8 string_pad <- function(x, width) {
9 1027_x_ if (nchar(x) >= width) {
10 ! return(x)
11 }
12 1027_x_ width <- width - nchar(x)
13 1027_x_ left <- paste0(rep(" ", floor(width / 2)), collapse = "")
14 1027_x_ right <- paste0(rep(" ", ceiling(width / 2)), collapse = "")
15 1027_x_ paste0(left, x, right, collapse = "")
16 }
17
18
19 #' recursive_reduce
20 #'
21 #' Utility function used to replicated `purrr::reduce`. Recursively applies a
22 #' function to a list of elements until only 1 element remains
23 #' @param .l list of values to apply a function to
24 #' @param .f function to apply to each each element of the list in turn. See details.
25 #' @details
26 #' This function is essentially performing the following operation:
27 #' ```
28 #' .l[[1]] <- .f( .l[[1]] , .l[[2]]) ; .l[[1]] <- .f( .l[[1]] , .l[[3]])
29 #' ```
30 #' @keywords internal
31 recursive_reduce <- function(.l, .f) {
32 365_x_ if (length(.l) != 1) {
33 233_x_ .l[[2]] <- .f(.l[[1]], .l[[2]])
34 233_x_ return(recursive_reduce(.l[-1], .f))
35 } else {
36 132_x_ return(.l[[1]])
37 }
38 }
39
40 #' invert
41 #'
42 #' Utility function used to replicated `purrr::transpose`. Turns a list inside
43 #' out.
44 #' @param x list
45 #' @keywords internal
46 invert <- function(x) {
47 40_x_ x2 <- list()
48 40_x_ cnames <- names(x)
49 40_x_ tnames <- names(x[[1]])
50 40_x_ for (i in tnames) {
51 120_x_ x2[[i]] <- list()
52 120_x_ for (j in cnames) {
53 345_x_ x2[[i]][[j]] <- x[[j]][[i]]
54 }
55 }
56 40_x_ return(x2)
57 }
58
59
60
61
62 #' as_ascii_table
63 #'
64 #' This function takes a `data.frame` and attempts to convert it into
65 #' a simple ascii format suitable for printing to the screen
66 #' It is assumed all variable values have a `as.character()` method
67 #' in order to cast them to character.
68 #' @param dat Input dataset to convert into a ascii table
69 #' @param line_prefix Symbols to prefix in front of every line of the table
70 #' @keywords internal
71 as_ascii_table <- function(dat, line_prefix = " ") {
72 40_x_ n_col <- ncol(dat)
73 40_x_ n_row <- nrow(dat)
74
75 ## Convert every value to character and crop to a suitable length
76 40_x_ dat_char <- lapply(dat, as_fmt_char)
77
78
79 40_x_ hold <- list()
80 40_x_ COLS <- colnames(dat)
81
82 ### For each column extract core elements (width, values , title) and pad out
83 ### each string to be a suitable length
84 40_x_ for (i in seq_len(n_col)) {
85 115_x_ COL <- COLS[i]
86 115_x_ VALUES <- dat_char[[i]]
87
88 115_x_ JOINT <- c(COL, VALUES)
89 115_x_ WIDTH <- max(sapply(JOINT, nchar)) + 2
90
91 115_x_ hold[[COL]] <- list()
92 115_x_ hold[[COL]]$WIDTH <- WIDTH
93 115_x_ hold[[COL]]$VALUES <- sapply(VALUES, string_pad, width = WIDTH)
94 115_x_ hold[[COL]]$HEADER <- sapply(COL, string_pad, width = WIDTH)
95 }
96
97 ### Collapse into a single value per component ( title , values, width )
98 40_x_ thold <- invert(hold)
99 40_x_ tvals <- recursive_reduce(thold$VALUES, paste0)
100 40_x_ thead <- recursive_reduce(thold$HEADER, paste0)
101 40_x_ twidth <- recursive_reduce(thold$WIDTH, sum)
102
103 ### Create header and footer lines
104 40_x_ TLINE <- paste0(rep("=", twidth), collapse = "")
105 40_x_ LINE <- paste0(rep("-", twidth), collapse = "")
106 40_x_ FVALS <- paste0(line_prefix, tvals, collapse = "\n")
107
108 ### Output table
109 40_x_ paste0(
110 40_x_ line_prefix, TLINE, "\n",
111 40_x_ line_prefix, thead, "\n",
112 40_x_ line_prefix, LINE, "\n",
113 40_x_ FVALS, "\n",
114 40_x_ line_prefix, LINE
115 )
116 }
117
118
119 #' as_character
120 #'
121 #' Stub function to enable mocking in unit tests
122 as_character <- as.character
123
124 #' Format vector to printable string
125 #'
126 #' Coerces a vector of any type into a printable string. The most
127 #' significant transformation is performed on existing character
128 #' vectors which will be truncated, have newlines converted
129 #' to explicit symbols and will be wrapped in quotes if they
130 #' contain white space.
131 #'
132 #' @param x (`vector`) \cr vector to be converted to character
133 #' @param add_quotes (`logical`) \cr if true will wrap strings that contain
134 #' whitespace with quotes
135 #' @param crop_at (`numeric`) \cr specifies the limit at which strings should
136 #' be truncated to
137 #' @param ... additional arguments (not currently used)
138 #'
139 #' @name as_fmt_char
140 #' @keywords internal
141 as_fmt_char <- function(x, ...) {
142 180_x_ UseMethod("as_fmt_char")
143 }
144
145 #' @rdname as_fmt_char
146 #' @export
147 as_fmt_char.numeric <- function(x, ...) {
148 34_x_ format(x, digits = 7, justify = "right")
149 }
150
151 #' @rdname as_fmt_char
152 #' @export
153 as_fmt_char.NULL <- function(x, ...) {
154 ! ""
155 }
156
157 #' @importFrom utils capture.output
158 #' @rdname as_fmt_char
159 #' @export
160 as_fmt_char.list <- function(x, ...) {
161 15_x_ vapply(
162 15_x_ x,
163 15_x_ function(x) {
164 54_x_ if (is.numeric(x)) {
165 ! return(as_fmt_char(x))
166 }
167 54_x_ if (is.character(x) & length(x) == 1) {
168 54_x_ return(as_fmt_char(x))
169 }
170 ! as_fmt_char(
171 ! paste(capture.output(dput(x)), collapse = " "),
172 ! add_quotes = FALSE
173 )
174 },
175 15_x_ character(1)
176 )
177 }
178
179 #' @rdname as_fmt_char
180 #' @export
181 as_fmt_char.factor <- function(x, ...) {
182 3_x_ as_fmt_char(as.character(x))
183 }
184
185 #' @rdname as_fmt_char
186 #' @export
187 as_fmt_char.character <- function(x, add_quotes = TRUE, crop_at = 30, ...) {
188 123_x_ needs_quotes <- grepl("\\s", x) & add_quotes
189
190 123_x_ x[is.na(x)] <- ""
191
192 # Replace \nl \cr with tags to stop print message splitting over
193 # multiple lines
194 123_x_ x <- gsub("\x0D", "", x)
195 123_x_ x <- gsub("\x0A", "", x)
196
197 123_x_ charlength <- vapply(x, nchar, numeric(1))
198 123_x_ x <- substr(x, 1, crop_at)
199 123_x_ x[charlength > crop_at] <- paste0(x[charlength > crop_at], "...")
200
201 # Add enclosing " " around strings with white space so that it can be
202 # clearly identified in the printed output
203 123_x_ x[needs_quotes] <- paste0('"', x[needs_quotes], '"')
204
205 123_x_ return(x)
206 }
207
208
209 #' @rdname as_fmt_char
210 #' @export
211 as_fmt_char.default <- function(x, ...) {
212 6_x_ x_char <- as_character(x)
213 6_x_ assertthat::assert_that(
214 6_x_ is.character(x_char),
215 6_x_ msg = sprintf(
216 6_x_ "Unable to convert class `'%s'` to character for printing purposes",
217 6_x_ paste(class(x), collapse = "', '")
218 )
219 )
220 5_x_ as_fmt_char.character(x_char, add_quotes = FALSE)
221 }
222
223
224 #' @rdname as_fmt_char
225 #' @export
226 as_fmt_char.POSIXt <- function(x, ...) {
227 4_x_ format(x, "%Y-%m-%d %H:%M:%S %Z")
228 }
229
230
231 #' get_table
232 #'
233 #' Generate nice looking table from a data frame
234 #' @param dsin dataset
235 #' @inheritParams print.diffdf
236 #' @keywords internal
237 get_table <- function(dsin, row_limit = 10) {
238 37_x_ if (nrow(dsin) == 0) {
239 ! return("")
240 }
241 37_x_ if (!is.null(row_limit)) {
242 34_x_ assertthat::assert_that(
243 34_x_ assertthat::is.number(row_limit),
244 34_x_ row_limit > 0,
245 34_x_ msg = "row_limit must be a positive integer"
246 )
247 }
248 37_x_ if (is.null(row_limit)) {
249 3_x_ display_table <- dsin
250 } else {
251 34_x_ display_table <- subset(dsin, seq_len(nrow(dsin)) < (row_limit + 1))
252 }
253
254 37_x_ add_message <- if (!is.null(row_limit) && nrow(dsin) > row_limit) {
255 5_x_ paste0(
256 5_x_ "First ",
257 5_x_ row_limit,
258 5_x_ " of ",
259 5_x_ nrow(dsin),
260 5_x_ " rows are shown in table below"
261 )
262 } else {
263 32_x_ NULL
264 }
265
266 37_x_ msg <- paste(
267 37_x_ c(
268 37_x_ add_message,
269 37_x_ as_ascii_table(display_table)
270 ),
271 37_x_ collapse = "\n"
272 )
273 37_x_ return(msg)
274 }
1 #' is_variable_different
2 #'
3 #' This subsets the data set on the variable name, picks out differences and returns a `tibble`
4 #' of differences for the given variable
5 #' @importFrom tibble as_tibble
6 #' @param variablename name of variable being compared
7 #' @param keynames name of keys
8 #' @param datain Inputted dataset with base and compare vectors
9 #' @param ... Additional arguments which might be passed through (numerical accuracy)
10 #' @return A boolean vector which is T if target and current are different
11 #' @keywords internal
12 is_variable_different <- function(variablename, keynames, datain, ...) {
13 917_x_ xvar <- paste0(variablename, ".x")
14 917_x_ yvar <- paste0(variablename, ".y")
15
16 917_x_ assertthat::assert_that(
17 917_x_ xvar %in% names(datain) && yvar %in% names(datain),
18 917_x_ msg = "Variable does not exist within input dataset"
19 )
20
21 917_x_ target <- datain[[xvar]]
22 917_x_ current <- datain[[yvar]]
23 917_x_ outvect <- find_difference(target, current, ...)
24
25 917_x_ datain[["VARIABLE"]] <- variablename
26
27 917_x_ names(datain)[names(datain) %in% c(xvar, yvar)] <- c("BASE", "COMPARE")
28
29 917_x_ x <- as_tibble(
30 917_x_ subset(
31 917_x_ datain,
32 917_x_ outvect,
33 917_x_ select = c("VARIABLE", keynames, "BASE", "COMPARE")
34 )
35 )
36
37 917_x_ return(x)
38 }
39
40 #' compare_vectors
41 #'
42 #' Compare two vectors looking for differences
43 #'
44 #' @param target the base vector
45 #' @param current a vector to compare target to
46 #' @param ... Additional arguments which might be passed through (numerical accuracy)
47 #' @keywords internal
48 compare_vectors <- function(target, current, ...) {
49 950_x_ UseMethod("compare_vectors")
50 }
51
52
53 #' find_difference
54 #'
55 #' This determines if two vectors are different. It expects vectors of the same
56 #' length and type, and is intended to be used after checks have already been done
57 #' Initially picks out any `NA`'s (matching `NA`'s count as a match)
58 #' Then compares remaining vector
59 #'
60 #' @param target the base vector
61 #' @param current a vector to compare target to
62 #' @param ... Additional arguments which might be passed through (numerical accuracy)
63 #' @keywords internal
64 find_difference <- function(target, current, ...) {
65 953_x_ if (length(target) != length(current)) {
66 2_x_ warning("Inputs are not of the same length")
67 2_x_ return(NULL)
68 }
69
70 951_x_ if (is.null(target) |
71 1_x_ return(is.null(target) != is.null(current))
72 }
73
74 ### Initalise output, assume problem unless evidence otherwise
75 950_x_ return_vector <- rep(TRUE, length(target))
76
77 950_x_ nas_t <- is.na(target)
78 950_x_ nas_c <- is.na(current)
79
80 ## compare missing values
81 950_x_ nacompare <- nas_t != nas_c
82 950_x_ naselect <- nas_t | nas_c
83 950_x_ return_vector[naselect] <- nacompare[naselect]
84
85 ## compare non-missing values
86 950_x_ selectvector <- as.logical((!nas_t) * (!nas_c))
87
88 950_x_ comparevect <- compare_vectors(
89 950_x_ target[selectvector],
90 950_x_ current[selectvector],
91 ...
92 )
93
94 950_x_ return_vector[selectvector] <- comparevect
95
96 950_x_ return(return_vector)
97 }
98
99
100
101
102
103
104
105 #' compare_vectors.default
106 #'
107 #' Default method, if the vector is not numeric or factor. Basic comparison
108 #' @param target the base vector
109 #' @param current a vector to compare target to
110 #' @param ... Additional arguments which might be passed through (numerical accuracy)
111 #' @keywords internal
112 compare_vectors.default <- function(target, current, ...) {
113 408_x_ target != current
114 }
115
116
117
118
119 #' compare_vectors.factor
120 #'
121 #' Compares factors. Sets them as character and then compares
122 #' @param target the base vector
123 #' @param current a vector to compare target to
124 #' @param ... Additional arguments which might be passed through (numerical accuracy)
125 #' @keywords internal
126 compare_vectors.factor <- function(target, current, ...) {
127 83_x_ as.character(target) != as.character(current)
128 }
129
130
131
132
133
134 #' compare_vectors.numeric
135 #'
136 #' This is a modified version of the all.equal function
137 #' which returns a vector rather than a message
138 #' @param target the base vector
139 #' @param current a vector to compare target to
140 #' @param tolerance Level of tolerance for differences between two variables
141 #' @param scale Scale that tolerance should be set on. If NULL assume absolute
142 #' @param ... Not used
143 #' @keywords internal
144 compare_vectors.numeric <- function(
145 target,
146 current,
147 tolerance = sqrt(.Machine$double.eps),
148 scale = NULL,
149 ...
150 ) {
151
152 459_x_ out <- target == current
153
154 459_x_ if (all(out)) {
155 433_x_ return(!out)
156 }
157
158 26_x_ if (is.integer(target) |
159 7_x_ target <- as.double(target)
160 7_x_ current <- as.double(current)
161 }
162
163 26_x_ xy <- abs(target - current)
164
165 26_x_ if (!is.null(scale)) {
166 4_x_ xy <- xy / scale
167 }
168
169 26_x_ return(xy > tolerance)
170 }
171
172 #' compare_vectors.int64
173 #'
174 #' Handle int64 vectors. Uses numeric comparison
175 #' @param target the base vector
176 #' @param current a vector to compare target to
177 #' @param tolerance Level of tolerance for differences between two variables
178 #' @param scale Scale that tolerance should be set on. If NULL assume absolute
179 #' @param ... Not used
180 #' @keywords internal
181 compare_vectors.integer64 <- function(
182 target,
183 current,
184 tolerance = sqrt(.Machine$double.eps),
185 scale = NULL,
186 ...
187 ) {
188 4_x_ compare_vectors.numeric(target, current, tolerance, scale)
189 }
1 #' construct_issue
2 #'
3 #' Make an s3 object with class issue and possible additional class,
4 #' and assign other arguments to attributes
5 #' @param value the value of the object
6 #' @param message the value of the message attribute
7 #' @param add_class additional class to add
8 #' @keywords internal
9 construct_issue <- function(value, message, add_class = NULL) {
10 2180_x_ x <- value
11
12 ### If nothing has been provided return nothing !
13 2180_x_ if (nrow(x) == 0) {
14 1913_x_ return(NULL)
15 }
16
17 267_x_ class(x) <- c(add_class, "issue", class(x))
18 267_x_ attributes(x)[["message"]] <- message
19 267_x_ return(x)
20 }
21
22
23 #' get_issue_message
24 #'
25 #' Simple function to grab the issue message
26 #' @param object inputted object of class issue
27 #' @param ... other arguments
28 #' @keywords internal
29 get_issue_message <- function(object, ...) {
30 134_x_ return(attr(object, "message"))
31 }
32
33
34 #' get_print_message
35 #'
36 #' Get the required text depending on type of issue
37 #' @param object inputted object of class issue
38 #' @param ... other arguments
39 #' @keywords internal
40 get_print_message <- function(object, ...) {
41 37_x_ UseMethod("get_print_message", object)
42 }
43
44
45 #' get_print_message.default
46 #'
47 #' Errors, as this should only ever be given an issue
48 #' @param object issue
49 #' @param ... Not used
50 #' @keywords internal
51 get_print_message.default <- function(object, ...) {
52 ! stop("Error: An issue has not been provided to this function!")
53 }
54
55
56 #' get_print_message.issue
57 #'
58 #' Get text from a basic issue, based on the class of the value of the issue
59 #'
60 #' @param object an object of class issue_basic
61 #' @inheritParams print.diffdf
62 #' @keywords internal
63 get_print_message.issue <- function(object, row_limit, ...) {
64 37_x_ paste(
65 37_x_ c(
66 37_x_ attr(object, "message"),
67 37_x_ get_table(object, row_limit = row_limit),
68 37_x_ "\n\n"
69 ),
70 37_x_ collapse = "\n"
71 )
72 }
1 #' Print diffdf objects
2 #'
3 #' Print nicely formatted version of an diffdf object
4 #' @param x comparison object created by diffdf().
5 #' @param ... Additional arguments (not used)
6 #' @param row_limit Max row limit for difference tables (NULL to show all rows)
7 #' @param as_string Return printed message as an R character vector?
8 #' @examples
9 #' x <- subset(iris, -Species)
10 #' x[1, 2] <- 5
11 #' COMPARE <- diffdf(iris, x)
12 #' print(COMPARE)
13 #' print(COMPARE, row_limit = 5)
14 #' @export
15 print.diffdf <- function(x, row_limit = 10, as_string = FALSE, ...) {
16 25_x_ if (!is.null(row_limit)) {
17 24_x_ assertthat::assert_that(
18 24_x_ assertthat::is.number(row_limit),
19 24_x_ row_limit > 0,
20 24_x_ msg = "row_limit must be a positive integer"
21 )
22 }
23 21_x_ assertthat::assert_that(
24 21_x_ assertthat::is.flag(as_string)
25 )
26 19_x_ COMPARE <- x
27
28 19_x_ if (length(COMPARE) == 0) {
29 6_x_ outtext <- "No issues were found!\n"
30 } else {
31 13_x_ start_text <- paste0("Differences found between the objects!\n\n")
32 13_x_ end_text <- lapply(COMPARE, function(x) get_print_message(x, row_limit))
33 13_x_ end_text <- paste0(unlist(end_text), collapse = "")
34 13_x_ outtext <- paste0(start_text, end_text)
35 }
36 19_x_ if (as_string) {
37 7_x_ return(strsplit(outtext, "\n")[[1]])
38 } else {
39 12_x_ cat(outtext)
40 12_x_ return(invisible(COMPARE))
41 }
42 }
1 #' Generate unique key name
2 #'
3 #' Function to generate a name for the keys if not provided
4 #'
5 #' @param BASE base dataset
6 #' @param COMP comparison dataset
7 #' @param replace_names a vector of replacement names. Used for recursion, should be edited in function for clarity
8 #'
9 #' @keywords internal
10 generate_keyname <- function(
11 BASE,
12 COMP,
13 replace_names = c("..ROWNUMBER..", "..RN..", "..ROWN..", "..N..")
14 ) {
15 193_x_ assertthat::assert_that(
16 193_x_ is(replace_names, "character"),
17 193_x_ msg = "replace_names is not a character vector"
18 )
19
20 193_x_ assertthat::assert_that(
21 193_x_ length(replace_names) != 0,
22 193_x_ msg = "All default row names are in use in BASE/COMPARE. Please provide a KEY argument"
23 )
24
25
26 185_x_ key_name <- replace_names[1]
27
28 185_x_ if (!is.null(BASE[[key_name]]) |
29 66_x_ key_name <- generate_keyname(BASE, COMP, replace_names[-1])
30 }
31 153_x_ return(key_name)
32 }