(original) (raw)

1	#' Identify Issue Rows
2	#'
3	#' This function takes a `diffdf` object and a dataframe and subsets
4	#' the `data.frame` for problem rows as identified in the comparison object.
5	#' If \code{vars} has been specified only issue rows associated with those
6	#' variable(s) will be returned.
7	#' @param df dataframe to be subsetted
8	#' @param diff diffdf object
9	#' @param vars (optional) character vector containing names of issue variables to subset dataframe
10	#' on. A value of NULL (default) will be taken to mean available issue variables.
11	#' @examples
12	#' iris2 <- iris
13	#' for (i in 1:3) iris2[i, i] <- 99
14	#' x <- diffdf(iris, iris2, suppress_warnings = TRUE)
15	#' diffdf_issuerows(iris, x)
16	#' diffdf_issuerows(iris2, x)
17	#' diffdf_issuerows(iris2, x, vars = "Sepal.Length")
18	#' diffdf_issuerows(iris2, x, vars = c("Sepal.Length", "Sepal.Width"))
19	#' @details
20	#' Note that `diffdf_issuerows` can be used to subset against any dataframe. The only
21	#' requirement is that the original variables specified in the keys argument to diffdf
22	#' are present on the dataframe you are subsetting against. However please note that if
23	#' no keys were specified in diffdf then the row number is used. This means using
24	#' `diffdf_issuerows` without a keys against an arbitrary dataset can easily result in
25	#' nonsense rows being returned. It is always recommended to supply keys to diffdf.
26	#' @export
27	diffdf_issuerows <- function(df, diff, vars = NULL) {
28
29	19_x_	assertthat::assert_that(
30	19_x_	class(diff)[[1]] == "diffdf"
31	)
32
33
34	19_x_	KEYS_ATT <- attr(diff, "keys")
35
36	19_x_	assertthat::assert_that(
37	19_x_	!is.null(KEYS_ATT),
38	19_x_	msg = "diff is missing the keys attribute"
39	)
40
41
42	19_x_	issue_vars <- names(diff)[grep("^VarDiff_", names(diff))]
43
44	19_x_	if (is.null(vars)) {
45	15_x_	vars <- issue_vars
46	} else {
47	4_x_	vars <- paste0("VarDiff_", vars)
48	}
49
50	19_x_	if (length(issue_vars) == 0 \|
51	7_x_	return(df[FALSE, ])
52	}
53
54	12_x_	KEEP <- mapply(
55	12_x_	FUN = get_issue_dataset,
56	12_x_	issue = vars,
57	12_x_	diff = list(diff),
58	12_x_	SIMPLIFY = FALSE
59	)
60
61	12_x_	KEEP <- recursive_reduce(KEEP, rbind)
62	12_x_	KEEP <- KEEP[!duplicated(KEEP), ]
63
64	12_x_	if (KEYS_ATT$is_derived) {
65	2_x_	df[[KEYS_ATT$value]] <- seq_len(nrow(df))
66	}
67
68	12_x_	keys <- KEYS_ATT$value
69
70	12_x_	if (any(!keys %in% names(df))) {
71	1_x_	stop("df does not contain all variables specified as keys in diff")
72	}
73
74	11_x_	RET <- merge(
75	11_x_	x = df,
76	11_x_	y = KEEP,
77	11_x_	sort = TRUE
78	)
79
80	11_x_	RET <- RET[do.call("order", RET[keys]), ]
81
82	11_x_	if (KEYS_ATT$is_derived) {
83	2_x_	keep_vars <- !names(RET) %in% KEYS_ATT$value
84	2_x_	RET <- RET[, keep_vars, drop = FALSE]
85	}
86
87	11_x_	return(RET)
88	}
89
90
91
92
93	#' get_issue_dataset
94	#'
95	#' Internal function used by `diffdf_issuerows` to extract the dataframe
96	#' from each a target issue. In particular it also strips off any
97	#' non-key variables
98	#' @param issue name of issue to extract the dataset from diff
99	#' @param diff diffdf object which contains issues
100	#' @keywords internal
101	get_issue_dataset <- function(issue, diff) {
102	20_x_	issue_df <- diff[[issue]]
103	20_x_	keep <- names(issue_df)[!(names(issue_df) %in% c("BASE", "COMPARE", "VARIABLE"))]
104	20_x_	issue_df[, keep, drop = FALSE]
105	}

1	#' diffdf
2	#' @description
3	#' Compares 2 dataframes and outputs any differences.
4	#' @param base input dataframe
5	#' @param compare comparison dataframe
6	#' @param keys vector of variables (as strings) that defines a unique row in
7	#' the base and compare dataframes
8	#' @param strict_numeric Flag for strict numeric to numeric comparisons
9	#' (default = TRUE). If False diffdf will cast integer to double where
10	#' required for comparisons. Note that variables specified in the keys
11	#' will never be casted.
12	#' @param strict_factor Flag for strict factor to character comparisons
13	#' (default = TRUE). If False diffdf will cast factors to characters where
14	#' required for comparisons. Note that variables specified in the keys will
15	#' never be casted.
16	#' @param suppress_warnings Do you want to suppress warnings? (logical)
17	#' @param file Location and name of a text file to output the results to.
18	#' Setting to NULL will cause no file to be produced.
19	#' @param tolerance Set tolerance for numeric comparisons. Note that
20	#' comparisons fail if (x-y)/scale > tolerance.
21	#' @param scale Set scale for numeric comparisons. Note that comparisons fail
22	#' if (x-y)/scale > tolerance. Setting as NULL is a slightly more efficient
23	#' version of scale = 1.
24	#' @param check_column_order Should the column ordering be checked? (logical)
25	#' @param check_df_class Do you want to check for differences in the class
26	#' between `base` and `compare`? (logical)
27	#' @examples
28	#' x <- subset(iris, -Species)
29	#' x[1, 2] <- 5
30	#' COMPARE <- diffdf(iris, x)
31	#' print(COMPARE)
32	#'
33	#' #### Sample data frames
34	#'
35	#' DF1 <- data.frame(
36	#' id = c(1, 2, 3, 4, 5, 6),
37	#' v1 = letters[1:6],
38	#' v2 = c(NA, NA, 1, 2, 3, NA)
39	#' )
40	#'
41	#' DF2 <- data.frame(
42	#' id = c(1, 2, 3, 4, 5, 7),
43	#' v1 = letters[1:6],
44	#' v2 = c(NA, NA, 1, 2, NA, NA),
45	#' v3 = c(NA, NA, 1, 2, NA, 4)
46	#' )
47	#'
48	#' diffdf(DF1, DF1, keys = "id")
49	#'
50	#' # We can control matching with scale/location for example:
51	#'
52	#' DF1 <- data.frame(
53	#' id = c(1, 2, 3, 4, 5, 6),
54	#' v1 = letters[1:6],
55	#' v2 = c(1, 2, 3, 4, 5, 6)
56	#' )
57	#' DF2 <- data.frame(
58	#' id = c(1, 2, 3, 4, 5, 6),
59	#' v1 = letters[1:6],
60	#' v2 = c(1.1, 2, 3, 4, 5, 6)
61	#' )
62	#'
63	#' diffdf(DF1, DF2, keys = "id")
64	#' diffdf(DF1, DF2, keys = "id", tolerance = 0.2)
65	#' diffdf(DF1, DF2, keys = "id", scale = 10, tolerance = 0.2)
66	#'
67	#' # We can use strict_factor to compare factors with characters for example:
68	#'
69	#' DF1 <- data.frame(
70	#' id = c(1, 2, 3, 4, 5, 6),
71	#' v1 = letters[1:6],
72	#' v2 = c(NA, NA, 1, 2, 3, NA),
73	#' stringsAsFactors = FALSE
74	#' )
75	#'
76	#' DF2 <- data.frame(
77	#' id = c(1, 2, 3, 4, 5, 6),
78	#' v1 = letters[1:6],
79	#' v2 = c(NA, NA, 1, 2, 3, NA)
80	#' )
81	#'
82	#' diffdf(DF1, DF2, keys = "id", strict_factor = TRUE)
83	#' diffdf(DF1, DF2, keys = "id", strict_factor = FALSE)
84	#'
85	#' @export
86	diffdf <- function(
87	base,
88	compare,
89	keys = NULL,
90	suppress_warnings = FALSE,
91	strict_numeric = TRUE,
92	strict_factor = TRUE,
93	file = NULL,
94	tolerance = sqrt(.Machine$double.eps),
95	scale = NULL,
96	check_column_order = FALSE,
97	check_df_class = FALSE
98	) {
99
100	124_x_	assertthat::assert_that(
101	124_x_	assertthat::is.flag(check_df_class),
102	124_x_	!is.na(check_df_class),
103	124_x_	msg = "`check_df_class` must be a length 1 logical"
104	)
105
106	124_x_	BASE <- base
107	124_x_	COMP <- compare
108	124_x_	KEYS <- keys
109	124_x_	SUPWARN <- suppress_warnings
110
111	### Initatiate output object
112	124_x_	COMPARE <- list()
113	124_x_	class(COMPARE) <- c("diffdf", "list")
114
115
116	124_x_	BASE_NAME <- deparse(substitute(base))
117	124_x_	COMP_NAME <- deparse(substitute(compare))
118	124_x_	COMPARE[["DataSummary"]] <- construct_issue(
119	124_x_	value = describe_dataframe(BASE, COMP, BASE_NAME, COMP_NAME),
120	124_x_	message = "Summary of BASE and COMPARE"
121	)
122
123
124	124_x_	is_derived <- FALSE
125
126	### If no key is suplied match values based upon row number
127	124_x_	if (is.null(KEYS)) {
128	102_x_	is_derived <- TRUE
129	102_x_	keyname <- generate_keyname(BASE, COMP)
130	102_x_	BASE[[keyname]] <- seq_len(nrow(BASE))
131	102_x_	COMP[[keyname]] <- seq_len(nrow(COMP))
132	102_x_	KEYS <- keyname
133	}
134	124_x_	attr(COMPARE, "keys") <- list(value = KEYS, is_derived = is_derived)
135
136	124_x_	assertthat::assert_that(
137	124_x_	is.numeric(tolerance),
138	124_x_	is.numeric(scale) \|
139	)
140
141	120_x_	missing_keys_base <- KEYS[!KEYS %in% names(BASE)]
142	120_x_	assertthat::assert_that(
143	120_x_	length(missing_keys_base) == 0,
144	120_x_	msg = sprintf(
145	120_x_	"The following KEYS are not available in BASE:\n %s",
146	120_x_	paste(missing_keys_base, collapse = "\n ")
147	)
148	)
149
150	119_x_	missing_keys_comp <- KEYS[!KEYS %in% names(COMP)]
151	119_x_	assertthat::assert_that(
152	119_x_	length(missing_keys_comp) == 0,
153	119_x_	msg = sprintf(
154	119_x_	"The following KEYS are not available in COMPARE:\n %s",
155	119_x_	paste(missing_keys_comp, collapse = "\n ")
156	)
157	)
158
159	118_x_	assertthat::assert_that(
160	118_x_	has_unique_rows(BASE, KEYS),
161	118_x_	msg = "BY variables in BASE do not result in unique observations"
162	)
163
164	117_x_	assertthat::assert_that(
165	117_x_	has_unique_rows(COMP, KEYS),
166	117_x_	msg = "BY variables in COMPARE do not result in unique observations"
167	)
168
169
170
171	#### Check essential variable properties (class & mode)
172
173	117_x_	COMPARE[["UnsupportedColsBase"]] <- construct_issue(
174	117_x_	value = identify_unsupported_cols(BASE),
175	117_x_	message = "There are columns in BASE with unsupported modes !!"
176	)
177
178
179	117_x_	COMPARE[["UnsupportedColsComp"]] <- construct_issue(
180	117_x_	value = identify_unsupported_cols(COMP),
181	117_x_	message = "There are columns in COMPARE with unsupported modes !!"
182	)
183
184
185	# cast variables if strict is off
186	117_x_	if (!strict_factor \|
187	9_x_	casted_df <- cast_variables(
188	9_x_	BASE = BASE,
189	9_x_	COMPARE = COMP,
190	9_x_	ignore_vars = KEYS,
191	9_x_	cast_integers = !strict_numeric,
192	9_x_	cast_factors = !strict_factor
193	)
194
195	9_x_	BASE <- casted_df$BASE
196	9_x_	COMP <- casted_df$COMP
197	}
198
199
200	117_x_	COMPARE[["VarModeDiffs"]] <- construct_issue(
201	117_x_	value = identify_mode_differences(BASE, COMP),
202	117_x_	message = "There are columns in BASE and COMPARE with different modes !!"
203	)
204
205
206	117_x_	COMPARE[["VarClassDiffs"]] <- construct_issue(
207	117_x_	value = identify_class_differences(BASE, COMP),
208	117_x_	message = "There are columns in BASE and COMPARE with different classes !!"
209	)
210
211
212
213
214	##### Check Validity of Keys
215
216	117_x_	BASE_keys <- names(BASE)[names(BASE) %in% KEYS]
217	117_x_	COMP_keys <- names(COMP)[names(COMP) %in% KEYS]
218
219	117_x_	assertthat::assert_that(
220	117_x_	length(BASE_keys) == length(KEYS),
221	117_x_	msg = "BASE is missing variables specified in KEYS"
222	)
223
224	117_x_	assertthat::assert_that(
225	117_x_	length(COMP_keys) == length(KEYS),
226	117_x_	msg = "COMP is missing variables specified in KEYS"
227	)
228
229
230	117_x_	assert_valid_keys(
231	117_x_	COMPARE, KEYS, "UnsupportedColsBase",
232	117_x_	"The following KEYS in BASE have an unsupported mode (see `?mode()`)"
233	)
234	116_x_	assert_valid_keys(
235	116_x_	COMPARE, KEYS, "UnsupportedColsComp",
236	116_x_	"The following KEYS in COMPARE have an unsupported mode (see `?mode()`)"
237	)
238	115_x_	assert_valid_keys(
239	115_x_	COMPARE, KEYS, "VarModeDiffs",
240	115_x_	"The following KEYS have different modes between BASE and COMPARE"
241	)
242	113_x_	assert_valid_keys(
243	113_x_	COMPARE, KEYS, "VarClassDiffs",
244	113_x_	"The following KEYS have different classes between BASE and COMPARE"
245	)
246
247
248	112_x_	exclude_cols <- c(
249	112_x_	COMPARE[["UnsupportedColsBase"]]$VARIABLE,
250	112_x_	COMPARE[["UnsupportedColsComp"]]$VARIABLE,
251	112_x_	COMPARE[["VarClassDiffs"]]$VARIABLE,
252	112_x_	COMPARE[["VarModeDiffs"]]$VARIABLE
253	)
254
255	112_x_	if (check_column_order) {
256	5_x_	if (attr(COMPARE, "keys")$is_derived) {
257	4_x_	keep_vars_base <- !(names(BASE) %in% attr(COMPARE, "keys")$value)
258	4_x_	keep_vars_comp <- !(names(COMP) %in% attr(COMPARE, "keys")$value)
259	} else {
260	1_x_	keep_vars_base <- TRUE
261	1_x_	keep_vars_comp <- TRUE
262	}
263	5_x_	COMPARE[["ColumnOrder"]] <- construct_issue(
264	5_x_	value = identify_column_order_differences(
265	5_x_	BASE[, keep_vars_base, drop = FALSE],
266	5_x_	COMP[, keep_vars_comp, drop = FALSE]
267	),
268	5_x_	message = "There are differences in the column ordering between BASE and COMPARE !!"
269	)
270	}
271
272
273	##### Check Attributes
274	112_x_	COMPARE[["AttribDiffs"]] <- construct_issue(
275	112_x_	value = identify_att_differences(BASE, COMP, exclude_cols),
276	112_x_	message = "There are columns in BASE and COMPARE with differing attributes !!"
277	)
278
279
280	##### Check data
281
282	112_x_	BASE <- factor_to_character(BASE, KEYS)
283	112_x_	COMP <- factor_to_character(COMP, KEYS)
284
285
286	112_x_	COMPARE[["ExtRowsBase"]] <- construct_issue(
287	112_x_	value = identify_extra_rows(BASE, COMP, KEYS),
288	112_x_	message = "There are rows in BASE that are not in COMPARE !!"
289	)
290
291
292	112_x_	COMPARE[["ExtRowsComp"]] <- construct_issue(
293	112_x_	value = identify_extra_rows(COMP, BASE, KEYS),
294	112_x_	message = "There are rows in COMPARE that are not in BASE !!"
295	)
296
297
298	112_x_	COMPARE[["ExtColsBase"]] <- construct_issue(
299	112_x_	value = identify_extra_cols(BASE, COMP),
300	112_x_	message = "There are columns in BASE that are not in COMPARE !!"
301	)
302
303
304	112_x_	COMPARE[["ExtColsComp"]] <- construct_issue(
305	112_x_	value = identify_extra_cols(COMP, BASE),
306	112_x_	message = "There are columns in COMPARE that are not in BASE !!"
307	)
308
309
310	112_x_	VALUE_DIFFERENCES <- identify_differences(
311	112_x_	BASE, COMP, KEYS, exclude_cols,
312	112_x_	tolerance = tolerance,
313	112_x_	scale = scale
314	)
315
316
317
318	## Summarise the number of mismatching rows per variable
319
320	112_x_	if (length(VALUE_DIFFERENCES)) {
321	106_x_	NDIFF <- sapply(VALUE_DIFFERENCES, nrow)
322	106_x_	COMPARE[["NumDiff"]] <- construct_issue(
323	106_x_	value = convert_to_issue(NDIFF),
324	106_x_	message = "Not all Values Compared Equal"
325	)
326	}
327
328
329	112_x_	for (i in names(VALUE_DIFFERENCES)) {
330	917_x_	COMPARE[[paste0("VarDiff_", i)]] <- construct_issue(
331	917_x_	value = VALUE_DIFFERENCES[[i]],
332	917_x_	message = NULL
333	)
334	}
335
336
337	# suppress warning message of data summary if user didn't request to check it
338	# we leave the issue in the main compare object though for printing purposes
339	112_x_	COMPARE_WARNINGS <- COMPARE
340	112_x_	attr(COMPARE_WARNINGS[["DataSummary"]], "message") <- c(
341	112_x_	"There are differences between the class of BASE and COMPARE"
342	)
343	112_x_	if (!check_df_class \|
344	110_x_	COMPARE_WARNINGS["DataSummary"] <- NULL
345	}
346
347	# Get all issue messages, remove blank message, and collapse into single string
348	112_x_	ISSUE_MSGS <- sapply(COMPARE_WARNINGS, function(x) get_issue_message(x))
349	112_x_	ISSUE_MSGS <- Filter(function(x) !is.null(x), ISSUE_MSGS)
350	112_x_	ISSUE_MSGS <- Filter(function(x) x != "", ISSUE_MSGS)
351
352	112_x_	if (length(ISSUE_MSGS) != 0) {
353	73_x_	if (!SUPWARN) {
354	45_x_	ISSUE_MSGS <- paste(ISSUE_MSGS, collapse = "\n")
355	45_x_	warning(c("\n", ISSUE_MSGS))
356	}
357	}
358
359	# If the classes are the same and it is the only entry in the compare
360	# object then remove it in order to trigger "no issues found"
361	112_x_	if (identical(class(base), class(compare)) && length(COMPARE) == 1) {
362	38_x_	COMPARE["DataSummary"] <- NULL
363	}
364
365	# If the summary is the only item and the user didn't want to check classes
366	# then remove the object to trigger the "no issues found"
367	112_x_	if (!check_df_class && length(COMPARE) == 1) {
368	1_x_	COMPARE["DataSummary"] <- NULL
369	}
370
371
372	112_x_	if (!is.null(file)) {
373	!	x <- print(COMPARE, as_string = TRUE)
374
375	!	tryCatch(
376	{
377	!	sink(file)
378	!	cat(x, sep = "\n")
379	!	sink()
380	},
381	!	warning = function(w) {
382	!	sink()
383	!	warning(w)
384	},
385	!	error = function(e) {
386	!	sink()
387	!	stop(e)
388	}
389	)
390	!	return(invisible(COMPARE))
391	}
392
393	112_x_	return(COMPARE)
394	}
395
396
397
398
399	#' diffdf_has_issues
400	#'
401	#' Utility function which returns TRUE if an diffdf
402	#' object has issues or FALSE if an diffdf object does not have issues
403	#' @param x diffdf object
404	#' @examples
405	#'
406	#' # Example with no issues
407	#' x <- diffdf(iris, iris)
408	#' diffdf_has_issues(x)
409	#'
410	#' # Example with issues
411	#' iris2 <- iris
412	#' iris2[2, 2] <- NA
413	#' x <- diffdf(iris, iris2, suppress_warnings = TRUE)
414	#' diffdf_has_issues(x)
415	#' @export
416	diffdf_has_issues <- function(x) {
417	!	if (class(x)[[1]] != "diffdf") stop("x is not an diffdf object")
418	3_x_	return(length(x) != 0)
419	}
420
421
422	#' Assert that keys are valid
423	#'
424	#' Utility function to check that user provided "keys" aren't listed as a problem
425	#' variable of the current list of issues.
426	#' @param COMPARE (`list`)\cr A named list of which each element is a `data.frame` with the
427	#' column `VARIABLE`
428	#' @param KEYS (`character`)\cr name of key variables to check to make sure they don't contain
429	#' any issues
430	#' @param component (`character`)\cr name of the component within `COMPARE` to check against
431	#' @param msg (`character`)\cr error message to print if any of `KEYS` are found within
432	#' `COMPARE[component]$VARIABLE`
433	#' @keywords internal
434	assert_valid_keys <- function(COMPARE, KEYS, component, msg) {
435	461_x_	keys_reduced <- KEYS[KEYS %in% COMPARE[[component]]$VARIABLE]
436	461_x_	assertthat::assert_that(
437	461_x_	length(keys_reduced) == 0,
438	461_x_	msg = sprintf(
439	461_x_	"%s:\n%s",
440	461_x_	msg,
441	461_x_	paste0("`", paste0(keys_reduced, collapse = "`, `"), "`")
442	)
443	)
444	}

1	#' factor_to_character
2	#'
3	#' Takes a dataframe and converts any factor variables to character
4	#' @param dsin input dataframe
5	#' @param vars variables to consider for conversion. Default NULL will consider
6	#' every variable within the dataset
7	#' @keywords internal
8	factor_to_character <- function(dsin, vars = NULL) {
9	!	if (is.null(vars)) vars <- names(dsin)
10
11	224_x_	for (var in vars) {
12	236_x_	if (is.factor(dsin[[var]])) {
13	!	dsin[[var]] <- as.character(dsin[[var]])
14	}
15	}
16	224_x_	return(dsin)
17	}
18
19
20
21
22	#' has_unique_rows
23	#'
24	#' Check if a data sets rows are unique
25	#' @param DAT input data set (data frame)
26	#' @param KEYS Set of keys which should be unique
27	#' @keywords internal
28	has_unique_rows <- function(DAT, KEYS) {
29	235_x_	DUPS <- duplicated(subset(DAT, select = KEYS))
30	235_x_	NDUPS <- sum(DUPS)
31	235_x_	return(NDUPS == 0)
32	}
33
34	#' convert_to_issue
35	#'
36	#' converts the count value into the correct issue format
37	#' @param datin data inputted
38	#' @importFrom tibble rownames_to_column
39	#' @keywords internal
40	convert_to_issue <- function(datin) {
41	106_x_	datin_tibble <- tibble(
42	106_x_	`Variable` = names(datin),
43	106_x_	`No of Differences` = datin
44	)
45
46	106_x_	datin_tibble_reduced <- datin_tibble[datin_tibble[["No of Differences"]] > 0, , drop = FALSE]
47	106_x_	return(datin_tibble_reduced)
48	}
49
50	#' Describe the datasets being compared
51	#'
52	#' This function is used to produce a basic summary table of the core
53	#' features of the two `data.frame`'s being compared.
54	#' @param base (`data.frame`)\cr base dataset to be described
55	#' @param comp (`data.frame`)\cr comparison dataset to be described
56	#' @param base_name (`character`)\cr name of the base dataset
57	#' @param comp_name (`character`)\cr name of the comparison dataset
58	#' @keywords internal
59	describe_dataframe <- function(base, comp, base_name, comp_name) {
60	124_x_	tibble(
61	124_x_	PROPERTY = list(
62	124_x_	"Name",
63	124_x_	"Class",
64	124_x_	"Rows(#)",
65	124_x_	"Columns(#)"
66	),
67	124_x_	BASE = c(
68	124_x_	base_name,
69	124_x_	paste(class(base), collapse = ", "),
70	124_x_	as.character(nrow(base)),
71	124_x_	as.character(ncol(base))
72	),
73	124_x_	COMP = c(
74	124_x_	comp_name,
75	124_x_	paste(class(comp), collapse = ", "),
76	124_x_	as.character(nrow(comp)),
77	124_x_	as.character(ncol(comp))
78	)
79	)
80	}

1	#' identify_extra_rows
2	#'
3	#' Identifies rows that are in a baseline dataset but not in a comparator dataset
4	#' @param DS1 Baseline dataset (data frame)
5	#' @param DS2 Comparator dataset (data frame)
6	#' @param KEYS List of variables that define a unique row within the datasets (strings)
7	#' @keywords internal
8	identify_extra_rows <- function(DS1, DS2, KEYS) {
9	224_x_	if (nrow(DS2) == 0 \|
10	10_x_	return(DS1[, KEYS, drop = FALSE])
11	}
12	214_x_	DS2[["..FLAG.."]] <- "Y"
13	214_x_	dat <- merge(
14	214_x_	subset(DS1, select = KEYS),
15	214_x_	subset(DS2, select = c(KEYS, "..FLAG..")),
16	214_x_	by = KEYS, all.x = TRUE,
17	214_x_	sort = TRUE
18	)
19	214_x_	dat <- dat[do.call("order", dat[KEYS]), ]
20
21	214_x_	dat[is.na(dat[["..FLAG.."]]), KEYS, drop = FALSE]
22	}
23
24
25
26	#' identify_extra_cols
27	#'
28	#' Identifies columns that are in a baseline dataset but not in a comparator dataset
29	#' @param DS1 Baseline dataset (data frame)
30	#' @param DS2 Comparator dataset (data frame)
31	#' @importFrom tibble tibble
32	#' @keywords internal
33	identify_extra_cols <- function(DS1, DS2) {
34	224_x_	match.cols <- sapply(names(DS1), "%in%", names(DS2))
35	224_x_	assertthat::assert_that(
36	224_x_	all(is.logical(match.cols)),
37	224_x_	msg = "Assumption of logical return type is not true"
38	)
39	224_x_	tibble(
40	224_x_	COLUMNS = names(DS1)[!match.cols]
41	)
42	}
43
44
45
46
47
48
49
50	#' identify_matching_cols
51	#'
52	#' Identifies columns with the same name in two data frames
53	#' @param DS1 Input dataset 1 (data frame)
54	#' @param DS2 Input dataset 2 (data frame)
55	#' @param EXCLUDE Columns to ignore
56	#' @keywords internal
57	identify_matching_cols <- function(DS1, DS2, EXCLUDE = "") {
58	768_x_	match_cols <- sapply(names(DS1), "%in%", names(DS2))
59	768_x_	exclude_cols <- sapply(names(DS1), "%in%", EXCLUDE)
60	768_x_	names(DS1)[match_cols & !exclude_cols]
61	}
62
63
64
65
66
67	#' identify_unsupported_cols
68	#'
69	#' Identifies any columns for which the package is not setup to handle
70	#' @param dsin input dataset
71	#' @keywords internal
72	identify_unsupported_cols <- function(dsin) {
73	234_x_	dat <- subset(
74	234_x_	identify_properties(dsin),
75	234_x_	select = c("VARIABLE", "MODE")
76	)
77
78	234_x_	dat[!dat[["MODE"]] %in% c("numeric", "character", "logical"), , drop = FALSE]
79	}
80
81
82
83	#' identify_mode_differences
84	#'
85	#' Identifies any mode differences between two data frames
86	#' @param BASE Base dataset for comparison (data.frame)
87	#' @param COMP Comparator dataset to compare base against (data.frame)
88	#' @keywords internal
89	identify_mode_differences <- function(BASE, COMP) {
90	417_x_	matching_cols <- identify_matching_cols(BASE, COMP)
91
92	417_x_	dat <- merge(
93	417_x_	x = identify_properties(BASE),
94	417_x_	y = identify_properties(COMP),
95	417_x_	by = "VARIABLE",
96	417_x_	all = TRUE,
97	417_x_	suffixes = c(".BASE", ".COMP"),
98	417_x_	sort = TRUE
99	)
100	417_x_	dat <- subset(dat, select = c("VARIABLE", "MODE.BASE", "MODE.COMP"))
101
102	417_x_	KEEP1 <- dat[["VARIABLE"]] %in% matching_cols
103	417_x_	KEEP2 <- dat[["MODE.BASE"]] != dat[["MODE.COMP"]]
104
105	417_x_	dat[KEEP1 & KEEP2, , drop = FALSE]
106	}
107
108
109
110	#' identify_class_differences
111	#'
112	#' Identifies any class differences between two data frames
113	#' @param BASE Base dataset for comparison (data.frame)
114	#' @param COMP Comparator dataset to compare base against (data.frame)
115	#' @keywords internal
116	identify_class_differences <- function(BASE, COMP) {
117	117_x_	matching_cols <- identify_matching_cols(BASE, COMP)
118
119	117_x_	dat <- merge(
120	117_x_	x = identify_properties(BASE),
121	117_x_	y = identify_properties(COMP),
122	117_x_	by = "VARIABLE",
123	117_x_	all = TRUE,
124	117_x_	sort = TRUE,
125	117_x_	suffixes = c(".BASE", ".COMP")
126	)
127
128	117_x_	dat <- subset(dat, select = c("VARIABLE", "CLASS.BASE", "CLASS.COMP"))
129
130	117_x_	KEEP1 <- dat[["VARIABLE"]] %in% matching_cols
131	117_x_	KEEP2 <- !mapply(
132	117_x_	identical,
133	117_x_	dat[["CLASS.BASE"]],
134	117_x_	dat[["CLASS.COMP"]]
135	)
136
137	117_x_	dat[KEEP1 & KEEP2, , drop = FALSE]
138	}
139
140
141
142	#' Identify differences in attributes
143	#'
144	#' Identifies any attribute differences between two data frames
145	#' @param BASE Base dataset for comparison (data.frame)
146	#' @param COMP Comparator dataset to compare base against (data.frame)
147	#' @param exclude_cols Columns to exclude from comparison
148	#' @importFrom tibble tibble
149	#' @keywords internal
150	identify_att_differences <- function(BASE, COMP, exclude_cols = "") {
151	122_x_	matching_cols <- identify_matching_cols(BASE, COMP, exclude_cols)
152
153	122_x_	PROPS <- merge(
154	122_x_	x = identify_properties(BASE),
155	122_x_	y = identify_properties(COMP),
156	122_x_	by = "VARIABLE",
157	122_x_	all = TRUE,
158	122_x_	sort = TRUE,
159	122_x_	suffixes = c(".BASE", ".COMP")
160	)
161
162	122_x_	PROPS <- subset(PROPS, select = c("VARIABLE", "ATTRIBS.BASE", "ATTRIBS.COMP"))
163
164	122_x_	PROPS <- PROPS[PROPS[["VARIABLE"]] %in% matching_cols, , drop = FALSE]
165
166
167	### Setup dummy return value
168	122_x_	RETURN <- tibble(
169	122_x_	VARIABLE = character(),
170	122_x_	ATTR_NAME = character(),
171	122_x_	VALUES.BASE = list(),
172	122_x_	VALUES.COMP = list()
173	)
174
175	122_x_	for (i in PROPS[["VARIABLE"]]) {
176	1157_x_	PROPS_filt <- PROPS[PROPS[["VARIABLE"]] == i, , drop = FALSE]
177
178	### Get a vector of all available attributes across both variables
179	1157_x_	ATTRIB_NAMES <- unique(c(
180	1157_x_	names(PROPS_filt[["ATTRIBS.BASE"]][[1]]),
181	1157_x_	names(PROPS_filt[["ATTRIBS.COMP"]][[1]])
182	))
183
184	### If variable has no attributes move onto the next variable
185	850_x_	if (is.null(ATTRIB_NAMES)) next()
186
187	### Loop over each attribute checking if they are identical and outputing
188	### anyones that arn't
189	307_x_	for (j in ATTRIB_NAMES) {
190	528_x_	ATTRIB_BASE <- PROPS_filt[["ATTRIBS.BASE"]][[1]][j]
191	528_x_	ATTRIB_COMP <- PROPS_filt[["ATTRIBS.COMP"]][[1]][j]
192
193	528_x_	if (!identical(ATTRIB_BASE, ATTRIB_COMP)) {
194	64_x_	ATT_DIFFS <- tibble(
195	64_x_	VARIABLE = i,
196	64_x_	ATTR_NAME = j,
197	64_x_	VALUES.BASE = ifelse(is.null(ATTRIB_BASE), list(), ATTRIB_BASE),
198	64_x_	VALUES.COMP = ifelse(is.null(ATTRIB_COMP), list(), ATTRIB_COMP)
199	)
200
201	64_x_	RETURN <- rbind(RETURN, ATT_DIFFS)
202	}
203	}
204	}
205	122_x_	return(RETURN)
206	}
207
208
209
210
211
212	#' identify_differences
213	#'
214	#' Compares each column within 2 datasets to identify any values which they
215	#' mismatch on.
216	#' @param BASE Base dataset for comparison (data.frame)
217	#' @param COMP Comparator dataset to compare base against (data.frame)
218	#' @param KEYS List of variables that define a unique row within the datasets (strings)
219	#' @param exclude_cols Columns to exclude from comparison
220	#' @param tolerance Level of tolerance for numeric differences between two variables
221	#' @param scale Scale that tolerance should be set on. If NULL assume absolute
222	#' @keywords internal
223	identify_differences <- function(
224	BASE,
225	COMP,
226	KEYS,
227	exclude_cols,
228	tolerance = sqrt(.Machine$double.eps),
229	scale = NULL
230	) {
231
232	112_x_	matching_cols <- identify_matching_cols(BASE, COMP, c(KEYS, exclude_cols))
233
234	112_x_	if (length(matching_cols) == 0) {
235	2_x_	return(tibble())
236	}
237
238	110_x_	DAT <- merge(
239	110_x_	x = BASE,
240	110_x_	y = COMP,
241	110_x_	by = KEYS,
242	110_x_	suffix = c(".x", ".y"),
243	110_x_	sort = TRUE
244	)
245	110_x_	if (nrow(DAT) == 0) {
246	4_x_	return(tibble())
247	}
248	106_x_	DAT <- DAT[do.call("order", DAT[KEYS]), ]
249
250	106_x_	matching_list <- mapply(
251	106_x_	is_variable_different,
252	106_x_	matching_cols,
253	106_x_	MoreArgs = list(
254	106_x_	keynames = KEYS,
255	106_x_	datain = DAT,
256	106_x_	tolerance = tolerance,
257	106_x_	scale = scale
258	),
259	106_x_	SIMPLIFY = FALSE
260	)
261
262	106_x_	matching_list
263	}
264
265
266
267
268
269
270
271
272
273	#' identify_properties
274	#'
275	#' Returns a dataframe of metadata for a given dataset.
276	#' Returned values include variable names , class , mode , type & attributes
277	#' @param dsin input dataframe that you want to get the metadata from
278	#' @importFrom tibble tibble
279	#' @keywords internal
280	identify_properties <- function(dsin) {
281	### If missing or null return empty dataset
282	1546_x_	if (is.null(dsin)) {
283	!	x <- tibble(
284	!	VARIABLE = character(),
285	!	CLASS = list(),
286	!	MODE = character(),
287	!	TYPE = character(),
288	!	ATTRIBS = list()
289	)
290	!	return(x)
291	}
292
293	1546_x_	tibble(
294	1546_x_	VARIABLE = names(dsin),
295	1546_x_	CLASS = lapply(dsin, class),
296	1546_x_	MODE = sapply(dsin, mode),
297	1546_x_	TYPE = sapply(dsin, typeof),
298	1546_x_	ATTRIBS = lapply(dsin, attributes)
299	)
300	}
301
302
303	#' Find column ordering differences
304	#'
305	#' Compares two datasets and outputs a table listing any differences in the column
306	#' orders between the two datasets. Columns that are not contained within both
307	#' are ignored however column ordering is derived prior to removing these columns.
308	#'
309	#' @param BASE (`data.frame`)\cr Base dataset for comparison
310	#' @param COMP (`data.frame`)\cr Comparator dataset to compare base against
311	#' @keywords internal
312	identify_column_order_differences <- function(BASE, COMP) {
313	8_x_	base_cols <- tibble(
314	8_x_	COLUMN = names(BASE),
315	8_x_	"BASE-INDEX" = seq_along(names(BASE))
316	)
317	8_x_	comp_cols <- tibble(
318	8_x_	COLUMN = names(COMP),
319	8_x_	"COMPARE-INDEX" = seq_along(names(COMP))
320	)
321	8_x_	col_index <- merge(
322	8_x_	base_cols,
323	8_x_	comp_cols,
324	8_x_	by = c("COLUMN"),
325	8_x_	all = TRUE,
326	8_x_	sort = FALSE
327	)
328	8_x_	keep_rows <- col_index[["BASE-INDEX"]] != col_index[["COMPARE-INDEX"]]
329	8_x_	keep_rows[is.na(keep_rows)] <- FALSE
330	8_x_	col_index[keep_rows, , drop = FALSE]
331	}

1	#' sort_then_join
2	#'
3	#' Convenience function to sort two strings and paste them together
4	#' @param string1 first string
5	#' @param string2 second string
6	#' @keywords internal
7	sort_then_join <- function(string1, string2) {
8	14_x_	paste0(sort(c(string1, string2)), collapse = "")
9	}
10
11
12	#' class_merge
13	#'
14	#' Convenience function to put all classes an object has into one string
15	#' @param x an object
16	#' @keywords internal
17	class_merge <- function(x) {
18	304_x_	paste(class(x), collapse = "_")
19	}
20
21
22	get_message <- function(colname, whichdat, totype) {
23	12_x_	message(paste0(
24	12_x_	"NOTE: Variable ", colname, " in ", tolower(whichdat), " was casted to ", totype
25	))
26	}
27
28
29	#' get_casted_vector
30	#'
31	#' casts a vector depending on its type and input
32	#' @param colin column to cast
33	#' @param colname name of vector
34	#' @param whichdat whether base or compare is being casted (used for messages)
35	#' @importFrom methods is
36	#' @keywords internal
37	get_casted_vector <- function(colin, colname, whichdat) {
38	24_x_	if (is(colin, "factor")) {
39	6_x_	get_message(colname, whichdat, "character")
40	6_x_	return(as.character(colin))
41	}
42
43	18_x_	if (is(colin, "integer")) {
44	6_x_	get_message(colname, whichdat, "numeric")
45	6_x_	return(as.numeric(colin))
46	}
47
48	12_x_	colin
49	}
50
51
52
53	#' get_casted_dataset
54	#'
55	#' Internal utility function to loop across a dataset casting all target
56	#' variables
57	#' @param df dataset to be casted
58	#' @param columns columns to be casted
59	#' @param whichdat whether base or compare is being casted (used for messages)
60	#' @keywords internal
61	get_casted_dataset <- function(df, columns, whichdat) {
62	22_x_	for (col in columns) {
63	24_x_	df[[col]] <- get_casted_vector(df[[col]], col, whichdat)
64	}
65	22_x_	return(df)
66	}
67
68
69
70
71
72
73	#' cast_variables
74	#'
75	#' Function to cast datasets columns if they have differing types
76	#' Restricted to specific cases, currently integer and double, and character and factor
77	#' @param BASE base dataset
78	#' @param COMPARE comparison dataset
79	#' @param ignore_vars Variables not to be considered for casting
80	#' @param cast_integers Logical - Whether integers should be cased to double when compared to doubles
81	#' @param cast_factors Logical - Whether characters should be casted to characters when compared to characters
82	#' @keywords internal
83	cast_variables <- function(
84	BASE,
85	COMPARE,
86	ignore_vars = NULL,
87	cast_integers = FALSE,
88	cast_factors = FALSE
89	) {
90
91	13_x_	allowed_class_casts <- c("integernumeric", "characterfactor")[c(cast_integers, cast_factors)]
92
93	13_x_	BASE_class <- data.frame(
94	13_x_	class_BASE = sapply(BASE, class_merge),
95	13_x_	colname = names(BASE),
96	13_x_	stringsAsFactors = FALSE
97	)
98	13_x_	BASE_class <- BASE_class[!BASE_class[["colname"]] %in% ignore_vars, , drop = FALSE]
99
100
101	13_x_	COMPARE_class <- data.frame(
102	13_x_	class_COMPARE = sapply(COMPARE, class_merge),
103	13_x_	colname = names(COMPARE),
104	13_x_	stringsAsFactors = FALSE
105	)
106	13_x_	COMPARE_class <- COMPARE_class[!COMPARE_class[["colname"]] %in% ignore_vars, , drop = FALSE]
107
108	13_x_	common_class <- merge(
109	13_x_	x = BASE_class,
110	13_x_	y = COMPARE_class,
111	13_x_	by = "colname"
112	)
113
114
115	13_x_	diff_class <- common_class[common_class[["class_BASE"]] != common_class[["class_COMPARE"]], , drop = FALSE]
116
117
118	13_x_	diff_class$classmerge <- mapply(
119	13_x_	sort_then_join,
120	13_x_	diff_class$class_COMPARE,
121	13_x_	diff_class$class_BASE
122	)
123
124
125	13_x_	cast_columns <- diff_class[diff_class[["classmerge"]] %in% allowed_class_casts, , drop = FALSE]
126
127
128	13_x_	DATASETS <- list(
129	13_x_	"BASE" = BASE,
130	13_x_	"COMPARE" = COMPARE
131	)
132
133	13_x_	if (nrow(cast_columns) == 0) {
134	2_x_	return(DATASETS)
135	}
136
137
138	11_x_	for (i in names(DATASETS)) {
139	22_x_	DATASETS[[i]] <- get_casted_dataset(DATASETS[[i]], cast_columns$colname, i)
140	}
141
142	11_x_	return(DATASETS)
143	}

1	#' Pad String
2	#'
3	#' Utility function used to replicate `str_pad`. Adds white space to either end
4	#' of a string to get it to equal the desired length
5	#' @param x string
6	#' @param width desired length
7	#' @keywords internal
8	string_pad <- function(x, width) {
9	1027_x_	if (nchar(x) >= width) {
10	!	return(x)
11	}
12	1027_x_	width <- width - nchar(x)
13	1027_x_	left <- paste0(rep(" ", floor(width / 2)), collapse = "")
14	1027_x_	right <- paste0(rep(" ", ceiling(width / 2)), collapse = "")
15	1027_x_	paste0(left, x, right, collapse = "")
16	}
17
18
19	#' recursive_reduce
20	#'
21	#' Utility function used to replicated `purrr::reduce`. Recursively applies a
22	#' function to a list of elements until only 1 element remains
23	#' @param .l list of values to apply a function to
24	#' @param .f function to apply to each each element of the list in turn. See details.
25	#' @details
26	#' This function is essentially performing the following operation:
27	#' ```
28	#' .l[[1]] <- .f( .l[[1]] , .l[[2]]) ; .l[[1]] <- .f( .l[[1]] , .l[[3]])
29	#' ```
30	#' @keywords internal
31	recursive_reduce <- function(.l, .f) {
32	365_x_	if (length(.l) != 1) {
33	233_x_	.l[[2]] <- .f(.l[[1]], .l[[2]])
34	233_x_	return(recursive_reduce(.l[-1], .f))
35	} else {
36	132_x_	return(.l[[1]])
37	}
38	}
39
40	#' invert
41	#'
42	#' Utility function used to replicated `purrr::transpose`. Turns a list inside
43	#' out.
44	#' @param x list
45	#' @keywords internal
46	invert <- function(x) {
47	40_x_	x2 <- list()
48	40_x_	cnames <- names(x)
49	40_x_	tnames <- names(x[[1]])
50	40_x_	for (i in tnames) {
51	120_x_	x2[[i]] <- list()
52	120_x_	for (j in cnames) {
53	345_x_	x2[[i]][[j]] <- x[[j]][[i]]
54	}
55	}
56	40_x_	return(x2)
57	}
58
59
60
61
62	#' as_ascii_table
63	#'
64	#' This function takes a `data.frame` and attempts to convert it into
65	#' a simple ascii format suitable for printing to the screen
66	#' It is assumed all variable values have a `as.character()` method
67	#' in order to cast them to character.
68	#' @param dat Input dataset to convert into a ascii table
69	#' @param line_prefix Symbols to prefix in front of every line of the table
70	#' @keywords internal
71	as_ascii_table <- function(dat, line_prefix = " ") {
72	40_x_	n_col <- ncol(dat)
73	40_x_	n_row <- nrow(dat)
74
75	## Convert every value to character and crop to a suitable length
76	40_x_	dat_char <- lapply(dat, as_fmt_char)
77
78
79	40_x_	hold <- list()
80	40_x_	COLS <- colnames(dat)
81
82	### For each column extract core elements (width, values , title) and pad out
83	### each string to be a suitable length
84	40_x_	for (i in seq_len(n_col)) {
85	115_x_	COL <- COLS[i]
86	115_x_	VALUES <- dat_char[[i]]
87
88	115_x_	JOINT <- c(COL, VALUES)
89	115_x_	WIDTH <- max(sapply(JOINT, nchar)) + 2
90
91	115_x_	hold[[COL]] <- list()
92	115_x_	hold[[COL]]$WIDTH <- WIDTH
93	115_x_	hold[[COL]]$VALUES <- sapply(VALUES, string_pad, width = WIDTH)
94	115_x_	hold[[COL]]$HEADER <- sapply(COL, string_pad, width = WIDTH)
95	}
96
97	### Collapse into a single value per component ( title , values, width )
98	40_x_	thold <- invert(hold)
99	40_x_	tvals <- recursive_reduce(thold$VALUES, paste0)
100	40_x_	thead <- recursive_reduce(thold$HEADER, paste0)
101	40_x_	twidth <- recursive_reduce(thold$WIDTH, sum)
102
103	### Create header and footer lines
104	40_x_	TLINE <- paste0(rep("=", twidth), collapse = "")
105	40_x_	LINE <- paste0(rep("-", twidth), collapse = "")
106	40_x_	FVALS <- paste0(line_prefix, tvals, collapse = "\n")
107
108	### Output table
109	40_x_	paste0(
110	40_x_	line_prefix, TLINE, "\n",
111	40_x_	line_prefix, thead, "\n",
112	40_x_	line_prefix, LINE, "\n",
113	40_x_	FVALS, "\n",
114	40_x_	line_prefix, LINE
115	)
116	}
117
118
119	#' as_character
120	#'
121	#' Stub function to enable mocking in unit tests
122	as_character <- as.character
123
124	#' Format vector to printable string
125	#'
126	#' Coerces a vector of any type into a printable string. The most
127	#' significant transformation is performed on existing character
128	#' vectors which will be truncated, have newlines converted
129	#' to explicit symbols and will be wrapped in quotes if they
130	#' contain white space.
131	#'
132	#' @param x (`vector`) \cr vector to be converted to character
133	#' @param add_quotes (`logical`) \cr if true will wrap strings that contain
134	#' whitespace with quotes
135	#' @param crop_at (`numeric`) \cr specifies the limit at which strings should
136	#' be truncated to
137	#' @param ... additional arguments (not currently used)
138	#'
139	#' @name as_fmt_char
140	#' @keywords internal
141	as_fmt_char <- function(x, ...) {
142	180_x_	UseMethod("as_fmt_char")
143	}
144
145	#' @rdname as_fmt_char
146	#' @export
147	as_fmt_char.numeric <- function(x, ...) {
148	34_x_	format(x, digits = 7, justify = "right")
149	}
150
151	#' @rdname as_fmt_char
152	#' @export
153	as_fmt_char.NULL <- function(x, ...) {
154	!	""
155	}
156
157	#' @importFrom utils capture.output
158	#' @rdname as_fmt_char
159	#' @export
160	as_fmt_char.list <- function(x, ...) {
161	15_x_	vapply(
162	15_x_	x,
163	15_x_	function(x) {
164	54_x_	if (is.numeric(x)) {
165	!	return(as_fmt_char(x))
166	}
167	54_x_	if (is.character(x) & length(x) == 1) {
168	54_x_	return(as_fmt_char(x))
169	}
170	!	as_fmt_char(
171	!	paste(capture.output(dput(x)), collapse = " "),
172	!	add_quotes = FALSE
173	)
174	},
175	15_x_	character(1)
176	)
177	}
178
179	#' @rdname as_fmt_char
180	#' @export
181	as_fmt_char.factor <- function(x, ...) {
182	3_x_	as_fmt_char(as.character(x))
183	}
184
185	#' @rdname as_fmt_char
186	#' @export
187	as_fmt_char.character <- function(x, add_quotes = TRUE, crop_at = 30, ...) {
188	123_x_	needs_quotes <- grepl("\\s", x) & add_quotes
189
190	123_x_	x[is.na(x)] <- ""
191
192	# Replace \nl \cr with tags to stop print message splitting over
193	# multiple lines
194	123_x_	x <- gsub("\x0D", "", x)
195	123_x_	x <- gsub("\x0A", "", x)
196
197	123_x_	charlength <- vapply(x, nchar, numeric(1))
198	123_x_	x <- substr(x, 1, crop_at)
199	123_x_	x[charlength > crop_at] <- paste0(x[charlength > crop_at], "...")
200
201	# Add enclosing " " around strings with white space so that it can be
202	# clearly identified in the printed output
203	123_x_	x[needs_quotes] <- paste0('"', x[needs_quotes], '"')
204
205	123_x_	return(x)
206	}
207
208
209	#' @rdname as_fmt_char
210	#' @export
211	as_fmt_char.default <- function(x, ...) {
212	6_x_	x_char <- as_character(x)
213	6_x_	assertthat::assert_that(
214	6_x_	is.character(x_char),
215	6_x_	msg = sprintf(
216	6_x_	"Unable to convert class `'%s'` to character for printing purposes",
217	6_x_	paste(class(x), collapse = "', '")
218	)
219	)
220	5_x_	as_fmt_char.character(x_char, add_quotes = FALSE)
221	}
222
223
224	#' @rdname as_fmt_char
225	#' @export
226	as_fmt_char.POSIXt <- function(x, ...) {
227	4_x_	format(x, "%Y-%m-%d %H:%M:%S %Z")
228	}
229
230
231	#' get_table
232	#'
233	#' Generate nice looking table from a data frame
234	#' @param dsin dataset
235	#' @inheritParams print.diffdf
236	#' @keywords internal
237	get_table <- function(dsin, row_limit = 10) {
238	37_x_	if (nrow(dsin) == 0) {
239	!	return("")
240	}
241	37_x_	if (!is.null(row_limit)) {
242	34_x_	assertthat::assert_that(
243	34_x_	assertthat::is.number(row_limit),
244	34_x_	row_limit > 0,
245	34_x_	msg = "row_limit must be a positive integer"
246	)
247	}
248	37_x_	if (is.null(row_limit)) {
249	3_x_	display_table <- dsin
250	} else {
251	34_x_	display_table <- subset(dsin, seq_len(nrow(dsin)) < (row_limit + 1))
252	}
253
254	37_x_	add_message <- if (!is.null(row_limit) && nrow(dsin) > row_limit) {
255	5_x_	paste0(
256	5_x_	"First ",
257	5_x_	row_limit,
258	5_x_	" of ",
259	5_x_	nrow(dsin),
260	5_x_	" rows are shown in table below"
261	)
262	} else {
263	32_x_	NULL
264	}
265
266	37_x_	msg <- paste(
267	37_x_	c(
268	37_x_	add_message,
269	37_x_	as_ascii_table(display_table)
270	),
271	37_x_	collapse = "\n"
272	)
273	37_x_	return(msg)
274	}

1	#' is_variable_different
2	#'
3	#' This subsets the data set on the variable name, picks out differences and returns a `tibble`
4	#' of differences for the given variable
5	#' @importFrom tibble as_tibble
6	#' @param variablename name of variable being compared
7	#' @param keynames name of keys
8	#' @param datain Inputted dataset with base and compare vectors
9	#' @param ... Additional arguments which might be passed through (numerical accuracy)
10	#' @return A boolean vector which is T if target and current are different
11	#' @keywords internal
12	is_variable_different <- function(variablename, keynames, datain, ...) {
13	917_x_	xvar <- paste0(variablename, ".x")
14	917_x_	yvar <- paste0(variablename, ".y")
15
16	917_x_	assertthat::assert_that(
17	917_x_	xvar %in% names(datain) && yvar %in% names(datain),
18	917_x_	msg = "Variable does not exist within input dataset"
19	)
20
21	917_x_	target <- datain[[xvar]]
22	917_x_	current <- datain[[yvar]]
23	917_x_	outvect <- find_difference(target, current, ...)
24
25	917_x_	datain[["VARIABLE"]] <- variablename
26
27	917_x_	names(datain)[names(datain) %in% c(xvar, yvar)] <- c("BASE", "COMPARE")
28
29	917_x_	x <- as_tibble(
30	917_x_	subset(
31	917_x_	datain,
32	917_x_	outvect,
33	917_x_	select = c("VARIABLE", keynames, "BASE", "COMPARE")
34	)
35	)
36
37	917_x_	return(x)
38	}
39
40	#' compare_vectors
41	#'
42	#' Compare two vectors looking for differences
43	#'
44	#' @param target the base vector
45	#' @param current a vector to compare target to
46	#' @param ... Additional arguments which might be passed through (numerical accuracy)
47	#' @keywords internal
48	compare_vectors <- function(target, current, ...) {
49	950_x_	UseMethod("compare_vectors")
50	}
51
52
53	#' find_difference
54	#'
55	#' This determines if two vectors are different. It expects vectors of the same
56	#' length and type, and is intended to be used after checks have already been done
57	#' Initially picks out any `NA`'s (matching `NA`'s count as a match)
58	#' Then compares remaining vector
59	#'
60	#' @param target the base vector
61	#' @param current a vector to compare target to
62	#' @param ... Additional arguments which might be passed through (numerical accuracy)
63	#' @keywords internal
64	find_difference <- function(target, current, ...) {
65	953_x_	if (length(target) != length(current)) {
66	2_x_	warning("Inputs are not of the same length")
67	2_x_	return(NULL)
68	}
69
70	951_x_	if (is.null(target) \|
71	1_x_	return(is.null(target) != is.null(current))
72	}
73
74	### Initalise output, assume problem unless evidence otherwise
75	950_x_	return_vector <- rep(TRUE, length(target))
76
77	950_x_	nas_t <- is.na(target)
78	950_x_	nas_c <- is.na(current)
79
80	## compare missing values
81	950_x_	nacompare <- nas_t != nas_c
82	950_x_	naselect <- nas_t \| nas_c
83	950_x_	return_vector[naselect] <- nacompare[naselect]
84
85	## compare non-missing values
86	950_x_	selectvector <- as.logical((!nas_t) * (!nas_c))
87
88	950_x_	comparevect <- compare_vectors(
89	950_x_	target[selectvector],
90	950_x_	current[selectvector],
91	...
92	)
93
94	950_x_	return_vector[selectvector] <- comparevect
95
96	950_x_	return(return_vector)
97	}
98
99
100
101
102
103
104
105	#' compare_vectors.default
106	#'
107	#' Default method, if the vector is not numeric or factor. Basic comparison
108	#' @param target the base vector
109	#' @param current a vector to compare target to
110	#' @param ... Additional arguments which might be passed through (numerical accuracy)
111	#' @keywords internal
112	compare_vectors.default <- function(target, current, ...) {
113	408_x_	target != current
114	}
115
116
117
118
119	#' compare_vectors.factor
120	#'
121	#' Compares factors. Sets them as character and then compares
122	#' @param target the base vector
123	#' @param current a vector to compare target to
124	#' @param ... Additional arguments which might be passed through (numerical accuracy)
125	#' @keywords internal
126	compare_vectors.factor <- function(target, current, ...) {
127	83_x_	as.character(target) != as.character(current)
128	}
129
130
131
132
133
134	#' compare_vectors.numeric
135	#'
136	#' This is a modified version of the all.equal function
137	#' which returns a vector rather than a message
138	#' @param target the base vector
139	#' @param current a vector to compare target to
140	#' @param tolerance Level of tolerance for differences between two variables
141	#' @param scale Scale that tolerance should be set on. If NULL assume absolute
142	#' @param ... Not used
143	#' @keywords internal
144	compare_vectors.numeric <- function(
145	target,
146	current,
147	tolerance = sqrt(.Machine$double.eps),
148	scale = NULL,
149	...
150	) {
151
152	459_x_	out <- target == current
153
154	459_x_	if (all(out)) {
155	433_x_	return(!out)
156	}
157
158	26_x_	if (is.integer(target) \|
159	7_x_	target <- as.double(target)
160	7_x_	current <- as.double(current)
161	}
162
163	26_x_	xy <- abs(target - current)
164
165	26_x_	if (!is.null(scale)) {
166	4_x_	xy <- xy / scale
167	}
168
169	26_x_	return(xy > tolerance)
170	}
171
172	#' compare_vectors.int64
173	#'
174	#' Handle int64 vectors. Uses numeric comparison
175	#' @param target the base vector
176	#' @param current a vector to compare target to
177	#' @param tolerance Level of tolerance for differences between two variables
178	#' @param scale Scale that tolerance should be set on. If NULL assume absolute
179	#' @param ... Not used
180	#' @keywords internal
181	compare_vectors.integer64 <- function(
182	target,
183	current,
184	tolerance = sqrt(.Machine$double.eps),
185	scale = NULL,
186	...
187	) {
188	4_x_	compare_vectors.numeric(target, current, tolerance, scale)
189	}

1	#' construct_issue
2	#'
3	#' Make an s3 object with class issue and possible additional class,
4	#' and assign other arguments to attributes
5	#' @param value the value of the object
6	#' @param message the value of the message attribute
7	#' @param add_class additional class to add
8	#' @keywords internal
9	construct_issue <- function(value, message, add_class = NULL) {
10	2180_x_	x <- value
11
12	### If nothing has been provided return nothing !
13	2180_x_	if (nrow(x) == 0) {
14	1913_x_	return(NULL)
15	}
16
17	267_x_	class(x) <- c(add_class, "issue", class(x))
18	267_x_	attributes(x)[["message"]] <- message
19	267_x_	return(x)
20	}
21
22
23	#' get_issue_message
24	#'
25	#' Simple function to grab the issue message
26	#' @param object inputted object of class issue
27	#' @param ... other arguments
28	#' @keywords internal
29	get_issue_message <- function(object, ...) {
30	134_x_	return(attr(object, "message"))
31	}
32
33
34	#' get_print_message
35	#'
36	#' Get the required text depending on type of issue
37	#' @param object inputted object of class issue
38	#' @param ... other arguments
39	#' @keywords internal
40	get_print_message <- function(object, ...) {
41	37_x_	UseMethod("get_print_message", object)
42	}
43
44
45	#' get_print_message.default
46	#'
47	#' Errors, as this should only ever be given an issue
48	#' @param object issue
49	#' @param ... Not used
50	#' @keywords internal
51	get_print_message.default <- function(object, ...) {
52	!	stop("Error: An issue has not been provided to this function!")
53	}
54
55
56	#' get_print_message.issue
57	#'
58	#' Get text from a basic issue, based on the class of the value of the issue
59	#'
60	#' @param object an object of class issue_basic
61	#' @inheritParams print.diffdf
62	#' @keywords internal
63	get_print_message.issue <- function(object, row_limit, ...) {
64	37_x_	paste(
65	37_x_	c(
66	37_x_	attr(object, "message"),
67	37_x_	get_table(object, row_limit = row_limit),
68	37_x_	"\n\n"
69	),
70	37_x_	collapse = "\n"
71	)
72	}

1	#' Print diffdf objects
2	#'
3	#' Print nicely formatted version of an diffdf object
4	#' @param x comparison object created by diffdf().
5	#' @param ... Additional arguments (not used)
6	#' @param row_limit Max row limit for difference tables (NULL to show all rows)
7	#' @param as_string Return printed message as an R character vector?
8	#' @examples
9	#' x <- subset(iris, -Species)
10	#' x[1, 2] <- 5
11	#' COMPARE <- diffdf(iris, x)
12	#' print(COMPARE)
13	#' print(COMPARE, row_limit = 5)
14	#' @export
15	print.diffdf <- function(x, row_limit = 10, as_string = FALSE, ...) {
16	25_x_	if (!is.null(row_limit)) {
17	24_x_	assertthat::assert_that(
18	24_x_	assertthat::is.number(row_limit),
19	24_x_	row_limit > 0,
20	24_x_	msg = "row_limit must be a positive integer"
21	)
22	}
23	21_x_	assertthat::assert_that(
24	21_x_	assertthat::is.flag(as_string)
25	)
26	19_x_	COMPARE <- x
27
28	19_x_	if (length(COMPARE) == 0) {
29	6_x_	outtext <- "No issues were found!\n"
30	} else {
31	13_x_	start_text <- paste0("Differences found between the objects!\n\n")
32	13_x_	end_text <- lapply(COMPARE, function(x) get_print_message(x, row_limit))
33	13_x_	end_text <- paste0(unlist(end_text), collapse = "")
34	13_x_	outtext <- paste0(start_text, end_text)
35	}
36	19_x_	if (as_string) {
37	7_x_	return(strsplit(outtext, "\n")[[1]])
38	} else {
39	12_x_	cat(outtext)
40	12_x_	return(invisible(COMPARE))
41	}
42	}

1	#' Generate unique key name
2	#'
3	#' Function to generate a name for the keys if not provided
4	#'
5	#' @param BASE base dataset
6	#' @param COMP comparison dataset
7	#' @param replace_names a vector of replacement names. Used for recursion, should be edited in function for clarity
8	#'
9	#' @keywords internal
10	generate_keyname <- function(
11	BASE,
12	COMP,
13	replace_names = c("..ROWNUMBER..", "..RN..", "..ROWN..", "..N..")
14	) {
15	193_x_	assertthat::assert_that(
16	193_x_	is(replace_names, "character"),
17	193_x_	msg = "replace_names is not a character vector"
18	)
19
20	193_x_	assertthat::assert_that(
21	193_x_	length(replace_names) != 0,
22	193_x_	msg = "All default row names are in use in BASE/COMPARE. Please provide a KEY argument"
23	)
24
25
26	185_x_	key_name <- replace_names[1]
27
28	185_x_	if (!is.null(BASE[[key_name]]) \|
29	66_x_	key_name <- generate_keyname(BASE, COMP, replace_names[-1])
30	}
31	153_x_	return(key_name)
32	}