This function generates an evaluation data frame based on the provided data and predictions.
Arguments
- data
The data set containing high-dimensional data along with an unique identifier.
- prediction_df
The data set with 2D embeddings, IDs, and predicted hexagonal IDs.
- df_bin_centroids
The data set with coordinates of hexagonal bin centroids.
- df_bin
The data set with averaged/weighted high-dimensional data.
- col_start
The text that begin the column name of the high-D data
Examples
num_bins_x <- 4
shape_value <- 1.833091
hexbin_data_object <- extract_hexbin_mean(nldr_df = s_curve_noise_umap, num_bins_x,
shape_val = shape_value)
df_bin_centroids <- hexbin_data_object$hexdf_data
UMAP_data_with_hb_id <- s_curve_noise_umap |> dplyr::mutate(hb_id = hexbin_data_object$hb_data@cID)
df_all <- dplyr::bind_cols(s_curve_noise_training |> dplyr::select(-ID), UMAP_data_with_hb_id)
df_bin <- avg_highD_data(df_all)
pred_df_test <- predict_2d_embeddings(test_data = s_curve_noise_training,
df_bin_centroids = df_bin_centroids,
df_bin = df_bin, type_NLDR = "UMAP")
generate_eval_df(data = s_curve_noise, prediction_df = pred_df_test,
df_bin_centroids = df_bin_centroids, df_bin = df_bin, col_start = "x")
#> # A tibble: 1 × 4
#> number_of_bins number_of_observations total_error total_mse
#> <int> <int> <dbl> <dbl>
#> 1 3128 75 -1685. 0.185