Spaces:

cjerzak
/

Multiscaler

Running

App Files Files Community

Multiscaler / app.R

cjerzak

Update app.R

3633b24 verified about 1 month ago

raw

history blame

13.6 kB

	# setwd('~/Dropbox/ImageSeq/')

	options(error = NULL)
	library(shiny)
	library(dplyr)
	library(fields) # For image.plot in heatMap
	library(akima) # For interpolation

	# Load the data from sm.csv
	sm <- read.csv("sm.csv")

	# Define function to convert to numeric
	f2n <- function(x) as.numeric(as.character(x))

	# Compute MaxImageDimsLeft and MaxImageDimsRight from MaxImageDims
	sm$MaxImageDimsLeft <- unlist(lapply(strsplit(sm$MaxImageDims, split = "_"), function(x) sort(f2n(x))[1]))
	sm$MaxImageDimsRight <- unlist(lapply(strsplit(sm$MaxImageDims, split = "_"), function(x) sort(f2n(x))[2]))

	# Heatmap function with optimal_point parameter
	heatMap <- function(x, y, z,
	main = "",
	N, yaxt = NULL,
	xlab = "",
	ylab = "",
	horizontal = FALSE,
	useLog = "",
	legend.width = 1,
	ylim = NULL,
	xlim = NULL,
	zlim = NULL,
	add.legend = TRUE,
	legend.only = FALSE,
	vline = NULL,
	col_vline = "black",
	hline = NULL,
	col_hline = "black",
	cex.lab = 2,
	cex.main = 2,
	myCol = NULL,
	includeMarginals = FALSE,
	marginalJitterSD_x = 0.01,
	marginalJitterSD_y = 0.01,
	openBrowser = FALSE,
	optimal_point = NULL) {
	if (openBrowser) { browser() }
	s_ <- akima::interp(x = x, y = y, z = z,
	xo = seq(min(x), max(x), length = N),
	yo = seq(min(y), max(y), length = N),
	duplicate = "mean")
	if (is.null(xlim)) { xlim = range(s_$x, finite = TRUE) }
	if (is.null(ylim)) { ylim = range(s_$y, finite = TRUE) }
	imageFxn <- if (add.legend) fields::image.plot else graphics::image
	if (!grepl(useLog, pattern = "z")) {
	imageFxn(s_, xlab = xlab, ylab = ylab, log = useLog, cex.lab = cex.lab, main = main,
	cex.main = cex.main, col = myCol, xlim = xlim, ylim = ylim,
	legend.width = legend.width, horizontal = horizontal, yaxt = yaxt,
	zlim = zlim, legend.only = legend.only)
	} else {
	useLog <- gsub(useLog, pattern = "z", replace = "")
	zTicks <- summary(c(s_$z))
	ep_ <- 0.001
	zTicks[zTicks < ep_] <- ep_
	zTicks <- exp(seq(log(min(zTicks)), log(max(zTicks)), length.out = 10))
	zTicks <- round(zTicks, abs(min(log(zTicks, base = 10))))
	s_$z[s_$z < ep_] <- ep_
	imageFxn(s_$x, s_$y, log(s_$z), yaxt = yaxt,
	axis.args = list(at = log(zTicks), labels = zTicks),
	main = main, cex.main = cex.main, xlab = xlab, ylab = ylab,
	log = useLog, cex.lab = cex.lab, xlim = xlim, ylim = ylim,
	horizontal = horizontal, col = myCol, legend.width = legend.width,
	zlim = zlim, legend.only = legend.only)
	}
	if (!is.null(vline)) { abline(v = vline, lwd = 10, col = col_vline) }
	if (!is.null(hline)) { abline(h = hline, lwd = 10, col = col_hline) }

	if (includeMarginals) {
	points(x + rnorm(length(y), sd = marginalJitterSD_x * sd(x)),
	rep(ylim[1] * 1.1, length(y)), pch = "\|", col = "darkgray")
	points(rep(xlim[1] * 1.1, length(x)),
	y + rnorm(length(y), sd = sd(y) * marginalJitterSD_y), pch = "-", col = "darkgray")
	}

	# Add green star at optimal point if provided
	if (!is.null(optimal_point)) {
	points(optimal_point$x, optimal_point$y, pch = 8, col = "green", cex = 3, lwd = 4)
	}
	}

	##############################################################################
	# IMPORTANT: Store the meaningful labels for metric in a named vector.
	# The "name" is what is displayed to the user in the dropdown,
	# while the "value" is the underlying column in the dataset.
	##############################################################################
	metric_choices <- c(
	"Mean AUTOC RATE Ratio" = "AUTOC_rate_std_ratio_mean",
	"Mean AUTOC RATE" = "AUTOC_rate_mean",
	"Mean SD of AUTOC RATE" = "AUTOC_rate_std_mean",
	"Mean AUTOC RATE Ratio with PC" = "AUTOC_rate_std_ratio_mean_pc",
	"Mean AUTOC RATE with PC" = "AUTOC_rate_mean_pc",
	"Mean SD of AUTOC RATE with PC" = "AUTOC_rate_std_mean_pc",
	"Mean Variable Importance (Image 1)" = "MeanVImportHalf1",
	"Mean Variable Importance (Image 2)" = "MeanVImportHalf2",
	"Mean Fraction of Top k Features (Image 1)" = "FracTopkHalf1",
	"Mean RMSE" = "RMSE"
	)

	##############################################################################
	# Helper function to retrieve the label from its code
	##############################################################################
	getMetricLabel <- function(metric_value) {
	# This returns, e.g., "Mean AUTOC RATE" if metric_value == "AUTOC_rate_mean".
	# If it doesn't find a match, return the code itself.
	lbl <- names(metric_choices)[which(metric_choices == metric_value)]
	if (length(lbl) == 0) return(metric_value)
	lbl
	}

	# UI Definition
	ui <- fluidPage(
	titlePanel("Multiscale Heatmap Explorer"),
	sidebarLayout(
	sidebarPanel(
	selectInput("application", "Application",
	choices = unique(sm$application),
	selected = unique(sm$application)[1]),
	selectInput("model", "Model",
	choices = unique(sm$optimizeImageRep),
	selected = "clip-rsicd"),

	########################################################################
	# Use our named vector 'metric_choices' directly in selectInput
	########################################################################
	selectInput("metric", "Metric",
	choices = metric_choices,
	selected = "AUTOC_rate_std_ratio_mean"),

	checkboxInput("compareToBest", "Compare to best single scale", value = FALSE)
	),
	mainPanel(
	plotOutput("heatmapPlot", height = "600px"),
	div(style = "margin-top: 10px; font-style: italic;", uiOutput("contextNote"))
	)
	)
	)

	# Server Definition
	server <- function(input, output) {
	# Function to determine whether to maximize or minimize the metric
	get_better_direction <- function(metric) {
	#if (grepl("std\|RMSE", metric)) "min" else "max"
	if (grepl(metric, pattern = "std_mean\|RMSE")) "min" else "max"
	}

	# Reactive data processing
	filteredData <- reactive({
	df <- sm %>%
	filter(application == input$application,
	optimizeImageRep == input$model) %>%
	mutate(MaxImageDimsRight = ifelse(is.na(MaxImageDimsRight),
	MaxImageDimsLeft,
	MaxImageDimsRight))
	if (nrow(df) == 0) return(NULL)
	df
	})

	# Reactive expression to compute interpolated data and optimal point
	interpolated_data <- reactive({
	data <- filteredData()
	if (is.null(data)) return(NULL)

	# Group data
	grouped_data <- data %>%
	group_by(MaxImageDimsLeft, MaxImageDimsRight) %>%
	summarise(
	mean_metric = mean(as.numeric(get(input$metric)), na.rm = TRUE),
	se_metric = sd(as.numeric(get(input$metric)), na.rm = TRUE) / sqrt(n()),
	n = n(),
	.groups = "drop"
	)

	better_dir <- get_better_direction(input$metric)
	single_scale_data <- grouped_data %>% filter(MaxImageDimsLeft == MaxImageDimsRight)
	best_single_scale_metric <- if (nrow(single_scale_data) > 0) {
	if (better_dir == "max") max(single_scale_data$mean_metric, na.rm = TRUE)
	else min(single_scale_data$mean_metric, na.rm = TRUE)
	} else NA

	grouped_data <- grouped_data %>%
	mutate(improvement = if (better_dir == "max") {
	mean_metric - best_single_scale_metric
	} else {
	best_single_scale_metric - mean_metric
	})

	# Select z based on checkbox
	z_to_interpolate <- if (input$compareToBest) grouped_data$improvement else grouped_data$mean_metric
	x <- grouped_data$MaxImageDimsLeft
	y <- grouped_data$MaxImageDimsRight

	# Check if interpolation is possible
	if (length(unique(x)) < 2 \|\| length(unique(y)) < 2 \|\| nrow(grouped_data) < 3) {
	return(NULL)
	}

	# Compute interpolated grid
	s_ <- akima::interp(
	x = x,
	y = y,
	z = z_to_interpolate,
	xo = seq(min(x), max(x), length = 50),
	yo = seq(min(y), max(y), length = 50),
	duplicate = "mean"
	)

	# Find optimal point from interpolated grid
	max_idx <- if (input$compareToBest \|\| better_dir == "max") {
	which.max(s_$z)
	} else {
	which.min(s_$z)
	}
	row_col <- arrayInd(max_idx, .dim = dim(s_$z))
	optimal_x <- s_$x[row_col[1,1]]
	optimal_y <- s_$y[row_col[1,2]]
	optimal_z <- s_$z[row_col[1,1], row_col[1,2]]

	list(
	s_ = s_,
	optimal_point = list(x = optimal_x, y = optimal_y, z = optimal_z)
	)
	})

	# Heatmap Output
	output$heatmapPlot <- renderPlot({
	interp_data <- interpolated_data()
	if (is.null(interp_data)) {
	plot.new()
	text(0.5, 0.5, "Insufficient data for interpolation", cex = 1.5)
	return(NULL)
	}

	data <- filteredData()
	grouped_data <- data %>%
	group_by(MaxImageDimsLeft, MaxImageDimsRight) %>%
	summarise(
	mean_metric = mean(as.numeric(get(input$metric)), na.rm = TRUE),
	.groups = "drop"
	)

	better_dir <- get_better_direction(input$metric)
	single_scale_data <- grouped_data %>% filter(MaxImageDimsLeft == MaxImageDimsRight)
	best_single_scale_metric <- if (nrow(single_scale_data) > 0) {
	if (better_dir == "max") max(single_scale_data$mean_metric, na.rm = TRUE)
	else min(single_scale_data$mean_metric, na.rm = TRUE)
	} else NA

	grouped_data <- grouped_data %>%
	mutate(improvement = if (better_dir == "max") {
	mean_metric - best_single_scale_metric
	} else {
	best_single_scale_metric - mean_metric
	})

	# Retrieve the label for the chosen metric:
	chosen_metric_label <- getMetricLabel(input$metric)

	if (input$compareToBest) {
	z <- grouped_data$improvement
	main_title <- paste(input$application, "-", chosen_metric_label, "\n Improvement Over Best Single Scale")
	} else {
	z <- grouped_data$mean_metric
	main_title <- paste(input$application, "-", chosen_metric_label)
	}

	x <- grouped_data$MaxImageDimsLeft
	y <- grouped_data$MaxImageDimsRight
	zlim <- range(z, na.rm = TRUE)

	par(mar=c(5,5,5,1))
	customPalette <- colorRampPalette(c("blue", "white", "red"))(50)
	heatMap(
	x = x,
	y = y,
	z = z,
	N = 50,
	main = main_title,
	xlab = "Image Dimension 1",
	ylab = "Image Dimension 2",
	useLog = "xy",
	myCol = customPalette,
	cex.lab = 1.4,
	zlim = zlim,
	optimal_point = interp_data$optimal_point
	)
	})

	# Contextual Note Output
	output$contextNote <- renderText({
	SharedContextText <- c(
	"The Peru RCT involves a multifaceted graduation program treatment to reduce poverty outcomes.",
	"The Uganda RCT involves a cash grant program to stimulate human capital and living conditions among the poor.",
	"For more information, see <a href='https://arxiv.org/abs/2411.02134' target='_blank'>https://arxiv.org/abs/2411.02134</a>",
	"<div style='font-size: 10px; line-height: 1.5;'>",
	"<b>Glossary:</b><br>",
	"• <b>Model:</b> The neural-network backbone (e.g., clip-rsicd) transforming satellite images into numerical representations.<br>",
	"• <b>Metric:</b> The criterion (e.g., RATE Ratio, RMSE) measuring performance or heterogeneity detection.<br>",
	"• <b>Compare to best single-scale:</b> Toggle showing metric improvement relative to the best single-scale baseline.<br>",
	"• <b>ImageDim1, ImageDim2:</b> Image sizes (e.g., 64×64, 128×128) for multi-scale analysis.<br>",
	"• <b>RATE Ratio:</b> A t-statistic-like quantity indicating how much a data-model combination captures treatment-effect variation. Ratio of the RATE and its standard error. It can employ two weighting scemes (AUTOC and Qini).<br>",
	"• <b>PC:</b> Principal Components; a compression step of neural representations.<br>",
	"• <b>MeanDiff, MeanDiff_pc:</b> Gain in RATE Ratio from multi-scale vs. single-scale, with '_pc' for compressed data.<br>",
	"• <b>RMSE:</b> Root Mean Squared Error, measuring prediction accuracy in simulations.<br>",
	"</div>"
	)

	chosen_metric_label <- getMetricLabel(input$metric)

	if (input$compareToBest) {
	c(
	paste(
	"This heatmap shows the improvement in",
	paste0("'", chosen_metric_label, "'"),
	"over the best single scale for",
	input$application,
	"using the", input$model, "model. The green star marks the optimal point."
	),
	SharedContextText
	)
	} else {
	c(
	paste(
	"This heatmap displays",
	paste0("'", chosen_metric_label, "'"),
	"for", input$application,
	"using the", input$model,
	"model across different image dimension combinations. The green star marks the optimal point."
	),
	SharedContextText
	)
	}
	})
	}

	# Run the Shiny App
	shinyApp(ui = ui, server = server)