############################################################################# # GUIhistogram.R - Enhanced version with input validation and better organization ############################################################################# # Load required libraries with error handling required_packages <- c("shiny", "ggplot2", "mixtools", "colourpicker", "tidyr", "dplyr") for (pkg in required_packages) { if (!requireNamespace(pkg, quietly = TRUE)) { install.packages(pkg) } library(pkg, character.only = TRUE) } # Global constants FONT_CHOICES <- c( "Arial" = "Arial", "Times New Roman" = "Times New Roman", "Helvetica" = "Helvetica", "Courier New" = "Courier New", "Georgia" = "Georgia" ) DEFAULT_COLORS <- c("#1f77b4", "#ff7f0e", "#2ca02c", "#d62728") ############################################################################# # Helper Functions ############################################################################# # Input validation function for numerical inputs validate_numeric_input <- function(input_value, default_value, min_value = NULL, max_value = NULL) { # Check if input is empty, NA, or non-numeric if (is.null(input_value) || is.na(input_value) || !is.numeric(input_value)) { return(default_value) } # Apply min/max constraints if provided if (!is.null(min_value) && input_value < min_value) { return(min_value) } if (!is.null(max_value) && input_value > max_value) { return(max_value) } return(input_value) } # Process dataset with validation process_dataset <- function(text) { if (is.null(text) || text == "") { return(NULL) } # Split on newline, trim whitespace lines <- unlist(strsplit(text, "[\r\n]+")) lines <- trimws(lines) # Remove empty lines lines <- lines[nzchar(lines)] if (length(lines) == 0) { return(NULL) } # Convert to numeric, ignoring non-numeric lines numeric_vals <- suppressWarnings(as.numeric(lines)) numeric_vals <- numeric_vals[!is.na(numeric_vals)] if (length(numeric_vals) == 0) { return(NULL) } return(numeric_vals) } # Calculate breaks for histograms with validation calculate_breaks <- function(data, bin_size) { if (length(data) == 0) return(NULL) # Validate bin size bin_size <- validate_numeric_input(bin_size, 0.1, diff(range(data)), 1) range_data <- range(data, na.rm = TRUE) if (diff(range_data) < bin_size) { mid_point <- mean(range_data) return(c(mid_point - bin_size, mid_point, mid_point + bin_size)) } min_break <- floor(range_data[1] / bin_size) * bin_size max_break <- ceiling(range_data[2] / bin_size) * bin_size seq(min_break, max_break, by = bin_size) } # Gaussian fitting function with error handling fit_gaussians <- function(data, fit_type) { if (is.null(data) || length(data) < 3) return(NULL) fits <- list() if (fit_type == "single") { tryCatch({ fits$single <- list( means = mean(data), sds = sd(data), weights = 1 ) }, error = function(e) { fits$single <- NULL }) } else if (fit_type == "double") { tryCatch({ mixture <- normalmixEM(data, k = 2) fits$double <- list( means = mixture$mu, sds = mixture$sigma, weights = mixture$lambda ) }, error = function(e) { fits$double <- NULL }) } return(fits) } # Force the browser to open automatically in non-interactive sessions: options(shiny.launch.browser = TRUE) ui <- fluidPage( tags$head( tags$style(HTML(" /* Scrollbar styles */ .sidebar-content { height: calc(100vh - 80px); overflow-y: auto; padding-right: 15px; } .sidebar-content::-webkit-scrollbar { width: 8px; } .sidebar-content::-webkit-scrollbar-track { background: #f1f1f1; } .sidebar-content::-webkit-scrollbar-thumb { background: #888; border-radius: 4px; } .sidebar-content::-webkit-scrollbar-thumb:hover { background: #555; } /* Section styling */ .section-header { background: #f8f9fa; padding: 10px; margin: 15px -15px; border-left: 4px solid #007bff; font-weight: 500; color: #2c3e50; } /* Section divider */ .section-divider { border: none; height: 3px; background: linear-gradient(to right, #0056b3, #007bff); margin: 25px 0; box-shadow: 0 2px 4px rgba(0,0,0,0.1); } /* Input styling */ .form-control { border-radius: 4px; border: 1px solid #dee2e6; } /* Button styling */ .action-button { border-radius: 4px; transition: all 0.3s ease; width: 100%; margin: 5px 0; } .btn-create { background-color: #28a745; color: white; border: none; padding: 12px; font-weight: 500; box-shadow: 0 2px 4px rgba(0,0,0,0.1); } .btn-create:hover { background-color: #218838; box-shadow: 0 4px 8px rgba(0,0,0,0.1); } .btn-stats { background-color: #17a2b8; color: white; border: none; padding: 8px; } .btn-stats:hover { background-color: #138496; } .btn-reset { background-color: #6c757d; color: white; border: none; padding: 6px; } .btn-reset:hover { background-color: #5a6268; } /* Download button styling */ .btn-download { background-color: #007bff; color: white; border: none; padding: 8px; width: 100%; margin-top: 10px; } .btn-download:hover { background-color: #0056b3; } /* Checkbox and radio button styling */ .checkbox, .radio { margin: 10px 0; } ")) ), titlePanel("Multi-Dataset Histogram Comparison with Gaussian Fitting"), sidebarLayout( sidebarPanel( div(class = "sidebar-content", # Basic Settings Section h4(class = "section-header", "Basic Settings"), numericInput("num_datasets", "Number of Datasets (1-4):", value = 1, min = 1, max = 4), textInput("plot_title", "Plot Title:", value = "Histogram Comparison"), textInput("x_label", "X-axis Label:", value = "Value"), textInput("y_label", "Y-axis Label:", value = "Count"), hr(class = "section-divider"), # Visualization Settings h4(class = "section-header", "Visualization Settings"), numericInput("bin_size", "Bin Size:", min = 0.1, value = 1, step = 0.1), selectInput("font_family", "Font Family:", choices = FONT_CHOICES, selected = "Arial"), radioButtons("gaussian_fit", "Gaussian Fit:", choices = list("None" = "none", "Single Gaussian" = "single", "Two Gaussians" = "double")), hr(class = "section-divider"), # Grid and Axis Section h4(class = "section-header", "Grid and Axis Options"), checkboxInput("show_grid", "Show Grid Lines", value = TRUE), checkboxInput("bold_zero", "Bold Zero Lines", value = FALSE), checkboxInput("show_ticks", "Show Outer Tick Marks", value = FALSE), # Bar Border Section h4(class = "section-header", "Bar Border Options"), checkboxInput("show_bar_border", "Show Bar Borders", value = TRUE), numericInput("bar_border_size", "Bar Border Thickness:", value = 0.5, min = 0.1, max = 2, step = 0.1), hr(class = "section-divider"), # Font Settings Section h4(class = "section-header", "Font Settings"), numericInput("axis_label_size", "Axis Label Size:", value = 12, min = 6, max = 24), numericInput("tick_label_size", "Tick Label Size:", value = 10, min = 6, max = 20), hr(class = "section-divider"), # Axis Range Section h4(class = "section-header", "Axis Range Controls"), numericInput("x_min", "X-axis Min:", value = 0), numericInput("x_max", "X-axis Max:", value = 1), numericInput("y_min", "Y-axis Min:", value = 0), numericInput("y_max", "Y-axis Max:", value = 1), actionButton("reset_ranges", "Reset to Auto Range", class = "action-button btn-reset"), br(), br(), # Action Buttons Section h4(class = "section-header", "Actions"), actionButton("create", "Create Plot", class = "action-button btn-create"), actionButton("stats_btn", "Show Statistics", class = "action-button btn-stats"), hr(class = "section-divider"), # Export Section h4(class = "section-header", "Export Options"), selectInput("export_format", "Format:", choices = c("PNG" = "png", "JPEG" = "jpeg", "PDF" = "pdf", "TIFF" = "tiff")), numericInput("export_width", "Width (in):", value = 6, min = 1), numericInput("export_height", "Height (in):", value = 4, min = 1), numericInput("export_dpi", "DPI:", value = 300, min = 72), downloadButton("download_plot", "Download Plot", class = "btn-download") ) ), mainPanel( tabsetPanel( id = "dataset_tabs", tabPanel("Dataset 1", textAreaInput("data1", "Dataset 1 (one number per line):", rows = 8), colourInput("color1", "Color:", value = DEFAULT_COLORS[1]), textInput("label1", "Label:", value = "Dataset 1") ), tabPanel("Dataset 2", textAreaInput("data2", "Dataset 2 (one number per line):", rows = 8), colourInput("color2", "Color:", value = DEFAULT_COLORS[2]), textInput("label2", "Label:", value = "Dataset 2") ), tabPanel("Dataset 3", textAreaInput("data3", "Dataset 3 (one number per line):", rows = 8), colourInput("color3", "Color:", value = DEFAULT_COLORS[3]), textInput("label3", "Label:", value = "Dataset 3") ), tabPanel("Dataset 4", textAreaInput("data4", "Dataset 4 (one number per line):", rows = 8), colourInput("color4", "Color:", value = DEFAULT_COLORS[4]), textInput("label4", "Label:", value = "Dataset 4") ) ), plotOutput("histogram"), verbatimTextOutput("fit_params") ) ) ) server <- function(input, output, session) { ######################################################################## # Function to process a single dataset, ignoring empty/whitespace lines ######################################################################## process_dataset <- function(text) { # If no text, return NULL if (is.null(text) || text == "") { return(NULL) } # Split on newline, trim whitespace lines <- unlist(strsplit(text, "[\r\n]+")) lines <- trimws(lines) # Remove empty lines lines <- lines[nzchar(lines)] if (length(lines) == 0) { return(NULL) } # Convert to numeric, ignoring non-numeric lines numeric_vals <- suppressWarnings(as.numeric(lines)) # Keep only valid (non-NA) values numeric_vals <- numeric_vals[!is.na(numeric_vals)] if (length(numeric_vals) == 0) { return(NULL) } return(numeric_vals) } # Reactive expression to process all datasets once "Create Plot" is clicked datasets <- eventReactive(input$create, { result <- list() for (i in seq_len(input$num_datasets)) { data <- process_dataset(input[[paste0("data", i)]]) if (!is.null(data)) { result[[i]] <- list( data = data, color = input[[paste0("color", i)]], label = input[[paste0("label", i)]] ) } } return(result) }) # Calculate breaks for histograms calculate_breaks <- function(data, bin_size) { if (length(data) == 0) return(NULL) range_data <- range(data, na.rm = TRUE) if (diff(range_data) < bin_size) { mid_point <- mean(range_data) return(c(mid_point - bin_size, mid_point, mid_point + bin_size)) } min_break <- floor(range_data[1] / bin_size) * bin_size max_break <- ceiling(range_data[2] / bin_size) * bin_size seq(min_break, max_break, by = bin_size) } # Update axis ranges with modified auto-scaling after "Create Plot" observe({ req(input$create) all_data <- datasets() if (length(all_data) > 0) { all_values <- unlist(lapply(all_data, function(x) x$data)) if (length(all_values) > 0) { breaks_seq <- calculate_breaks(all_values, input$bin_size) if (!is.null(breaks_seq)) { hist_data <- hist(all_values, plot = FALSE, breaks = breaks_seq) y_max <- max(hist_data$counts) * 1.2 x_max_raw <- max(all_values) x_max_50 <- ceiling(x_max_raw / 50) * 50 # Round up to next 50 updateNumericInput(session, "x_min", value = 0) # Always start at 0 updateNumericInput(session, "x_max", value = x_max_50) updateNumericInput(session, "y_min", value = 0) updateNumericInput(session, "y_max", value = y_max) } } } }) # Reset ranges (modified to match new auto-scaling) observeEvent(input$reset_ranges, { all_data <- datasets() if (length(all_data) > 0) { all_values <- unlist(lapply(all_data, function(x) x$data)) if (length(all_values) > 0) { breaks_seq <- calculate_breaks(all_values, input$bin_size) if (!is.null(breaks_seq)) { hist_data <- hist(all_values, plot = FALSE, breaks = breaks_seq) y_max <- max(hist_data$counts) * 1.2 x_max_raw <- max(all_values) x_max_50 <- ceiling(x_max_raw / 50) * 50 updateNumericInput(session, "x_min", value = 0) updateNumericInput(session, "x_max", value = x_max_50) updateNumericInput(session, "y_min", value = 0) updateNumericInput(session, "y_max", value = y_max) } } } }) # Function to fit Gaussians fit_gaussians <- function(data) { if (is.null(data)) return(NULL) fits <- list() if (input$gaussian_fit == "single") { fits$single <- list( means = mean(data), sds = sd(data), weights = 1 ) } else if (input$gaussian_fit == "double") { tryCatch({ mixture <- normalmixEM(data, k = 2) fits$double <- list( means = mixture$mu, sds = mixture$sigma, weights = mixture$lambda ) }, error = function(e) { fits$double <- NULL }) } return(fits) } ############################################################################### # Reactive expression that creates the ggplot object so we can both display # it in the UI (renderPlot) and export the same plot via the download handler. ############################################################################### plot_reactive <- reactive({ req(datasets()) all_data <- datasets() if (length(all_data) == 0) return(NULL) p <- ggplot() + ggtitle(input$plot_title) + xlab(input$x_label) + ylab(input$y_label) + theme_minimal() + theme( text = element_text(family = input$font_family), plot.title = element_text(hjust = 0.5), panel.grid.major = element_line( color = if (input$show_grid) "gray90" else "transparent" ), panel.grid.minor = element_line( color = if (input$show_grid) "gray95" else "transparent" ), axis.title = element_text(size = input$axis_label_size), axis.text = element_text(size = input$tick_label_size), axis.ticks = element_line( color = if (input$show_ticks) "black" else "transparent" ), axis.ticks.length = unit(if (input$show_ticks) 3 else 0, "pt"), axis.line = element_blank(), panel.border = element_rect(color = "black", fill = NA) ) for (i in seq_along(all_data)) { dataset <- all_data[[i]] df <- data.frame(x = dataset$data, group = dataset$label) breaks_seq <- calculate_breaks(df$x, input$bin_size) if (!is.null(breaks_seq)) { p <- p + geom_histogram( data = df, aes(x = x, y = after_stat(count), fill = group), alpha = 0.5, breaks = breaks_seq, color = if(input$show_bar_border) "black" else NA, linewidth = if(input$show_bar_border) input$bar_border_size else 0 ) # Add Gaussian fits if requested if (input$gaussian_fit != "none") { fits <- fit_gaussians(dataset$data) x_range <- seq(min(dataset$data), max(dataset$data), length.out = 200) if (input$gaussian_fit == "single" && !is.null(fits$single)) { hist_data <- hist(dataset$data, breaks = breaks_seq, plot = FALSE) max_height <- max(hist_data$counts) raw_gaussian <- dnorm(x_range, mean = fits$single$means, sd = fits$single$sds) scale_factor <- max_height / max(raw_gaussian) y_vals <- raw_gaussian * scale_factor p <- p + geom_line( data = data.frame(x = x_range, y = y_vals, group = dataset$label), aes(x = x, y = y, color = group), size = 1 ) } else if (input$gaussian_fit == "double" && !is.null(fits$double)) { hist_data <- hist(dataset$data, breaks = breaks_seq, plot = FALSE) max_height <- max(hist_data$counts) raw_gaussian1 <- dnorm(x_range, mean = fits$double$means[1], sd = fits$double$sds[1]) raw_gaussian2 <- dnorm(x_range, mean = fits$double$means[2], sd = fits$double$sds[2]) mixture_raw <- fits$double$weights[1] * raw_gaussian1 + fits$double$weights[2] * raw_gaussian2 scale_factor <- max_height / max(mixture_raw) y_vals1 <- raw_gaussian1 * scale_factor * fits$double$weights[1] y_vals2 <- raw_gaussian2 * scale_factor * fits$double$weights[2] p <- p + geom_line( data = data.frame(x = x_range, y = y_vals1, group = dataset$label), aes(x = x, y = y, color = group), size = 1, linetype = "dashed" ) + geom_line( data = data.frame(x = x_range, y = y_vals2, group = dataset$label), aes(x = x, y = y, color = group), size = 1, linetype = "dashed" ) + geom_line( data = data.frame(x = x_range, y = y_vals1 + y_vals2, group = dataset$label), aes(x = x, y = y, color = group), size = 1 ) } } } } # Set fill/color scales colors <- sapply(all_data, function(x) x$color) p <- p + scale_fill_manual(values = colors) + scale_color_manual(values = colors) # Remove the gap at zero by using expand = c(0,0) p <- p + scale_x_continuous(limits = c(input$x_min, input$x_max), expand = c(0, 0)) + scale_y_continuous(limits = c(input$y_min, input$y_max), expand = c(0, 0)) # Add zero lines if requested if (input$bold_zero) { # If x=0 is within range, draw a bold vertical line if (input$x_min <= 0 && input$x_max >= 0) { p <- p + theme( axis.line.x = element_line(color = "black", linewidth = 0.8), axis.line.y = element_line(color = "black", linewidth = 0.8) ) + geom_vline(xintercept = 0, color = "black", linewidth = 0.8) } # If y=0 is within range, draw a bold horizontal line if (input$y_min <= 0 && input$y_max >= 0) { p <- p + geom_hline(yintercept = 0, color = "black", linewidth = 0.8) } } p }) # Render the plot in the UI output$histogram <- renderPlot({ plot_reactive() }) # Fit parameters output output$fit_params <- renderPrint({ all_data <- datasets() if (length(all_data) == 0) return(NULL) for (i in seq_along(all_data)) { dataset <- all_data[[i]] cat("\nFit Parameters for", dataset$label, "\n") cat("--------------------------------\n") fits <- fit_gaussians(dataset$data) if (input$gaussian_fit == "single" && !is.null(fits$single)) { cat("Single Gaussian Fit:\n") cat("Mean:", round(fits$single$means, 3), "\n") cat("Standard Deviation:", round(fits$single$sds, 3), "\n") } else if (input$gaussian_fit == "double" && !is.null(fits$double)) { cat("Two-Gaussian Mixture:\n") cat("Component 1:\n") cat(" Weight:", round(fits$double$weights[1], 3), "\n") cat(" Mean:", round(fits$double$means[1], 3), "\n") cat(" Standard Deviation:", round(fits$double$sds[1], 3), "\n") cat("Component 2:\n") cat(" Weight:", round(fits$double$weights[2], 3), "\n") cat(" Mean:", round(fits$double$means[2], 3), "\n") cat(" Standard Deviation:", round(fits$double$sds[2], 3), "\n") } cat("\n") } }) ######################################################################## # Stats logic for "Show Stats" button ######################################################################## compute_stats <- function() { data_list <- datasets() if (length(data_list) < 1) { return("No data to analyze.") } # Combine all datasets into one data frame: value + group factor df_combined <- data.frame(value = numeric(), group = factor()) for (i in seq_along(data_list)) { label <- data_list[[i]]$label vals <- data_list[[i]]$data temp_df <- data.frame(value = vals, group = factor(label)) df_combined <- rbind(df_combined, temp_df) } df_combined$group <- factor(df_combined$group) # Basic stats per group result <- "" group_levels <- levels(df_combined$group) for (grp in group_levels) { grp_data <- df_combined$value[df_combined$group == grp] grp_mean <- mean(grp_data) grp_sd <- sd(grp_data) grp_sem <- grp_sd / sqrt(length(grp_data)) result <- paste0( result, "Group: ", grp, "\n", " n = ", length(grp_data), "\n", " Mean = ", round(grp_mean, 3), "\n", " SD = ", round(grp_sd, 3), "\n", " SEM = ", round(grp_sem, 3), "\n\n" ) } # If only 1 group, no test if (length(group_levels) == 1) { result <- paste0(result, "Only 1 group: no statistical test.\n") return(result) } # If exactly 2 groups, do a t-test if (length(group_levels) == 2) { grp1 <- df_combined$value[df_combined$group == group_levels[1]] grp2 <- df_combined$value[df_combined$group == group_levels[2]] ttest_res <- t.test(grp1, grp2) result <- paste0( result, "T-test comparing ", group_levels[1], " and ", group_levels[2], ":\n", " p-value = ", signif(ttest_res$p.value, 5), "\n", " t-statistic = ", round(ttest_res$statistic, 3), "\n", " degrees of freedom = ", round(ttest_res$parameter, 2), "\n\n" ) return(result) } # If 3+ groups, do a one-way ANOVA aov_res <- aov(value ~ group, data = df_combined) aov_summary <- summary(aov_res) pval <- NA if (!is.null(aov_summary) && length(aov_summary) > 0) { pval <- aov_summary[[1]]$`Pr(>F)`[1] } result <- paste0( result, "One-way ANOVA:\n", " p-value = ", signif(pval, 5), "\n\n" ) return(result) } # On Show Stats button click, show a modal with results observeEvent(input$stats_btn, { stats_text <- compute_stats() showModal( modalDialog( title = "Statistical Analysis", size = "l", easyClose = TRUE, footer = modalButton("Close"), tagList( verbatimTextOutput("stats_modal_text") ) ) ) output$stats_modal_text <- renderText({ stats_text }) }) ######################################################################## # Download handler for exporting the plot ######################################################################## output$download_plot <- downloadHandler( filename = function() { # Example: "histogram.png" or "histogram.pdf" paste0("histogram.", input$export_format) }, content = function(file) { # Choose the correct graphics device based on format device_func <- switch(input$export_format, "png" = function(...){ png(..., bg = "white") }, "jpeg" = function(...){ jpeg(..., quality = 90, bg = "white") }, "pdf" = pdf, "tiff" = tiff ) device_func(file, width = input$export_width, # in inches height = input$export_height, # in inches units = "in", res = input$export_dpi) # DPI (for raster formats) # Print the same plot that is shown in the UI print(plot_reactive()) dev.off() } ) } # Run the application, forcing the browser to open shinyApp(ui = ui, server = server, options = list(launch.browser = TRUE))