SVD and Low-Rank Approximations

Jack Werner

March 8, 2019

Background

I wanted to get myself a little more familiar with singular-value decomposition, and specifically how it can be used to create low-rank approximations of matrices. In trying to wrap my head around the process, I found this American Mathematical Society article pretty helpful.

I wanted to take a look at three different examples: a ring, a box with added noise (like the AMS article), and noted historical ballplayer Babe Ruth. For each, I wanted to visualize the approximations both side-by-side and as a GIF. For the GIF, I also wanted to also overlay a plot of each singular value to get a feel for how much an image changes as the higher singular values are used one-by-one.

Here are functions for both of these plots (and a helper function to create an array of low-rank approximations).

Create functions for visualizing SVD

library(tidyverse)
library(pixmap)
library(gganimate)
library(transformr)

# Creates array where each matrix is a low-rank approximation or original
svd_approximation_array <- function(image_matrix) {
  image_svd <- svd(image_matrix)
  
  image_approximations <- array(0, dim = c(nrow(image_matrix), ncol(image_matrix), nrow(image_matrix)))
  
  for (i in 1:nrow(image_matrix)) {
    new_diag <- c(image_svd$d[1:i], rep(0, length(image_svd$d)))[1:length(image_svd$d)]
    image_approximations[,,i]<- image_svd$u %*% diag(new_diag) %*% t(image_svd$v)
  }
  
  return(image_approximations)
}

# View all (or given subset) of low-rank approximations
svd_approximation_multiplot <- function(image_matrix, 
                                        num_sv_list = 1:dim(image_matrix)[2], 
                                        plot_nrow = NULL, plot_ncol = NULL) {
  image_approximations <- svd_approximation_array(image_matrix = image_matrix)
  
  image_approximations_df <- data.frame(bw_val = as.vector(image_approximations),
                                       x_pos = 1:dim(image_approximations)[1],
                                       y_pos = rep(1:dim(image_approximations)[2], each = dim(image_approximations)[1]),
                                       num_sv_used = rep(1:dim(image_approximations)[3], 
                                                         each = prod(dim(image_approximations)[1:2]))) %>%
    mutate(num_sv_used_label = paste0(num_sv_used, ifelse(num_sv_used == 1, " SV", " SVs")))
  
  image_approximations_df$num_sv_used_label <- factor(image_approximations_df$num_sv_used_label, 
                                                     levels = unique(image_approximations_df$num_sv_used_label))
  
  
  p <- image_approximations_df %>% filter(num_sv_used %in% num_sv_list) %>%
    ggplot(aes(x = x_pos, y = y_pos, fill = bw_val)) + 
    facet_wrap(~num_sv_used_label, nrow = plot_nrow, ncol = plot_ncol) + 
    coord_fixed() + 
    geom_tile() +
    scale_fill_gradient(low = "black", high = "white") +
    theme(legend.position = "none",
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank())
  
  return(p)
}

# Create a GIF of all (or a given subset) of low-rank approximations
svd_approximation_gif <- function(image_matrix, 
                                  num_sv_list = 1:dim(image_matrix)[2]) {
  image_approximations <- svd_approximation_array(image_matrix = image_matrix)
  
  image_svd <- svd(image_matrix)
  
  image_approximations_df <- data.frame(bw_val = as.vector(image_approximations),
                                        x_pos = 1:dim(image_approximations)[1],
                                        y_pos = rep(1:dim(image_approximations)[2], each = dim(image_approximations)[1]),
                                        num_sv_used = rep(1:dim(image_approximations)[3], 
                                                          each = prod(dim(image_approximations)[1:2]))) %>%
    mutate(num_sv_used_label = paste0(num_sv_used, ifelse(num_sv_used == 1, " SV", " SVs")),
           x_pos = x_pos - min(x_pos),
           x_pos_scaled = x_pos/max(x_pos)*length(image_svd$d),
           y_pos = y_pos - min(y_pos),
           y_pos_scaled = y_pos/max(y_pos)*max(image_svd$d)) %>%
    filter(num_sv_used %in% num_sv_list)
  image_approximations_df$num_sv_used_label <- factor(image_approximations_df$num_sv_used_label, 
                                                      levels = unique(image_approximations_df$num_sv_used_label))
  
  singular_values <- data.frame(num_sv_used = rep(1:length(image_svd$d), length(image_svd$d)),
                                singular_value = rep(image_svd$d, length(image_svd$d)),
                                highlight_sv = rep(1:length(image_svd$d), each = length(image_svd$d)),
                                num_sv_used_label = rep(c("1 SV", paste0(2:length(image_svd$d), " SVs")), 
                                                        each = length(image_svd$d))) %>%
    mutate(highlight_one = ifelse(num_sv_used == highlight_sv,T, F),
           highlight_many = ifelse(num_sv_used <= highlight_sv, T, F),
           singular_value_number = num_sv_used) %>%
    filter(highlight_sv %in% num_sv_list)
  singular_values$num_sv_used_label <- factor(singular_values$num_sv_used_label, 
                                              levels = unique(singular_values$num_sv_used_label))
  
  image_animation <- ggplot(data = image_approximations_df, aes(x = x_pos_scaled, y = y_pos_scaled, fill = bw_val)) + 
    transition_states(num_sv_used_label, transition_length = 0) +
    geom_raster(hjust = 1, vjust = 1) +
    geom_point(data = singular_values, 
               aes(x = singular_value_number, y = singular_value, color = highlight_many, fill = NULL), size = 5) +
    scale_fill_gradient(low = "black", high = "white") +
    scale_color_manual(values = c("black", "red")) +
    scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0)) +
    labs(x = "Singular Values Used",
        y = "Singular Value") +
    coord_fixed(ratio = (length(image_svd$d)/max(image_approximations_df$x_pos))/
                  (max(image_svd$d)/max(image_approximations_df$y_pos))) +
    theme(legend.position = "none",
          panel.background = element_blank(),
          axis.line = element_line(),
          axis.title = element_text(size = 18),
          axis.text = element_text(size = 13)
          )
  
  return(image_animation)
}

Example 1: Ring

I wanted to start with an image with no noise. I chose a ring, since it’s a step up in complexity from a rectangle.

ring_matrix <- matrix(1, nrow = 30, ncol = 30)

for (i in 1:30) {
  for (j in 1:30) {
    r <- sqrt((i-15)^2 + (j-15)^2)
    if (r > 7 & r < 12) {
      ring_matrix[i, j] <- 0
    }
  }
}

image(ring_matrix, axes = F, col = grey(seq(0, 1, length = 256)))

svd_approximation_multiplot(ring_matrix, plot_ncol = 10)

ring_animation <- svd_approximation_gif(ring_matrix)

anim_save("ring_animation.gif", ring_animation)

Example 2: Box with Noise

Here I tried to replicate the AMS article’s example of a rectangle with added noise.

beta_shape1 <- 1
beta_shape2 <- 10
box_matrix <- matrix(1 - rbeta(15*25, beta_shape1, beta_shape2), ncol = 25, nrow = 15)

box_matrix[c(3:5, 11:13), 6:20] <- rbeta(15*6, beta_shape1, beta_shape2)
box_matrix[6:10, c(6:8, 18:20)] <- rbeta(6*5, beta_shape1, beta_shape2)

image(box_matrix, axes = F, col = grey(seq(0, 1, length = 256)))

svd_approximation_multiplot(box_matrix)

box_animation <- svd_approximation_gif(box_matrix)

anim_save("box_animation.gif", box_animation)

Example 3: Babe Ruth

Babe Ruth hit 714 home runs in his storied career. I wonder if his low-rank approximation would have quite as much pop?

pnm_data <- read.pnm(file = "RuthBabe.ppm")
babe_matrix <- getChannels(pnm_data)[,,1] %>% t() %>% .[,ncol(.):1]
image(babe_matrix, axes = F, col = grey(seq(0, 1, length = 256)))

svd_approximation_multiplot(babe_matrix, num_sv_list = c(1:10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 50, 60, 80, 100, 125, 150, 175, 200, 250, 298))

babe_animation <- svd_approximation_gif(babe_matrix, 
                                        num_sv_list = c(1:10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 50, 60, 
                                                        80, 100, 125, 150, 175, 200, 250, 298))

anim_save("babe_animation.gif", babe_animation)

Advertisements
SVD and Low-Rank Approximations

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s