SVD and Low-Rank Approximations

Background

I wanted to get myself a little more familiar with singular-value decomposition, and specifically how it can be used to create low-rank approximations of matrices. In trying to wrap my head around the process, I found this American Mathematical Society article pretty helpful.

I wanted to take a look at three different examples: a ring, a box with added noise (like the AMS article), and noted historical ballplayer Babe Ruth. For each, I wanted to visualize the approximations both side-by-side and as a GIF. For the GIF, I also wanted to also overlay a plot of each singular value to get a feel for how much an image changes as the higher singular values are used one-by-one.

Here are functions for both of these plots (and a helper function to create an array of low-rank approximations).

Create functions for visualizing SVD

``````library(tidyverse)
library(pixmap)
library(gganimate)
library(transformr)

# Creates array where each matrix is a low-rank approximation or original
svd_approximation_array <- function(image_matrix) {
image_svd <- svd(image_matrix)

image_approximations <- array(0, dim = c(nrow(image_matrix), ncol(image_matrix), nrow(image_matrix)))

for (i in 1:nrow(image_matrix)) {
new_diag <- c(image_svd\$d[1:i], rep(0, length(image_svd\$d)))[1:length(image_svd\$d)]
image_approximations[,,i]<- image_svd\$u %*% diag(new_diag) %*% t(image_svd\$v)
}

return(image_approximations)
}

# View all (or given subset) of low-rank approximations
svd_approximation_multiplot <- function(image_matrix,
num_sv_list = 1:dim(image_matrix),
plot_nrow = NULL, plot_ncol = NULL) {
image_approximations <- svd_approximation_array(image_matrix = image_matrix)

image_approximations_df <- data.frame(bw_val = as.vector(image_approximations),
x_pos = 1:dim(image_approximations),
y_pos = rep(1:dim(image_approximations), each = dim(image_approximations)),
num_sv_used = rep(1:dim(image_approximations),
each = prod(dim(image_approximations)[1:2]))) %>%
mutate(num_sv_used_label = paste0(num_sv_used, ifelse(num_sv_used == 1, " SV", " SVs")))

image_approximations_df\$num_sv_used_label <- factor(image_approximations_df\$num_sv_used_label,
levels = unique(image_approximations_df\$num_sv_used_label))

p <- image_approximations_df %>% filter(num_sv_used %in% num_sv_list) %>%
ggplot(aes(x = x_pos, y = y_pos, fill = bw_val)) +
facet_wrap(~num_sv_used_label, nrow = plot_nrow, ncol = plot_ncol) +
coord_fixed() +
geom_tile() +
scale_fill_gradient(low = "black", high = "white") +
theme(legend.position = "none",
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())

return(p)
}

# Create a GIF of all (or a given subset) of low-rank approximations
svd_approximation_gif <- function(image_matrix,
num_sv_list = 1:dim(image_matrix)) {
image_approximations <- svd_approximation_array(image_matrix = image_matrix)

image_svd <- svd(image_matrix)

image_approximations_df <- data.frame(bw_val = as.vector(image_approximations),
x_pos = 1:dim(image_approximations),
y_pos = rep(1:dim(image_approximations), each = dim(image_approximations)),
num_sv_used = rep(1:dim(image_approximations),
each = prod(dim(image_approximations)[1:2]))) %>%
mutate(num_sv_used_label = paste0(num_sv_used, ifelse(num_sv_used == 1, " SV", " SVs")),
x_pos = x_pos - min(x_pos),
x_pos_scaled = x_pos/max(x_pos)*length(image_svd\$d),
y_pos = y_pos - min(y_pos),
y_pos_scaled = y_pos/max(y_pos)*max(image_svd\$d)) %>%
filter(num_sv_used %in% num_sv_list)
image_approximations_df\$num_sv_used_label <- factor(image_approximations_df\$num_sv_used_label,
levels = unique(image_approximations_df\$num_sv_used_label))

singular_values <- data.frame(num_sv_used = rep(1:length(image_svd\$d), length(image_svd\$d)),
singular_value = rep(image_svd\$d, length(image_svd\$d)),
highlight_sv = rep(1:length(image_svd\$d), each = length(image_svd\$d)),
num_sv_used_label = rep(c("1 SV", paste0(2:length(image_svd\$d), " SVs")),
each = length(image_svd\$d))) %>%
mutate(highlight_one = ifelse(num_sv_used == highlight_sv,T, F),
highlight_many = ifelse(num_sv_used <= highlight_sv, T, F),
singular_value_number = num_sv_used) %>%
filter(highlight_sv %in% num_sv_list)
singular_values\$num_sv_used_label <- factor(singular_values\$num_sv_used_label,
levels = unique(singular_values\$num_sv_used_label))

image_animation <- ggplot(data = image_approximations_df, aes(x = x_pos_scaled, y = y_pos_scaled, fill = bw_val)) +
transition_states(num_sv_used_label, transition_length = 0) +
geom_raster(hjust = 1, vjust = 1) +
geom_point(data = singular_values,
aes(x = singular_value_number, y = singular_value, color = highlight_many, fill = NULL), size = 5) +
scale_fill_gradient(low = "black", high = "white") +
scale_color_manual(values = c("black", "red")) +
scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0)) +
labs(x = "Singular Values Used",
y = "Singular Value") +
coord_fixed(ratio = (length(image_svd\$d)/max(image_approximations_df\$x_pos))/
(max(image_svd\$d)/max(image_approximations_df\$y_pos))) +
theme(legend.position = "none",
panel.background = element_blank(),
axis.line = element_line(),
axis.title = element_text(size = 18),
axis.text = element_text(size = 13)
)

return(image_animation)
}``````

Example 1: Ring

I wanted to start with an image with no noise. I chose a ring, since it’s a step up in complexity from a rectangle.

``````ring_matrix <- matrix(1, nrow = 30, ncol = 30)

for (i in 1:30) {
for (j in 1:30) {
r <- sqrt((i-15)^2 + (j-15)^2)
if (r > 7 & r < 12) {
ring_matrix[i, j] <- 0
}
}
}

image(ring_matrix, axes = F, col = grey(seq(0, 1, length = 256)))`````` ``svd_approximation_multiplot(ring_matrix, plot_ncol = 10)`` ``````ring_animation <- svd_approximation_gif(ring_matrix)

anim_save("ring_animation.gif", ring_animation)`````` Example 2: Box with Noise

Here I tried to replicate the AMS article’s example of a rectangle with added noise.

``````beta_shape1 <- 1
beta_shape2 <- 10
box_matrix <- matrix(1 - rbeta(15*25, beta_shape1, beta_shape2), ncol = 25, nrow = 15)

box_matrix[c(3:5, 11:13), 6:20] <- rbeta(15*6, beta_shape1, beta_shape2)
box_matrix[6:10, c(6:8, 18:20)] <- rbeta(6*5, beta_shape1, beta_shape2)

image(box_matrix, axes = F, col = grey(seq(0, 1, length = 256)))`````` ``svd_approximation_multiplot(box_matrix)`` ``````box_animation <- svd_approximation_gif(box_matrix)

anim_save("box_animation.gif", box_animation)`````` Example 3: Babe Ruth

Babe Ruth hit 714 home runs in his storied career. I wonder if his low-rank approximation would have quite as much pop?

``````pnm_data <- read.pnm(file = "RuthBabe.ppm")
babe_matrix <- getChannels(pnm_data)[,,1] %>% t() %>% .[,ncol(.):1]
image(babe_matrix, axes = F, col = grey(seq(0, 1, length = 256)))`````` ``svd_approximation_multiplot(babe_matrix, num_sv_list = c(1:10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 50, 60, 80, 100, 125, 150, 175, 200, 250, 298))`` ``````babe_animation <- svd_approximation_gif(babe_matrix,
num_sv_list = c(1:10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 50, 60,
80, 100, 125, 150, 175, 200, 250, 298))

anim_save("babe_animation.gif", babe_animation)`````` 