# SVD and Low-Rank Approximations

## Background

I wanted to get myself a little more familiar with singular-value decomposition, and specifically how it can be used to create low-rank approximations of matrices. In trying to wrap my head around the process, I found this American Mathematical Society article pretty helpful.

I wanted to take a look at three different examples: a ring, a box with added noise (like the AMS article), and noted historical ballplayer Babe Ruth. For each, I wanted to visualize the approximations both side-by-side and as a GIF. For the GIF, I also wanted to also overlay a plot of each singular value to get a feel for how much an image changes as the higher singular values are used one-by-one.

Here are functions for both of these plots (and a helper function to create an array of low-rank approximations).

## Create functions for visualizing SVD

```
library(tidyverse)
library(pixmap)
library(gganimate)
library(transformr)
# Creates array where each matrix is a low-rank approximation or original
svd_approximation_array <- function(image_matrix) {
image_svd <- svd(image_matrix)
image_approximations <- array(0, dim = c(nrow(image_matrix), ncol(image_matrix), nrow(image_matrix)))
for (i in 1:nrow(image_matrix)) {
new_diag <- c(image_svd$d[1:i], rep(0, length(image_svd$d)))[1:length(image_svd$d)]
image_approximations[,,i]<- image_svd$u %*% diag(new_diag) %*% t(image_svd$v)
}
return(image_approximations)
}
# View all (or given subset) of low-rank approximations
svd_approximation_multiplot <- function(image_matrix,
num_sv_list = 1:dim(image_matrix)[2],
plot_nrow = NULL, plot_ncol = NULL) {
image_approximations <- svd_approximation_array(image_matrix = image_matrix)
image_approximations_df <- data.frame(bw_val = as.vector(image_approximations),
x_pos = 1:dim(image_approximations)[1],
y_pos = rep(1:dim(image_approximations)[2], each = dim(image_approximations)[1]),
num_sv_used = rep(1:dim(image_approximations)[3],
each = prod(dim(image_approximations)[1:2]))) %>%
mutate(num_sv_used_label = paste0(num_sv_used, ifelse(num_sv_used == 1, " SV", " SVs")))
image_approximations_df$num_sv_used_label <- factor(image_approximations_df$num_sv_used_label,
levels = unique(image_approximations_df$num_sv_used_label))
p <- image_approximations_df %>% filter(num_sv_used %in% num_sv_list) %>%
ggplot(aes(x = x_pos, y = y_pos, fill = bw_val)) +
facet_wrap(~num_sv_used_label, nrow = plot_nrow, ncol = plot_ncol) +
coord_fixed() +
geom_tile() +
scale_fill_gradient(low = "black", high = "white") +
theme(legend.position = "none",
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
return(p)
}
# Create a GIF of all (or a given subset) of low-rank approximations
svd_approximation_gif <- function(image_matrix,
num_sv_list = 1:dim(image_matrix)[2]) {
image_approximations <- svd_approximation_array(image_matrix = image_matrix)
image_svd <- svd(image_matrix)
image_approximations_df <- data.frame(bw_val = as.vector(image_approximations),
x_pos = 1:dim(image_approximations)[1],
y_pos = rep(1:dim(image_approximations)[2], each = dim(image_approximations)[1]),
num_sv_used = rep(1:dim(image_approximations)[3],
each = prod(dim(image_approximations)[1:2]))) %>%
mutate(num_sv_used_label = paste0(num_sv_used, ifelse(num_sv_used == 1, " SV", " SVs")),
x_pos = x_pos - min(x_pos),
x_pos_scaled = x_pos/max(x_pos)*length(image_svd$d),
y_pos = y_pos - min(y_pos),
y_pos_scaled = y_pos/max(y_pos)*max(image_svd$d)) %>%
filter(num_sv_used %in% num_sv_list)
image_approximations_df$num_sv_used_label <- factor(image_approximations_df$num_sv_used_label,
levels = unique(image_approximations_df$num_sv_used_label))
singular_values <- data.frame(num_sv_used = rep(1:length(image_svd$d), length(image_svd$d)),
singular_value = rep(image_svd$d, length(image_svd$d)),
highlight_sv = rep(1:length(image_svd$d), each = length(image_svd$d)),
num_sv_used_label = rep(c("1 SV", paste0(2:length(image_svd$d), " SVs")),
each = length(image_svd$d))) %>%
mutate(highlight_one = ifelse(num_sv_used == highlight_sv,T, F),
highlight_many = ifelse(num_sv_used <= highlight_sv, T, F),
singular_value_number = num_sv_used) %>%
filter(highlight_sv %in% num_sv_list)
singular_values$num_sv_used_label <- factor(singular_values$num_sv_used_label,
levels = unique(singular_values$num_sv_used_label))
image_animation <- ggplot(data = image_approximations_df, aes(x = x_pos_scaled, y = y_pos_scaled, fill = bw_val)) +
transition_states(num_sv_used_label, transition_length = 0) +
geom_raster(hjust = 1, vjust = 1) +
geom_point(data = singular_values,
aes(x = singular_value_number, y = singular_value, color = highlight_many, fill = NULL), size = 5) +
scale_fill_gradient(low = "black", high = "white") +
scale_color_manual(values = c("black", "red")) +
scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0)) +
labs(x = "Singular Values Used",
y = "Singular Value") +
coord_fixed(ratio = (length(image_svd$d)/max(image_approximations_df$x_pos))/
(max(image_svd$d)/max(image_approximations_df$y_pos))) +
theme(legend.position = "none",
panel.background = element_blank(),
axis.line = element_line(),
axis.title = element_text(size = 18),
axis.text = element_text(size = 13)
)
return(image_animation)
}
```

## Example 1: Ring

I wanted to start with an image with no noise. I chose a ring, since it’s a step up in complexity from a rectangle.

```
ring_matrix <- matrix(1, nrow = 30, ncol = 30)
for (i in 1:30) {
for (j in 1:30) {
r <- sqrt((i-15)^2 + (j-15)^2)
if (r > 7 & r < 12) {
ring_matrix[i, j] <- 0
}
}
}
image(ring_matrix, axes = F, col = grey(seq(0, 1, length = 256)))
```

`svd_approximation_multiplot(ring_matrix, plot_ncol = 10)`

```
ring_animation <- svd_approximation_gif(ring_matrix)
anim_save("ring_animation.gif", ring_animation)
```

## Example 2: Box with Noise

Here I tried to replicate the AMS article’s example of a rectangle with added noise.

```
beta_shape1 <- 1
beta_shape2 <- 10
box_matrix <- matrix(1 - rbeta(15*25, beta_shape1, beta_shape2), ncol = 25, nrow = 15)
box_matrix[c(3:5, 11:13), 6:20] <- rbeta(15*6, beta_shape1, beta_shape2)
box_matrix[6:10, c(6:8, 18:20)] <- rbeta(6*5, beta_shape1, beta_shape2)
image(box_matrix, axes = F, col = grey(seq(0, 1, length = 256)))
```

`svd_approximation_multiplot(box_matrix)`

```
box_animation <- svd_approximation_gif(box_matrix)
anim_save("box_animation.gif", box_animation)
```

## Example 3: Babe Ruth

Babe Ruth hit 714 home runs in his storied career. I wonder if his low-rank approximation would have quite as much pop?

```
pnm_data <- read.pnm(file = "RuthBabe.ppm")
babe_matrix <- getChannels(pnm_data)[,,1] %>% t() %>% .[,ncol(.):1]
image(babe_matrix, axes = F, col = grey(seq(0, 1, length = 256)))
```

`svd_approximation_multiplot(babe_matrix, num_sv_list = c(1:10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 50, 60, 80, 100, 125, 150, 175, 200, 250, 298))`

```
babe_animation <- svd_approximation_gif(babe_matrix,
num_sv_list = c(1:10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 50, 60,
80, 100, 125, 150, 175, 200, 250, 298))
anim_save("babe_animation.gif", babe_animation)
```