Day 3

tidyverse

|>

stringr

Author

Josef Fruehwald

Published

December 3, 2022

Source: https://adventofcode.com/2022/day/3

Part 1

Description:

Given a string of letters, we need to split them in half, and then find the single letter in common between them. Each letter is assigned a score, and then we need to sum them up.

I think I’ll rely on stringr for this?

sessionInfo()

R version 4.2.1 (2022-06-23)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Monterey 12.3

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices datasets  utils     methods   base     

loaded via a namespace (and not attached):
 [1] digest_0.6.30     jsonlite_1.8.3    magrittr_2.0.3    evaluate_0.17    
 [5] rlang_1.0.6       stringi_1.7.8     cli_3.4.1         renv_0.15.5      
 [9] rstudioapi_0.14   rmarkdown_2.17    tools_4.2.1       stringr_1.4.1    
[13] htmlwidgets_1.5.4 xfun_0.34         yaml_2.3.6        fastmap_1.1.0    
[17] compiler_4.2.1    htmltools_0.5.3   knitr_1.40

library(tidyverse)

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.1      ✔ stringr 1.4.1 
✔ readr   2.1.3      ✔ forcats 0.5.2 
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

input <- read_tsv(
  "2022-12-3_assets/input.txt", 
  col_names = FALSE
)

Rows: 300 Columns: 1
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): X1

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

compartment <- 
  input |>
  mutate(
    # Get a total character count
    n = nchar(X1),
    # split the whole character string into a list
    item_list = map(X1, ~str_split(.x, pattern = "", simplify = TRUE)),
    # get the vector for the first half
    compartment1 = map2(item_list, n, ~.x[1:(.y/2)]),
    # get the vector for the second half
    compartment2 = map2(item_list, n, ~.x[((.y/2)+1):.y]),
    # get the shared values
    shared = map2(compartment1, compartment2, ~.x[.x %in% .y])
  )

At this point, there are some values in shared are longer than 1. I need to check on those

compartment |>
  mutate(n_shared = map(shared, length) |> simplify()) |>
  filter(n_shared > 1) |>
  sample_n(size = 5) |>
  pull(shared)

[[1]]
[1] "R" "R"

[[2]]
[1] "V" "V"

[[3]]
[1] "z" "z"

[[4]]
[1] "n" "n"

[[5]]
[1] "w" "w" "w"

I see, they’re repeated. I’ll uniquify and see if it’s all just length 1

compartment |>
  mutate(unique_shared = map(shared, unique),
         n_unique = map(unique_shared, length) |> simplify()) |>
  filter(n_unique !=  1)

# A tibble: 0 × 8
# … with 8 variables: X1 <chr>, n <int>, item_list <list>, compartment1 <list>,
#   compartment2 <list>, shared <list>, unique_shared <list>, n_unique <int>

Ok, good to go.

shared_item_df <- 
  compartment |>
  mutate(unique_shared = map(shared, unique) |> simplify())

I’ll use named vectors again to get the scores.

item_score = (1:52) |> set_names(c(letters, LETTERS))

shared_item_df |>
  mutate(priority = item_score[unique_shared]) |>
  summarise(priority = sum(priority))

# A tibble: 1 × 1
  priority
     <int>
1     7763

Part 2

Task, find the common letters in each three row sequence. I can start off with the compartment data frame from before.

group_coding <- 
  compartment |>
  mutate(group = rep(1:(n()/3), each = 3))

I think I’ll write a n-way %in%.

multi_in <- function(x){
  out <- x[[1]]
  for(i in seq_along(x)){
    out = x[[i]][x[[i]] %in% out]
  }
  return(out)
}

multi_in(
  list(
    c("a", "b", "c", "d"), 
    c("a", "x", "y", "c"), 
    c("a", "x", "z")
  )
)

[1] "a"

group_coding |>
  group_by(group) |>
  summarise(shared = multi_in(item_list) |> unique(),
            priority = item_score[shared]) |>
  summarise(priority = sum(priority))

# A tibble: 1 × 1
  priority
     <int>
1     2569

Just for fun

library(ggdark)
library(khroma)
library(showtext)

Loading required package: sysfonts

Loading required package: showtextdb

library(scales)


Attaching package: 'scales'

The following object is masked from 'package:purrr':

    discard

The following object is masked from 'package:readr':

    col_factor

font_add_google(name = "Mountains of Christmas", family = "christmas")
showtext_auto()

theme_set(dark_theme_gray() + 
            theme(title = element_text(family = "christmas", size = 20)))

Inverted geom defaults of fill and color/colour.
To change them back, use invert_geom_defaults().

group_coding |>
  group_by(group) |>
  summarise(shared = multi_in(item_list) |> unique(),
            priority = item_score[shared]) |>
  group_by(shared) |>
  summarise(n = n(),
            group_priority = priority[1],
            total_priority = sum(priority)) |>
  ggplot(aes(group_priority, n))+
    geom_col(fill = "lightblue", color = "white")+
    labs(title = "count of priority groups")

Figure 1: the number of groups with a given priority

knitr::knit_hooks$set(crop = knitr::hook_pdfcrop)

group_coding |>
  group_by(group) |>
  summarise(shared = multi_in(item_list) |> unique(),
            priority = item_score[shared]) |>
  group_by(shared) |>
  summarise(n = n(),
            group_priority = priority[1],
            total_priority = sum(priority)) |>
  arrange(group_priority) |>
 ggplot(aes("x", n))+
    geom_col(aes(fill = group_priority),
             color = "white",
             position = "fill")+
    scale_fill_oslo()+
    labs(title = "proportion of priority groups",
         x = NULL,
         y = NULL)+
    coord_polar(theta = "y")+
    theme(axis.text.y = element_blank(),
          axis.ticks.y = element_blank())

Figure 2: the proportion of groups with a given priority