Appendix D — Code for Chapter 5

This chapter contains the code used for the analysis in chapter 5. The code is organized into three sections: R, Functions, and R Environment. The R section contains the code for the analysis, the Functions section contains the functions used in the analysis, and the R Environment section contains the R environment used to run the analysis.

R

library(tidyverse)
library(ggthemes)
library(here)
library(rio)
library(igraph)
library(ggnetwork)
library(vegan)
library(magick)
library(sf)

set.seed(1010)

source("analysis/Chap5functions.R")

sites <- rio::import(here("data/heurist_sites.csv")) %>%
  janitor::clean_names() %>%
  as_tibble() %>%
  rename(region = place_name)

points <- rio::import(here("data/heurist_projectilePoints.csv")) %>%
  janitor::clean_names() %>%
  as_tibble()

ceramics <- rio::import(here("data/heurist_ceramics.csv")) %>%
  janitor::clean_names() %>%
  as_tibble()

table_sites <- points %>%
  filter(point_type != "undetermined") %>%
  filter(site_link_h_id != "") %>%
  group_by(site_link_h_id) %>%
  count(name = "points") %>%
  ungroup() %>%
  filter(points > 4) %>%
  rename(site_h_id = site_link_h_id) %>%
  mutate(site_h_id = as.integer(site_h_id))

table_ceramics <- ceramics %>%
  filter(!decoration %in% c("Unpainted", "Unknown", "Undifferentiated")) %>%
  filter(site_link_h_id != "") %>%
  group_by(site_link_h_id) %>%
  summarize(ceramics = sum(count)) %>%
  ungroup() %>%
  filter(ceramics > 10) %>%
  rename(site_h_id = site_link_h_id) %>%
  mutate(site_h_id = as.integer(site_h_id))

data <- sites %>%
  select(site_h_id, site_name, long, lat, start_date, end_date, region) %>%
  filter(!is.na(long)) %>%
  inner_join(table_sites, by = "site_h_id") %>%
  inner_join(table_ceramics, by = "site_h_id") %>%
  filter(end_date > 1100) %>%
  mutate_at(vars(-site_name, -region), as.numeric) %>%
  mutate(site_name = reorder(factor(site_name), end_date))

library(ggnetwork)

ggplot(data) +
  geom_edges(aes(x = start_date, y = site_name, xend = end_date, yend = site_name, color = region, group = site_name),
    linewidth = 1.5,
    arrow = arrow(length = unit(0.75, "lines"), ends = "both", type = "open")
  ) +
  theme_gdocs() +
  xlab("Date AD") +
  ylab("")

data <- data %>%
  filter(end_date > 1200) %>%
  filter(start_date < 1250)

ggplot(data) +
  geom_edges(aes(x = start_date, y = site_name, xend = end_date, yend = site_name, color = region, group = site_name),
    linewidth = 1.5,
    arrow = arrow(length = unit(0.75, "lines"), ends = "both", type = "open")
  ) +
  theme_gdocs() +
  xlab("Date AD") +
  ylab("") +
  scale_x_continuous(limits = c(1125, 1375), breaks = seq(1150, 1350, 50), oob = scales::oob_keep)

fn <- "figures/siteOccupations.png"
# ggsave(fn, dpi = 450, width = 6.5, height = 4.5)
# image_read(fn) %>%
  # image_trim() %>%
  # image_write(fn)

north <- data %>%
  filter(lat > 34)

south <- data %>%
  filter(lat < 34)

analyze(data)
analyze(north, "north")
analyze(south, "south")

library(cowplot)
library(magick) # Optional: for better image scaling/format handling

# Load the images
img_A <- cowplot::ggdraw() + draw_image("figures/Chap5NetworkCorrelations_all.png")
img_B <- cowplot::ggdraw() + draw_image("figures/Chap5NetworkCorrelations_north.png")
img_C <- cowplot::ggdraw() + draw_image("figures/Chap5NetworkCorrelations_south.png")

# Combine with labels
combined_plot <- plot_grid(
  img_A, img_B, img_C,
  labels = c("A", "B", "C"),
  label_size = 16,
  ncol = 1, # vertical stacking
  align = "v"
)

# Display or save
print(combined_plot)

# To save
ggsave("figures/combined_network_correlations.png", combined_plot, height = 8, dpi = 450)


#### export session info  ####
# Get the R version
r_version <- paste0("R version=", R.version$major, R.version$minor, sep = ".")

# Get the loaded packages and their versions
loaded_packages <- sessionInfo()$otherPkgs
loaded_versions <- sapply(
  loaded_packages,
  function(x) paste(x$Package, x$Version, sep = "=")
)

# Write the R version and package versions to the file
writeLines(c(r_version, loaded_versions), con = "analysis/r_environment5.qmd")

Functions

# network functions
scale_to_1 <- function(mat) {
  rng <- range(mat, na.rm = TRUE)
  (mat - rng[1]) / (rng[2] - rng[1])
}

makeGraph <- function(d, nLinks = NULL, sf = FALSE) {
  # igraph
  m <- scale_to_1(d)
  g <- graph_from_adjacency_matrix(as.matrix(d),
    mode = "undirected",
    diag = F,
    weighted = T
  )
  n_df <- tibble(site_name = V(g)$name) %>%
    left_join(data, by = "site_name")
  V(g)$centrality <- eigen_centrality(g, weights = E(g)$weight)$vector
  V(g)$region <- n_df$region

  if (!is.null(nLinks)) {
    # calculate top n (with ties) edges and convert to igraph object
    # convert to dataframe
    n <- igraph::as_data_frame(g)
    n <- bind_rows(n, n %>% dplyr::rename(from = to, to = from)) %>%
      mutate_if(is.numeric, replace_na, 0)
    q <- quantile(n$weight, 0.5)
    n <- n %>% # sort data frame
      filter(weight > q) %>%
      arrange(from, desc(weight)) %>%
      # rank by site
      group_by(from) %>%
      mutate(rank = rank(-weight)) %>%
      ungroup()
    nSub <- n %>%
      group_by(from) %>%
      slice_max(n = nLinks, order_by = desc(rank)) %>%
      ungroup()

    if (sf) {
      nSub <- nSub %>%
        left_join(data %>% select(site_name, region, long, lat),
          by = c("from" = "site_name")
        ) %>%
        left_join(
          data %>% select(
            site_name,
            region,
            longend = long,
            latend = lat
          ),
          by = c("to" = "site_name")
        ) %>%
        return(nSub)
    } else {
      gRanks <- graph_from_data_frame(nSub, directed = F, vertices = n_df)
      gRanks <- gRanks %>% igraph::simplify()
      V(gRanks)$centrality <- eigen_centrality(gRanks, weights = E(gRanks)$weight)$vector
      return(gRanks)
    }
  } else {
    return(g)
  }
}


analyze <- function(data, label = "all") {
  ceramics <- ceramics %>%
    filter(!decoration %in% c("Unknown", "Undifferentiated")) %>%
    filter(site_link_h_id %in% data$site_h_id) %>%
    group_by(site_link_h_id) %>%
    filter(end_age > 1200) %>%
    filter(begin_age < 1300) %>%
    select(site_name, ceramic_type, count)

  points <- points %>%
    filter(point_type != "undetermined") %>%
    filter(site_link_h_id %in% data$site_h_id) %>%
    select(site_name, point_type)

  dists <- list()
  coords_sf <- data %>%
    st_as_sf(coords = c("long", "lat"), crs = 4326) %>%
    st_transform(26912)

  coords <- st_coordinates(coords_sf) %>%
    as.data.frame() %>%
    rename(UTMEAST = X, UTMNORTH = Y)

  d <- dist(coords[, c("UTMEAST", "UTMNORTH")]) %>% as.matrix()
  colnames(d) <- rownames(d) <- data$site_name
  # convert to scaled similarity (1 highest)
  d <- 1 - (d / max(d))
  dists$spatial <- d

  # ceramic network
  # convert to adjacency matrix
  m <- ceramics %>%
    select(ceramic_type, site_name, count) %>%
    mutate_at(vars(count), as.numeric) %>%
    group_by(site_name, ceramic_type) %>%
    summarize(count = sum(count)) %>%
    ungroup() %>%
    pivot_wider(names_from = ceramic_type, values_from = count) %>%
    column_to_rownames("site_name") %>%
    mutate_all(as.integer) %>%
    as.matrix()
  m <- m / rowSums(m, na.rm = T)
  # m[1,] %>% sum(na.rm = T)
  d <- vegdist(m, method = "manhattan", na.rm = T) %>% as.matrix()
  d <- 1 - (d / 2)
  dists$ceramics <- d

  # projectile point network
  # convert to adjacency matrix
  m <- points %>%
    select(point_type, site_name) %>%
    mutate(count = 1) %>%
    group_by(site_name, point_type) %>%
    summarize(count = sum(count)) %>%
    ungroup() %>%
    pivot_wider(names_from = point_type, values_from = count) %>%
    column_to_rownames("site_name") %>%
    mutate_all(as.integer) %>%
    as.matrix()
  m <- m / rowSums(m, na.rm = T)
  # m[1,] %>% sum(na.rm = T)
  d <- vegdist(m, method = "manhattan", na.rm = T) %>% as.matrix()
  d <- 1 - (d / 2)
  dists$points <- d

  g2List <- list()
  nms <- names(dists)
  for (n in nms) {
    print(n)
    g <- makeGraph(dists[[n]], nLinks = 3)
    g2List[[n]] <- g
  }


  netDF <- map2_df(g2List, names(g2List), function(g, n) {
    ggnetwork(g) %>% mutate(layer = n)
  }) %>%
    mutate(weight = weight / 2)

  ggplot(
    netDF,
    aes(
      x = x,
      y = y,
      xend = xend,
      yend = yend,
      color = region,
      label = name
    )
  ) +
    geom_edges(aes(linewidth = weight),
      color = "#404040",
      alpha = .75
    ) +
    scale_linewidth(range = c(.5, 2)) +
    geom_nodes(size = 5) +
    geom_nodetext_repel(color = "black", size = 2.5) +
    facet_wrap(~layer, ncol = 1) +
    theme_blank() +
    theme(
      panel.border = element_rect(fill = NA),
      strip.background = element_rect(fill = "#404040"),
      strip.text = element_text(size = 20, color = "white"),
      legend.position = "bottom",
      legend.text = element_text(size = 8)
    ) +
    guides(size = "none", color = guide_legend(ncol = 4, title = NULL))

  fn <- glue::glue("figures/Chap5MultilayerNetworks_{label}.png")
  ggsave(fn,
    dpi = 450,
    width = 6.5,
    height = 9
  )
  # browseURL(fn)

  g2List_sf <- list()
  nms <- names(dists)
  for (n in nms) {
    print(n)
    g <- makeGraph(dists[[n]], nLinks = 3, sf = T)
    g2List_sf[[n]] <- g
  }

  for (i in 1:length(g2List_sf)) {
    g2List_sf[[i]] <- g2List_sf[[i]] %>%
      mutate(
        layer = names(g2List_sf[i]),
        geometry = st_as_sfc(
          paste0("LINESTRING(", long, " ", lat, ",", longend, " ", latend, ")"),
          crs = 4326
        )
      ) %>%
      st_as_sf()
  }

  g2List_sf <- bind_rows(g2List_sf)

  # mapview::mapview(g2List_sf)

  try(st_write(g2List_sf,
    here(glue::glue("data/Chap5MultilayerNetworks_{label}.geojson")),
    delete_dsn = T
  ))

  nodes_sf <- data %>%
    select(site_name, region, long, lat) %>%
    st_as_sf(coords = c("long", "lat"), crs = 4326)

  try(st_write(nodes_sf,
    here(glue::glue("data/Chap5MultilayerNodes_{label}.geojson")),
    delete_dsn = T
  ))

  eigenDF <- map2_df(g2List, names(g2List), function(g, n) {
    tibble(
      id = V(g)$name,
      centrality = V(g)$centrality,
      name = n
    )
  }) %>%
    left_join(data %>% select(site_name, region), by = c("id" = "site_name"))

  eigenDF_plot <- eigenDF %>%
    pivot_wider(names_from = name, values_from = centrality) %>%
    mutate(id = reorder(
      factor(id),
      desc(ceramics)
    ))

  ggplot(eigenDF_plot, aes(
    x = ceramics,
    y = id,
    xend = points,
    yend = id
  )) +
    geom_nodes(
      data = eigenDF_plot,
      aes(x = points, y = id),
      color = "black",
      size = 7,
      alpha = .75
    ) +
    geom_nodes(
      data = eigenDF_plot,
      aes(x = ceramics, y = id),
      color = "tan",
      size = 7,
      alpha = .75
    ) +
    geom_edges(aes(color = region), linewidth = 2) +
    theme_gdocs() +
    theme(
      axis.text.x = element_text(size = 10),
      axis.text.y = element_text(size = 8),
      plot.margin = margin(
        t = 0.1,
        r = 0.1,
        b = 0.01,
        l = 0.1,
        unit = "cm"
      ),
      legend.margin = margin(t = -30)
    ) +
    guides(color = guide_legend(
      title = element_blank(),
      position = "bottom",
      ncol = 4
    )) +
    xlab("") +
    ylab("")

  fn <- glue::glue("figures/Chap5Centrality_{label}.png")
  ggsave(fn,
    dpi = 450,
    width = 8.5,
    height = 5.5
  )

  image_read(fn) %>%
    image_trim() %>%
    image_write(fn)

  # browseURL(fn)

  library(sna)

  corr_result <- matrix(NA, nrow = length(g2List), ncol = length(g2List))
  for (i in 1:length(g2List)) {
    for (j in 1:length(g2List)) {
      if (i != j) {
        g1 <- g2List[[i]]
        g2 <- g2List[[j]]
        n1 <- igraph::as_adjacency_matrix(g1, attr = "weight") %>% as.matrix()
        n2 <- igraph::as_adjacency_matrix(g2, attr = "weight") %>% as.matrix()
        corr_result[i, j] <- sna::gcor(n1, n2, mode = "graph")
      }
    }
  }


  colnames(corr_result) <- rownames(corr_result) <- names(g2List)

  library(pheatmap)

  fn <- here(glue::glue("figures/Chap5NetworkCorrelations_{label}.png"))

  pheatmap(
    corr_result,
    cluster_rows = F,
    cluster_cols = F,
    fontsize_row = 10,
    fontsize_col = 10,
    cellwidth = 20,
    cellheight = 20,
    color = colorRampPalette(c("white", "orange"))(50),
    filename = fn,
    width = 6.5,
    height = 6.5,
    show_rownames = T,
    show_colnames = T,
    display_numbers = T,
    legend = F,
    angle_col = 45
  )

  image_read(fn) %>%
    image_trim() %>%
    image_write(fn)

  # multilayer communities

  combined <- make_empty_graph(directed = F)

  for (i in c("ceramics", "points")) {
    g <- g2List[[i]]
    combined <- union(combined, g)
    simplify(combined, remove.multiple = T, remove.loops = T)
  }

  comm <- cluster_louvain(combined, weights = E(combined)$weight)

  comm_df <- data.frame(
    site_name = V(combined)$name,
    comm = comm$membership
  ) %>%
    left_join(data %>% select(site_name, region, long, lat), by = "site_name") %>%
    distinct_all() %>%
    arrange(site_name) %>%
    st_as_sf(coords = c("long", "lat"), crs = 4326)

  try(st_write(comm_df, glue::glue("data/Chap5MultilayerCommunities_{label}.geojson"),
    delete_dsn = T
  ))
  print("completed")
}

R Environment

R version=44.3. sna=2.8 network=1.19.0 statnet.common=4.11.0 sf=1.0-19 magick=2.8.5 vegan=2.6-10 lattice=0.22-6 permute=0.9-7 ggnetwork=0.5.13 igraph=2.1.4 rio=1.2.3 here=1.0.1 ggthemes=5.1.0 lubridate=1.9.4 forcats=1.0.0 stringr=1.5.1 dplyr=1.1.4 purrr=1.0.4 readr=2.1.5 tidyr=1.3.1 tibble=3.2.1 ggplot2=3.5.1 tidyverse=2.0.0