Appendix D — Code for Chapter 5
This chapter contains the code used for the analysis in chapter 5. The code is organized into three sections: R, Functions, and R Environment. The R section contains the code for the analysis, the Functions section contains the functions used in the analysis, and the R Environment section contains the R environment used to run the analysis.
R
library(tidyverse)
library(ggthemes)
library(here)
library(rio)
library(igraph)
library(ggnetwork)
library(vegan)
library(magick)
library(sf)
set.seed(1010)
source("analysis/Chap5functions.R")
sites <- rio::import(here("data/heurist_sites.csv")) %>%
janitor::clean_names() %>%
as_tibble() %>%
rename(region = place_name)
points <- rio::import(here("data/heurist_projectilePoints.csv")) %>%
janitor::clean_names() %>%
as_tibble()
ceramics <- rio::import(here("data/heurist_ceramics.csv")) %>%
janitor::clean_names() %>%
as_tibble()
table_sites <- points %>%
filter(point_type != "undetermined") %>%
filter(site_link_h_id != "") %>%
group_by(site_link_h_id) %>%
count(name = "points") %>%
ungroup() %>%
filter(points > 4) %>%
rename(site_h_id = site_link_h_id) %>%
mutate(site_h_id = as.integer(site_h_id))
table_ceramics <- ceramics %>%
filter(!decoration %in% c("Unpainted", "Unknown", "Undifferentiated")) %>%
filter(site_link_h_id != "") %>%
group_by(site_link_h_id) %>%
summarize(ceramics = sum(count)) %>%
ungroup() %>%
filter(ceramics > 10) %>%
rename(site_h_id = site_link_h_id) %>%
mutate(site_h_id = as.integer(site_h_id))
data <- sites %>%
select(site_h_id, site_name, long, lat, start_date, end_date, region) %>%
filter(!is.na(long)) %>%
inner_join(table_sites, by = "site_h_id") %>%
inner_join(table_ceramics, by = "site_h_id") %>%
filter(end_date > 1100) %>%
mutate_at(vars(-site_name, -region), as.numeric) %>%
mutate(site_name = reorder(factor(site_name), end_date))
library(ggnetwork)
ggplot(data) +
geom_edges(aes(x = start_date, y = site_name, xend = end_date, yend = site_name, color = region, group = site_name),
linewidth = 1.5,
arrow = arrow(length = unit(0.75, "lines"), ends = "both", type = "open")
) +
theme_gdocs() +
xlab("Date AD") +
ylab("")
data <- data %>%
filter(end_date > 1200) %>%
filter(start_date < 1250)
ggplot(data) +
geom_edges(aes(x = start_date, y = site_name, xend = end_date, yend = site_name, color = region, group = site_name),
linewidth = 1.5,
arrow = arrow(length = unit(0.75, "lines"), ends = "both", type = "open")
) +
theme_gdocs() +
xlab("Date AD") +
ylab("") +
scale_x_continuous(limits = c(1125, 1375), breaks = seq(1150, 1350, 50), oob = scales::oob_keep)
fn <- "figures/siteOccupations.png"
# ggsave(fn, dpi = 450, width = 6.5, height = 4.5)
# image_read(fn) %>%
# image_trim() %>%
# image_write(fn)
north <- data %>%
filter(lat > 34)
south <- data %>%
filter(lat < 34)
analyze(data)
analyze(north, "north")
analyze(south, "south")
library(cowplot)
library(magick) # Optional: for better image scaling/format handling
# Load the images
img_A <- cowplot::ggdraw() + draw_image("figures/Chap5NetworkCorrelations_all.png")
img_B <- cowplot::ggdraw() + draw_image("figures/Chap5NetworkCorrelations_north.png")
img_C <- cowplot::ggdraw() + draw_image("figures/Chap5NetworkCorrelations_south.png")
# Combine with labels
combined_plot <- plot_grid(
img_A, img_B, img_C,
labels = c("A", "B", "C"),
label_size = 16,
ncol = 1, # vertical stacking
align = "v"
)
# Display or save
print(combined_plot)
# To save
ggsave("figures/combined_network_correlations.png", combined_plot, height = 8, dpi = 450)
#### export session info ####
# Get the R version
r_version <- paste0("R version=", R.version$major, R.version$minor, sep = ".")
# Get the loaded packages and their versions
loaded_packages <- sessionInfo()$otherPkgs
loaded_versions <- sapply(
loaded_packages,
function(x) paste(x$Package, x$Version, sep = "=")
)
# Write the R version and package versions to the file
writeLines(c(r_version, loaded_versions), con = "analysis/r_environment5.qmd")Functions
# network functions
scale_to_1 <- function(mat) {
rng <- range(mat, na.rm = TRUE)
(mat - rng[1]) / (rng[2] - rng[1])
}
makeGraph <- function(d, nLinks = NULL, sf = FALSE) {
# igraph
m <- scale_to_1(d)
g <- graph_from_adjacency_matrix(as.matrix(d),
mode = "undirected",
diag = F,
weighted = T
)
n_df <- tibble(site_name = V(g)$name) %>%
left_join(data, by = "site_name")
V(g)$centrality <- eigen_centrality(g, weights = E(g)$weight)$vector
V(g)$region <- n_df$region
if (!is.null(nLinks)) {
# calculate top n (with ties) edges and convert to igraph object
# convert to dataframe
n <- igraph::as_data_frame(g)
n <- bind_rows(n, n %>% dplyr::rename(from = to, to = from)) %>%
mutate_if(is.numeric, replace_na, 0)
q <- quantile(n$weight, 0.5)
n <- n %>% # sort data frame
filter(weight > q) %>%
arrange(from, desc(weight)) %>%
# rank by site
group_by(from) %>%
mutate(rank = rank(-weight)) %>%
ungroup()
nSub <- n %>%
group_by(from) %>%
slice_max(n = nLinks, order_by = desc(rank)) %>%
ungroup()
if (sf) {
nSub <- nSub %>%
left_join(data %>% select(site_name, region, long, lat),
by = c("from" = "site_name")
) %>%
left_join(
data %>% select(
site_name,
region,
longend = long,
latend = lat
),
by = c("to" = "site_name")
) %>%
return(nSub)
} else {
gRanks <- graph_from_data_frame(nSub, directed = F, vertices = n_df)
gRanks <- gRanks %>% igraph::simplify()
V(gRanks)$centrality <- eigen_centrality(gRanks, weights = E(gRanks)$weight)$vector
return(gRanks)
}
} else {
return(g)
}
}
analyze <- function(data, label = "all") {
ceramics <- ceramics %>%
filter(!decoration %in% c("Unknown", "Undifferentiated")) %>%
filter(site_link_h_id %in% data$site_h_id) %>%
group_by(site_link_h_id) %>%
filter(end_age > 1200) %>%
filter(begin_age < 1300) %>%
select(site_name, ceramic_type, count)
points <- points %>%
filter(point_type != "undetermined") %>%
filter(site_link_h_id %in% data$site_h_id) %>%
select(site_name, point_type)
dists <- list()
coords_sf <- data %>%
st_as_sf(coords = c("long", "lat"), crs = 4326) %>%
st_transform(26912)
coords <- st_coordinates(coords_sf) %>%
as.data.frame() %>%
rename(UTMEAST = X, UTMNORTH = Y)
d <- dist(coords[, c("UTMEAST", "UTMNORTH")]) %>% as.matrix()
colnames(d) <- rownames(d) <- data$site_name
# convert to scaled similarity (1 highest)
d <- 1 - (d / max(d))
dists$spatial <- d
# ceramic network
# convert to adjacency matrix
m <- ceramics %>%
select(ceramic_type, site_name, count) %>%
mutate_at(vars(count), as.numeric) %>%
group_by(site_name, ceramic_type) %>%
summarize(count = sum(count)) %>%
ungroup() %>%
pivot_wider(names_from = ceramic_type, values_from = count) %>%
column_to_rownames("site_name") %>%
mutate_all(as.integer) %>%
as.matrix()
m <- m / rowSums(m, na.rm = T)
# m[1,] %>% sum(na.rm = T)
d <- vegdist(m, method = "manhattan", na.rm = T) %>% as.matrix()
d <- 1 - (d / 2)
dists$ceramics <- d
# projectile point network
# convert to adjacency matrix
m <- points %>%
select(point_type, site_name) %>%
mutate(count = 1) %>%
group_by(site_name, point_type) %>%
summarize(count = sum(count)) %>%
ungroup() %>%
pivot_wider(names_from = point_type, values_from = count) %>%
column_to_rownames("site_name") %>%
mutate_all(as.integer) %>%
as.matrix()
m <- m / rowSums(m, na.rm = T)
# m[1,] %>% sum(na.rm = T)
d <- vegdist(m, method = "manhattan", na.rm = T) %>% as.matrix()
d <- 1 - (d / 2)
dists$points <- d
g2List <- list()
nms <- names(dists)
for (n in nms) {
print(n)
g <- makeGraph(dists[[n]], nLinks = 3)
g2List[[n]] <- g
}
netDF <- map2_df(g2List, names(g2List), function(g, n) {
ggnetwork(g) %>% mutate(layer = n)
}) %>%
mutate(weight = weight / 2)
ggplot(
netDF,
aes(
x = x,
y = y,
xend = xend,
yend = yend,
color = region,
label = name
)
) +
geom_edges(aes(linewidth = weight),
color = "#404040",
alpha = .75
) +
scale_linewidth(range = c(.5, 2)) +
geom_nodes(size = 5) +
geom_nodetext_repel(color = "black", size = 2.5) +
facet_wrap(~layer, ncol = 1) +
theme_blank() +
theme(
panel.border = element_rect(fill = NA),
strip.background = element_rect(fill = "#404040"),
strip.text = element_text(size = 20, color = "white"),
legend.position = "bottom",
legend.text = element_text(size = 8)
) +
guides(size = "none", color = guide_legend(ncol = 4, title = NULL))
fn <- glue::glue("figures/Chap5MultilayerNetworks_{label}.png")
ggsave(fn,
dpi = 450,
width = 6.5,
height = 9
)
# browseURL(fn)
g2List_sf <- list()
nms <- names(dists)
for (n in nms) {
print(n)
g <- makeGraph(dists[[n]], nLinks = 3, sf = T)
g2List_sf[[n]] <- g
}
for (i in 1:length(g2List_sf)) {
g2List_sf[[i]] <- g2List_sf[[i]] %>%
mutate(
layer = names(g2List_sf[i]),
geometry = st_as_sfc(
paste0("LINESTRING(", long, " ", lat, ",", longend, " ", latend, ")"),
crs = 4326
)
) %>%
st_as_sf()
}
g2List_sf <- bind_rows(g2List_sf)
# mapview::mapview(g2List_sf)
try(st_write(g2List_sf,
here(glue::glue("data/Chap5MultilayerNetworks_{label}.geojson")),
delete_dsn = T
))
nodes_sf <- data %>%
select(site_name, region, long, lat) %>%
st_as_sf(coords = c("long", "lat"), crs = 4326)
try(st_write(nodes_sf,
here(glue::glue("data/Chap5MultilayerNodes_{label}.geojson")),
delete_dsn = T
))
eigenDF <- map2_df(g2List, names(g2List), function(g, n) {
tibble(
id = V(g)$name,
centrality = V(g)$centrality,
name = n
)
}) %>%
left_join(data %>% select(site_name, region), by = c("id" = "site_name"))
eigenDF_plot <- eigenDF %>%
pivot_wider(names_from = name, values_from = centrality) %>%
mutate(id = reorder(
factor(id),
desc(ceramics)
))
ggplot(eigenDF_plot, aes(
x = ceramics,
y = id,
xend = points,
yend = id
)) +
geom_nodes(
data = eigenDF_plot,
aes(x = points, y = id),
color = "black",
size = 7,
alpha = .75
) +
geom_nodes(
data = eigenDF_plot,
aes(x = ceramics, y = id),
color = "tan",
size = 7,
alpha = .75
) +
geom_edges(aes(color = region), linewidth = 2) +
theme_gdocs() +
theme(
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 8),
plot.margin = margin(
t = 0.1,
r = 0.1,
b = 0.01,
l = 0.1,
unit = "cm"
),
legend.margin = margin(t = -30)
) +
guides(color = guide_legend(
title = element_blank(),
position = "bottom",
ncol = 4
)) +
xlab("") +
ylab("")
fn <- glue::glue("figures/Chap5Centrality_{label}.png")
ggsave(fn,
dpi = 450,
width = 8.5,
height = 5.5
)
image_read(fn) %>%
image_trim() %>%
image_write(fn)
# browseURL(fn)
library(sna)
corr_result <- matrix(NA, nrow = length(g2List), ncol = length(g2List))
for (i in 1:length(g2List)) {
for (j in 1:length(g2List)) {
if (i != j) {
g1 <- g2List[[i]]
g2 <- g2List[[j]]
n1 <- igraph::as_adjacency_matrix(g1, attr = "weight") %>% as.matrix()
n2 <- igraph::as_adjacency_matrix(g2, attr = "weight") %>% as.matrix()
corr_result[i, j] <- sna::gcor(n1, n2, mode = "graph")
}
}
}
colnames(corr_result) <- rownames(corr_result) <- names(g2List)
library(pheatmap)
fn <- here(glue::glue("figures/Chap5NetworkCorrelations_{label}.png"))
pheatmap(
corr_result,
cluster_rows = F,
cluster_cols = F,
fontsize_row = 10,
fontsize_col = 10,
cellwidth = 20,
cellheight = 20,
color = colorRampPalette(c("white", "orange"))(50),
filename = fn,
width = 6.5,
height = 6.5,
show_rownames = T,
show_colnames = T,
display_numbers = T,
legend = F,
angle_col = 45
)
image_read(fn) %>%
image_trim() %>%
image_write(fn)
# multilayer communities
combined <- make_empty_graph(directed = F)
for (i in c("ceramics", "points")) {
g <- g2List[[i]]
combined <- union(combined, g)
simplify(combined, remove.multiple = T, remove.loops = T)
}
comm <- cluster_louvain(combined, weights = E(combined)$weight)
comm_df <- data.frame(
site_name = V(combined)$name,
comm = comm$membership
) %>%
left_join(data %>% select(site_name, region, long, lat), by = "site_name") %>%
distinct_all() %>%
arrange(site_name) %>%
st_as_sf(coords = c("long", "lat"), crs = 4326)
try(st_write(comm_df, glue::glue("data/Chap5MultilayerCommunities_{label}.geojson"),
delete_dsn = T
))
print("completed")
}R Environment
R version=44.3. sna=2.8 network=1.19.0 statnet.common=4.11.0 sf=1.0-19 magick=2.8.5 vegan=2.6-10 lattice=0.22-6 permute=0.9-7 ggnetwork=0.5.13 igraph=2.1.4 rio=1.2.3 here=1.0.1 ggthemes=5.1.0 lubridate=1.9.4 forcats=1.0.0 stringr=1.5.1 dplyr=1.1.4 purrr=1.0.4 readr=2.1.5 tidyr=1.3.1 tibble=3.2.1 ggplot2=3.5.1 tidyverse=2.0.0