Monero-Black-Marble-Flood code and paper

2024-12-22 11:29:22 +00:00 · 2024-03-27 21:12:26 +00:00 · 2024-03-27 21:12:26 +00:00 · 571ad0bd1f
commit 571ad0bd1f
parent 13c9d7206f
12 changed files with 2052 additions and 0 deletions
--- a/Monero-Black-Marble-Flood/code/block-size.R
+++ b/Monero-Black-Marble-Flood/code/block-size.R
@ -0,0 +1,67 @@
+
+
+n.workers <- min(floor(parallelly::availableCores()/2), 32L)
+
+future::plan(future::multicore(workers = n.workers))
+
+system.time({
+  block.data <- future.apply::future_lapply(sort(unlist(block.heights)), function(height) {
+    
+    block.data <- xmr.rpc(url.rpc = paste0(url.rpc, "/json_rpc"),
+      method = "get_block",
+      params = list(height = height ),
+      keep.trying.rpc = TRUE)$result
+    
+    as.data.frame(block.data$block_header)
+    
+  })
+})
+
+
+block.data <- rbindlist(block.data)
+
+block.data[, timestamp.POSIX := as.POSIXct(timestamp)]
+
+
+block.data[, block_weight.rolling.max := zoo::rollapply(block.data$block_weight, width = 30, FUN = max, fill = NA)]
+
+
+png("rolling-max-block-weight.png", width = 500, height = 600)
+
+ggplot(block.data[timestamp.POSIX >= as.POSIXct((start.spam.date - 5)), ], aes(x = timestamp.POSIX, y = block_weight.rolling.max / 1000)) +
+  geom_line() +
+  scale_y_continuous(limit = c(0, NA), expand = c(0, 0)) +
+  scale_x_datetime(date_breaks = "day", expand = c(0, 0), guide = guide_axis(angle = 90)) +
+  ggtitle("Monero empirical block weight (maximum peaks)",
+    subtitle = "To smooth the line, the 30 block (1 hr) rolling maximum is displayed") +
+  xlab("                                                    Date              github.com/Rucknium") +
+  ylab("Block weight in kB (bytes/10^3)")   +
+  theme(plot.title = element_text(size = 20),
+    plot.subtitle = element_text(size = 15),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15, margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15))
+
+dev.off()
+
+block.data[, block_weight.100.block.median := zoo::rollapply(block.data$block_weight, width = 101, FUN = median, fill = NA, align = "right")]
+
+png("rolling-median-block-weight.png", width = 500, height = 600)
+
+ggplot(block.data[timestamp.POSIX >= as.POSIXct((start.spam.date - 5)), ], aes(x = timestamp.POSIX, y = block_weight.100.block.median / 1000)) +
+  geom_line() +
+  scale_y_continuous(limit = c(0, max(block.data$block_weight.rolling.max / 1000, na.rm = TRUE)), expand = c(0, 0)) +
+  scale_x_datetime(date_breaks = "day", expand = c(0, 0), guide = guide_axis(angle = 90)) +
+  ggtitle("Monero empirical block weight",
+    subtitle = "100 block rolling median") +
+  xlab("                                                    Date              github.com/Rucknium") +
+  ylab("Block weight in kB (bytes/10^3)")   +
+  theme(plot.title = element_text(size = 20),
+    plot.subtitle = element_text(size = 15),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15, margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15))
+
+dev.off()
+
+
--- a/Monero-Black-Marble-Flood/code/effective-ring-size-projections.R
+++ b/Monero-Black-Marble-Flood/code/effective-ring-size-projections.R
@ -0,0 +1,149 @@
+
+
+mean.coinbase.tx.size <- output.index[!duplicated(tx_hash) & tx_num == 1, mean(tx_size_bytes)]
+
+num.blocks.since.spam <- output.index[block_height >= start.spam.height, uniqueN(block_height)]
+
+mean.size.1in.2out <- spam.results[[1]]$non.spam.fingerprint.tx[number_of_inputs == 1 & number_of_outputs == 2, mean(tx_size_bytes)]
+mean.size.2in.2out <- spam.results[[1]]$non.spam.fingerprint.tx[number_of_inputs == 2 & number_of_outputs == 2, mean(tx_size_bytes)]
+mean.size.16.ring.input <- mean.size.2in.2out - mean.size.1in.2out
+
+mean.size.zero.ring.tx.size <- mean.size.1in.2out - mean.size.16.ring.input
+# This "size zero ring" is so that the variable ring size can be added later
+
+mean.size.16.ring.input <- mean.size.16.ring.input - 32
+# Subtract key image bytes since there is just one key image per ring
+mean.size.one.ring.member <- mean.size.16.ring.input / 16
+
+
+
+
+sim.spam <- lapply(c(11, 16, 25, 40, 60), FUN = function(ring.size.sim) {
+  
+  non.spam.fingerprint.tx.sim <- copy(spam.results[[1]]$non.spam.fingerprint.tx)
+  
+  non.spam.fingerprint.tx.sim[, tx_weight_bytes.sim :=
+      tx_weight_bytes - mean.size.16.ring.input * number_of_inputs + number_of_inputs * mean.size.one.ring.member * ring.size.sim]
+  
+  mean.non.spam.kb.per.block <- mean.coinbase.tx.size/1000 +
+    (sum(non.spam.fingerprint.tx.sim[block_height >= start.spam.height, tx_weight_bytes.sim])/num.blocks.since.spam)/1000
+  
+  
+  mean.non.spam.output.per.block <- nrow(non.spam.fingerprint[block_height >= start.spam.height, ])/num.blocks.since.spam
+  
+  mean.effective.ring.size <- ring.size.sim
+  simulated.adversary.owned.outputs <- 0
+  mean.kb.per.block <- mean.non.spam.kb.per.block
+  block.size <- mean.non.spam.kb.per.block
+  
+  mean.kb.per.block.data <- vector("numeric", 100000)
+  mean.effective.ring.size.data <- vector("numeric", 100000)
+  median.effective.ring.size.data <- vector("numeric", 100000)
+  simulated.adversary.owned.share.data <-  vector("numeric", 100000)
+  
+  i <- 0
+  # while (mean.effective.ring.size >= 2) {
+  while (mean.kb.per.block <= 3000) {
+    i <- i + 1
+    simulated.adversary.owned.share <- simulated.adversary.owned.outputs/(simulated.adversary.owned.outputs + mean.non.spam.output.per.block)
+    mean.effective.ring.size <- 1 + (ring.size.sim - 1) * (1 - simulated.adversary.owned.share)
+    mean.kb.per.block.data[i] <- mean.kb.per.block
+    mean.effective.ring.size.data[i] <- mean.effective.ring.size
+    median.effective.ring.size.data[i] <- 1 + qbinom(0.5, size = ring.size.sim - 1, prob = 1 - simulated.adversary.owned.share)
+    simulated.adversary.owned.share.data[i] <- simulated.adversary.owned.share
+    
+    simulated.adversary.owned.outputs <- simulated.adversary.owned.outputs + 2
+    mean.kb.per.block <- mean.kb.per.block + mean.size.zero.ring.tx.size / 1000 + mean.size.one.ring.member * ring.size.sim / 1000
+    # Add adversary outputs at the end so the first iteration has zero adversary outputs
+  }
+  
+  sim.spam <- data.table(mean.kb.per.block.data = mean.kb.per.block.data[seq_len(i)],
+    mean.effective.ring.size.data = mean.effective.ring.size.data[seq_len(i)],
+    median.effective.ring.size.data = median.effective.ring.size.data[seq_len(i)],
+    simulated.adversary.owned.share.data = simulated.adversary.owned.share.data[seq_len(i)],
+    ring.size.sim = ring.size.sim,
+    ring.size.sim.label = paste0(ring.size.sim, " (", round(mean.non.spam.kb.per.block), " kB non-spam)"))
+  
+  sim.spam
+})
+
+sim.spam <- rbindlist(sim.spam)
+
+
+
+max.ring.size.sim <- max(sim.spam$ring.size.sim)
+
+png("projected-effective-ring-size-non-log.png", width = 600, height = 600)
+
+ggplot(sim.spam, aes(x = mean.kb.per.block.data, y = mean.effective.ring.size.data, colour = ring.size.sim.label)) +
+  geom_line() +
+  scale_y_continuous(breaks = seq(2, max.ring.size.sim, by = 2), limits = c(0, NA), expand = c(0, 0)) +
+  scale_x_continuous(breaks = seq(0, 10000, by = 500), limits = c(0, NA), expand = c(0, 0)) +
+  ggtitle("Long-term projected mean effective ring size") +
+  xlab("                  Block weight in kilobytes (10^3 bytes)     github.com/Rucknium") +
+  ylab("Effective ring size")  +
+  labs(colour = "Ring size") +
+  theme(legend.position = "top", legend.text = element_text(size = 12), legend.title = element_text(size = 15),
+    plot.title = element_text(size = 20),
+    plot.subtitle = element_text(size = 15),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15, margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) +
+  guides(colour = guide_legend(nrow = 2, byrow = FALSE, override.aes = list(linewidth = 5)))
+
+dev.off()
+
+
+
+min.mean.kb.per.block.data <- ceiling(min(sim.spam$mean.kb.per.block.data))
+
+png("projected-effective-ring-size-log-log.png", width = 600, height = 600)
+
+ggplot(sim.spam, aes(x = mean.kb.per.block.data, y = mean.effective.ring.size.data, colour = ring.size.sim.label)) +
+  geom_line() +
+  scale_y_log10(breaks = c(1, seq(2, max.ring.size.sim, by = 2)), limits = c(1, NA), expand = c(0, 0) ) +
+  scale_x_log10(breaks = c(min.mean.kb.per.block.data, seq(0, 10000, by = 250)), guide = guide_axis(angle = 90), expand = c(0, 0)) +
+  ggtitle("Long-term projected mean effective ring size (log-log scale)") +
+  xlab("       Block weight in kilobytes (10^3 bytes) (log scale)     github.com/Rucknium") +
+  ylab("Effective ring size (log scale)")  +
+  labs(colour = "Ring size") +
+  theme(legend.position = "top", legend.text = element_text(size = 12), legend.title = element_text(size = 15),
+    plot.title = element_text(size = 20),
+    plot.subtitle = element_text(size = 15),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15, margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) +
+  guides(colour = guide_legend(nrow = 2, byrow = FALSE, override.aes = list(linewidth = 5)))
+
+dev.off()
+
+
+
+
+png("projected-ring-size-one.png", width = 600, height = 600)
+
+ggplot(sim.spam, aes(x = mean.kb.per.block.data,
+  y = dbinom(0, size = ring.size.sim - 1, prob = 1 - simulated.adversary.owned.share.data),
+  colour = ring.size.sim.label)) +
+  geom_line() +
+  scale_y_continuous( limits = c(0, 1), expand = c(0, 0), labels = scales::label_percent()) +
+  scale_x_continuous(breaks = seq(0, 10000, by = 250), limits = c(0, NA), expand = c(0, 0), guide = guide_axis(angle = 90)) +
+  ggtitle("Long-term projected share of rings with effective ring size 1") +
+  xlab("       Block weight in kilobytes (10^3 bytes) (log scale)     github.com/Rucknium") +
+  ylab("Share of rings")  +
+  labs(colour = "Ring size") +
+  theme(legend.position = "top", legend.text = element_text(size = 12), legend.title = element_text(size = 15),
+    plot.title = element_text(size = 20),
+    plot.subtitle = element_text(size = 15),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15, margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) +
+  guides(colour = guide_legend(nrow = 2, byrow = FALSE, override.aes = list(linewidth = 5)))
+
+dev.off()
+
+
+
+
+
+
--- a/Monero-Black-Marble-Flood/code/empirical-effective-ring-size.R
+++ b/Monero-Black-Marble-Flood/code/empirical-effective-ring-size.R
@ -0,0 +1,235 @@
+
+
+
+
+CRYPTONOTE_DEFAULT_TX_SPENDABLE_AGE = 10
+DIFFICULTY_TARGET_V2 = 120
+DEFAULT_UNLOCK_TIME = CRYPTONOTE_DEFAULT_TX_SPENDABLE_AGE * DIFFICULTY_TARGET_V2
+RECENT_SPEND_WINDOW = 15 * DIFFICULTY_TARGET_V2
+
+SECONDS_IN_A_YEAR =  60 * 60 * 24 * 365
+BLOCKS_IN_A_YEAR = SECONDS_IN_A_YEAR / DIFFICULTY_TARGET_V2
+
+
+
+
+calculate_average_output_flow <- function(crod) {
+  # 1
+  num_blocks_to_consider_for_flow = min(c(length(crod), BLOCKS_IN_A_YEAR))
+  
+  # 2
+  if (length(crod) > num_blocks_to_consider_for_flow) {
+    num_outputs_to_consider_for_flow = crod[length(crod)] - crod[ length(crod) - num_blocks_to_consider_for_flow ]
+    # R indexes from 1
+  } else {
+    num_outputs_to_consider_for_flow = crod[length(crod)] # R indexes from 1
+  }
+  
+  # 3
+  average_output_flow = DIFFICULTY_TARGET_V2 * num_blocks_to_consider_for_flow / num_outputs_to_consider_for_flow
+  
+  return(average_output_flow)
+}
+
+calculate_num_usable_rct_outputs <- function(crod) {
+  # 1
+  num_usable_crod_blocks = length(crod) - (CRYPTONOTE_DEFAULT_TX_SPENDABLE_AGE - 1)
+  
+  # 2
+  num_usable_rct_outputs = crod[num_usable_crod_blocks] # R indexes from 1
+  
+  return(num_usable_rct_outputs)
+}
+
+
+
+
+GAMMA_SHAPE = 19.28
+GAMMA_RATE = 1.61
+# GAMMA_SCALE = 1 / GAMMA_RATE
+
+
+G <- function(x) {
+  actuar::plgamma(x, shapelog = GAMMA_SHAPE, ratelog = GAMMA_RATE)
+}
+
+
+
+
+crod <- xmr.rpc(url.rpc = paste0(url.rpc, "/json_rpc"), method = "get_output_distribution",
+  params = list(amounts = list(0), from_height = 0, to_height = current.height, binary = FALSE, cumulative = TRUE))
+
+
+
+start_height <- crod$result$distributions[[1]]$start_height
+crod <- crod$result$distributions[[1]]$distribution
+crod.full <- crod
+
+
+spam.output_index <- list()
+
+for (i in seq_along(spam.results)) {
+  spam.output_index[[i]] <- list(name = spam.types[[i]]$fingerprint.text,
+    output_index = spam.results[[i]]$spam.fingerprint$output_index)
+}
+
+
+n.workers <- min(floor(parallelly::availableCores()/2), 32L)
+
+future::plan(future::multisession(workers = n.workers))
+
+
+adversary.owned.dsa.mass <- future.apply::future_lapply((start.spam.height:current.height), function(ring.construction.height) {
+  
+  crod <- crod.full[1:(ring.construction.height - start_height + 1)]
+  
+  average_output_flow <- calculate_average_output_flow(crod)
+  
+  num_usable_rct_outputs <- calculate_num_usable_rct_outputs(crod)
+  
+  
+  v <- average_output_flow
+  z <- num_usable_rct_outputs
+  
+  
+  G_star <- function(x) {
+    (0 <= x*v & x*v <= 1800) *
+      (G(x*v + 1200) - G(1200) +
+          ( (x*v)/(1800) ) * G(1200)
+      )/G(z*v + 1200) +
+      (x*v > 1800) * G(x*v + 1200)/G(z*v + 1200)
+  }
+  
+  
+  usable.outputs <- 1:num_usable_rct_outputs
+  
+  crod.reversed <- cumsum(abs(diff(rev(crod)))[-(1:9)])
+  # Remove first 9 blocks before cumsum() since cant spend from those outputs
+  
+  crod.reversed <- c(0, crod.reversed)
+  
+  y_0 <- crod.reversed[-length(crod.reversed)] + 1
+  y_1 <- crod.reversed[-1]
+  pmf.decoy.crod <- (G_star(y_1 + 1) - G_star(y_0)) / (y_1 + 1 - y_0)
+  
+  
+  pmf.decoy <- rep(pmf.decoy.crod, times = diff(crod.reversed))
+  
+  
+  
+  pmf.decoy.reversed <- rev(pmf.decoy)
+  
+  result <- list()
+  
+  for (i in seq_along(spam.output_index)) {
+    
+    estimated.adversary.owned.share <- sum(pmf.decoy.reversed[
+      spam.output_index[[i]]$output_index[ spam.output_index[[i]]$output_index <= length(pmf.decoy.reversed)]  ])
+    
+    result[[i]] <- data.table(ring.construction.height = ring.construction.height,
+      estimated.adversary.owned.share = estimated.adversary.owned.share,
+      type = spam.output_index[[i]]$name)
+    
+  }
+  
+  rbindlist(result)
+  
+})
+
+
+
+adversary.owned.dsa.mass <- rbindlist(adversary.owned.dsa.mass)
+
+adversary.owned.dsa.mass <- merge(adversary.owned.dsa.mass, block.data[, .(height, timestamp.POSIX)],
+  by.x = "ring.construction.height", by.y = "height")
+
+setorder(adversary.owned.dsa.mass, timestamp.POSIX)
+
+adversary.owned.dsa.mass[, effective.ring.size := 1 + (1 - estimated.adversary.owned.share) * 15]
+
+
+png("empirical-effective-ring-size.png", width = 800, height = 800)
+
+ggplot(adversary.owned.dsa.mass, aes(x = timestamp.POSIX, y = effective.ring.size, colour = type)) +
+  geom_line() +
+  scale_y_continuous(breaks = 1:16, limits = c(0, NA), expand = c(0, 0)) +
+  scale_x_datetime(date_breaks = "day", guide = guide_axis(angle = 90)) +
+  ggtitle("Estimated mean effective ring size") +
+  xlab("                                                    Date                      github.com/Rucknium") +
+  ylab("Mean effective ring size") +
+  labs(colour = "Spam type") +
+  theme(legend.position = "top", legend.text = element_text(size = 15), legend.title = element_text(size = 15),
+    plot.title = element_text(size = 20),
+    plot.subtitle = element_text(size = 15),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15, margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) +
+  guides(colour = guide_legend(override.aes = list(linewidth = 5)))
+
+dev.off()
+
+
+
+
+guess.prob <- function(effective.ring.size, nominal.ring.size) {
+  decoys <- nominal.ring.size - 1
+  sapply(effective.ring.size, FUN = function(x) {
+    weighted.mean(1/(1 + 0:decoys),
+      w = dbinom(0:decoys, size = decoys, prob = (x - 1)/decoys))
+  })
+}
+
+
+
+adversary.owned.dsa.mass[, guess.prob := guess.prob(effective.ring.size, nominal.ring.size = 16)]
+
+png("empirical-guessing-probability.png", width = 800, height = 800)
+
+ggplot(adversary.owned.dsa.mass, aes(x = timestamp.POSIX, y = guess.prob, colour = type)) +
+  geom_line() +
+  scale_y_continuous( limits = c(0, NA), expand = c(0, 0), labels = scales::label_percent()) +
+  scale_x_datetime(date_breaks = "day", guide = guide_axis(angle = 90)) +
+  ggtitle("Estimated probability of correctly guessing the real spend") +
+  xlab("                                                    Date                      github.com/Rucknium") +
+  ylab("Probability") +
+  labs(colour = "Spam type") +
+  theme(legend.position = "top", legend.text = element_text(size = 15), legend.title = element_text(size = 15),
+    plot.title = element_text(size = 20),
+    plot.subtitle = element_text(size = 15),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15, margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) +
+  guides(colour = guide_legend(override.aes = list(linewidth = 5)))
+
+dev.off()
+
+
+
+adversary.owned.dsa.mass[, effective.ring.size.one := dbinom(0, size = 15, prob = 1 - estimated.adversary.owned.share)]
+
+png("empirical-ring-size-one.png", width = 800, height = 800)
+
+ggplot(adversary.owned.dsa.mass, aes(x = timestamp.POSIX, y = effective.ring.size.one, colour = type)) +
+  geom_line() +
+  scale_y_continuous( limits = c(0, NA), expand = c(0, 0), labels = scales::label_percent()) +
+  scale_x_datetime(date_breaks = "day", guide = guide_axis(angle = 90)) +
+  ggtitle("Estimated share of rings with effective ring size of one") +
+  xlab("                                                    Date                      github.com/Rucknium") +
+  ylab("Share of rings") +
+  labs(colour = "Spam type") +
+  theme(legend.position = "top", legend.text = element_text(size = 15), legend.title = element_text(size = 15),
+    plot.title = element_text(size = 20),
+    plot.subtitle = element_text(size = 15),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15, margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) +
+  guides(colour = guide_legend(override.aes = list(linewidth = 5)))
+
+dev.off()
+
+future::plan(future::sequential)
+# Reset to remove threaded R sessions to get back RAM
+
+
+
+
--- a/Monero-Black-Marble-Flood/code/fee-behavior.R
+++ b/Monero-Black-Marble-Flood/code/fee-behavior.R
@ -0,0 +1,96 @@
+
+
+
+
+all.tx.volume <- rbind(spam.results[[1]]$spam.fingerprint.tx, spam.results[[1]]$non.spam.fingerprint.tx, fill = TRUE)
+
+all.tx.volume[, fee_per_byte_nanoneros := floor((tx_fee/tx_size_bytes)/1000)]
+
+all.tx.volume.fees <- all.tx.volume[number_of_outputs == 2 & (
+  fee_per_byte_nanoneros %between% c(18, 22) |
+  fee_per_byte_nanoneros %between% c(78, 82) |
+  fee_per_byte_nanoneros %between% c(315, 325) |
+  fee_per_byte_nanoneros %between% c(3000, 4100)
+), ]
+
+all.tx.volume.fees[, fee_per_byte_nanoneros.cut := cut(fee_per_byte_nanoneros,
+  breaks = c(0, 22, 82, 325, 4100), labels = c("20", "80", "320", "4000"))]
+
+
+all.tx.volume.fees <- all.tx.volume.fees[, as.data.frame(prop.table(table(block_date, fee_per_byte_nanoneros.cut), margin = 1))]
+all.tx.volume.fees$block_date <- as.Date(as.character(all.tx.volume.fees$block_date))
+
+
+png("share-tx-in-fee-tier-all-txs.png", width = 500, height = 600)
+
+ggplot(all.tx.volume.fees, aes(x = block_date,
+  y = Freq,
+  colour = factor(fee_per_byte_nanoneros.cut))) +
+  geom_line(linewidth = 1.5) +
+  geom_vline(xintercept = start.spam.date, linetype = 2) +
+  scale_y_continuous( limits = c(0, 1), expand = c(0, 0), labels = scales::label_percent()) +
+  scale_x_date(breaks = "3 day", expand = c(0, 0), guide = guide_axis(angle = 90)) +
+  ggtitle("Share of transactions by fee tier (all transactions)") +
+  xlab("                                                    Date              github.com/Rucknium") +
+  ylab("Share of transactions")  +
+  labs(colour = "Fee tier (nanoneros/byte)") +
+  theme(legend.position = "top", legend.text = element_text(size = 12), legend.title = element_text(size = 15),
+    plot.title = element_text(size = 16),
+    plot.subtitle = element_text(size = 15),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15, margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) +
+  guides(colour = guide_legend(nrow = 1, byrow = FALSE, override.aes = list(linewidth = 5)))
+
+dev.off()
+
+
+non.spam.fingerprint.tx <- spam.results[[2]]$non.spam.fingerprint.tx
+
+
+non.spam.fingerprint.tx[, fee_per_byte_nanoneros := floor((tx_fee/tx_size_bytes)/1000)]
+
+non.spam.fingerprint.tx.fees <- non.spam.fingerprint.tx[number_of_outputs == 2 & (
+  fee_per_byte_nanoneros %between% c(18, 22) |
+  fee_per_byte_nanoneros %between% c(78, 82) |
+  fee_per_byte_nanoneros %between% c(315, 325) |
+  fee_per_byte_nanoneros %between% c(3000, 4100)
+), ]
+
+non.spam.fingerprint.tx.fees[, fee_per_byte_nanoneros.cut := cut(fee_per_byte_nanoneros,
+  breaks = c(0, 22, 82, 325, 4100), labels = c("20", "80", "320", "4000"))]
+
+
+non.spam.fingerprint.tx.fees <- non.spam.fingerprint.tx.fees[, as.data.frame(prop.table(table(block_date, fee_per_byte_nanoneros.cut), margin = 1))]
+non.spam.fingerprint.tx.fees$block_date <- as.Date(as.character(non.spam.fingerprint.tx.fees$block_date))
+
+png("share-tx-in-fee-tier-spam-removed.png", width = 500, height = 600)
+
+ggplot(non.spam.fingerprint.tx.fees, aes(x = block_date,
+  y = Freq,
+  colour = factor(fee_per_byte_nanoneros.cut))) +
+  geom_line(linewidth = 1.5)  +
+  geom_vline(xintercept = start.spam.date, linetype = 2) +
+  scale_y_continuous( limits = c(0, 1), expand = c(0, 0), labels = scales::label_percent()) +
+  scale_x_date(breaks = "3 day", expand = c(0, 0), guide = guide_axis(angle = 90)) +
+  ggtitle("Share of transactions by fee tier (suspected spam removed)") +
+  xlab("                                                    Date              github.com/Rucknium") +
+  ylab("Share of transactions")  +
+  labs(colour = "Fee tier (nanoneros/byte)") +
+  theme(legend.position = "top", legend.text = element_text(size = 12), legend.title = element_text(size = 15),
+    plot.title = element_text(size = 16),
+    plot.subtitle = element_text(size = 15),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15, margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) +
+  guides(colour = guide_legend(nrow = 1, byrow = FALSE, override.aes = list(linewidth = 5)))
+
+dev.off()
+
+
+
+
+
+
+
+
--- a/Monero-Black-Marble-Flood/code/mempool-tx-confirmation.R
+++ b/Monero-Black-Marble-Flood/code/mempool-tx-confirmation.R
@ -0,0 +1,89 @@
+
+
+
+
+mempool[, confirmation.latency := block_receive_time - receive_time]
+
+mempool[, block_receive_time.hour := as.character(cut(as.POSIXct(block_receive_time), "hour"))]
+mempool[nchar(block_receive_time.hour) == 10, block_receive_time.hour := paste0(block_receive_time.hour, " 00:00:00")]
+mempool[, block_receive_time.hour := as.POSIXct(block_receive_time.hour)]
+
+mempool.hourly <- mempool[, .(confirmation.latency = mean(confirmation.latency)), by = "block_receive_time.hour"]
+
+png("mean-delay-first-confirmation.png", width = 500, height = 600)
+
+ggplot(mempool.hourly[block_receive_time.hour >= as.POSIXct(start.spam.date - 3),], aes(x = block_receive_time.hour, y = confirmation.latency/60)) +
+  geom_line() +
+  geom_vline(xintercept = mempool[block_height == start.spam.height, block_receive_time_UTC[1]], linetype = 2) +
+  scale_y_continuous(breaks = seq(0, 600, by = 30), limits = c(0, NA), expand = c(0, 0)) +
+  scale_x_datetime(breaks = "day", expand = c(0, 0), guide = guide_axis(angle = 90)) +
+  ggtitle("Mean delay to first transaction confirmation") +
+  xlab("                                                    Date              github.com/Rucknium") +
+  ylab("Mean delay (minutes)")  +
+  theme(legend.position = "top", legend.text = element_text(size = 12), legend.title = element_text(size = 15),
+    plot.title = element_text(size = 16),
+    plot.subtitle = element_text(size = 15),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15, margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) +
+  guides(colour = guide_legend(nrow = 1, byrow = FALSE, override.aes = list(linewidth = 5)))
+
+dev.off()
+
+
+
+
+
+hourly.max.confirmation.latency <- lapply(unique(mempool$block_receive_time.hour), FUN = function(hour.bin) {
+  
+  mempool[, time.to.hour := hour.bin - receive_time]
+  leftover.txs <- mempool[time.to.hour > 0 & hour.bin < block_receive_time.hour, ]
+  # These are tx that stay in the mempool longer than an hour
+  
+  if (nrow(leftover.txs) > 0) {
+    return(data.table(hour.bin = hour.bin,
+      confirmation.latency = leftover.txs[, max(as.numeric(time.to.hour))]))
+  } else {
+    return(data.table(hour.bin = hour.bin,
+      confirmation.latency = mempool[hour.bin == block_receive_time.hour, max(confirmation.latency)]))
+  }
+})
+
+hourly.max.confirmation.latency <- rbindlist(hourly.max.confirmation.latency)
+
+
+png("max-delay-first-confirmation.png", width = 500, height = 600)
+
+ggplot(hourly.max.confirmation.latency[hour.bin >= as.POSIXct(start.spam.date - 3),], aes(x = hour.bin, y = confirmation.latency/60^2)) +
+  geom_line() +
+  geom_vline(xintercept = mempool[block_height == start.spam.height, block_receive_time_UTC[1]], linetype = 2) +
+  scale_y_continuous(breaks = seq(0, 24*5, by = 3),limits = c(0, NA), expand = c(0, 0)) +
+  scale_x_datetime(breaks = "day", expand = c(0, 0), guide = guide_axis(angle = 90)) +
+  ggtitle("Maximum delay to first transaction confirmation") +
+  xlab("                                                    Date              github.com/Rucknium") +
+  ylab("Maximum delay (hours)")  +
+  theme(legend.position = "top", legend.text = element_text(size = 12), legend.title = element_text(size = 15),
+    plot.title = element_text(size = 16),
+    plot.subtitle = element_text(size = 15),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15, margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) +
+  guides(colour = guide_legend(nrow = 1, byrow = FALSE, override.aes = list(linewidth = 5)))
+
+dev.off()
+
+
+long.wait.txs <- mempool[confirmation.latency >= 3*60^2, ]
+
+long.wait.txs[, summary(fee/weight)]
+long.wait.txs <- merge(long.wait.txs, output.index[!duplicated(tx_hash), .(tx_hash, number_of_inputs, number_of_outputs)], by = "tx_hash")
+
+long.wait.txs[, table(number_of_inputs)]
+long.wait.txs[, table(number_of_outputs)]
+
+long.wait.txs[number_of_inputs == 1, ]
+
+output.index[number_of_inputs == 1 & number_of_outputs == 2, summary(tx_weight_bytes)]
+output.index[number_of_inputs == 1 & number_of_outputs == 2, summary(tx_fee)]
+
+
--- a/Monero-Black-Marble-Flood/code/output-index.R
+++ b/Monero-Black-Marble-Flood/code/output-index.R
@ -0,0 +1,303 @@
+
+
+
+library(data.table)
+
+setwd("Monero-Black-Marble-Flood/pdf/images")
+# Set location of where plot images should be saved
+
+current.height <- 3114270
+# current.height should be the most recent height that you want to collect data for
+start.height <- 3077201
+# First block of Feb 5, 2024 UTC
+
+url.rpc <- "http://127.0.0.1:18081"
+# Set the IP address and port of your node. Should usually be "http://127.0.0.1:18081"
+
+
+stopifnot(!is.na(current.height))
+
+block.heights <- start.height:current.height
+
+
+
+
+# Modified from TownforgeR::tf_rpc_curl function
+xmr.rpc <- function(
+    url.rpc = "http://127.0.0.1:18081/json_rpc",
+  method = "",
+  params = list(),
+  userpwd = "",
+  num.as.string = FALSE,
+  nonce.as.string = FALSE,
+  keep.trying.rpc = FALSE,
+  curl = RCurl::getCurlHandle(),
+  ...
+){
+  
+  json.ret <- RJSONIO::toJSON(
+    list(
+      jsonrpc = "2.0",
+      id = "0",
+      method = method,
+      params = params
+    ), digits = 50
+  )
+  
+  rcp.ret <- 	tryCatch(RCurl::postForm(url.rpc,
+    .opts = list(
+      userpwd = userpwd,
+      postfields = json.ret,
+      httpheader = c('Content-Type' = 'application/json', Accept = 'application/json')
+      # https://stackoverflow.com/questions/19267261/timeout-while-reading-csv-file-from-url-in-r
+    ),
+    curl = curl
+  ), error = function(e) {NULL})
+  
+  if (keep.trying.rpc && length(rcp.ret) == 0) {
+    while (length(rcp.ret) == 0) {
+      rcp.ret <- 	tryCatch(RCurl::postForm(url.rpc,
+        .opts = list(
+          userpwd = userpwd,
+          postfields = json.ret,
+          httpheader = c('Content-Type' = 'application/json', Accept = 'application/json')
+          # https://stackoverflow.com/questions/19267261/timeout-while-reading-csv-file-from-url-in-r
+        ),
+        curl = curl
+      ), error = function(e) {NULL})
+    }
+  }
+  
+  if (is.null(rcp.ret)) {
+    stop("Cannot connect to monerod. Is monerod running?")
+  }
+  
+  if (num.as.string) {
+    rcp.ret <- gsub("(: )([-0123456789.]+)([,\n\r])", "\\1\"\\2\"\\3", rcp.ret )
+  }
+  
+  if (nonce.as.string & ! num.as.string) {
+    rcp.ret <- gsub("(\"nonce\": )([-0123456789.]+)([,\n\r])", "\\1\"\\2\"\\3", rcp.ret )
+  }
+  
+  RJSONIO::fromJSON(rcp.ret, asText = TRUE) # , simplify = FALSE
+}
+
+
+
+system.time({
+  
+  n.workers <- min(floor(parallelly::availableCores()/2), 32L)
+  
+  future::plan(future::multisession(workers = n.workers))
+  options(future.globals.maxSize= 8000*1024^2)
+  
+  set.seed(314)
+  
+  # Randomize block heights to make processing time more uniform between parallel processes
+  block.heights <- split(block.heights, sample(cut(block.heights, n.workers)))
+  # First randomly put heights into list elements (split() will sort them ascendingly in each list element)
+  block.heights <- lapply(block.heights, sample)
+  # Then order the heights randomly within each list element
+  block.heights <- unname(block.heights)
+  
+  returned <- future.apply::future_lapply(block.heights, function(block.heights) {
+    
+    handle <- RCurl::getCurlHandle()
+    
+    return.data <- vector("list", length(block.heights))
+    
+    for (height.iter in seq_along(block.heights)) {
+      
+      height <- block.heights[height.iter]
+      
+      block.data <- xmr.rpc(url.rpc = paste0(url.rpc, "/json_rpc"),
+        method = "get_block",
+        params = list(height = height ),
+        keep.trying.rpc = TRUE,
+        curl = handle)$result
+      
+      txs.to.collect <- c(block.data$miner_tx_hash, block.data$tx_hashes)
+      
+      rcp.ret <- 	tryCatch(RCurl::postForm(paste0(url.rpc, "/get_transactions"),
+        .opts = list(
+          postfields = paste0('{"txs_hashes":["', paste0(txs.to.collect, collapse = '","'), '"],"decode_as_json":true}'),
+          httpheader = c('Content-Type' = 'application/json', Accept = 'application/json')
+        ),
+        curl = handle
+      ), error = function(e) {NULL})
+      
+      if (length(rcp.ret) == 0) {
+        while (length(rcp.ret) == 0) {
+          rcp.ret <- tryCatch(RCurl::postForm(paste0(url.rpc, "/get_transactions"),
+            .opts = list(
+              postfields = paste0('{"txs_hashes":["', paste0(txs.to.collect, collapse = '","'), '"],"decode_as_json":true}'),
+              httpheader = c('Content-Type' = 'application/json', Accept = 'application/json')
+            ),
+            curl = handle
+          ), error = function(e) {NULL})
+        }
+      }
+      
+      rcp.ret <- RJSONIO::fromJSON(rcp.ret, asText = TRUE)
+      
+      output.index.collected <- vector("list", length(txs.to.collect))
+      rings.collected <- vector("list", length(txs.to.collect) - 1)
+      
+      for (i in seq_along(txs.to.collect)) {
+        
+        tx.json <- tryCatch(
+          RJSONIO::fromJSON(rcp.ret$txs[[i]]$as_json, asText = TRUE),
+          error = function(e) {NULL} )
+        
+        if (is.null(tx.json)) {
+          # stop()
+          cat(paste0("tx: ", i, " block: ", height, "\n"), file = "~/RingCT-problems.txt", append = TRUE)
+          next
+        }
+        
+        output.amounts <- sapply(tx.json$vout, FUN = function(x) {x$amount})
+        
+        tx_size_bytes <- ifelse(i == 1,
+          nchar(rcp.ret$txs[[i]]$pruned_as_hex) / 2,
+          nchar(rcp.ret$txs[[i]]$as_hex) / 2)
+        # Coinbase has special structure
+        # Reference:
+        # https://libera.monerologs.net/monero-dev/20221231
+        # https://github.com/monero-project/monero/pull/8691
+        # https://github.com/monero-project/monero/issues/8311
+        
+        calc.tx.weight.clawback <- function(p) {
+          pow.of.two <- 2^(1:4)
+          pow.of.two.index <- findInterval(p, pow.of.two, left.open = TRUE) + 1
+          num_dummy_outs <- pow.of.two[pow.of.two.index] - p
+          transaction_clawback <- 0.8 * ( (23 * (p + num_dummy_outs)/2) * 32 - (2 * ceiling(log2(64 * p)) + 9) * 32 )
+          # Equation from page 63 of Zero to Monero 2.0
+          transaction_clawback
+        }
+        
+        if (length(tx.json$vout) == 2 && i > 1) {
+          # i > 1 means not the first tx, which is the coinbase tx
+          tx_weight_bytes <- tx_size_bytes
+        } else {
+          tx_weight_bytes <- tx_size_bytes + calc.tx.weight.clawback(length(tx.json$vout))
+        }
+        
+        
+        tx_fee <- ifelse(i == 1 || is.null(tx.json$rct_signatures), NA, tx.json$rct_signatures$txnFee)
+        # missing non-RingCT tx fee
+        
+        is.mordinal <-
+          height >= 2838965 &&
+          length(tx.json$vout) == 2 &&
+          i > 1 && # not the first tx, which is the coinbase tx
+          length(tx.json$extra) > 44 &&
+          tx.json$extra[45] == 16
+        # With "&&", evaluates each expression sequentially until it is false (if ever). Then stops.
+        # If all are TRUE, then returns true.
+        
+        is.mordinal.transfer <-
+          height >= 2838965 &&
+          length(tx.json$vout) == 2 &&
+          i > 1 && # not the first tx, which is the coinbase tx
+          length(tx.json$extra) > 44 &&
+          tx.json$extra[45] == 17
+        
+        output.index.collected[[i]] <- data.table(
+          block_height = height,
+          block_timestamp = block.data$block_header$timestamp,
+          block_size = block.data$block_size,
+          block_reward = block.data$reward,
+          tx_num = i,
+          tx_hash = txs.to.collect[i],
+          tx_version = tx.json$version,
+          tx_fee = tx_fee,
+          tx_size_bytes = tx_size_bytes,
+          tx_weight_bytes = tx_weight_bytes,
+          number_of_inputs = length(tx.json$vin),
+          number_of_outputs = length(tx.json$vout),
+          output_num = seq_along(rcp.ret$txs[[i]]$output_indices),
+          output_index = rcp.ret$txs[[i]]$output_indices,
+          output_amount = output.amounts,
+          output_unlock_time = tx.json$unlock_time,
+          is_mordinal = is.mordinal,
+          is_mordinal_transfer = is.mordinal.transfer)
+        
+        
+        if (i == 1L) { next }
+        # Skip first tx since it is the coinbase and has no inputs
+        
+        tx_hash <- txs.to.collect[i]
+        
+        rings <- vector("list", length(tx.json$vin))
+        
+        for (j in seq_along(tx.json$vin)) {
+          rings[[j]] <- data.table(
+            tx_hash = tx_hash,
+            input_num = j,
+            input_amount = tx.json$vin[[j]]$key$amount,
+            key_offset_num = seq_along(tx.json$vin[[j]]$key$key_offsets),
+            key_offsets = tx.json$vin[[j]]$key$key_offsets
+          )
+        }
+        
+        rings.collected[[i-1]] <- rbindlist(rings)
+        
+      }
+      
+      output.index.collected <- data.table::rbindlist(output.index.collected)
+      rings.collected <- rbindlist(rings.collected)
+      
+      return.data[[height.iter]] <- list(
+        output.index.collected = output.index.collected,
+        rings.collected = rings.collected)
+      
+    }
+    
+    return.data
+    
+  } )
+})
+
+
+returned.temp <- vector("list", length(returned))
+
+for (i in seq_along(returned)) {
+  returned.temp[[i]] <- list(
+    output.index.collected = rbindlist(lapply(returned[[i]],
+      FUN = function(y) { y$output.index.collected })),
+    rings.collected = rbindlist(lapply(returned[[i]],
+      FUN = function(y) { y$rings.collected }))
+  )
+}
+
+returned.temp <- list(
+  output.index.collected = rbindlist(lapply(returned.temp,
+    FUN = function(y) { y$output.index.collected })),
+  rings.collected = rbindlist(lapply(returned.temp,
+    FUN = function(y) { y$rings.collected }))
+)
+
+output.index <- returned.temp$output.index.collected
+returned.temp$output.index.collected <- NULL
+rm(returned.temp)
+
+output.index[, output_amount_for_index := ifelse(tx_num == 1, 0, output_amount)]
+
+output.index <- output.index[ !(tx_num == 1 & tx_version == 1), ]
+# Remove coinbase outputs that are ineligible for use in a RingCT ring
+# See https://libera.monerologs.net/monero-dev/20230323#c224570
+
+
+
+output.index.date <- unique(output.index[, .(block_timestamp = block_timestamp)])
+
+output.index.date[, block_date := as.Date(as.POSIXct(block_timestamp, origin = "1970-01-01"))]
+
+output.index <- merge(output.index, output.index.date)
+# speed improvement by splitting and then merging
+
+gc()
+
+
+
--- a/Monero-Black-Marble-Flood/code/spam-assumptions.R
+++ b/Monero-Black-Marble-Flood/code/spam-assumptions.R
@ -0,0 +1,295 @@
+
+
+start.spam.height <- 3097764 # 2024-03-04 15:21:24
+start.spam.date <- as.Date("2024-03-04")
+
+library(ggplot2)
+
+output.index[, block_date.week.day := weekdays(block_date)]
+
+
+spam.types <- list(list(
+  fingerprint.text = "1in/2out 20 nanoneros/byte",
+  fingerprint.crieria = substitute(
+    floor((tx_fee/tx_size_bytes)/1000) %between% c(18, 22) &
+      number_of_inputs == 1 &
+      number_of_outputs == 2)),
+  list(
+    fingerprint.text = "1in/2out 20 or 320 nanoneros/byte",
+    fingerprint.crieria = substitute(
+      floor((tx_fee/tx_size_bytes)/1000) %between% c(315, 325) &
+        number_of_inputs == 1 &
+        number_of_outputs == 2)))
+
+
+spam.results <- list()
+
+for (spam.type in seq_along(spam.types)) {
+  
+  spam.fingerprint.all <- list()
+  spam.fingerprint.tx.all <- list()
+  
+  for (spam.type.sub in 1:spam.type) {
+    
+    pre.spam.level.week.day <- output.index[
+      # block_height < start.spam.height &
+      block_date < start.spam.date &
+        tx_num != 1 &
+        eval(spam.types[[spam.type.sub]]$fingerprint.crieria),
+      .(txs.rm.from.spam.set = round(uniqueN(tx_hash)/4)),
+      # NOTE: /4 assumes number of pre-spam weeks in data is 4.
+      by = "block_date.week.day"]
+    
+    spam.fingerprint <- output.index[
+      block_height >= start.spam.height &
+        tx_num != 1 &
+        eval(spam.types[[spam.type.sub]]$fingerprint.crieria),  ]
+    
+    spam.fingerprint[, fingerprint := spam.types[[spam.type.sub]]$fingerprint.text]
+    
+    spam.fingerprint.tx <- spam.fingerprint[!duplicated(tx_hash), ]
+    
+    spam.fingerprint.tx <- merge(spam.fingerprint.tx,
+      pre.spam.level.week.day[, .(block_date.week.day, txs.rm.from.spam.set)], by = "block_date.week.day")
+    
+    set.seed(314)
+    
+    
+    tx_hash.to.rm <- spam.fingerprint.tx[, .(tx_hash.to.rm = sample(tx_hash,
+      min(c(unique(txs.rm.from.spam.set), length(tx_hash))), replace = FALSE)), by = "block_date"]
+    spam.fingerprint.tx[, txs.rm.from.spam.set := NULL]
+    spam.fingerprint.tx <- spam.fingerprint.tx[ ! tx_hash %chin% tx_hash.to.rm$tx_hash.to.rm, ]
+    
+    spam.fingerprint.all[[spam.type.sub]] <- spam.fingerprint
+    spam.fingerprint.tx.all[[spam.type.sub]] <- spam.fingerprint.tx
+    
+  }
+  
+  spam.fingerprint <- rbindlist(spam.fingerprint.all)
+  spam.fingerprint.tx <- rbindlist(spam.fingerprint.tx.all)
+  
+  non.spam.fingerprint <- output.index[ tx_num != 1 &
+      (
+        block_height < start.spam.height |
+          (block_height >= start.spam.height  &
+              ! (tx_hash %chin% spam.fingerprint.tx$tx_hash))
+      ), ]
+  
+  non.spam.fingerprint.tx <- non.spam.fingerprint[!duplicated(tx_hash), ]
+  
+  spam.results[[spam.type]] <- list(
+    spam.fingerprint = spam.fingerprint, spam.fingerprint.tx = spam.fingerprint.tx,
+    non.spam.fingerprint = non.spam.fingerprint, non.spam.fingerprint.tx = non.spam.fingerprint.tx
+  )
+  
+}
+
+
+print(sum(spam.results[[1]]$spam.fingerprint.tx$tx_fee)/1e+12)
+print(sum(spam.results[[1]]$spam.fingerprint.tx$tx_size_bytes) / 1000000000)
+sum(spam.results[[1]]$spam.fingerprint.tx$tx_weight_bytes) / 1000000000
+
+print(sum(spam.results[[2]]$spam.fingerprint.tx$tx_fee)/1e+12)
+print(sum(spam.results[[2]]$spam.fingerprint.tx$tx_size_bytes) / 1000000000)
+sum(spam.results[[2]]$spam.fingerprint.tx$tx_weight_bytes) / 1000000000
+# Weight and size should be the same since all suspected spam is 2 outputs
+
+
+
+all.tx.volume <- rbind(spam.results[[1]]$spam.fingerprint.tx, spam.results[[1]]$non.spam.fingerprint.tx, fill = TRUE)
+all.tx.volume <- all.tx.volume[eval(spam.types[[1]]$fingerprint.crieria), ]
+all.tx.volume.by.day <- all.tx.volume[, .(n.all.fingerprint.txs = .N), by = "block_date"]
+setorder(all.tx.volume.by.day, block_date)
+all.tx.volume.by.day <- all.tx.volume.by.day[-.N, ]
+# Remove most recent day because it doesn't have full day of data
+
+png("spam-fingerprint-tx-volume.png", width = 600, height = 600)
+
+ggplot(all.tx.volume.by.day, aes(x = as.POSIXct(block_date), y = n.all.fingerprint.txs / 1000)) +
+  geom_line() +
+  scale_y_continuous(limit = c(0, NA), expand = c(0, 0)) +
+  scale_x_datetime(date_breaks = "3 day", guide = guide_axis(angle = 90)) +
+  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
+  ggtitle("Volume of Monero transactions with spam fingerprint",
+    subtitle = "1in/2out, 20 nanoneros/byte") +
+  xlab("                                                    Date                      github.com/Rucknium") +
+  ylab("Number of transactions (thousands)")   +
+  theme(plot.title = element_text(size = 20),
+    plot.subtitle = element_text(size = 15),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15, margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15))
+
+dev.off()
+
+
+
+
+all.tx.volume <- rbind(spam.results[[1]]$spam.fingerprint.tx, spam.results[[1]]$non.spam.fingerprint.tx, fill = TRUE)
+
+all.tx.volume[, type.in.out := paste0(number_of_inputs, "in/", number_of_outputs, "out")]
+
+txs.type.in.out <- all.tx.volume[, .(n.type.in.out = .N), by = c("block_date", "type.in.out")]
+
+txs.type.in.out.sum <- txs.type.in.out[, .(sum.n.type.in.out = sum(n.type.in.out)), by = "type.in.out"]
+
+setorder(txs.type.in.out.sum, - sum.n.type.in.out)
+
+most.common.tx.type <- txs.type.in.out.sum$type.in.out[1:8]
+
+txs.type.in.out <- txs.type.in.out[type.in.out %in% most.common.tx.type, ]
+
+txs.type.in.out <- txs.type.in.out[block_date != max(block_date), ]
+# Remove most recent date that does not have full day of data
+
+setorder(txs.type.in.out, block_date, n.type.in.out)
+
+
+png("in-out-tx-type-volume.png", width = 800, height = 800)
+
+ggplot(txs.type.in.out, aes(x = block_date, y = n.type.in.out / 1000,
+  colour = factor(type.in.out, levels = rev(unique(type.in.out))))) +
+  geom_line(linewidth = 1.25) +
+  scale_y_log10() +
+  scale_x_date(expand = c(0, 0), date_breaks = "2 day", guide = guide_axis(angle = 90)) +
+  ggtitle("Transaction volume by number of inputs and outputs (log scale)") +
+  xlab("                                                    Date                      github.com/Rucknium") +
+  ylab("Thousands of transactions (log scale)")  +
+  labs(colour = "Type") +
+  theme(legend.position = "top", legend.text = element_text(size = 12), legend.title = element_text(size = 15),
+    plot.title = element_text(size = 20),
+    plot.subtitle = element_text(size = 15),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15, margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) +
+  guides(colour = guide_legend(nrow = 2, byrow = FALSE, override.aes = list(linewidth = 5))) +
+  scale_color_brewer(palette = "Accent")
+
+dev.off()
+
+
+
+
+
+all.output.volume <- rbind(spam.results[[1]]$spam.fingerprint, spam.results[[1]]$non.spam.fingerprint, fill = TRUE)
+
+all.output.volume.by.day <- all.output.volume[, .(non.spam = sum(is.na(fingerprint)), spam = sum(!is.na(fingerprint))), by = "block_date"]
+
+all.output.volume.by.day[, spam.share.outputs := spam/(non.spam + spam) ]
+
+all.output.volume.by.day <- all.output.volume.by.day[-.N, ]
+# Remove most recent day because it doesn't have full day of data
+
+png("spam-share-outputs.png", width = 600, height = 600)
+
+ggplot(all.output.volume.by.day[block_date >= start.spam.date, ], aes(x = as.POSIXct(block_date), y = spam.share.outputs)) +
+  geom_line() +
+  scale_y_continuous(limit = c(0, 1), expand = c(0, 0), labels = scales::label_percent()) +
+  scale_x_datetime(date_breaks = "day", guide = guide_axis(angle = 90)) +
+  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
+  ggtitle("Spam share of outputs") +
+  xlab("                                                    Date                      github.com/Rucknium") +
+  ylab("Daily share of outputs owned by suspected spammer")   +
+  theme(plot.title = element_text(size = 20),
+    plot.subtitle = element_text(size = 15),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15,  margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15))
+
+dev.off()
+
+
+
+mean.spam.share.outputs <- all.output.volume.by.day[block_date >= (start.spam.date + 1), mean(spam.share.outputs)]
+# Skip the first day because suspected spam started in the middle of the day
+
+binom.ring.size <- rbind(
+  data.table(x = 1:16, y = dbinom(0:15, size = 11, prob = 1 - 192/233),
+    Model = paste0("Ring size: 11, Share of adversary outputs: ", round(100*192/233), "% (Chervinski et al. 2021)")),
+  data.table(x = 1:16, y = dbinom(0:15, size = 16, prob = 1 - mean.spam.share.outputs),
+    Model = paste0("Ring size: 16, Share of adversary outputs: ", round(100*mean.spam.share.outputs), "% (Estimated March 2024)")))
+
+# "prob = 1 - 192/233" because:
+# Chervinski et al. (2021)
+# "Scenario II analyzes the impact of an attack where the malicious actor creates
+# transactions with 2 inputs and 2 outputs, generating 96 transactions and 192
+# malicious outputs in each block for a total of 233 outputs per block when
+# adding the 41 user generated outputs."
+
+print(binom.ring.size[, .(mean.eff.ring.size = sum(x*y)), by = "Model"])
+
+png("effective-ring-size-binomial-pmf.png", width = 500, height = 600)
+
+ggplot(binom.ring.size, aes(x = factor(x), y = y, fill = Model)) +
+  geom_bar(stat = "identity", position = position_dodge(), width = 0.8) +
+  geom_line(aes(x = factor(x), y = y, group = Model, colour = Model), linewidth = 1.25) +
+  scale_y_continuous(labels = scales::label_percent()) +
+  ggtitle("Long-term projected effective ring sizes, binomial assumption",
+    subtitle = "Probability mass function of binomial(nominal_ring_size, 1 - adversary_outputs_share)") +
+  xlab("                                       Effective ring size       github.com/Rucknium") +
+  ylab("Share of rings")  +
+  labs(colour = "Ring size") +
+  theme(legend.position = "top", legend.text = element_text(size = 13), legend.title = element_blank(),
+    plot.title = element_text(size = 16),
+    plot.subtitle = element_text(size = 11.5),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15, margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) +
+  guides(fill = guide_legend(nrow = 2), colour = waiver())
+
+dev.off()
+
+
+
+# Last row of Table IV of
+# Chervinski, J. O., Kreutz, D., & Yu, J. 2021, Analysis of transaction flooding attacks against Monero.
+# Paper presented at 2021 IEEE International Conference on Blockchain and Cryptocurrency (ICBC).
+Chervinski.2021 <- c(
+  14.4701,
+  30.8318,
+  29.5862,
+  16.8408,
+  6.315,
+  1.6359,
+  0.2803,
+  0.0366,
+  0.0031,
+  0.0002,
+  0
+)
+
+
+Chervinski.ring.size <- rbind(
+  data.table(x = 1:16, y = dbinom(0:15, size = 11, prob = 1 - 192/233),
+    Model = paste0("Binomial assumption (n = 11, p = ", round(192/233, 2), ")")),
+  data.table(x = 1:16, y = c(Chervinski.2021/100, rep(0, 16 - length(Chervinski.2021))),
+    Model = "12 month spamming, with chain reaction analysis (Chervinski et al. 2021)"))
+
+
+Chervinski.ring.size <- Chervinski.ring.size[x <= 11, ]
+
+print(Chervinski.ring.size[, .(mean.eff.ring.size = sum(x*y)), by = "Model"])
+
+
+png("chervinski-chain-reaction.png", width = 500, height = 600)
+
+ggplot(Chervinski.ring.size, aes(x = factor(x), y = y, fill = Model)) +
+  geom_bar(stat = "identity", position = position_dodge(), width = 0.8) +
+  scale_y_continuous(labels = scales::label_percent()) +
+  ggtitle("Long-term effective ring sizes, binomial and chain reaction",
+    subtitle = "Probability mass function of binomial(nominal_ring_size, 1 - adversary_outputs_share)") +
+  xlab("                                       Effective ring size       github.com/Rucknium") +
+  ylab("Share of rings")  +
+  labs(colour = "Ring size") +
+  theme(legend.position = "top", legend.text = element_text(size = 13), legend.title = element_blank(),
+    plot.title = element_text(size = 16),
+    plot.subtitle = element_text(size = 11.5),
+    axis.text = element_text(size = 15),
+    axis.title.x = element_text(size = 15, margin = margin(t = 10)),
+    axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) +
+  guides(fill = guide_legend(nrow = 2), colour = waiver())
+
+dev.off()
+
+
+
--- a/Monero-Black-Marble-Flood/pdf/images/projected-effective-ring-size-log-log.png
+++ b/Monero-Black-Marble-Flood/pdf/images/projected-effective-ring-size-log-log.png
--- a/Monero-Black-Marble-Flood/pdf/images/projected-ring-size-one.png
+++ b/Monero-Black-Marble-Flood/pdf/images/projected-ring-size-one.png
--- a/Monero-Black-Marble-Flood/pdf/monero-black-marble-flood.bib
+++ b/Monero-Black-Marble-Flood/pdf/monero-black-marble-flood.bib
@ -0,0 +1,127 @@
+
+
+@article{Aguado2010,
+author = {Aguado, J. and Cid, C. and Saiz, E. and Cerrato, Y.},
+title = {Hyperbolic decay of the Dst Index during the recovery phase of intense geomagnetic storms},
+journal = {Journal of Geophysical Research: Space Physics},
+volume = {115},
+number = {A7},
+pages = {},
+keywords = {Dst index, recovery phase, magnetosphere},
+doi = {https://doi.org/10.1029/2009JA014658},
+url = {https://agupubs.onlinelibrary.wiley.com/doi/abs/10.1029/2009JA014658},
+eprint = {https://agupubs.onlinelibrary.wiley.com/doi/pdf/10.1029/2009JA014658},
+abstract = {What one commonly considers for reproducing the recovery phase of magnetosphere, as seen by the Dst index, is exponential function. However, the magnetosphere recovers faster in the first hours than in the late recovery phase. The early steepness followed by the late smoothness in the magnetospheric response is a feature that leads to the proposal of a hyperbolic decay function to reproduce the recovery phase instead of the exponential function. A superposed epoch analysis of recovery phases of intense storms from 1963 to 2003 was performed, categorizing the storms by their intensity into five subsets. The hyperbolic decay function reproduces experimental data better than what the exponential function does for any subset of storms, which indicates a nonlinear coupling between dDst/dt and Dst. Moreover, this kind of mathematical function, where the degree of reduction of the Dst index depends on time, allows for explaining different lifetimes of the physical mechanisms involved in the recovery phase and provides new insights for the modeling of the Dst index.},
+year = {2010}
+}
+
+
+@misc{Noether2014,
+	title = {A Note on Chain Reactions in Traceability in CryptoNote 2.0},
+	number = {1},
+	year = {2014},
+	URL = {https://www.getmonero.org/resources/research-lab/pubs/MRL-0001.pdf},
+	booktitle = {Monero Research Lab},
+	author = {Noether, Surae and Noether, Sarang and Mackenzie, Adam},
+	abstract = {This research bulletin describes a plausible attack on a ring-signature based anonymity system. We use as motivation the cryptocurrency protocol CryptoNote 2.0 ostensibly published by Nicolas van Saberhagen in 2012. It has been previously demonstrated that the untraceability obscuring a one-time key pair can be dependent upon the untraceability of all of the keys used in composing that ring signature. This allows for the possibility of chain reactions in traceability between ring signatures, causing a critical loss in untraceability across the whole network if parameters are poorly chosen and if an attacker owns a sufficient percentage of the network. The signatures are still one-time, however, and any such attack will still not necessarily violate the anonymity of users. However, such an attack could plausibly weaken the resistance CryptoNote demonstrates against blockchain analysis. This research bulletin has not undergone peer review, and reflects only the results of internal investigation.},
+	howpublished = {Research Bulletin},
+}
+
+@inproceedings{Chervinski2021,
+	title = {Analysis of transaction flooding attacks against Monero},
+	DOI = {10.1109/ICBC51069.2021.9461084},
+	year = {2021},
+	URL = {https://ieeexplore.ieee.org/abstract/document/9461084},
+	booktitle = {2021 IEEE International Conference on Blockchain and Cryptocurrency (ICBC)},
+	pages = {1--8},
+	author = {Chervinski, Ot{\'a}vio Jo{\~a}o and Kreutz, Diego and Yu, Jiangshan},
+	abstract = {Monero was one of the first cryptocurrencies to address the problem of providing privacy-preserving digital asset trading. Currently, it has a market capitalization of over 2.5 billion US dollars and is among the 15 most valuable cryptocurrencies. This digital currency aims to protect users' identities and hide transaction information by using obfuscation mechanisms such as stealth addresses and ring signatures. However, in spite of the efforts to protect Monero's users' privacy, researchers have found ways to identify true payment keys within a ring signature in the past, making attacks against transaction privacy feasible. Since then, the system has received updates and adopted improved measures to provide privacy. This work presents an analysis on how an attacker can take advantage of the system's current settings to conduct both a high-profile transaction flooding attack and a stealthier version. Our results show that after flooding the network for 12 months, the attacker can identify the true spend of 46.24{\%} of newly created transaction inputs by conducting the strongest attack and 14.47{\%} by using the low-profile strategy.},
+}
+
+
+@misc{Krawiec-Thayer2021,
+	title = "Fingerprinting a flood: forensic statistical analysis of the mid-2021 Monero transaction volume anomaly",
+	year = "2021",
+	URL = "https://mitchellpkt.medium.com/fingerprinting-a-flood-forensic-statistical-analysis-of-the-mid-2021-monero-transaction-volume-a19cbf41ce60",
+	author = "Krawiec-Thayer, Mitchell P. and Neptune and Rucknium and Jberman and Carrington",
+	note = "Available at https://mitchellpkt.medium.com/fingerprinting-a-flood-forensic-statistical-analysis-of-the-mid-2021-monero-transaction-volume-a19cbf41ce60"
+}
+
+@misc{Rucknium2023a,
+	title = "Closed-form Expression of Monero's wallet2 Decoy Selection Algorithm",
+	year = "2023",
+	URL = "https://github.com/Rucknium/misc-research/tree/main//Monero-Decoy-Selection-Closed-Form/pdf",
+	author = "Rucknium",
+	note = "Available at https://github.com/Rucknium/misc-research/tree/main//Monero-Decoy-Selection-Closed-Form/pdf"
+}
+
+
+
+@article{Ronge2021,
+	title = "Foundations of Ring Sampling",
+	DOI = "doi:10.2478/popets-2021-0047",
+	volume = "2021",
+	number = "3",
+	year = "2021",
+	URL = "https://doi.org/10.2478/popets-2021-0047",
+	journal = "Proceedings on Privacy Enhancing Technologies",
+	pages = "265--288",
+	author = "Ronge, Viktoria and Egger, Christoph and Lai, Russell W. F. and Schr{\"o}der, Dominique and Yin, Hoover H. F.",
+	abstract = "A ring signature scheme allows the signer to sign on behalf of an ad hoc set of users, called a ring. The verifier can be convinced that a ring member signs, but cannot point to the exact signer. Ring signatures have become increasingly important today with their deployment in anonymous cryptocurrencies. Conventionally, it is implicitly assumed that all ring members are equally likely to be the signer. This assumption is generally false in reality, leading to various practical and devastating deanonymizing attacks in Monero, one of the largest anonymous cryptocurrencies. These attacks highlight the unsatisfactory situation that how a ring should be chosen is poorly understood.We propose an analytical model of ring samplers towards a deeper understanding of them through systematic studies. Our model helps to describe how anonymous a ring sampler is with respect to a given signer distribution as an information-theoretic measure. We show that this measure is robust ? it only varies slightly when the signer distribution varies slightly. We then analyze three natural samplers ? uniform, mimicking, and partitioning ? under our model with respect to a family of signer distributions modeled after empirical Bitcoin data. We hope that our work paves the way towards researching ring samplers from a theoretical point of view.",
+}
+
+
+@article{Egger2022,
+	title = "On Defeating Graph Analysis of Anonymous Transactions",
+	volume = "2022",
+	number = "3",
+	year = "2022",
+	URL = "https://petsymposium.org/2022/files/papers/issue3/popets-2022-0085.pdf",
+	journal = "Proceedings on Privacy Enhancing Technologies",
+	author = "Egger, Christoph and Lai, Russell W. F. and Ronge, Viktoria and Woo, Ivy K. Y. and Yin, Hoover H. F.",
+	abstract = "In a ring-signature-based anonymous cryptocurrency, signers of a transaction are hidden among a set of potential signers, called a ring, whose size is much smaller than the number of all users. The ring-membership relations specified by the sets of transactions thus induce bipartite transaction graphs, whose distribution is in turn induced by the ring sampler underlying the cryptocurrency.Since efficient graph analysis could be performed on transaction graphs to potentially deanonymise signers, it is crucial to understand the resistance of (the transaction graphs induced by) a ring sampler against graph analysis. Of particular interest is the class of partitioning ring samplers. Although previous works showed that they provide almost optimal local anonymity, their resistance against global, e.g. graph-based, attacks were unclear.In this work, we analyse transaction graphs induced by partitioning ring samplers. Specifically, we show (partly analytically and partly empirically) that, somewhat surprisingly, by setting the ring size to be at least logarithmic in the number of users, a graph-analysing adversary is no better than the one that performs random guessing in deanonymisation up to constant factor of 2.",
+}
+
+
+@inproceedings{Yu2019a,
+	title = "New Empirical Traceability Analysis of CryptoNote-Style Blockchains",
+	ISBN = "978-3-030-32101-7",
+	year = "2019",
+	URL = "https://link.springer.com/chapter/10.1007/978-3-030-32101-7_9",
+	booktitle = "Financial Cryptography and Data Security",
+	pages = "133--149",
+	author = "Yu, Zuoxia and Au, Man Ho and Yu, Jiangshan and Yang, Rupeng and Xu, Qiuliang and Lau, Wang Fat",
+	editor = "Goldberg, Ian and Moore, Tyler",
+	abstract = "The cascade effect attacks (PETS' 18) on the untraceability of Monero are circumvented by two approaches. The first one is to increase the minimum ring size of each input, from 3 (version 0.9.0) to 7 in the latest update (version 0.12.0). The second approach is introducing the ring confidential transactions with enhanced privacy guarantee. However, so far, no formal analysis has been conducted on the level of anonymity provided by the new countermeasures in Monero. In addition, since Monero is only an example of leading CryptoNote-style blockchains, the actual privacy guarantee provided by other similar blockchains in the wild remains unknown.",
+	publisher = "Springer International Publishing"
+}
+
+@inproceedings{Vijayakumaran2023,
+	title = "Analysis of CryptoNote Transaction Graphs using the Dulmage-Mendelsohn Decomposition",
+	ISBN = "978-3-95977-303-4",
+	series = "Leibniz International Proceedings in Informatics (LIPIcs)",
+	volume = "282",
+	year = "2023",
+	URL = "https://aftconf.github.io/aft23/program.html",
+	booktitle = "5th Conference on Advances in Financial Technologies (AFT 2023)",
+	author = "Vijayakumaran, Saravanan",
+	editor = "Bonneau, Joseph and Weinberg, Matthew S.",
+	abstract = "CryptoNote blockchains like Monero represent the largest public deployments of linkable ring signatures. Beginning with the work of Kumar et al. (ESORICS 2017) and M{\"o}ser et al. (PoPETs 2018), several techniques have been proposed to trace CryptoNote transactions, i.e. identify the actual signing key, by using the transaction history. Yu et al. (FC 2019) introduced the closed set attack for undeniable traceability and proved that it is optimal by showing that it has the same performance as the brute-force attack. However, they could only implement an approximation of the closed set attack due to its exponential time complexity. In this paper, we show that the Dulmage-Mendelsohn (DM) decomposition of bipartite graphs gives a polynomial-time implementation of the closed set attack. Our contribution includes open source implementations of the DM decomposition and the clustering algorithm (the approximation to the closed set attack proposed by Yu et al). Using these implementations, we evaluate the empirical performance of these methods on the Monero dataset in two ways -- firstly using data only from the main Monero chain and secondly using data from four hard forks of Monero in addition to the main Monero chain. We have released the scripts used to perform the empirical analysis along with step-by-step instructions.",
+	publisher = "Schloss Dagstuhl -- Leibniz-Zentrum f{{"}u}r Informatik"
+}
+
+@misc{Sharma2022,
+	title = "On the Anonymity of Peer-To-Peer Network Anonymity Schemes Used by Cryptocurrencies",
+	DOI = "10.48550/ARXIV.2201.11860",
+	year = "2022",
+	URL = "https://arxiv.org/abs/2201.11860",
+	author = "Sharma, Piyush Kumar and Gosain, Devashish and Diaz, Claudia",
+	abstract = "Cryptocurrency systems can be subject to deanonimization attacks by exploiting the network-level communication on their peer-to-peer network. Adversaries who control a set of colluding node(s) within the peer-to-peer network can observe transactions being exchanged and infer the parties involved. Thus, various network anonymity schemes have been proposed to mitigate this problem, with some solutions providing theoretical anonymity guarantees.In this work, we model such peer-to-peer network anonymity solutions and evaluate their anonymity guarantees. To do so, we propose a novel framework that uses Bayesian inference to obtain the probability distributions linking transactions to their possible originators. We characterize transaction anonymity with those distributions, using entropy as metric of adversarial uncertainty on the originator's identity. In particular, we model Dandelion, Dandelion++ and Lightning Network. We study different configurations and demonstrate that none of them offers acceptable anonymity to their users. For instance, our analysis reveals that in the widely deployed Lightning Network, with 1{\%} strategically chosen colluding nodes the adversary can uniquely determine the originator for about 50{\%} of the total transactions in the network. In Dandelion, an adversary that controls 15{\%} of the nodes has on average uncertainty among only 8 possible originators. Moreover, we observe that due to the way Dandelion and Dandelion++ are designed, increasing the network size does not correspond to an increase in the anonymity set of potential originators. Alarmingly, our longitudinal analysis of Lightning Network reveals rather an inverse trend -- with the growth of the network the overall anonymity decreases.",
+	publisher = "arXiv",
+	howpublished = "",
+	keywords = "Cryptography and Security (cs.CR),FOS: Computer and information sciences"
+}
+
+
+
+
--- a/Monero-Black-Marble-Flood/pdf/monero-black-marble-flood.pdf
+++ b/Monero-Black-Marble-Flood/pdf/monero-black-marble-flood.pdf
--- a/Monero-Black-Marble-Flood/pdf/monero-black-marble-flood.tex
+++ b/Monero-Black-Marble-Flood/pdf/monero-black-marble-flood.tex
@ -0,0 +1,691 @@
+
+\documentclass[usletter,11pt,english,openany]{article}
+
+
+
+
+\usepackage{float}
+
+%Primary packages
+\usepackage{fancyvrb}
+
+\usepackage[utf8]{inputenc}
+\usepackage[english]{babel}
+\usepackage[pdftex]{graphicx}
+
+
+
+
+% Useful packages:
+
+% Advanced mathematical formulas and symbols
+% -------------------------------------
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage{amsfonts}
+\usepackage{bm}
+
+% Footnotes
+% -------------------------------------
+\usepackage[stable,splitrule]{footmisc}
+
+% Color management package
+% -------------------------------------
+\usepackage[usenames,dvipsnames]{xcolor}
+
+% Control line spacing 
+% -------------------------------------
+% putting this between footmisc and hyperref seemed to fix broken footnote links
+\usepackage{setspace}
+\AtBeginDocument{\let~=\nobreakspace}
+\spacing{1.4}
+
+
+\usepackage{lineno}
+\linenumbers
+
+\usepackage[bookmarks=true]{hyperref}
+\hypersetup{colorlinks=false}
+\usepackage{orcidlink}
+\usepackage{booktabs}
+\usepackage{caption}
+\usepackage{longtable}
+\usepackage[T1]{fontenc}
+\usepackage{geometry}
+\geometry{verbose,tmargin=2cm,bmargin=2cm,lmargin=2cm,rmargin=2cm}
+\usepackage{array}
+\usepackage{url}
+\usepackage{multirow}
+\usepackage{stackrel}
+\usepackage{rotating}
+
+
+% https://tex.stackexchange.com/questions/151241/remove-metadata-of-pdf-generated-by-latex
+\hypersetup{
+    bookmarks=true,         % show bookmarks bar?
+    unicode=false,          % non-Latin characters in Acrobat's bookmarks
+    pdftoolbar=true,        % show Acrobat's toolbar?
+    pdfmenubar=true,        % show Acrobat's menu?
+    pdffitwindow=false,     % window fit to page when opened
+%    pdfstartview={FitW},    % fits the width of the page to the window
+    pdftitle={Monero Black Marble Flood},    % title
+    pdfauthor={Rucknium},     % author
+    pdfsubject={},   % subject of the document
+    pdfcreator={Rucknium},   % creator of the document
+    pdfproducer={},  % producer of the document
+    pdfkeywords={}, % list of keywords
+    pdfnewwindow=true,      % links in new window
+    colorlinks=false,       % false: boxed links; true: colored links
+    linkcolor=red,          % color of internal links
+    citecolor=green,        % color of links to bibliography
+    filecolor=magenta,      % color of file links
+    urlcolor=cyan           % color of external links
+}
+
+
+
+\begin{document}
+\title{March 2024 Suspected Black Marble Flooding Against Monero:
+Privacy, User Experience, and Countermeasures\\\vspace{.3cm}
+\large Draft v0.2\vspace{-.715cm}}
+\author{Rucknium\orcidlink{https://orcid.org/0000-0001-5999-8950} }
+\date{March 27, 2024}
+\maketitle
+\begin{abstract}
+On March 4, 2024, aggregate Monero transaction volume suddenly almost
+tripled. This note analyzes the effect of the large number of transactions,
+assuming that the transaction volume is an attempted black marble
+flooding attack by an adversary. According to my estimates, mean effective
+ring size has decreased from 16 to 5.5 if the black marble flooding
+hypothesis is correct. At current transaction volumes, the suspected
+spam transactions probably cannot be used for large-scale ``chain
+reaction'' analysis to eliminate all ring members except for the
+real spend. Effects of increasing Monero's ring size above 16 are
+analyzed.
+\end{abstract}
+
+\section{March 4, 2024: Sudden transaction volume }
+
+\begin{figure}[H]
+\caption{Volume of Monero transactions with spam fingerprint}
+\label{fig-spam-tx-volume}
+\centering{}\includegraphics[scale=0.5]{images/spam-fingerprint-tx-volume}
+\end{figure}
+
+On March 4, 2024 at approximately block height 3097764 (15:21:24 UTC),
+the number of 1input/2output minimum fee (20 nanoneros/byte) transactions
+sent to the Monero network rapidly increased. Figure \ref{fig-spam-tx-volume}
+shows daily volume of this type of transaction increasing from about
+15,000 to over 100,000.
+
+The large volume of these transactions was enough to entirely fill
+the 300 kB Monero blocks mined about every two minutes. Monero's dynamic
+block size algorithm activated. The 100 block rolling median block
+size slowly increased to adjust for the larger number of transactions
+that miners could pack in blocks. Figure \ref{fig-empirical-block-weight}
+shows the adjustment. The high transaction volume raised the 100 block
+median gradually for period of time. Then the transaction volume reduced
+just enough to allow the 100 block median to reset to a lower level.
+Then the process would restart. Block sizes have usually remained
+between 300 kB and 400 kB. Occasionally, high-fee transactions would
+allow miners to get more total revenue by giving up some of the 0.6
+XMR/block tail emission and including more transactions in a block.
+The ``maximum peaks'' plot shows this phenomenon.
+
+\begin{figure}[H]
+\caption{Monero empirical block weight}
+\label{fig-empirical-block-weight}
+\centering{}\includegraphics[scale=0.5]{images/rolling-median-block-weight}\includegraphics[scale=0.5]{images/rolling-max-block-weight}
+\end{figure}
+
+The sudden transaction volume rise may originate from a single entity.
+The motive may be spamming transactions to bloat the blockchain size,
+increase transaction confirmation times for real users, perform a
+network stress test, or execute a black marble flooding attack to
+reduce the privacy of Monero users. I will focus most of my analysis
+on the last possibility.
+
+\section{Literature review}
+
+The very first research bulletin released by the Monero Research Lab
+described black marble transaction flooding. \cite{Noether2014} points
+out that the ring signature privacy model requires rings to contain
+transaction outputs that are could be plausible real spends. If a
+single entity owns a large share of outputs (spent or not), it can
+use its knowledge to rule out ring members in other users' transactions
+that cannot be the real spend. Since the entity knows that itself
+did not spend the output(s) in a particular ring, the effective ring
+size that protects other users' privacy can be reduced --- even to
+an effective ring size of 1 when the entity knows the real spend with
+certainty. Rings with known real spends can be leveraged to determine
+the real spend in other rings in a ``chain reaction'' attack.
+
+\cite{Noether2014} gave the name ``black marble'' to the outputs
+owned by an anti-privacy adversary since they modeled the problem
+using a marble draw problem with a hypergeometric distribution. When
+a specific number of marbles are drawn \textit{without} replacement
+from an urn containing a specific number of white and black marbles,
+the hypergeometric distribution describes the probability of drawing
+a specific number of black marbles. In my modeling I use the binomial
+distribution, which is the same as the hypergeometric except marbles
+are drawn \textit{with} replacement. The binomial distribution makes
+more sense now ten years after \cite{Noether2014} was written. The
+total number of RingCT outputs on the blockchain that can be included
+in a ring is over 90 million. The hypergeometric distribution converges
+to the binomial distribution as the total number of marbles increases
+to infinity. Moreover, Monero's current decoy selection algorithm
+does not select all outputs with equal probability. More recent outputs
+are selected with much higher probability. The hypergeometric distribution
+cannot be used when individual marbles have unequal probability of
+being selected.
+
+\cite{Chervinski2021} simulates a realistic black marble flood attack.
+They consider two scenarios. The adversary could create 2input/16output
+transactions to maximize the number of black marble outputs per block
+or the adversary could create 2input/2output transactions to make
+the attack less obvious. The paper uses Monero transaction data from
+2020 to set the estimated number of real outputs and kB per block
+at 41 outputs and 51 kB respectively. The nominal ring size at this
+time was 11. The researchers simulated filling the remaining 249 kB
+of the 300 kB block with black marble transactions. A ``chain reaction''
+algorithm was used to boost the effectiveness of the attack. In the
+2in/2out scenario, the real spend could be deduced (effective ring
+size 1) in 11\% of rings after one month of spamming black marbles.
+Later I will compare the results of this simulation with the current
+suspected spam incident.
+
+\cite{Krawiec-Thayer2021} analyze a suspected spam incident in July-August
+2021. Transactions' inputs, outputs, fees, and ring member ages were
+plotted to evaluate evidence that a single entity created the spam.
+The analysis concluded, ``All signs point towards a single entity.
+While transaction homogeneity is a strong clue, a the {[}sic{]} input
+consumption patterns are more conclusive. In the case of organic growth
+due to independent entities, we would expect the typically semi-correlated
+trends across different input counts, and no correlation between independent
+users\textquoteright{} wallets. During the anomaly, we instead observed
+an extremely atypical spike in 1--2 input txns with no appreciable
+increase in 4+ input transactions.''
+
+TODO: A few papers like \cite{Ronge2021,Egger2022} discuss black
+marble attacks too.
+
+\section{Black marble theory}
+
+The binomial distribution describes the probability of drawing $x$
+number of ``successful'' items when drawing a total of $n$ items
+when the probability of a successful draw is $p$. It can be used
+to model the number of transaction outputs selected by the decoy selection
+algorithm that are not controlled by a suspected adversary.
+
+The probability mass function of the binomial distribution with $n\in\{0,1,2,\ldots\}$
+number of draws and $p\in[0,1]$ probability of success is
+
+\begin{equation}
+f(x,n,p)=\binom{n}{x}p^{x}\left(1-p\right)^{n-x}\textrm{, where }\binom{n}{x}=\frac{n!}{x!(n-x)!}
+\end{equation}
+
+The expected value (the theoretical mean) of a random variable with
+a binomial distribution is $np$.
+
+Monero's standard decoy selection algorithm programmed in \texttt{wallet2}
+does not select outputs with equal probability. The probability of
+selecting each output depends on the age of the output. Specifics
+are in \cite{Rucknium2023a}. The probability of a single draw selecting
+an output that is not owned by the adversary, $p_{r}$, is equal to
+the share of the probability mass function occupied by those outputs:
+$p_{r}=\sum_{i\in R}g(i)$, where $R$ is the set of outputs owned
+by real users and $g(x)$ is the probability mass function of the
+decoy selection algorithm.
+
+\subsection{Spam assumptions\label{subsec:spam-assumptions}}
+
+There is some set of criteria that identifies suspected spam. The
+early March 2024 suspected spam transactions: 1) have one input; 2)
+have two outputs; 3) pay the minimum 20 nanoneros per byte transaction
+fee. The normal volume of these transactions produced by real users
+must be estimated. The volume in excess of the normal volume is assumed
+to be spam. I followed this procedure:
+\begin{enumerate}
+\item Compute the mean number of daily transactions that fit the suspected
+spam criteria for the four weeks that preceded the suspected spam
+incident. A separate mean was calculated for each day of the week
+(Monday, Tuesday,...) because Monero transaction volumes have weekly
+cycles. These volume means are denoted $v_{r,m},v_{r,t},v_{r,w},\ldots$
+for the days of the week.
+\item For each day of the suspected spam interval, sum the number of transactions
+that fit the suspected spam criteria. Subtract the amounts found in
+step (1) from this sum, matching on the day of the week. This provides
+the estimated number of spam transactions for each day: $v_{s,1},v_{s,2},v_{s,3},\ldots$
+\item For each day of the suspected spam interval, randomly select $v_{s,t}$
+transactions from the set of transactions that fit the suspected spam
+criteria, without replacement. This randomly selected set is assumed
+to be the true spam transactions.
+\item During the period of time of the spam incident, compute the expected
+probability $p_{r}$ that one output drawn from the \texttt{wallet2}
+decoy distribution will select an output owned by a real user (instead
+of the adversary) when the wallet constructs a ring at the point in
+time when the blockchain tip is at height $h$. The closed-form formula
+of the \texttt{wallet2} decoy distribution is in \cite{Rucknium2023a}.
+\item The expected effective ring size of each ring constructed at block
+height $h$ is $1+15\cdot p_{r}$. The coefficient on $p_{r}$ is
+the number of decoys.
+\end{enumerate}
+Figure \ref{fig-estimated-mean-effective-ring-size} shows the results
+of this methodology. The mean effective ring size settled at about
+5.5 by the fifth day of the large transaction volume. On March 12
+and 13 there was a large increase in the number of 1input/2output
+transactions that paid 320 nanoneros/byte (the third fee tier). This
+could have been the spammer switching fee level temporarily or a service
+that uses Monero increasing fees to avoid delays. I used the same
+method to estimate the spam volume of these 320 nanoneros/byte suspected
+spam. The 1in/2out 320 nanoneros/byte transactions displaced some
+of the 1in/2out 20 nanoneros/byte transactions because miners preferred
+to put transactions with higher fees into blocks. Other graphs and
+analysis will consider only the 1in/2out 20 nanoneros/byte transactions
+as spam unless indicated otherwise.
+
+\begin{figure}[H]
+\caption{Estimated mean effective ring size}
+\label{fig-estimated-mean-effective-ring-size}
+\centering{}\includegraphics[scale=0.5]{images/empirical-effective-ring-size}
+\end{figure}
+
+Figure \ref{fig-spam-share-outputs} shows the daily share of outputs
+on the blockchain that are owned by the suspected spammer. The mean
+share of outputs since the suspected spam started is about 75 percent.
+
+\begin{figure}[H]
+\caption{Spam share of outputs}
+\label{fig-spam-share-outputs}
+\centering{}\includegraphics[scale=0.5]{images/spam-share-outputs}
+\end{figure}
+
+
+\subsection{Long term projection scenarios at different ring sizes}
+
+Fix the number of outputs owned by real users at $r$. The analysis
+will let the number $s$ of outputs owned by the adversary vary. The
+share of outputs owned by real users is
+
+\begin{equation}
+p_{r}=\dfrac{r}{r+s}\label{eq:p_r-fixed-real}
+\end{equation}
+
+The \ref{eq:p_r-fixed-real} expression can be written $p_{r}=\frac{1}{r}\cdot\dfrac{r}{1+\tfrac{1}{r}s}$
+, which is the formula for hyperbolic decay with the additional $\frac{1}{r}$
+coefficient at the beginning of the expression \cite{Aguado2010}.
+
+Let $n$ be the nominal ring size (16 in Monero version 0.18). The
+number of decoys chosen by the decoy selection algorithm is $n-1$.
+The mean effective ring size for a real user's ring is one (the real
+spend) plus the ring's expected number of decoys owned by other real
+users.
+
+\begin{equation}
+\mathrm{E}\left[n_{e}\right]=1+\left(n-1\right)\cdot\dfrac{r}{r+s}\label{eq:expectation-n_e}
+\end{equation}
+
+The empirical analysis of Section \ref{subsec:spam-assumptions} considered
+the fact that the \texttt{wallet2} decoy selection algorithm draws
+a small number of decoys from the pre-spam era. Now we will assume
+that the spam incident has continued for a very long time and all
+but a negligible number of decoys are selected from the spam era.
+We will hold constant the non-spam transactions and vary the number
+of spam transactions and the ring size. Figures \ref{fig-projected-effective-ring-size-non-log},
+\ref{fig-projected-effective-ring-size-log-log}, and \ref{fig-projected-share-ring-size-one}
+show the results of the simulations.
+
+\begin{figure}[H]
+\caption{Long-term projected mean effective ring size}
+\label{fig-projected-effective-ring-size-non-log}
+\centering{}\includegraphics[scale=0.5]{images/projected-effective-ring-size-non-log}
+\end{figure}
+
+\begin{figure}[H]
+\caption{Long-term projected mean effective ring size (log-log scale)}
+\label{fig-projected-effective-ring-size-log-log}
+\centering{}\includegraphics[scale=0.5]{images/projected-effective-ring-size-log-log}
+\end{figure}
+
+\begin{figure}[H]
+\caption{Long-term projected share of rings with effective ring size 1}
+\label{fig-projected-share-ring-size-one}
+\centering{}\includegraphics[scale=0.5]{images/projected-ring-size-one}
+\end{figure}
+
+
+\subsection{Guessing the real spend using a black marble flooder's simple classifier}
+
+The adversary carrying out a black marble flooding attack could use
+a simple classifier to try to guess the real spend: Let $n$ be nominal
+ring size and $n_{s}$ be the number of outputs in a given ring that
+are owned by the attacker. $n_{s}$ is a random variable because decoy
+selection is a random process. The adversary can eliminate $n_{s}$
+of the $n$ ring members as possible real spends. The attacker guesses
+randomly with uniform probability that the $i$th ring member of the
+$n-n_{s}$ remaining ring members is the real spend. The probability
+of correctly guessing the real spend is $\frac{1}{n-n_{s}}$. If the
+adversary owns all ring members except for one ring member, which
+must be the real spend, the probability of correctly guessing the
+real spend is 100\%. If the adversary owns all except two ring members,
+the probability of correctly guessing is 50\%. And so forth.
+
+The mean effective ring size is $\mathrm{E}\left[n_{e}\right]$ from
+\ref{eq:expectation-n_e}. Does this mean that the mean probability
+of correctly guessing the real spend is $\frac{1}{\mathrm{E}\left[n_{e}\right]}$?
+No. The $h(x)=\frac{1}{x}$ function is strictly convex. By Jensen's
+inequality, $\mathrm{E}\left[\frac{1}{n_{e}}\right]>\frac{1}{\mathrm{E}\left[n_{e}\right]}$.
+The mean probability of correctly guessing the real spend is
+
+\begin{equation}
+\mathrm{E}\left[\frac{1}{n_{e}}\right]=\stackrel[i=1]{n}{\sum}\dfrac{1}{i}\cdot f(i-1,n-1,\frac{\mathrm{E}\left[n_{e}\right]-1}{n-1})
+\end{equation}
+
+$\frac{1}{i}$ is the probability of correctly guessing the real spend
+when the effective ring size is $i$. $f$ is the probability mass
+function of the binomial distribution. It calculates the probability
+of the decoy selection algorithm selecting $i-1$ decoys that are
+owned by real users. The total number of decoys to select is $n-1$
+(that is the argument in the second position of $f$). The probability
+of selecting a decoy owned by a real user is $\frac{\mathrm{E}\left[n_{e}\right]-1}{n-1}=\frac{r}{r+s}$.
+
+\begin{figure}[H]
+\caption{Estimated probability of correctly guessing the real spend}
+\label{fig-prob-guessing-real-spend}
+\centering{}\includegraphics[scale=0.5]{images/empirical-guessing-probability}
+\end{figure}
+
+The probability of a given ring having all adversary-owned ring members
+except for the real spend is $f\left(0,n-1,\frac{\mathrm{E}\left[n_{e}\right]-1}{n-1}\right)$
+. Figure \ref{fig-share-ring-size-one} plots the estimated share
+of rings with effective ring size one.
+
+\begin{figure}[H]
+\caption{Estimated share of rings with effective ring size of one}
+\label{fig-share-ring-size-one}
+\centering{}\includegraphics[scale=0.5]{images/empirical-ring-size-one}
+\end{figure}
+
+
+\section{Chain reaction graph attacks}
+
+The effective ring size can be reduced further by applying a process
+of elimination to related rings. This technique is called a ``chain
+reaction'' or a ``graph analysis attack''. Say that the effective
+ring size in transaction $A$ is reduced to two because of a black
+marble attack. One of the remaining two ring members is an output
+in transaction $B$. If the output in transaction $B$ is known to
+be spent in transaction $C$ because the effective ring size of transaction
+$C$ was one, then that output can be ruled out as a plausible real
+spend in transaction $A$. Therefore, the adversary can reduce the
+effective ring size of transaction $A$ to one.
+
+Theorem 1 of \cite{Yu2019a} says that a ``closed set'' attack is
+as effective as exhaustively checking all subsets of outputs. The
+brute force attack is infeasible since its complexity is $O\left(2^{m}\right)$,
+where $m$ is the total number of RingCT outputs on the blockchain.
+\cite{Yu2019a} implements a heuristic algorithm to execute the closed
+set attack that is almost as effective as the brute force method.
+\cite{Vijayakumaran2023} proves that the Dulmage-Mendelsohn (DM)
+decomposition gives the same results as the brute force closed set
+attack, but the algorithm renders a result in polynomial time. The
+open source implementation of the DM decomposition in \cite{Vijayakumaran2023}
+processes 37 million RingCT rings in about four hours.
+
+In practice, how much further can chain reaction attacks reduce the
+effective ring size when combined with a black marble attack? \cite{Egger2022}
+suggest some closed-form formulas to compute the vulnerability of
+different ring sizes to chain reaction attacks. However, \cite{Egger2022}
+assume that decoys are selected by a partitioning process instead
+of Monero's actual mimicking decoy selection algorithm. It is not
+clear how relevant the findings of \cite{Egger2022} are for Monero's
+mainnet. Monte Carlo simulations would be a better way to evaluate
+the risk of chain reactions.
+
+\cite{Chervinski2021} carries out a simulation using the old ring
+size of 11. In the 2input/2output spam scenario, 82\% of outputs are
+black marbles. Assuming only the binomial distribution, i.e. no chain
+reaction analysis, Figure \ref{fig-effective-ring-size-binomial-pmf}
+compares the theoretical long-term distribution of effective ring
+sizes in the \cite{Chervinski2021} scenario and the March 2024 suspected
+spam on Monero's mainnet. The share of rings with effective ring size
+1 in the \cite{Chervinski2021} scenario is 11.9 percent, but the
+share is only 0.8 percent with the suspected March 2024 spam. The
+mean effective ring sizes of the \cite{Chervinski2021} scenario without
+chain reaction and the March 2024 spam estimate are 2.9 and 5.2, respectively.
+
+\begin{figure}[H]
+\caption{Probability mass function of long-term effective ring sizes}
+\label{fig-effective-ring-size-binomial-pmf}
+\centering{}\includegraphics[scale=0.5]{images/effective-ring-size-binomial-pmf}\includegraphics[scale=0.5]{images/chervinski-chain-reaction}
+\end{figure}
+
+\cite{Chervinski2021} executes chain reaction analysis to increase
+the effectiveness of the attack. The second plot in Figure \ref{fig-effective-ring-size-binomial-pmf}
+compares the long term effective ring size achieved by \cite{Chervinski2021}
+when leveraging chain reaction analysis and the effective ring size
+when only the binomial distribution is assumed. \cite{Chervinski2021}
+increases the share of ring with effective ring size one from 11.9
+to 14.5 percent. Mean effective ring size decreases from 2.94 to 2.76.
+This is a modest gain of attack effectiveness, but \cite{Chervinski2021}
+appears to be using a suboptimal chain reaction algorithm instead
+of the closed set attack.
+
+The actual risk from chain reaction analysis in the suspected March
+2024 flooding is a gap in our knowledge. \cite{Vijayakumaran2023}
+provides an open source implementation of the DM decomposition in
+Rust and excellent documentation.\footnote{\url{https://github.com/avras/cryptonote-analysis}\\
+\url{https://www.respectedsir.com/cna}} A Monte Carlo simulation applying the DM decomposition to the March
+2024 black marble estimates should be written.
+
+\section{Countermeasures}
+
+See \url{https://github.com/monero-project/research-lab/issues/119}
+
+TODO
+
+\section{Estimated cost to suspected spammer}
+
+When the 1in/2out 20 nanoneros/byte spam definition is used, the total
+fees paid by the spam transactions over the 23 days of spam was 61.5
+XMR. The sum total of the transaction sizes of the spam transactions
+was 3.08 GB.
+
+When the 1in/2out 20 or 320 nanoneros/byte spam definition is used,
+the total fees paid by the spam transactions over the 23 days of spam
+was 81.3 XMR. The sub total of the transaction sizes of the spam transactions
+was 3.12 GB.
+
+\section{Transaction confirmation delay}
+
+Monero's transaction propagation rules are different from BTC's rules
+for good reasons, but two of the rules can make transactions seem
+like they are ``stuck'' when the txpool (mempool) is congested.
+First, Monero does not have replace-by-fee (RBF). When a Monero node
+sees that a transaction attempts to spend an output that is already
+spent by another transaction in the txpool, the node does not send
+the transaction to other nodes because it is an attempt to double
+spend the output. (Monero nodes do not know the real spend in the
+ring, but double spends can be detected by comparing the key images
+of ring signatures in different transactions.) Monero users cannot
+increase the fee of a transaction that they already sent to a node
+because the transaction with the higher fee would be considered a
+double spend. BTC has RBF that allows a transaction to replace a transaction
+in the mempool that spends the same output if the replacement transaction
+pays a higher fee. One of RBF's downsides is that merchants cannot
+safely accept zero-confirmation transactions because a malicious customer
+can replace the transaction in the mempool with a higher-fee transaction
+that spends the output back to themselves. Without RBF, Monero users
+must wait for their low-fee transaction to confirm on the blockchain.
+They cannot choose to raise their ``bid'' for block space even if
+they were willing to pay more. They have to get it right the first
+time. Fee prediction is especially important for Monero users when
+the txpool is congested because of the lack of RBF, but very little
+Monero-specific fee prediction research has been done.
+
+Unlike BTC, Monero also does not have child-pays-for-parent (CPFP),
+which allows users to chain multiple transactions together while they
+are still in the mempool. With CPFP, users can spend the output of
+the unconfirmed parent transaction and attach a higher fee to the
+child transaction. Miners have an incentive to include the parent
+transaction in the block because the child transaction is only valid
+if the parent transaction is also mined in a block. Monero transaction
+outputs cannot be spent in the same block that they are confirmed
+in. Actually, Monero users need to wait at least ten blocks to spend
+new transaction outputs because benign or malicious blockchain reorganizations
+can invalidate ring signatures.\footnote{``Eliminating the 10-block-lock'' \url{https://github.com/monero-project/research-lab/issues/95}}
+
+Monero's transaction propagation rules can create long delays for
+users who pay the same minimum fee that the suspected spammer pays.
+When users pay the same fee as the spam, their transactions are put
+in a ``queue'' with other transactions at the same fee per byte
+level. Their transactions are confirmed in first-in/first-out order
+because the \texttt{get\_block\_template} RPC call to \texttt{monerod}
+arranges transactions that way.\footnote{\url{https://github.com/monero-project/monero/blob/9bf06ea75de4a71e3ad634e66a5e09d0ce021b67/src/cryptonote_core/tx_pool.cpp\#L1596}}
+Most miners use \texttt{get\_block\_template} to construct blocks,
+but P2Pool orders transactions randomly after they have been sorted
+by fee per byte.\footnote{\url{https://github.com/SChernykh/p2pool/blob/dd17372ec0f64545311af40b976e6274f625ddd8/src/block_template.cpp\#L194}}
+
+The first plot in Figure \ref{fig-delay-tx-confirmation} shows the
+mean delay of transaction confirmation in each hour. The plot shows
+the mean time that elapsed between when the transaction entered the
+txpool and when it was confirmed in a block. Each hour's value in
+the line plot is computed from transactions that were confirmed in
+blocks in that hour. This data is based on txpool archive data actively
+collected from a few nodes.\footnote{\url{https://github.com/Rucknium/misc-research/tree/main/Monero-Mempool-Archive}}
+The mean includes transactions with and without the spam fingerprint.
+Usually mean confirmation time was less than 30 minutes, but sometimes
+confirmations of the average transaction were delayed by over two
+hours.
+
+\begin{figure}[H]
+\caption{Delay to first transaction confirmation}
+\label{fig-delay-tx-confirmation}
+\centering{}\includegraphics[scale=0.5]{images/mean-delay-first-confirmation}\includegraphics[scale=0.5]{images/max-delay-first-confirmation}
+\end{figure}
+
+The second plot in Figure \ref{fig-delay-tx-confirmation} shows the
+\textit{maximum} waiting time for a transaction to be confirmed. The
+value of the line at each hour is the longest time that a transaction
+waited to be confirmed in one of the block mined in the hour or the
+amount of time that a transaction was still waiting to be confirmed
+at the end of the hour (whichever is greater). There were a handful
+of transactions that paid fees below the 20 nanoneros/byte tier that
+the spam was paying. These transactions did not move forward in the
+queue when the spam transactions were confirmed. Instead, they had
+to wait until the txpool completely emptied. Exactly 100 transactions
+waited longer than three hours. They paid between 19465 and 19998
+piconeros per byte. Most of the transactions appeared to have set
+fees slightly lower than 20 nanonerpos per byte because they had an
+unusual number of inputs. 92 of them had four or more inputs. The
+remaining eight of them had just one input. Those eight may have been
+constructed by a nonstandard wallet.
+
+\section{Real user fee behavior}
+
+During the suspected spam, users must pay more than the minimum fee
+to put their transactions at the front of the confirmation queue.
+If users pay more than the minimum fee, usually their transactions
+would be confirmed in the next mined block. Monero's standard fee
+levels are 20, 80, 320, and 4000 nanoneros per byte. Users are not
+required to pay one of these fee levels, but all wallets that are
+based on \texttt{wallet2} do not allow users to choose custom fees
+outside of the four standard levels because of the privacy risk of
+unusual transactions.\footnote{\url{https://github.com/Rucknium/misc-research/tree/main/Monero-Nonstandard-Fees}}
+
+The ``auto'' fee level of the Monero GUI and CLI wallets is supposed
+to automatically change the fee of a transaction from the lowest tier
+(20 nanoneros/byte) to the second tier (80 nanoneros/byte) when the
+txpool is congested. Unfortunately, a bug prevented the automatic
+adjustment. On March 9, 2024 the Monero Core Team released the 0.18.3.2
+version of Monero and the GUI/CLI wallet that fixed the bug.\footnote{``Monero 0.18.3.2 'Fluorine Fermi' released'' \url{https://www.getmonero.org/2024/03/09/monero-0.18.3.2-released.html}
+
+``wallet2: adjust fee during backlog, fix set priority'' \url{https://github.com/monero-project/monero/pull/9220}} Users are not required to upgrade to the latest wallet version, so
+probably many users still use the version that is not automatically
+adjusting fees.
+
+The first plot of Figure \ref{fig-share-tx-by-fee-tier} shows the
+share of trasnactions paying each of the four fee tiers. Any transactions
+that do not pay in the standard ranges $\left\{ \left[18,22\right],\left[72,82\right],\left[315,325\right],\left[3000,4100\right]\right\} $
+were not included in the plot. The 320 nanoneros/byte tier is interesting.
+About 10 percent of transactions paid 320 nanonero/byte until Februray
+17, 2024. The date could have something to do with Monero being delisted
+from Binance on February 20, 2024.\footnote{\url{https://decrypt.co/218194/binance-finalizes-monero-delisting}}
+Then on March 12-13, 2024 there was a burst of 320 nanonero/byte transactions.
+The 0.18.3.2 GUI/CLI wallet release could not explain the burst since
+the auto fee adjustment would only increase fees from 20 to 80 nanoneros/byte.
+The burst of 320 nanonero/byte transactions must have been either
+from a central service producing fees or from the suspected spammer.
+
+The second plot of Figure \ref{fig-share-tx-by-fee-tier} shows the
+same data with the suspected spam transactions eliminated both the
+80 and 320 nanoneros/byte transactions with the spam fingerprint were
+removed. There is a modest increase in 80 nanonero/byte transactions
+after the spam started.
+
+\begin{figure}[H]
+\caption{Share of transactions by fee tier}
+\label{fig-share-tx-by-fee-tier}
+\centering{}\includegraphics[scale=0.5]{images/share-tx-in-fee-tier-all-txs}\includegraphics[scale=0.5]{images/share-tx-in-fee-tier-spam-removed}
+\end{figure}
+
+The mempool archive data suggest that merchants using zero-confirmation
+delivery were still safe during the spam incident. Once submitted
+to the network, transactions did not drop out of the mempool. They
+just took longer to confirm. There were only two transaction IDs in
+the mempool of one of the mempool archive nodes that did not confirm
+during the spam period. Both occurred on March 8 when the mempool
+was very congested. The the two ``disappearing transactions'' could
+happen if someone submits a transactions to an overloaded public RPC
+node, the transactions does not propagate well, and then the user
+reconstructs the transactions with another node. The first transaction
+will not confirm because it is a double spend. Seeing a transaction
+in the mempool that never confirms happens sometimes during normal
+transaction volumes, too. Single transactions like that appeared on
+February 14, 17, and 23 and March 1 in the mempool archive data.
+
+\section{Evidence for and against the spam hypothesis}
+
+Is the March 4, 2024 transaction volume a result of many real users
+starting to use Monero more, or is it spam created by a single entity?
+\cite{Krawiec-Thayer2021} analyzed the July/August 2021 sudden rise
+in transaction volume. We concluded that it was likely spam. Our evidence
+was: 1) There was a sharp increase of 1in/2out and 2in/1out transactions,
+but the volume of other transaction types did not increase, 2) All
+the suspected spam paid minimum fees, 3) The distribution of ring
+members became much younger, suggesting that the spammer was rapidly
+re-spending outputs as quickly as possible.
+
+Available time has not permitted a full run of the \cite{Krawiec-Thayer2021}
+analysis on the March 2024 suspected spam data. It is easy to do a
+quick check of transaction volume by input/output type. Figure \ref{fig-in-out-tx-type-volume}
+plots the eight most common in/out transaction types on a log scale.
+Only the volume of 1in/2out transactions increased on March 4, supporting
+the spam hypothesis.
+
+\begin{figure}[H]
+\caption{Transaction volume by number of inputs and outputs (log scale)}
+\label{fig-in-out-tx-type-volume}
+\centering{}\includegraphics[scale=0.5]{images/in-out-tx-type-volume}
+\end{figure}
+
+More can be done to generate evidence for or against the spam hypothesis.
+\cite{Krawiec-Thayer2021} analyzed the age of all ring members. Using
+the OSPEAD techniques, the distribution of the age of the real spends
+can be estimated.\footnote{\url{https://github.com/Rucknium/OSPEAD}}
+The Monero node network can be actively crawled to see if the spam
+transactions originate from one node. Dandelion++ can defeat attempts
+to discover the origin of most transaction because the signal of the
+real transaction is covered by the Dandelion++ noise. When the signal
+is huge like the spam, some statistical analysis could overcome the
+Dandelion++ protection. Investigatory nodes could use \texttt{set\_log
+net.p2p.msg:INFO} to view which neighboring nodes the suspected spam
+is coming from. Then the investigatory node could crawl the network
+in the direction of the highest incoming volume. The techniques of
+\cite{Sharma2022} are useful at extremely high transaction volumes,
+like in the spam case, and could be used.
+
+\bibliographystyle{apalike-ejor}
+\bibliography{monero-black-marble-flood}
+
+\end{document}