start.spam.height <- 3097764 # 2024-03-04 15:21:24 start.spam.date <- as.Date("2024-03-04") end.spam.height <- 3114046 # 2024-03-27 06:30:37 UTC end.spam.date <- as.Date("2024-03-27") library(ggplot2) output.index[, block_date.week.day := weekdays(block_date)] spam.types <- list(list( fingerprint.text = "1in/2out 20 nanoneros/byte", fingerprint.crieria = substitute( floor((tx_fee/tx_size_bytes)/1000) %between% c(18, 22) & number_of_inputs == 1 & number_of_outputs == 2)), list( fingerprint.text = "1in/2out 20 or 320 nanoneros/byte", fingerprint.crieria = substitute( floor((tx_fee/tx_size_bytes)/1000) %between% c(315, 325) & number_of_inputs == 1 & number_of_outputs == 2))) spam.results <- list() for (spam.type in seq_along(spam.types)) { spam.fingerprint.all <- list() spam.fingerprint.tx.all <- list() for (spam.type.sub in 1:spam.type) { pre.spam.level.week.day <- output.index[ # block_height < start.spam.height & block_date < start.spam.date & tx_num != 1 & eval(spam.types[[spam.type.sub]]$fingerprint.crieria), .(txs.rm.from.spam.set = round(uniqueN(tx_hash)/4)), # NOTE: /4 assumes number of pre-spam weeks in data is 4. by = "block_date.week.day"] spam.fingerprint <- output.index[ block_height %between% c(start.spam.height, end.spam.height) & tx_num != 1 & eval(spam.types[[spam.type.sub]]$fingerprint.crieria), ] spam.fingerprint[, fingerprint := spam.types[[spam.type.sub]]$fingerprint.text] spam.fingerprint.tx <- spam.fingerprint[!duplicated(tx_hash), ] spam.fingerprint.tx <- merge(spam.fingerprint.tx, pre.spam.level.week.day[, .(block_date.week.day, txs.rm.from.spam.set)], by = "block_date.week.day") set.seed(314) tx_hash.to.rm <- spam.fingerprint.tx[, .(tx_hash.to.rm = sample(tx_hash, min(c(unique(txs.rm.from.spam.set), length(tx_hash))), replace = FALSE)), by = "block_date"] spam.fingerprint.tx[, txs.rm.from.spam.set := NULL] spam.fingerprint.tx <- spam.fingerprint.tx[ ! tx_hash %chin% tx_hash.to.rm$tx_hash.to.rm, ] spam.fingerprint.all[[spam.type.sub]] <- spam.fingerprint spam.fingerprint.tx.all[[spam.type.sub]] <- spam.fingerprint.tx } spam.fingerprint <- rbindlist(spam.fingerprint.all) spam.fingerprint.tx <- rbindlist(spam.fingerprint.tx.all) non.spam.fingerprint <- output.index[ tx_num != 1 & ( (! block_height %between% c(start.spam.height, end.spam.height)) | (block_height %between% c(start.spam.height, end.spam.height) & ! (tx_hash %chin% spam.fingerprint.tx$tx_hash)) ), ] non.spam.fingerprint.tx <- non.spam.fingerprint[!duplicated(tx_hash), ] spam.results[[spam.type]] <- list( spam.fingerprint = spam.fingerprint, spam.fingerprint.tx = spam.fingerprint.tx, non.spam.fingerprint = non.spam.fingerprint, non.spam.fingerprint.tx = non.spam.fingerprint.tx ) } print(sum(spam.results[[1]]$spam.fingerprint.tx$tx_fee)/1e+12) print(sum(spam.results[[1]]$spam.fingerprint.tx$tx_size_bytes) / 1000000000) sum(spam.results[[1]]$spam.fingerprint.tx$tx_weight_bytes) / 1000000000 print(sum(spam.results[[2]]$spam.fingerprint.tx$tx_fee)/1e+12) print(sum(spam.results[[2]]$spam.fingerprint.tx$tx_size_bytes) / 1000000000) sum(spam.results[[2]]$spam.fingerprint.tx$tx_weight_bytes) / 1000000000 # Weight and size should be the same since all suspected spam is 2 outputs all.tx.volume <- rbind(spam.results[[1]]$spam.fingerprint.tx, spam.results[[1]]$non.spam.fingerprint.tx, fill = TRUE) all.tx.volume <- all.tx.volume[eval(spam.types[[1]]$fingerprint.crieria), ] all.tx.volume.by.day <- all.tx.volume[, .(n.all.fingerprint.txs = .N), by = "block_date"] setorder(all.tx.volume.by.day, block_date) all.tx.volume.by.day <- all.tx.volume.by.day[-.N, ] # Remove most recent day because it doesn't have full day of data png("spam-fingerprint-tx-volume.png", width = 600, height = 600) ggplot(all.tx.volume.by.day, aes(x = as.POSIXct(block_date), y = n.all.fingerprint.txs / 1000)) + geom_line() + scale_y_continuous(limit = c(0, NA), expand = c(0, 0)) + scale_x_datetime(date_breaks = "3 day", guide = guide_axis(angle = 90)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + ggtitle("Volume of Monero transactions with spam fingerprint", subtitle = "1in/2out, 20 nanoneros/byte") + xlab(" Date github.com/Rucknium") + ylab("Number of transactions (thousands)") + theme(plot.title = element_text(size = 20), plot.subtitle = element_text(size = 15), axis.text = element_text(size = 15), axis.title.x = element_text(size = 15, margin = margin(t = 10)), axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) dev.off() all.tx.volume <- rbind(spam.results[[1]]$spam.fingerprint.tx, spam.results[[1]]$non.spam.fingerprint.tx, fill = TRUE) all.tx.volume[, type.in.out := paste0(number_of_inputs, "in/", number_of_outputs, "out")] txs.type.in.out <- all.tx.volume[, .(n.type.in.out = .N), by = c("block_date", "type.in.out")] txs.type.in.out.sum <- txs.type.in.out[, .(sum.n.type.in.out = sum(n.type.in.out)), by = "type.in.out"] setorder(txs.type.in.out.sum, - sum.n.type.in.out) most.common.tx.type <- txs.type.in.out.sum$type.in.out[1:8] txs.type.in.out <- txs.type.in.out[type.in.out %in% most.common.tx.type, ] txs.type.in.out <- txs.type.in.out[block_date != max(block_date), ] # Remove most recent date that does not have full day of data setorder(txs.type.in.out, block_date, n.type.in.out) png("in-out-tx-type-volume.png", width = 800, height = 800) ggplot(txs.type.in.out, aes(x = block_date, y = n.type.in.out / 1000, colour = factor(type.in.out, levels = rev(unique(type.in.out))))) + geom_line(linewidth = 1.25) + scale_y_log10() + scale_x_date(expand = c(0, 0), date_breaks = "2 day", guide = guide_axis(angle = 90)) + ggtitle("Transaction volume by number of inputs and outputs (log scale)") + xlab(" Date github.com/Rucknium") + ylab("Thousands of transactions (log scale)") + labs(colour = "Type") + theme(legend.position = "top", legend.text = element_text(size = 12), legend.title = element_text(size = 15), plot.title = element_text(size = 20), plot.subtitle = element_text(size = 15), axis.text = element_text(size = 15), axis.title.x = element_text(size = 15, margin = margin(t = 10)), axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) + guides(colour = guide_legend(nrow = 2, byrow = FALSE, override.aes = list(linewidth = 5))) + scale_color_brewer(palette = "Accent") dev.off() all.output.volume <- rbind(spam.results[[1]]$spam.fingerprint, spam.results[[1]]$non.spam.fingerprint, fill = TRUE) all.output.volume.by.day <- all.output.volume[, .(non.spam = sum(is.na(fingerprint)), spam = sum(!is.na(fingerprint))), by = "block_date"] all.output.volume.by.day[, spam.share.outputs := spam/(non.spam + spam) ] all.output.volume.by.day <- all.output.volume.by.day[-.N, ] # Remove most recent day because it doesn't have full day of data png("spam-share-outputs.png", width = 600, height = 600) ggplot(all.output.volume.by.day[block_date %between% c(start.spam.date, end.spam.date), ], aes(x = as.POSIXct(block_date), y = spam.share.outputs)) + geom_line() + scale_y_continuous(limit = c(0, 1), expand = c(0, 0), labels = scales::label_percent()) + scale_x_datetime(date_breaks = "day", guide = guide_axis(angle = 90)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + ggtitle("Spam share of outputs") + xlab(" Date github.com/Rucknium") + ylab("Daily share of outputs owned by suspected spammer") + theme(plot.title = element_text(size = 20), plot.subtitle = element_text(size = 15), axis.text = element_text(size = 15), axis.title.x = element_text(size = 15, margin = margin(t = 10)), axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) dev.off() mean.spam.share.outputs <- all.output.volume.by.day[block_date %between% c(start.spam.date + 1, end.spam.date - 1), mean(spam.share.outputs)] # Skip the first and last days because suspected spam started in the middle of the days binom.ring.size <- rbind( data.table(x = 1:16, y = dbinom(0:15, size = 11, prob = 1 - 192/233), Model = paste0("Ring size: 11, Share of adversary outputs: ", round(100*192/233), "% (Chervinski et al. 2021)")), data.table(x = 1:16, y = dbinom(0:15, size = 16, prob = 1 - mean.spam.share.outputs), Model = paste0("Ring size: 16, Share of adversary outputs: ", round(100*mean.spam.share.outputs), "% (Estimated March 2024)"))) # "prob = 1 - 192/233" because: # Chervinski et al. (2021) # "Scenario II analyzes the impact of an attack where the malicious actor creates # transactions with 2 inputs and 2 outputs, generating 96 transactions and 192 # malicious outputs in each block for a total of 233 outputs per block when # adding the 41 user generated outputs." print(binom.ring.size[, .(mean.eff.ring.size = sum(x*y)), by = "Model"]) png("effective-ring-size-binomial-pmf.png", width = 500, height = 600) ggplot(binom.ring.size, aes(x = factor(x), y = y, fill = Model)) + geom_bar(stat = "identity", position = position_dodge(), width = 0.8) + geom_line(aes(x = factor(x), y = y, group = Model, colour = Model), linewidth = 1.25) + scale_y_continuous(labels = scales::label_percent()) + ggtitle("Long-term projected effective ring sizes, binomial assumption", subtitle = "Probability mass function of binomial(nominal_ring_size, 1 - adversary_outputs_share)") + xlab(" Effective ring size github.com/Rucknium") + ylab("Share of rings") + labs(colour = "Ring size") + theme(legend.position = "top", legend.text = element_text(size = 13), legend.title = element_blank(), plot.title = element_text(size = 16), plot.subtitle = element_text(size = 11.5), axis.text = element_text(size = 15), axis.title.x = element_text(size = 15, margin = margin(t = 10)), axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) + guides(fill = guide_legend(nrow = 2), colour = waiver()) dev.off() # Last row of Table IV of # Chervinski, J. O., Kreutz, D., & Yu, J. 2021, Analysis of transaction flooding attacks against Monero. # Paper presented at 2021 IEEE International Conference on Blockchain and Cryptocurrency (ICBC). Chervinski.2021 <- c( 14.4701, 30.8318, 29.5862, 16.8408, 6.315, 1.6359, 0.2803, 0.0366, 0.0031, 0.0002, 0 ) Chervinski.ring.size <- rbind( data.table(x = 1:16, y = dbinom(0:15, size = 11, prob = 1 - 192/233), Model = paste0("Binomial assumption (n = 11, p = ", round(192/233, 2), ")")), data.table(x = 1:16, y = c(Chervinski.2021/100, rep(0, 16 - length(Chervinski.2021))), Model = "12 month spamming, with chain reaction analysis (Chervinski et al. 2021)")) Chervinski.ring.size <- Chervinski.ring.size[x <= 11, ] print(Chervinski.ring.size[, .(mean.eff.ring.size = sum(x*y)), by = "Model"]) png("chervinski-chain-reaction.png", width = 500, height = 600) ggplot(Chervinski.ring.size, aes(x = factor(x), y = y, fill = Model)) + geom_bar(stat = "identity", position = position_dodge(), width = 0.8) + scale_y_continuous(labels = scales::label_percent()) + ggtitle("Long-term effective ring sizes, binomial and chain reaction", subtitle = "Probability mass function of binomial(nominal_ring_size, 1 - adversary_outputs_share)") + xlab(" Effective ring size github.com/Rucknium") + ylab("Share of rings") + labs(colour = "Ring size") + theme(legend.position = "top", legend.text = element_text(size = 13), legend.title = element_blank(), plot.title = element_text(size = 16), plot.subtitle = element_text(size = 11.5), axis.text = element_text(size = 15), axis.title.x = element_text(size = 15, margin = margin(t = 10)), axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) + guides(fill = guide_legend(nrow = 2), colour = waiver()) dev.off()