mirror of
https://github.com/Rucknium/misc-research.git
synced 2025-01-18 08:14:31 +00:00
302 lines
12 KiB
R
302 lines
12 KiB
R
|
|
|
|
start.spam.height <- 3097764 # 2024-03-04 15:21:24
|
|
start.spam.date <- as.Date("2024-03-04")
|
|
|
|
|
|
end.spam.height <- 3114046 # 2024-03-27 06:30:37 UTC
|
|
end.spam.date <- as.Date("2024-03-27")
|
|
|
|
|
|
|
|
library(ggplot2)
|
|
|
|
output.index[, block_date.week.day := weekdays(block_date)]
|
|
|
|
|
|
spam.types <- list(list(
|
|
fingerprint.text = "1in/2out 20 nanoneros/byte",
|
|
fingerprint.crieria = substitute(
|
|
floor((tx_fee/tx_size_bytes)/1000) %between% c(18, 22) &
|
|
number_of_inputs == 1 &
|
|
number_of_outputs == 2)),
|
|
list(
|
|
fingerprint.text = "1in/2out 20 or 320 nanoneros/byte",
|
|
fingerprint.crieria = substitute(
|
|
floor((tx_fee/tx_size_bytes)/1000) %between% c(315, 325) &
|
|
number_of_inputs == 1 &
|
|
number_of_outputs == 2)))
|
|
|
|
|
|
spam.results <- list()
|
|
|
|
for (spam.type in seq_along(spam.types)) {
|
|
|
|
spam.fingerprint.all <- list()
|
|
spam.fingerprint.tx.all <- list()
|
|
|
|
for (spam.type.sub in 1:spam.type) {
|
|
|
|
pre.spam.level.week.day <- output.index[
|
|
# block_height < start.spam.height &
|
|
block_date < start.spam.date &
|
|
tx_num != 1 &
|
|
eval(spam.types[[spam.type.sub]]$fingerprint.crieria),
|
|
.(txs.rm.from.spam.set = round(uniqueN(tx_hash)/4)),
|
|
# NOTE: /4 assumes number of pre-spam weeks in data is 4.
|
|
by = "block_date.week.day"]
|
|
|
|
spam.fingerprint <- output.index[
|
|
block_height %between% c(start.spam.height, end.spam.height) &
|
|
tx_num != 1 &
|
|
eval(spam.types[[spam.type.sub]]$fingerprint.crieria), ]
|
|
|
|
spam.fingerprint[, fingerprint := spam.types[[spam.type.sub]]$fingerprint.text]
|
|
|
|
spam.fingerprint.tx <- spam.fingerprint[!duplicated(tx_hash), ]
|
|
|
|
spam.fingerprint.tx <- merge(spam.fingerprint.tx,
|
|
pre.spam.level.week.day[, .(block_date.week.day, txs.rm.from.spam.set)], by = "block_date.week.day")
|
|
|
|
set.seed(314)
|
|
|
|
|
|
tx_hash.to.rm <- spam.fingerprint.tx[, .(tx_hash.to.rm = sample(tx_hash,
|
|
min(c(unique(txs.rm.from.spam.set), length(tx_hash))), replace = FALSE)), by = "block_date"]
|
|
spam.fingerprint.tx[, txs.rm.from.spam.set := NULL]
|
|
spam.fingerprint.tx <- spam.fingerprint.tx[ ! tx_hash %chin% tx_hash.to.rm$tx_hash.to.rm, ]
|
|
|
|
spam.fingerprint.all[[spam.type.sub]] <- spam.fingerprint
|
|
spam.fingerprint.tx.all[[spam.type.sub]] <- spam.fingerprint.tx
|
|
|
|
}
|
|
|
|
spam.fingerprint <- rbindlist(spam.fingerprint.all)
|
|
spam.fingerprint.tx <- rbindlist(spam.fingerprint.tx.all)
|
|
|
|
non.spam.fingerprint <- output.index[ tx_num != 1 &
|
|
(
|
|
(! block_height %between% c(start.spam.height, end.spam.height)) |
|
|
(block_height %between% c(start.spam.height, end.spam.height) &
|
|
! (tx_hash %chin% spam.fingerprint.tx$tx_hash))
|
|
), ]
|
|
|
|
non.spam.fingerprint.tx <- non.spam.fingerprint[!duplicated(tx_hash), ]
|
|
|
|
spam.results[[spam.type]] <- list(
|
|
spam.fingerprint = spam.fingerprint, spam.fingerprint.tx = spam.fingerprint.tx,
|
|
non.spam.fingerprint = non.spam.fingerprint, non.spam.fingerprint.tx = non.spam.fingerprint.tx
|
|
)
|
|
|
|
}
|
|
|
|
|
|
print(sum(spam.results[[1]]$spam.fingerprint.tx$tx_fee)/1e+12)
|
|
print(sum(spam.results[[1]]$spam.fingerprint.tx$tx_size_bytes) / 1000000000)
|
|
sum(spam.results[[1]]$spam.fingerprint.tx$tx_weight_bytes) / 1000000000
|
|
|
|
print(sum(spam.results[[2]]$spam.fingerprint.tx$tx_fee)/1e+12)
|
|
print(sum(spam.results[[2]]$spam.fingerprint.tx$tx_size_bytes) / 1000000000)
|
|
sum(spam.results[[2]]$spam.fingerprint.tx$tx_weight_bytes) / 1000000000
|
|
# Weight and size should be the same since all suspected spam is 2 outputs
|
|
|
|
|
|
|
|
all.tx.volume <- rbind(spam.results[[1]]$spam.fingerprint.tx, spam.results[[1]]$non.spam.fingerprint.tx, fill = TRUE)
|
|
all.tx.volume <- all.tx.volume[eval(spam.types[[1]]$fingerprint.crieria), ]
|
|
all.tx.volume.by.day <- all.tx.volume[, .(n.all.fingerprint.txs = .N), by = "block_date"]
|
|
setorder(all.tx.volume.by.day, block_date)
|
|
all.tx.volume.by.day <- all.tx.volume.by.day[-.N, ]
|
|
# Remove most recent day because it doesn't have full day of data
|
|
|
|
png("spam-fingerprint-tx-volume.png", width = 600, height = 600)
|
|
|
|
ggplot(all.tx.volume.by.day, aes(x = as.POSIXct(block_date), y = n.all.fingerprint.txs / 1000)) +
|
|
geom_line() +
|
|
scale_y_continuous(limit = c(0, NA), expand = c(0, 0)) +
|
|
scale_x_datetime(date_breaks = "3 day", guide = guide_axis(angle = 90)) +
|
|
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
|
|
ggtitle("Volume of Monero transactions with spam fingerprint",
|
|
subtitle = "1in/2out, 20 nanoneros/byte") +
|
|
xlab(" Date github.com/Rucknium") +
|
|
ylab("Number of transactions (thousands)") +
|
|
theme(plot.title = element_text(size = 20),
|
|
plot.subtitle = element_text(size = 15),
|
|
axis.text = element_text(size = 15),
|
|
axis.title.x = element_text(size = 15, margin = margin(t = 10)),
|
|
axis.title.y = element_text(size = 15), strip.text = element_text(size = 15))
|
|
|
|
dev.off()
|
|
|
|
|
|
|
|
|
|
all.tx.volume <- rbind(spam.results[[1]]$spam.fingerprint.tx, spam.results[[1]]$non.spam.fingerprint.tx, fill = TRUE)
|
|
|
|
all.tx.volume[, type.in.out := paste0(number_of_inputs, "in/", number_of_outputs, "out")]
|
|
|
|
txs.type.in.out <- all.tx.volume[, .(n.type.in.out = .N), by = c("block_date", "type.in.out")]
|
|
|
|
txs.type.in.out.sum <- txs.type.in.out[, .(sum.n.type.in.out = sum(n.type.in.out)), by = "type.in.out"]
|
|
|
|
setorder(txs.type.in.out.sum, - sum.n.type.in.out)
|
|
|
|
most.common.tx.type <- txs.type.in.out.sum$type.in.out[1:8]
|
|
|
|
txs.type.in.out <- txs.type.in.out[type.in.out %in% most.common.tx.type, ]
|
|
|
|
txs.type.in.out <- txs.type.in.out[block_date != max(block_date), ]
|
|
# Remove most recent date that does not have full day of data
|
|
|
|
setorder(txs.type.in.out, block_date, n.type.in.out)
|
|
|
|
|
|
png("in-out-tx-type-volume.png", width = 800, height = 800)
|
|
|
|
ggplot(txs.type.in.out, aes(x = block_date, y = n.type.in.out / 1000,
|
|
colour = factor(type.in.out, levels = rev(unique(type.in.out))))) +
|
|
geom_line(linewidth = 1.25) +
|
|
scale_y_log10() +
|
|
scale_x_date(expand = c(0, 0), date_breaks = "2 day", guide = guide_axis(angle = 90)) +
|
|
ggtitle("Transaction volume by number of inputs and outputs (log scale)") +
|
|
xlab(" Date github.com/Rucknium") +
|
|
ylab("Thousands of transactions (log scale)") +
|
|
labs(colour = "Type") +
|
|
theme(legend.position = "top", legend.text = element_text(size = 12), legend.title = element_text(size = 15),
|
|
plot.title = element_text(size = 20),
|
|
plot.subtitle = element_text(size = 15),
|
|
axis.text = element_text(size = 15),
|
|
axis.title.x = element_text(size = 15, margin = margin(t = 10)),
|
|
axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) +
|
|
guides(colour = guide_legend(nrow = 2, byrow = FALSE, override.aes = list(linewidth = 5))) +
|
|
scale_color_brewer(palette = "Accent")
|
|
|
|
dev.off()
|
|
|
|
|
|
|
|
|
|
|
|
all.output.volume <- rbind(spam.results[[1]]$spam.fingerprint, spam.results[[1]]$non.spam.fingerprint, fill = TRUE)
|
|
|
|
all.output.volume.by.day <- all.output.volume[, .(non.spam = sum(is.na(fingerprint)), spam = sum(!is.na(fingerprint))), by = "block_date"]
|
|
|
|
all.output.volume.by.day[, spam.share.outputs := spam/(non.spam + spam) ]
|
|
|
|
all.output.volume.by.day <- all.output.volume.by.day[-.N, ]
|
|
# Remove most recent day because it doesn't have full day of data
|
|
|
|
png("spam-share-outputs.png", width = 600, height = 600)
|
|
|
|
ggplot(all.output.volume.by.day[block_date %between% c(start.spam.date, end.spam.date), ], aes(x = as.POSIXct(block_date), y = spam.share.outputs)) +
|
|
geom_line() +
|
|
scale_y_continuous(limit = c(0, 1), expand = c(0, 0), labels = scales::label_percent()) +
|
|
scale_x_datetime(date_breaks = "day", guide = guide_axis(angle = 90)) +
|
|
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
|
|
ggtitle("Spam share of outputs") +
|
|
xlab(" Date github.com/Rucknium") +
|
|
ylab("Daily share of outputs owned by suspected spammer") +
|
|
theme(plot.title = element_text(size = 20),
|
|
plot.subtitle = element_text(size = 15),
|
|
axis.text = element_text(size = 15),
|
|
axis.title.x = element_text(size = 15, margin = margin(t = 10)),
|
|
axis.title.y = element_text(size = 15), strip.text = element_text(size = 15))
|
|
|
|
dev.off()
|
|
|
|
|
|
|
|
mean.spam.share.outputs <- all.output.volume.by.day[block_date %between%
|
|
c(start.spam.date + 1, end.spam.date - 1), mean(spam.share.outputs)]
|
|
# Skip the first and last days because suspected spam started in the middle of the days
|
|
|
|
binom.ring.size <- rbind(
|
|
data.table(x = 1:16, y = dbinom(0:15, size = 11, prob = 1 - 192/233),
|
|
Model = paste0("Ring size: 11, Share of adversary outputs: ", round(100*192/233), "% (Chervinski et al. 2021)")),
|
|
data.table(x = 1:16, y = dbinom(0:15, size = 16, prob = 1 - mean.spam.share.outputs),
|
|
Model = paste0("Ring size: 16, Share of adversary outputs: ", round(100*mean.spam.share.outputs), "% (Estimated March 2024)")))
|
|
|
|
# "prob = 1 - 192/233" because:
|
|
# Chervinski et al. (2021)
|
|
# "Scenario II analyzes the impact of an attack where the malicious actor creates
|
|
# transactions with 2 inputs and 2 outputs, generating 96 transactions and 192
|
|
# malicious outputs in each block for a total of 233 outputs per block when
|
|
# adding the 41 user generated outputs."
|
|
|
|
print(binom.ring.size[, .(mean.eff.ring.size = sum(x*y)), by = "Model"])
|
|
|
|
png("effective-ring-size-binomial-pmf.png", width = 500, height = 600)
|
|
|
|
ggplot(binom.ring.size, aes(x = factor(x), y = y, fill = Model)) +
|
|
geom_bar(stat = "identity", position = position_dodge(), width = 0.8) +
|
|
geom_line(aes(x = factor(x), y = y, group = Model, colour = Model), linewidth = 1.25) +
|
|
scale_y_continuous(labels = scales::label_percent()) +
|
|
ggtitle("Long-term projected effective ring sizes, binomial assumption",
|
|
subtitle = "Probability mass function of binomial(nominal_ring_size, 1 - adversary_outputs_share)") +
|
|
xlab(" Effective ring size github.com/Rucknium") +
|
|
ylab("Share of rings") +
|
|
labs(colour = "Ring size") +
|
|
theme(legend.position = "top", legend.text = element_text(size = 13), legend.title = element_blank(),
|
|
plot.title = element_text(size = 16),
|
|
plot.subtitle = element_text(size = 11.5),
|
|
axis.text = element_text(size = 15),
|
|
axis.title.x = element_text(size = 15, margin = margin(t = 10)),
|
|
axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) +
|
|
guides(fill = guide_legend(nrow = 2), colour = waiver())
|
|
|
|
dev.off()
|
|
|
|
|
|
|
|
# Last row of Table IV of
|
|
# Chervinski, J. O., Kreutz, D., & Yu, J. 2021, Analysis of transaction flooding attacks against Monero.
|
|
# Paper presented at 2021 IEEE International Conference on Blockchain and Cryptocurrency (ICBC).
|
|
Chervinski.2021 <- c(
|
|
14.4701,
|
|
30.8318,
|
|
29.5862,
|
|
16.8408,
|
|
6.315,
|
|
1.6359,
|
|
0.2803,
|
|
0.0366,
|
|
0.0031,
|
|
0.0002,
|
|
0
|
|
)
|
|
|
|
|
|
Chervinski.ring.size <- rbind(
|
|
data.table(x = 1:16, y = dbinom(0:15, size = 11, prob = 1 - 192/233),
|
|
Model = paste0("Binomial assumption (n = 11, p = ", round(192/233, 2), ")")),
|
|
data.table(x = 1:16, y = c(Chervinski.2021/100, rep(0, 16 - length(Chervinski.2021))),
|
|
Model = "12 month spamming, with chain reaction analysis (Chervinski et al. 2021)"))
|
|
|
|
|
|
Chervinski.ring.size <- Chervinski.ring.size[x <= 11, ]
|
|
|
|
print(Chervinski.ring.size[, .(mean.eff.ring.size = sum(x*y)), by = "Model"])
|
|
|
|
|
|
png("chervinski-chain-reaction.png", width = 500, height = 600)
|
|
|
|
ggplot(Chervinski.ring.size, aes(x = factor(x), y = y, fill = Model)) +
|
|
geom_bar(stat = "identity", position = position_dodge(), width = 0.8) +
|
|
scale_y_continuous(labels = scales::label_percent()) +
|
|
ggtitle("Long-term effective ring sizes, binomial and chain reaction",
|
|
subtitle = "Probability mass function of binomial(nominal_ring_size, 1 - adversary_outputs_share)") +
|
|
xlab(" Effective ring size github.com/Rucknium") +
|
|
ylab("Share of rings") +
|
|
labs(colour = "Ring size") +
|
|
theme(legend.position = "top", legend.text = element_text(size = 13), legend.title = element_blank(),
|
|
plot.title = element_text(size = 16),
|
|
plot.subtitle = element_text(size = 11.5),
|
|
axis.text = element_text(size = 15),
|
|
axis.title.x = element_text(size = 15, margin = margin(t = 10)),
|
|
axis.title.y = element_text(size = 15), strip.text = element_text(size = 15)) +
|
|
guides(fill = guide_legend(nrow = 2), colour = waiver())
|
|
|
|
dev.off()
|
|
|
|
|
|
|