mirror of
https://github.com/Rucknium/misc-research.git
synced 2025-01-31 06:15:51 +00:00
Add General-Blockchin-Age-of-Spent-Outputs
This commit is contained in:
parent
4d32a4b60a
commit
27abd63290
4 changed files with 390 additions and 0 deletions
103
General-Blockchain-Age-of-Spent-Outputs/construct-edgelist.R
Normal file
103
General-Blockchain-Age-of-Spent-Outputs/construct-edgelist.R
Normal file
|
@ -0,0 +1,103 @@
|
|||
# install.packages("data.table")
|
||||
# install.packages("RSQLite")
|
||||
# install.packages("DBI")
|
||||
|
||||
library(data.table)
|
||||
library(RSQLite)
|
||||
library(DBI)
|
||||
|
||||
data.dir <- ""
|
||||
# Input data directory here, with trailing "/"
|
||||
|
||||
source("https://gist.githubusercontent.com/jeffwong/5925000/raw/bf02ed0dd2963169a91664be02fb18e45c4d1e20/sqlitewritetable.R")
|
||||
# From https://gist.github.com/jeffwong/5925000
|
||||
# Modifies RSQLite's sqliteWriteTable function so as to reject duplicates
|
||||
|
||||
con <- DBI::dbConnect(RSQLite::SQLite(), paste0(data.dir, "tx-graph-node-indices.db"))
|
||||
|
||||
DBI::dbExecute(con, "CREATE TABLE nodes (
|
||||
node TEXT,
|
||||
node_index INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
unique(node)
|
||||
)")
|
||||
|
||||
|
||||
DBI::dbWriteTable(con, "edgelist",
|
||||
data.frame(origin = character(0), destination = character(0), value = numeric(0),
|
||||
block_height = integer(0), stringsAsFactors = FALSE))
|
||||
|
||||
tx.graph.files <- list.files(paste0(data.dir, "tx_graphs/"))
|
||||
tx.graph.files <- tx.graph.files[grepl("^tx_graph.+rds$", tx.graph.files)]
|
||||
tx.graph.files <- sort(tx.graph.files)
|
||||
|
||||
tx.graph.indexed <- vector("list", length(tx.graph.files))
|
||||
names(tx.graph.indexed) <- tx.graph.files
|
||||
|
||||
for (file.iter in tx.graph.files) {
|
||||
|
||||
tx.graph.chunk <- readRDS(paste0(data.dir, "tx_graphs/", file.iter))
|
||||
|
||||
tx.graph.chunk <-
|
||||
rbind(
|
||||
data.table(origin = paste0(tx.graph.chunk$incoming$origin.txid, "-",
|
||||
formatC(tx.graph.chunk$incoming$origin.position, width = 4, format = "f", flag = "0", digits = 0)),
|
||||
destination = tx.graph.chunk$incoming$txid,
|
||||
value = NA_real_,
|
||||
block_height = as.integer(tx.graph.chunk$incoming$block_height), stringsAsFactors = FALSE),
|
||||
data.table(origin = tx.graph.chunk$outgoing$txid,
|
||||
destination = paste0(tx.graph.chunk$outgoing$txid, "-",
|
||||
formatC(tx.graph.chunk$outgoing$position, width = 4, format = "f", flag = "0", digits = 0)),
|
||||
value = tx.graph.chunk$outgoing$value,
|
||||
block_height = as.integer(tx.graph.chunk$outgoing$block_height), stringsAsFactors = FALSE)
|
||||
)
|
||||
|
||||
DBI::dbWriteTable(con, "edgelist",
|
||||
tx.graph.chunk, append = TRUE)
|
||||
|
||||
cat(file.iter, base::date(), "\n")
|
||||
|
||||
if (nrow(tx.graph.chunk) == 0) {next}
|
||||
|
||||
new.nodes <- unique(c(tx.graph.chunk$origin, tx.graph.chunk$destination))
|
||||
|
||||
nodes.to.insert <- data.frame(node = new.nodes, node_index = NA, stringsAsFactors = FALSE)
|
||||
|
||||
mysqliteWriteTable(con, "nodes",
|
||||
nodes.to.insert, append = TRUE, row.names = FALSE, ignore = TRUE)
|
||||
|
||||
cat(nrow(nodes.to.insert), "Nodes written\n")
|
||||
|
||||
}
|
||||
|
||||
|
||||
DBI::dbWriteTable(con, "edgelist_intermediate_1",
|
||||
data.frame(origin = character(0), destination = character(0),
|
||||
value = numeric(0), block_height = integer(0),
|
||||
node_index = integer(0), stringsAsFactors = FALSE), overwrite = TRUE)
|
||||
|
||||
base::date()
|
||||
DBI::dbExecute(con, "INSERT INTO edgelist_intermediate_1 SELECT
|
||||
origin, destination, value, block_height, node_index FROM
|
||||
edgelist JOIN nodes ON edgelist.origin = nodes.node")
|
||||
base::date()
|
||||
|
||||
|
||||
DBI::dbExecute(con,
|
||||
"ALTER TABLE edgelist_intermediate_1 RENAME COLUMN node_index TO origin_index")
|
||||
|
||||
|
||||
DBI::dbWriteTable(con, "edgelist_intermediate_2",
|
||||
data.frame(origin = character(0), destination = character(0),
|
||||
origin_index = integer(0), node_index = integer(0),
|
||||
value = numeric(0), block_height = integer(0), stringsAsFactors = FALSE))
|
||||
|
||||
base::date()
|
||||
DBI::dbExecute(con, "INSERT INTO edgelist_intermediate_2 SELECT
|
||||
origin, destination, origin_index, node_index, value, block_height FROM
|
||||
edgelist_intermediate_1 JOIN nodes ON edgelist_intermediate_1.destination = nodes.node")
|
||||
base::date()
|
||||
|
||||
DBI::dbExecute(con,
|
||||
"ALTER TABLE edgelist_intermediate_2 RENAME COLUMN node_index TO destination_index")
|
||||
|
||||
|
|
@ -0,0 +1,73 @@
|
|||
|
||||
library(data.table)
|
||||
library(RSQLite)
|
||||
library(DBI)
|
||||
# NOTE: Also need lubridate package installed, but not loading it due to
|
||||
# it masking functions
|
||||
|
||||
data.dir <- ""
|
||||
# Input data directory here, with trailing "/"
|
||||
|
||||
|
||||
con <- DBI::dbConnect(RSQLite::SQLite(), paste0(data.dir, "tx-graph-node-indices.db"))
|
||||
|
||||
master.edgelist <- DBI::dbGetQuery(con,
|
||||
"SELECT origin_index, destination_index,block_height,value FROM edgelist_intermediate_2")
|
||||
|
||||
master.edgelist.output.created <- master.edgelist[
|
||||
(! is.na(master.edgelist$value)) & master.edgelist$value > 0 , c("destination_index", "block_height")]
|
||||
colnames(master.edgelist.output.created) <- c("output_index", "output.created.block_height")
|
||||
setDT(master.edgelist.output.created)
|
||||
|
||||
master.edgelist.output.spent <- master.edgelist[, c("origin_index", "block_height")]
|
||||
colnames(master.edgelist.output.spent) <- c("output_index", "output.spent.block_height")
|
||||
setDT(master.edgelist.output.spent)
|
||||
|
||||
# Only include positive _value_s for output created, since that's the value of the created output
|
||||
# Then, below do an "inner merge" to get the proper outputs on the spending side
|
||||
|
||||
rm(master.edgelist)
|
||||
master.edgelist.output.spent <- merge(master.edgelist.output.created, master.edgelist.output.spent)
|
||||
rm(master.edgelist.output.created)
|
||||
|
||||
|
||||
|
||||
block.times <- readRDS(paste0(data.dir, "block_times.rds"))
|
||||
colnames(block.times) <- c("output.created.block_height", "output.created.block_time")
|
||||
setDT(block.times)
|
||||
|
||||
master.edgelist.output.spent <- merge(master.edgelist.output.spent, block.times, by = "output.created.block_height")
|
||||
|
||||
colnames(block.times) <- c("output.spent.block_height", "output.spent.block_time")
|
||||
|
||||
master.edgelist.output.spent <- merge(master.edgelist.output.spent, block.times, by = "output.spent.block_height")
|
||||
|
||||
# TODO: Explore phenomenon of out-of-order block timestamps and decide what to do about them
|
||||
|
||||
master.edgelist.output.spent[, output.spend.age := output.spent.block_time - output.created.block_time]
|
||||
|
||||
# These reduce RAM usage if desired:
|
||||
# master.edgelist.output.spent[, output.created.block_time := NULL]
|
||||
# master.edgelist.output.spent[, output.created.block_height := NULL]
|
||||
# master.edgelist.output.spent[, output_index := NULL]
|
||||
|
||||
|
||||
output.spent.block_time.intermediate <- unique(master.edgelist.output.spent[, .(output.spent.block_time)])
|
||||
|
||||
output.spent.block_time.intermediate[, output.spent.block_time.week :=
|
||||
lubridate::isoweek(as.POSIXct(output.spent.block_time, origin = "1970-01-01", tz = "UTC"))]
|
||||
|
||||
output.spent.block_time.intermediate[, output.spent.block_time.isoweekyear :=
|
||||
lubridate::isoyear(as.POSIXct(output.spent.block_time, origin = "1970-01-01", tz = "UTC"))]
|
||||
|
||||
output.spent.block_time.intermediate[,
|
||||
output.spent.block_time.week := factor(paste0(output.spent.block_time.isoweekyear, "-",
|
||||
formatC(output.spent.block_time.week, width = 2, flag = "0")))]
|
||||
|
||||
master.edgelist.output.spent <- merge(master.edgelist.output.spent,
|
||||
output.spent.block_time.intermediate[, .(output.spent.block_time, output.spent.block_time.week)], by = "output.spent.block_time")
|
||||
|
||||
saveRDS(master.edgelist.output.spent, paste0(data.dir, "master_edgelist_output_spent.rds"))
|
||||
|
||||
|
||||
|
162
General-Blockchain-Age-of-Spent-Outputs/extract-tx-graphs.R
Normal file
162
General-Blockchain-Age-of-Spent-Outputs/extract-tx-graphs.R
Normal file
|
@ -0,0 +1,162 @@
|
|||
# install.packages("rbch")
|
||||
# install.packages("data.table")
|
||||
# install.packages("future.apply")
|
||||
|
||||
library(rbch)
|
||||
library(data.table)
|
||||
library(future.apply)
|
||||
|
||||
is.dogecoin <- FALSE
|
||||
|
||||
blockchain.conf.file <- ""
|
||||
# Input filepath for your {blockchain}.conf file
|
||||
|
||||
data.dir <- ""
|
||||
# Input data directory here, with trailing "/"
|
||||
|
||||
current.block.height <- NA_integer_
|
||||
# current.block.height <- rbch::getblockchaininfo(bch.config)@result$blocks
|
||||
|
||||
n.threads <- min(c(6, parallelly::availableCores()))
|
||||
# Recommended no more than 6 threads since all threads query the single blockchain daemon process.
|
||||
|
||||
dir.create(paste0(data.dir, "tx_graphs"))
|
||||
|
||||
blockchain.config <- rbch::conrpc(blockchain.conf.file)
|
||||
rpcport <- readLines(blockchain.conf.file)
|
||||
rpcport <- rpcport[grepl("rpcport", rpcport) ]
|
||||
if (length(rpcport) > 0) {
|
||||
blockchain.config@url <- paste0("http://127.0.0.1:", gsub("[^0-9]", "", rpcport))
|
||||
}
|
||||
|
||||
cut.seq <- seq(20, current.block.height, by = 20)
|
||||
cut.seq <- c(-1, cut.seq, current.block.height)
|
||||
|
||||
heights.to.process <- 0:current.block.height
|
||||
heights.to.process <- split(heights.to.process,
|
||||
cut(heights.to.process, cut.seq))
|
||||
|
||||
future::plan(future::multiprocess(workers = n.threads))
|
||||
|
||||
|
||||
for (height.set in heights.to.process) {
|
||||
|
||||
extracted.txs <- future.apply::future_lapply(height.set, function(iter.block.height) {
|
||||
|
||||
if (iter.block.height %% 1000 == 0) {
|
||||
cat(iter.block.height, base::date(), "\n")
|
||||
}
|
||||
|
||||
block.hash <- rbch::getblockhash(blockchain.config, iter.block.height)
|
||||
if( ! is.dogecoin) {
|
||||
block.data <- rbch::getblock(bch.config, blockhash = block.hash@result, verbosity = "l2")
|
||||
# Argument verbose = 2 gives full transaction data
|
||||
} else {
|
||||
block.data <- jsonlite::fromJSON(paste0("http://localhost:22555/rest/block/", block.hash@result, ".json"), simplifyVector = FALSE)
|
||||
}
|
||||
#raw.txs.ls <- block.data@result$tx
|
||||
raw.txs.ls <- block.data$tx
|
||||
|
||||
coinbase.tx <- raw.txs.ls[[1]]
|
||||
|
||||
value <- vector("numeric", length(coinbase.tx$vout) )
|
||||
|
||||
for (j in seq_along(coinbase.tx$vout)) {
|
||||
value[j] <- coinbase.tx$vout[[j]]$value
|
||||
}
|
||||
|
||||
outgoing.coinbase <- data.table(txid = coinbase.tx$txid,
|
||||
position = seq_along(coinbase.tx$vout), value = value, block_height = iter.block.height, stringsAsFactors = FALSE)
|
||||
|
||||
coinbase.return.value <- list(incoming =
|
||||
data.table(txid = character(0), origin.txid = character(0),
|
||||
origin.position = numeric(0), block_height = integer(0), stringsAsFactors = FALSE),
|
||||
outgoing = outgoing.coinbase)
|
||||
|
||||
if ( length(raw.txs.ls) < 2) {
|
||||
return(list(coinbase.return.value))
|
||||
}
|
||||
# No incoming txs for coinbase-only
|
||||
|
||||
# Results of this lapply below are returned
|
||||
|
||||
return.value <- lapply(2:length(raw.txs.ls), function(iter) {
|
||||
# Start at 2 since the first tx is the coinbase tx
|
||||
|
||||
latest.tx <- raw.txs.ls[[iter]]
|
||||
|
||||
# addresses <- vector("character", length(latest.tx$vout) )
|
||||
value <- vector("numeric", length(latest.tx$vout) )
|
||||
|
||||
for (j in seq_along(latest.tx$vout)) {
|
||||
extracted.address <- latest.tx$vout[[j]]$scriptPubKey$addresses
|
||||
if (length(extracted.address) > 1) {
|
||||
extracted.address <- list(paste0(sort(unlist(extracted.address)), collapse = "|"))
|
||||
# sort() so that the address order is always the same
|
||||
}
|
||||
|
||||
stopifnot(length(extracted.address[[1]]) <= 1)
|
||||
if (length(extracted.address) == 0) {next}
|
||||
|
||||
# addresses[j] <- extracted.address[[1]]
|
||||
value[j] <- latest.tx$vout[[j]]$value
|
||||
}
|
||||
|
||||
outgoing <- data.table(txid = latest.tx$txid,
|
||||
# address = addresses,
|
||||
position = seq_along(latest.tx$vout), value = value, block_height = iter.block.height, stringsAsFactors = FALSE)
|
||||
|
||||
origin.txid <- vector("character", length(latest.tx$vin) )
|
||||
origin.position <- vector("numeric", length(latest.tx$vin) )
|
||||
|
||||
for (j in seq_along(latest.tx$vin)) {
|
||||
extracted.address <- latest.tx$vin[[j]]$txid
|
||||
stopifnot(length(extracted.address) <= 1)
|
||||
stopifnot(length(extracted.address[[1]]) <= 1)
|
||||
if (length(extracted.address) == 0) {next}
|
||||
|
||||
origin.txid[j] <- latest.tx$vin[[j]]$txid
|
||||
origin.position[j] <- latest.tx$vin[[j]]$vout + 1
|
||||
}
|
||||
|
||||
incoming <- data.table(txid = latest.tx$txid, origin.txid = origin.txid,
|
||||
origin.position = origin.position, block_height = iter.block.height, stringsAsFactors = FALSE)
|
||||
|
||||
list(incoming = incoming, outgoing = outgoing)
|
||||
|
||||
})
|
||||
|
||||
return.value[[length(return.value) + 1]] <- coinbase.return.value
|
||||
# Note that this means that coinbase txs are now "last in the block"
|
||||
|
||||
return.value
|
||||
|
||||
})
|
||||
|
||||
|
||||
print(object.size(extracted.txs), units = "Mb")
|
||||
|
||||
extracted.txs <- unlist(extracted.txs, recursive = FALSE)
|
||||
|
||||
incoming <- data.table::rbindlist(lapply(extracted.txs, function(x) {
|
||||
x[[1]]
|
||||
})
|
||||
)
|
||||
|
||||
outgoing <- data.table::rbindlist(lapply(extracted.txs, function(x) {
|
||||
x[[2]]
|
||||
})
|
||||
)
|
||||
|
||||
rm(extracted.txs)
|
||||
|
||||
saveRDS(list(incoming = incoming, outgoing = outgoing),
|
||||
file = paste0(data.dir, "tx_graphs/tx_graph_height_",
|
||||
paste0(formatC(range(height.set), width = 7, flag = "0"), collapse = "_to_"), ".rds"),
|
||||
compress = FALSE)
|
||||
|
||||
rm(incoming)
|
||||
rm(outgoing)
|
||||
|
||||
}
|
||||
|
52
General-Blockchain-Age-of-Spent-Outputs/get-block-times.R
Normal file
52
General-Blockchain-Age-of-Spent-Outputs/get-block-times.R
Normal file
|
@ -0,0 +1,52 @@
|
|||
library(data.table)
|
||||
library(rbch)
|
||||
library(future.apply)
|
||||
|
||||
is.dogecoin <- FALSE
|
||||
|
||||
blockchain.conf.file <- ""
|
||||
# Input filepath for your {blockchain}.conf file
|
||||
|
||||
data.dir <- ""
|
||||
# Input data directory here, with trailing "/"
|
||||
|
||||
current.block.height <- NA_integer_
|
||||
# current.block.height <- rbch::getblockchaininfo(bch.config)@result$blocks
|
||||
|
||||
n.threads <- min(c(6, parallelly::availableCores()))
|
||||
# Recommended no more than 6 threads since all threads query the single blockchain daemon process.
|
||||
|
||||
blockchain.config <- rbch::conrpc(blockchain.conf.file)
|
||||
|
||||
|
||||
getblock.doge <- function(con, blockhash, verbosity = TRUE) {
|
||||
bh <- as.character(blockhash)
|
||||
pl <- unname(list(blockhash = bh, verbosity = verbosity))
|
||||
rpcpost(con, "getblock", pl)
|
||||
}
|
||||
|
||||
|
||||
future::plan(future::multiprocess(workers = n.threads))
|
||||
|
||||
block.times <- future.apply::future_lapply(0:current.block.height, function(iter.block.height) {
|
||||
|
||||
if (iter.block.height %% 1000 == 0) {
|
||||
cat(iter.block.height, base::date(), "\n")
|
||||
}
|
||||
|
||||
block.hash <- rbch::getblockhash(blockchain.config, iter.block.height)
|
||||
|
||||
if ( ! is.dogecoin) {
|
||||
block.data <- rbch::getblock(blockchain.config, blockhash = block.hash@result, verbosity = "l1")
|
||||
} else {
|
||||
block.data <- getblock.doge(blockchain.config, blockhash = block.hash@result, verbosity = TRUE)
|
||||
}
|
||||
data.frame(block_height = iter.block.height, block_time = block.data@result$time)
|
||||
})
|
||||
|
||||
block.times <- data.table::rbindlist(block.times)
|
||||
|
||||
saveRDS(block.times, file = paste0(data.dir, "block_times.rds"))
|
||||
|
||||
|
||||
|
Loading…
Reference in a new issue