Don't write nodes of transaction graph if after fork, reducing run time

This commit is contained in:
Rucknium 2022-04-15 13:47:24 +00:00
parent e2998b266a
commit 222f62b905
2 changed files with 13 additions and 4 deletions

View file

@ -9,6 +9,8 @@ library(DBI)
data.dir <- "" data.dir <- ""
# Input data directory here, with trailing "/" # Input data directory here, with trailing "/"
last.pre.fork.block <- 478558
source("https://gist.githubusercontent.com/jeffwong/5925000/raw/bf02ed0dd2963169a91664be02fb18e45c4d1e20/sqlitewritetable.R") source("https://gist.githubusercontent.com/jeffwong/5925000/raw/bf02ed0dd2963169a91664be02fb18e45c4d1e20/sqlitewritetable.R")
# From https://gist.github.com/jeffwong/5925000 # From https://gist.github.com/jeffwong/5925000
# Modifies RSQLite's sqliteWriteTable function so as to reject duplicates # Modifies RSQLite's sqliteWriteTable function so as to reject duplicates
@ -28,7 +30,7 @@ DBI::dbWriteTable(con, "edgelist",
tx.graph.files <- list.files(paste0(data.dir, "tx_graphs/")) tx.graph.files <- list.files(paste0(data.dir, "tx_graphs/"))
tx.graph.files <- tx.graph.files[grepl("^tx_graph.+rds$", tx.graph.files)] tx.graph.files <- tx.graph.files[grepl("^tx_graph.+rds$", tx.graph.files)]
tx.graph.files <- sort(tx.graph.files)
tx.graph.indexed <- vector("list", length(tx.graph.files)) tx.graph.indexed <- vector("list", length(tx.graph.files))
names(tx.graph.indexed) <- tx.graph.files names(tx.graph.indexed) <- tx.graph.files
@ -54,6 +56,12 @@ for (file.iter in tx.graph.files) {
DBI::dbWriteTable(con, "edgelist", DBI::dbWriteTable(con, "edgelist",
tx.graph.chunk, append = TRUE) tx.graph.chunk, append = TRUE)
tx.graph.chunk <- tx.graph.chunk[block_height <= last.pre.fork.block, ]
cat(file.iter, base::date(), "\n")
if (nrow(tx.graph.chunk) == 0) {next}
new.nodes <- unique(c(tx.graph.chunk$origin, tx.graph.chunk$destination)) new.nodes <- unique(c(tx.graph.chunk$origin, tx.graph.chunk$destination))
nodes.to.insert <- data.frame(node = new.nodes, node_index = NA, stringsAsFactors = FALSE) nodes.to.insert <- data.frame(node = new.nodes, node_index = NA, stringsAsFactors = FALSE)
@ -61,7 +69,7 @@ for (file.iter in tx.graph.files) {
mysqliteWriteTable(con, "nodes", mysqliteWriteTable(con, "nodes",
nodes.to.insert, append = TRUE, row.names = FALSE, ignore = TRUE) nodes.to.insert, append = TRUE, row.names = FALSE, ignore = TRUE)
cat(file.iter, base::date(), "\n") cat(nrow(nodes.to.insert), "Nodes written\n")
} }
@ -76,6 +84,7 @@ DBI::dbExecute(con, "INSERT INTO edgelist_intermediate_1 SELECT
origin, destination, value, block_height, node_index FROM origin, destination, value, block_height, node_index FROM
edgelist JOIN nodes ON edgelist.origin = nodes.node") edgelist JOIN nodes ON edgelist.origin = nodes.node")
base::date() base::date()
# JOIN is an INNER JOIN, so post-fork nodes are not included
DBI::dbExecute(con, DBI::dbExecute(con,

View file

@ -20,9 +20,9 @@ dir.create(paste0(data.dir, "tx_graphs"))
bch.config <- rbch::conrpc(bitcoin.conf.file) bch.config <- rbch::conrpc(bitcoin.conf.file)
# current.block.height <- rbch::getblockchaininfo(bch.config)@result$blocks # current.block.height <- rbch::getblockchaininfo(bch.config)@result$blocks
# current.block.height <- 733867 current.block.height <- 733867
# 733867 is for BCH # 733867 is for BCH
current.block.height <- 729896 # current.block.height <- 729896
# 729896 is for BTC # 729896 is for BTC
cut.seq <- seq(20, current.block.height, by = 20) cut.seq <- seq(20, current.block.height, by = 20)