From b73fd573f5b5d302dd358b74db0fc242beee0131 Mon Sep 17 00:00:00 2001 From: Rucknium Date: Sat, 26 Feb 2022 19:59:27 +0000 Subject: [PATCH] Create determine-descendants.R --- .../R/determine-descendants.R | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 CashFusion-Descendant-Analysis/R/determine-descendants.R diff --git a/CashFusion-Descendant-Analysis/R/determine-descendants.R b/CashFusion-Descendant-Analysis/R/determine-descendants.R new file mode 100644 index 0000000..2c1afe5 --- /dev/null +++ b/CashFusion-Descendant-Analysis/R/determine-descendants.R @@ -0,0 +1,93 @@ +# install.packages("data.table") +# install.packages("RSQLite") +# install.packages("DBI") +# install.packages("igraph") + +library(data.table) +library(RSQLite) +library(DBI) +library(igraph) + +data.dir <- "" +# Input data directory here, with trailing "/" + +fusions.df <- readRDS("https://github.com/Rucknium/CashFusionStats/raw/main/data/fusions_df.rds") +# Get the list of transactions that are the fusion transactions + + +con <- DBI::dbConnect(RSQLite::SQLite(), paste0(data.dir, "tx-graph-node-indices.db")) + +master.edgelist <- DBI::dbGetQuery(con, + "SELECT origin_index, destination_index FROM edgelist_intermediate_2") + + +master.edgelist <- as.matrix(master.edgelist) + +bch.graph <- igraph::graph_from_edgelist(master.edgelist) + + +fusioned.nodes <- DBI::dbGetQuery(con, paste0('SELECT * FROM nodes WHERE node IN ("', + paste0(fusions.df$txid, collapse = '", "'), '")')) + +fusioned.nodes <- base::intersect(unique(c(master.edgelist)), fusioned.nodes$node_index) + +utxo.set <- setdiff(master.edgelist[, 2], master.edgelist[, 1]) + +fusioned.nodes <- sort(fusioned.nodes, decreasing = FALSE) + +fusioned.nodes.to.process <- split(fusioned.nodes, + cut(fusioned.nodes, fusioned.nodes[seq(4, length(fusioned.nodes) - 4, by = 4)])) + +fusioned.nodes.to.process <- rev(fusioned.nodes.to.process) + + +touched.UTXO <- c() + +counter.i <- 1 + +rm(master.edgelist) +gc() + + +for ( i in fusioned.nodes.to.process[counter.i:length(fusioned.nodes.to.process)]) { + + to.set <- setdiff(utxo.set, touched.UTXO) + if (counter.i %% 10 == 0 ) { + save(i, counter.i, touched.UTXO, + file = paste0(data.dir, "touched-UTXO-intermediate-", + counter.i, ".Rdata"), compress = FALSE) + } + + bch.paths <- distances(bch.graph, + v = i, + to = to.set, mode = "out") + + touched.UTXO <- c(touched.UTXO, to.set[colSums(is.finite(bch.paths)) > 0 ] ) + + cat(base::date(), " | ", + round(100 * length(touched.UTXO) / length(utxo.set), 3), "% | ", + counter.i, + " of ", length(fusioned.nodes.to.process), "\n") + + counter.i <- counter.i + 1 + +} + +save(i, counter.i, touched.UTXO, + file = paste0(data.dir, "touched-UTXO-intermediate-", + "COMPLETE", ".Rdata"), compress = FALSE) + + +# How to restart the analysis if interrupted: + +# Run this script up until the "gc()". +# Note that you should be using the version of fusions_df.rds that you used +# in the initial script run. + +# Find the most recent save state, i.e. highest value of +# touched-UTXO-intermediate-xxxx.Rdata +# Then load it: +# load(paste0(data.dir, "touched-UTXO-intermediate-xxxx.Rdata")) + +# Then start the for loop +