# install.packages("data.table")
# install.packages("RSQLite")
# install.packages("DBI")

library(data.table)
library(RSQLite)
library(DBI)

data.dir <- ""
# Input data directory here, with trailing "/"

last.pre.fork.block <- 478558

source("https://gist.githubusercontent.com/jeffwong/5925000/raw/bf02ed0dd2963169a91664be02fb18e45c4d1e20/sqlitewritetable.R")
# From https://gist.github.com/jeffwong/5925000
# Modifies RSQLite's sqliteWriteTable function so as to reject duplicates

con <- DBI::dbConnect(RSQLite::SQLite(), paste0(data.dir, "tx-graph-node-indices.db"))

DBI::dbExecute(con, "CREATE TABLE nodes (
node TEXT,
node_index INTEGER PRIMARY KEY AUTOINCREMENT,
unique(node)
)")


DBI::dbWriteTable(con, "edgelist", 
  data.frame(origin = character(0), destination = character(0), value = numeric(0),
    block_height = integer(0), stringsAsFactors = FALSE))

tx.graph.files <- list.files(paste0(data.dir, "tx_graphs/"))
tx.graph.files <- tx.graph.files[grepl("^tx_graph.+rds$", tx.graph.files)]
tx.graph.files <- sort(tx.graph.files)

tx.graph.indexed <- vector("list", length(tx.graph.files))
names(tx.graph.indexed) <- tx.graph.files

for (file.iter in tx.graph.files) {
  
  tx.graph.chunk <- readRDS(paste0(data.dir, "tx_graphs/", file.iter))
  
  tx.graph.chunk <-
    rbind(
      data.table(origin = paste0(tx.graph.chunk$incoming$origin.txid, "-", 
        formatC(tx.graph.chunk$incoming$origin.position, width = 4, format = "f", flag = "0", digits = 0)),
        destination = tx.graph.chunk$incoming$txid, 
        value = NA_real_,
        block_height = as.integer(tx.graph.chunk$incoming$block_height), stringsAsFactors = FALSE),
      data.table(origin = tx.graph.chunk$outgoing$txid,
        destination = paste0(tx.graph.chunk$outgoing$txid, "-", 
          formatC(tx.graph.chunk$outgoing$position, width = 4, format = "f", flag = "0", digits = 0)),
        value = tx.graph.chunk$outgoing$value,
        block_height = as.integer(tx.graph.chunk$outgoing$block_height), stringsAsFactors = FALSE)
    )
  
  DBI::dbWriteTable(con, "edgelist", 
    tx.graph.chunk, append = TRUE)
  
  tx.graph.chunk <- tx.graph.chunk[block_height <= last.pre.fork.block, ]
  
  cat(file.iter, base::date(), "\n")
  
  if (nrow(tx.graph.chunk) == 0) {next}
  
  new.nodes <- unique(c(tx.graph.chunk$origin, tx.graph.chunk$destination))
  
  nodes.to.insert <- data.frame(node = new.nodes, node_index = NA, stringsAsFactors = FALSE)
  
  mysqliteWriteTable(con, "nodes", 
    nodes.to.insert, append = TRUE, row.names = FALSE, ignore = TRUE)
  
  cat(nrow(nodes.to.insert), "Nodes written\n")
  
}


DBI::dbWriteTable(con, "edgelist_intermediate_1", 
  data.frame(origin = character(0), destination = character(0),
    value = numeric(0), block_height = integer(0),
    node_index = integer(0), stringsAsFactors = FALSE), overwrite = TRUE)

base::date()
DBI::dbExecute(con, "INSERT INTO edgelist_intermediate_1 SELECT 
  origin, destination, value, block_height, node_index FROM
  edgelist JOIN nodes ON edgelist.origin = nodes.node")
base::date()
# JOIN is an INNER JOIN, so post-fork nodes are not included


DBI::dbExecute(con, 
  "ALTER TABLE edgelist_intermediate_1 RENAME COLUMN node_index TO origin_index")


DBI::dbWriteTable(con, "edgelist_intermediate_2", 
  data.frame(origin = character(0), destination = character(0),
    origin_index = integer(0), node_index = integer(0),
    value = numeric(0), block_height = integer(0), stringsAsFactors = FALSE))

base::date()
DBI::dbExecute(con, "INSERT INTO edgelist_intermediate_2 SELECT 
  origin, destination, origin_index, node_index, value, block_height FROM
  edgelist_intermediate_1 JOIN nodes ON edgelist_intermediate_1.destination = nodes.node")
base::date()

DBI::dbExecute(con, 
  "ALTER TABLE edgelist_intermediate_2 RENAME COLUMN node_index TO destination_index")