remove duplicate historical trade stats after fuzzing
Some checks are pending
CI / build (macos-13) (push) Waiting to run
CI / build (ubuntu-latest) (push) Waiting to run
CI / build (windows-latest) (push) Waiting to run
Codacy Coverage Reporter / Publish coverage (push) Waiting to run
CodeQL / Analyze (java) (push) Waiting to run

This commit is contained in:
woodser 2024-07-28 09:20:50 -04:00 committed by GitHub
parent 75b96e83da
commit c63cf2f0a0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 41 additions and 22 deletions

View file

@ -69,6 +69,8 @@ public final class TradeStatistics3 implements ProcessOncePersistableNetworkPayl
@JsonExclude
private transient static final ZoneId ZONE_ID = ZoneId.systemDefault();
private static final double FUZZ_AMOUNT_PCT = 0.05;
private static final int FUZZ_DATE_HOURS = 24;
public static TradeStatistics3 from(Trade trade,
@Nullable String referralId,
@ -102,8 +104,7 @@ public final class TradeStatistics3 implements ProcessOncePersistableNetworkPayl
long originalTimestamp = trade.getTakeOfferDate().getTime();
long exactAmount = trade.getAmount().longValueExact();
Random random = new Random(originalTimestamp); // pseudo random generator seeded from take offer datestamp
long adjustedAmount = (long) random.nextDouble(
exactAmount * 0.95, exactAmount * 1.05);
long adjustedAmount = (long) random.nextDouble(exactAmount * (1.0 - FUZZ_AMOUNT_PCT), exactAmount * (1 + FUZZ_AMOUNT_PCT));
log.debug("trade {} fuzzed trade amount for tradeStatistics is {}", trade.getShortId(), adjustedAmount);
return adjustedAmount;
}
@ -111,8 +112,7 @@ public final class TradeStatistics3 implements ProcessOncePersistableNetworkPayl
private static long fuzzTradeDateReproducibly(Trade trade) { // randomize completed trade info #1099
long originalTimestamp = trade.getTakeOfferDate().getTime();
Random random = new Random(originalTimestamp); // pseudo random generator seeded from take offer datestamp
long adjustedTimestamp = random.nextLong(
originalTimestamp-TimeUnit.HOURS.toMillis(24), originalTimestamp);
long adjustedTimestamp = random.nextLong(originalTimestamp - TimeUnit.HOURS.toMillis(FUZZ_DATE_HOURS), originalTimestamp);
log.debug("trade {} fuzzed trade datestamp for tradeStatistics is {}", trade.getShortId(), new Date(adjustedTimestamp));
return adjustedTimestamp;
}

View file

@ -110,35 +110,54 @@ public class TradeStatisticsManager {
maybeDumpStatistics();
}
private void deduplicateEarlyTradeStatistics(Set<TradeStatistics3> set) {
private void deduplicateEarlyTradeStatistics(Set<TradeStatistics3> tradeStats) {
// collect trades before May 31, 2024
Set<TradeStatistics3> tradesBeforeMay31_24 = set.stream()
.filter(e -> e.getDate().toInstant().isBefore(Instant.parse("2024-05-31T00:00:00Z")))
// collect trades before August 7, 2024
Set<TradeStatistics3> earlyTrades = tradeStats.stream()
.filter(e -> e.getDate().toInstant().isBefore(Instant.parse("2024-08-07T00:00:00Z")))
.collect(Collectors.toSet());
// collect duplicated trades
Set<TradeStatistics3> duplicated = new HashSet<TradeStatistics3>();
Set<TradeStatistics3> deduplicated = new HashSet<TradeStatistics3>();
for (TradeStatistics3 tradeStatistics : tradesBeforeMay31_24) {
if (hasLenientDuplicate(tradeStatistics, deduplicated)) duplicated.add(tradeStatistics);
else deduplicated.add(tradeStatistics);
Set<TradeStatistics3> duplicates = new HashSet<TradeStatistics3>();
Set<TradeStatistics3> deduplicates = new HashSet<TradeStatistics3>();
Set<TradeStatistics3> usedAsDuplicate = new HashSet<TradeStatistics3>();
for (TradeStatistics3 tradeStatistic : earlyTrades) {
TradeStatistics3 fuzzyDuplicate = findFuzzyDuplicate(tradeStatistic, deduplicates, usedAsDuplicate);
if (fuzzyDuplicate == null) deduplicates.add(tradeStatistic);
else {
duplicates.add(tradeStatistic);
usedAsDuplicate.add(fuzzyDuplicate);
}
}
// remove duplicated trades
set.removeAll(duplicated);
tradeStats.removeAll(duplicates);
}
private boolean hasLenientDuplicate(TradeStatistics3 tradeStatistics, Set<TradeStatistics3> set) {
return set.stream().anyMatch(e -> isLenientDuplicate(tradeStatistics, e));
private TradeStatistics3 findFuzzyDuplicate(TradeStatistics3 tradeStatistics, Set<TradeStatistics3> set, Set<TradeStatistics3> excluded) {
return set.stream().filter(e -> !excluded.contains(e)).filter(e -> isFuzzyDuplicate(tradeStatistics, e)).findFirst().orElse(null);
}
private boolean isLenientDuplicate(TradeStatistics3 tradeStatistics1, TradeStatistics3 tradeStatistics2) {
boolean isWithin2Minutes = Math.abs(tradeStatistics1.getDate().getTime() - tradeStatistics2.getDate().getTime()) < 120000;
return isWithin2Minutes &&
tradeStatistics1.getCurrency().equals(tradeStatistics2.getCurrency()) &&
tradeStatistics1.getAmount() == tradeStatistics2.getAmount() &&
tradeStatistics1.getPrice() == tradeStatistics2.getPrice();
private boolean isFuzzyDuplicate(TradeStatistics3 tradeStatistics1, TradeStatistics3 tradeStatistics2) {
if (!tradeStatistics1.getPaymentMethodId().equals(tradeStatistics2.getPaymentMethodId())) return false;
if (!tradeStatistics1.getCurrency().equals(tradeStatistics2.getCurrency())) return false;
if (tradeStatistics1.getPrice() != tradeStatistics2.getPrice()) return false;
return isFuzzyDuplicateV1(tradeStatistics1, tradeStatistics2) || isFuzzyDuplicateV2(tradeStatistics1, tradeStatistics2);
}
// bug caused all peers to publish same trade with similar timestamps
private boolean isFuzzyDuplicateV1(TradeStatistics3 tradeStatistics1, TradeStatistics3 tradeStatistics2) {
boolean isWithin2Minutes = Math.abs(tradeStatistics1.getDate().getTime() - tradeStatistics2.getDate().getTime()) <= TimeUnit.MINUTES.toMillis(2);
return isWithin2Minutes;
}
// bug caused sellers to re-publish their trades with randomized amounts
private static final double FUZZ_AMOUNT_PCT = 0.05;
private static final int FUZZ_DATE_HOURS = 24;
private boolean isFuzzyDuplicateV2(TradeStatistics3 tradeStatistics1, TradeStatistics3 tradeStatistics2) {
boolean isWithinFuzzedHours = Math.abs(tradeStatistics1.getDate().getTime() - tradeStatistics2.getDate().getTime()) <= TimeUnit.HOURS.toMillis(FUZZ_DATE_HOURS);
boolean isWithinFuzzedAmount = Math.abs(tradeStatistics1.getAmount() - tradeStatistics2.getAmount()) <= FUZZ_AMOUNT_PCT * tradeStatistics1.getAmount();
return isWithinFuzzedHours && isWithinFuzzedAmount;
}
public ObservableSet<TradeStatistics3> getObservableTradeStatisticsSet() {