remove duplicate historical trade stats after fuzzing

This commit is contained in:
woodser 2024-07-28 09:20:50 -04:00 committed by GitHub
parent 75b96e83da
commit c63cf2f0a0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 41 additions and 22 deletions

View file

@ -69,6 +69,8 @@ public final class TradeStatistics3 implements ProcessOncePersistableNetworkPayl
@JsonExclude @JsonExclude
private transient static final ZoneId ZONE_ID = ZoneId.systemDefault(); private transient static final ZoneId ZONE_ID = ZoneId.systemDefault();
private static final double FUZZ_AMOUNT_PCT = 0.05;
private static final int FUZZ_DATE_HOURS = 24;
public static TradeStatistics3 from(Trade trade, public static TradeStatistics3 from(Trade trade,
@Nullable String referralId, @Nullable String referralId,
@ -102,8 +104,7 @@ public final class TradeStatistics3 implements ProcessOncePersistableNetworkPayl
long originalTimestamp = trade.getTakeOfferDate().getTime(); long originalTimestamp = trade.getTakeOfferDate().getTime();
long exactAmount = trade.getAmount().longValueExact(); long exactAmount = trade.getAmount().longValueExact();
Random random = new Random(originalTimestamp); // pseudo random generator seeded from take offer datestamp Random random = new Random(originalTimestamp); // pseudo random generator seeded from take offer datestamp
long adjustedAmount = (long) random.nextDouble( long adjustedAmount = (long) random.nextDouble(exactAmount * (1.0 - FUZZ_AMOUNT_PCT), exactAmount * (1 + FUZZ_AMOUNT_PCT));
exactAmount * 0.95, exactAmount * 1.05);
log.debug("trade {} fuzzed trade amount for tradeStatistics is {}", trade.getShortId(), adjustedAmount); log.debug("trade {} fuzzed trade amount for tradeStatistics is {}", trade.getShortId(), adjustedAmount);
return adjustedAmount; return adjustedAmount;
} }
@ -111,8 +112,7 @@ public final class TradeStatistics3 implements ProcessOncePersistableNetworkPayl
private static long fuzzTradeDateReproducibly(Trade trade) { // randomize completed trade info #1099 private static long fuzzTradeDateReproducibly(Trade trade) { // randomize completed trade info #1099
long originalTimestamp = trade.getTakeOfferDate().getTime(); long originalTimestamp = trade.getTakeOfferDate().getTime();
Random random = new Random(originalTimestamp); // pseudo random generator seeded from take offer datestamp Random random = new Random(originalTimestamp); // pseudo random generator seeded from take offer datestamp
long adjustedTimestamp = random.nextLong( long adjustedTimestamp = random.nextLong(originalTimestamp - TimeUnit.HOURS.toMillis(FUZZ_DATE_HOURS), originalTimestamp);
originalTimestamp-TimeUnit.HOURS.toMillis(24), originalTimestamp);
log.debug("trade {} fuzzed trade datestamp for tradeStatistics is {}", trade.getShortId(), new Date(adjustedTimestamp)); log.debug("trade {} fuzzed trade datestamp for tradeStatistics is {}", trade.getShortId(), new Date(adjustedTimestamp));
return adjustedTimestamp; return adjustedTimestamp;
} }

View file

@ -110,35 +110,54 @@ public class TradeStatisticsManager {
maybeDumpStatistics(); maybeDumpStatistics();
} }
private void deduplicateEarlyTradeStatistics(Set<TradeStatistics3> set) { private void deduplicateEarlyTradeStatistics(Set<TradeStatistics3> tradeStats) {
// collect trades before May 31, 2024 // collect trades before August 7, 2024
Set<TradeStatistics3> tradesBeforeMay31_24 = set.stream() Set<TradeStatistics3> earlyTrades = tradeStats.stream()
.filter(e -> e.getDate().toInstant().isBefore(Instant.parse("2024-05-31T00:00:00Z"))) .filter(e -> e.getDate().toInstant().isBefore(Instant.parse("2024-08-07T00:00:00Z")))
.collect(Collectors.toSet()); .collect(Collectors.toSet());
// collect duplicated trades // collect duplicated trades
Set<TradeStatistics3> duplicated = new HashSet<TradeStatistics3>(); Set<TradeStatistics3> duplicates = new HashSet<TradeStatistics3>();
Set<TradeStatistics3> deduplicated = new HashSet<TradeStatistics3>(); Set<TradeStatistics3> deduplicates = new HashSet<TradeStatistics3>();
for (TradeStatistics3 tradeStatistics : tradesBeforeMay31_24) { Set<TradeStatistics3> usedAsDuplicate = new HashSet<TradeStatistics3>();
if (hasLenientDuplicate(tradeStatistics, deduplicated)) duplicated.add(tradeStatistics); for (TradeStatistics3 tradeStatistic : earlyTrades) {
else deduplicated.add(tradeStatistics); TradeStatistics3 fuzzyDuplicate = findFuzzyDuplicate(tradeStatistic, deduplicates, usedAsDuplicate);
if (fuzzyDuplicate == null) deduplicates.add(tradeStatistic);
else {
duplicates.add(tradeStatistic);
usedAsDuplicate.add(fuzzyDuplicate);
}
} }
// remove duplicated trades // remove duplicated trades
set.removeAll(duplicated); tradeStats.removeAll(duplicates);
} }
private boolean hasLenientDuplicate(TradeStatistics3 tradeStatistics, Set<TradeStatistics3> set) { private TradeStatistics3 findFuzzyDuplicate(TradeStatistics3 tradeStatistics, Set<TradeStatistics3> set, Set<TradeStatistics3> excluded) {
return set.stream().anyMatch(e -> isLenientDuplicate(tradeStatistics, e)); return set.stream().filter(e -> !excluded.contains(e)).filter(e -> isFuzzyDuplicate(tradeStatistics, e)).findFirst().orElse(null);
} }
private boolean isLenientDuplicate(TradeStatistics3 tradeStatistics1, TradeStatistics3 tradeStatistics2) { private boolean isFuzzyDuplicate(TradeStatistics3 tradeStatistics1, TradeStatistics3 tradeStatistics2) {
boolean isWithin2Minutes = Math.abs(tradeStatistics1.getDate().getTime() - tradeStatistics2.getDate().getTime()) < 120000; if (!tradeStatistics1.getPaymentMethodId().equals(tradeStatistics2.getPaymentMethodId())) return false;
return isWithin2Minutes && if (!tradeStatistics1.getCurrency().equals(tradeStatistics2.getCurrency())) return false;
tradeStatistics1.getCurrency().equals(tradeStatistics2.getCurrency()) && if (tradeStatistics1.getPrice() != tradeStatistics2.getPrice()) return false;
tradeStatistics1.getAmount() == tradeStatistics2.getAmount() && return isFuzzyDuplicateV1(tradeStatistics1, tradeStatistics2) || isFuzzyDuplicateV2(tradeStatistics1, tradeStatistics2);
tradeStatistics1.getPrice() == tradeStatistics2.getPrice(); }
// bug caused all peers to publish same trade with similar timestamps
private boolean isFuzzyDuplicateV1(TradeStatistics3 tradeStatistics1, TradeStatistics3 tradeStatistics2) {
boolean isWithin2Minutes = Math.abs(tradeStatistics1.getDate().getTime() - tradeStatistics2.getDate().getTime()) <= TimeUnit.MINUTES.toMillis(2);
return isWithin2Minutes;
}
// bug caused sellers to re-publish their trades with randomized amounts
private static final double FUZZ_AMOUNT_PCT = 0.05;
private static final int FUZZ_DATE_HOURS = 24;
private boolean isFuzzyDuplicateV2(TradeStatistics3 tradeStatistics1, TradeStatistics3 tradeStatistics2) {
boolean isWithinFuzzedHours = Math.abs(tradeStatistics1.getDate().getTime() - tradeStatistics2.getDate().getTime()) <= TimeUnit.HOURS.toMillis(FUZZ_DATE_HOURS);
boolean isWithinFuzzedAmount = Math.abs(tradeStatistics1.getAmount() - tradeStatistics2.getAmount()) <= FUZZ_AMOUNT_PCT * tradeStatistics1.getAmount();
return isWithinFuzzedHours && isWithinFuzzedAmount;
} }
public ObservableSet<TradeStatistics3> getObservableTradeStatisticsSet() { public ObservableSet<TradeStatistics3> getObservableTradeStatisticsSet() {