From 7dd15914d03bd4ac1a5e4c6459070fea6e6eb3e5 Mon Sep 17 00:00:00 2001 From: Konstantin Ullrich Date: Mon, 24 Jun 2024 18:38:32 +0200 Subject: [PATCH] Normalize text to fix french (#1504) * Normalize text to fix french * Normalize text to fix french * Fix French? * Fix French? * Fix French? * Polyseed v0.0.5 --- cw_bitcoin/lib/bitcoin_mnemonic.dart | 119 +----------------- cw_bitcoin/pubspec.lock | 2 +- cw_bitcoin/pubspec.yaml | 1 - cw_core/lib/utils/text_normalizer.dart | 117 +++++++++++++++++ cw_core/pubspec.lock | 8 ++ cw_core/pubspec.yaml | 1 + cw_monero/pubspec.yaml | 2 +- lib/core/seed_validator.dart | 15 ++- lib/src/widgets/seed_widget.dart | 12 +- .../validable_annotated_editable_text.dart | 9 +- pubspec_base.yaml | 4 +- 11 files changed, 153 insertions(+), 137 deletions(-) create mode 100644 cw_core/lib/utils/text_normalizer.dart diff --git a/cw_bitcoin/lib/bitcoin_mnemonic.dart b/cw_bitcoin/lib/bitcoin_mnemonic.dart index 4a01d6ddc..905aece28 100644 --- a/cw_bitcoin/lib/bitcoin_mnemonic.dart +++ b/cw_bitcoin/lib/bitcoin_mnemonic.dart @@ -2,9 +2,9 @@ import 'dart:convert'; import 'dart:math'; import 'dart:typed_data'; import 'package:crypto/crypto.dart'; -import 'package:unorm_dart/unorm_dart.dart' as unorm; import 'package:cryptography/cryptography.dart' as cryptography; import 'package:cw_core/sec_random_native.dart'; +import 'package:cw_core/utils/text_normalizer.dart'; const segwit = '100'; final wordlist = englishWordlist; @@ -137,121 +137,6 @@ bool validateMnemonic(String mnemonic, {String prefix = segwit}) { } } -final COMBININGCODEPOINTS = combiningcodepoints(); - -List combiningcodepoints() { - final source = '300:34e|350:36f|483:487|591:5bd|5bf|5c1|5c2|5c4|5c5|5c7|610:61a|64b:65f|670|' + - '6d6:6dc|6df:6e4|6e7|6e8|6ea:6ed|711|730:74a|7eb:7f3|816:819|81b:823|825:827|' + - '829:82d|859:85b|8d4:8e1|8e3:8ff|93c|94d|951:954|9bc|9cd|a3c|a4d|abc|acd|b3c|' + - 'b4d|bcd|c4d|c55|c56|cbc|ccd|d4d|dca|e38:e3a|e48:e4b|eb8|eb9|ec8:ecb|f18|f19|' + - 'f35|f37|f39|f71|f72|f74|f7a:f7d|f80|f82:f84|f86|f87|fc6|1037|1039|103a|108d|' + - '135d:135f|1714|1734|17d2|17dd|18a9|1939:193b|1a17|1a18|1a60|1a75:1a7c|1a7f|' + - '1ab0:1abd|1b34|1b44|1b6b:1b73|1baa|1bab|1be6|1bf2|1bf3|1c37|1cd0:1cd2|' + - '1cd4:1ce0|1ce2:1ce8|1ced|1cf4|1cf8|1cf9|1dc0:1df5|1dfb:1dff|20d0:20dc|20e1|' + - '20e5:20f0|2cef:2cf1|2d7f|2de0:2dff|302a:302f|3099|309a|a66f|a674:a67d|a69e|' + - 'a69f|a6f0|a6f1|a806|a8c4|a8e0:a8f1|a92b:a92d|a953|a9b3|a9c0|aab0|aab2:aab4|' + - 'aab7|aab8|aabe|aabf|aac1|aaf6|abed|fb1e|fe20:fe2f|101fd|102e0|10376:1037a|' + - '10a0d|10a0f|10a38:10a3a|10a3f|10ae5|10ae6|11046|1107f|110b9|110ba|11100:11102|' + - '11133|11134|11173|111c0|111ca|11235|11236|112e9|112ea|1133c|1134d|11366:1136c|' + - '11370:11374|11442|11446|114c2|114c3|115bf|115c0|1163f|116b6|116b7|1172b|11c3f|' + - '16af0:16af4|16b30:16b36|1bc9e|1d165:1d169|1d16d:1d172|1d17b:1d182|1d185:1d18b|' + - '1d1aa:1d1ad|1d242:1d244|1e000:1e006|1e008:1e018|1e01b:1e021|1e023|1e024|' + - '1e026:1e02a|1e8d0:1e8d6|1e944:1e94a'; - - return source.split('|').map((e) { - if (e.contains(':')) { - return e.split(':').map((hex) => int.parse(hex, radix: 16)); - } - - return int.parse(e, radix: 16); - }).fold([], (List acc, element) { - if (element is List) { - for (var i = element[0] as int; i <= (element[1] as int); i++) {} - } else if (element is int) { - acc.add(element); - } - - return acc; - }).toList(); -} - -String removeCombiningCharacters(String source) { - return source - .split('') - .where((char) => !COMBININGCODEPOINTS.contains(char.codeUnits.first)) - .join(''); -} - -bool isCJK(String char) { - final n = char.codeUnitAt(0); - - for (var x in CJKINTERVALS) { - final imin = x[0] as num; - final imax = x[1] as num; - - if (n >= imin && n <= imax) return true; - } - - return false; -} - -String removeCJKSpaces(String source) { - final splitted = source.split(''); - final filtered = []; - - for (var i = 0; i < splitted.length; i++) { - final char = splitted[i]; - final isSpace = char.trim() == ''; - final prevIsCJK = i != 0 && isCJK(splitted[i - 1]); - final nextIsCJK = i != splitted.length - 1 && isCJK(splitted[i + 1]); - - if (!(isSpace && prevIsCJK && nextIsCJK)) { - filtered.add(char); - } - } - - return filtered.join(''); -} - -String normalizeText(String source) { - final res = - removeCombiningCharacters(unorm.nfkd(source).toLowerCase()).trim().split('/\s+/').join(' '); - - return removeCJKSpaces(res); -} - -const CJKINTERVALS = [ - [0x4e00, 0x9fff, 'CJK Unified Ideographs'], - [0x3400, 0x4dbf, 'CJK Unified Ideographs Extension A'], - [0x20000, 0x2a6df, 'CJK Unified Ideographs Extension B'], - [0x2a700, 0x2b73f, 'CJK Unified Ideographs Extension C'], - [0x2b740, 0x2b81f, 'CJK Unified Ideographs Extension D'], - [0xf900, 0xfaff, 'CJK Compatibility Ideographs'], - [0x2f800, 0x2fa1d, 'CJK Compatibility Ideographs Supplement'], - [0x3190, 0x319f, 'Kanbun'], - [0x2e80, 0x2eff, 'CJK Radicals Supplement'], - [0x2f00, 0x2fdf, 'CJK Radicals'], - [0x31c0, 0x31ef, 'CJK Strokes'], - [0x2ff0, 0x2fff, 'Ideographic Description Characters'], - [0xe0100, 0xe01ef, 'Variation Selectors Supplement'], - [0x3100, 0x312f, 'Bopomofo'], - [0x31a0, 0x31bf, 'Bopomofo Extended'], - [0xff00, 0xffef, 'Halfwidth and Fullwidth Forms'], - [0x3040, 0x309f, 'Hiragana'], - [0x30a0, 0x30ff, 'Katakana'], - [0x31f0, 0x31ff, 'Katakana Phonetic Extensions'], - [0x1b000, 0x1b0ff, 'Kana Supplement'], - [0xac00, 0xd7af, 'Hangul Syllables'], - [0x1100, 0x11ff, 'Hangul Jamo'], - [0xa960, 0xa97f, 'Hangul Jamo Extended A'], - [0xd7b0, 0xd7ff, 'Hangul Jamo Extended B'], - [0x3130, 0x318f, 'Hangul Compatibility Jamo'], - [0xa4d0, 0xa4ff, 'Lisu'], - [0x16f00, 0x16f9f, 'Miao'], - [0xa000, 0xa48f, 'Yi Syllables'], - [0xa490, 0xa4cf, 'Yi Radicals'], -]; - final englishWordlist = [ 'abandon', 'ability', @@ -2301,4 +2186,4 @@ final englishWordlist = [ 'zero', 'zone', 'zoo' -]; \ No newline at end of file +]; diff --git a/cw_bitcoin/pubspec.lock b/cw_bitcoin/pubspec.lock index 997ed9452..ffc224e93 100644 --- a/cw_bitcoin/pubspec.lock +++ b/cw_bitcoin/pubspec.lock @@ -863,7 +863,7 @@ packages: source: hosted version: "1.3.2" unorm_dart: - dependency: "direct main" + dependency: transitive description: name: unorm_dart sha256: "5b35bff83fce4d76467641438f9e867dc9bcfdb8c1694854f230579d68cd8f4b" diff --git a/cw_bitcoin/pubspec.yaml b/cw_bitcoin/pubspec.yaml index 40f3c6e29..66c5729e8 100644 --- a/cw_bitcoin/pubspec.yaml +++ b/cw_bitcoin/pubspec.yaml @@ -28,7 +28,6 @@ dependencies: url: https://github.com/cake-tech/bitbox-flutter.git ref: Add-Support-For-OP-Return-data rxdart: ^0.27.5 - unorm_dart: ^0.2.0 cryptography: ^2.0.5 bitcoin_base: git: diff --git a/cw_core/lib/utils/text_normalizer.dart b/cw_core/lib/utils/text_normalizer.dart new file mode 100644 index 000000000..5aeb5fd21 --- /dev/null +++ b/cw_core/lib/utils/text_normalizer.dart @@ -0,0 +1,117 @@ +import 'package:unorm_dart/unorm_dart.dart' as unorm; + +const CJKINTERVALS = [ + [0x4e00, 0x9fff, 'CJK Unified Ideographs'], + [0x3400, 0x4dbf, 'CJK Unified Ideographs Extension A'], + [0x20000, 0x2a6df, 'CJK Unified Ideographs Extension B'], + [0x2a700, 0x2b73f, 'CJK Unified Ideographs Extension C'], + [0x2b740, 0x2b81f, 'CJK Unified Ideographs Extension D'], + [0xf900, 0xfaff, 'CJK Compatibility Ideographs'], + [0x2f800, 0x2fa1d, 'CJK Compatibility Ideographs Supplement'], + [0x3190, 0x319f, 'Kanbun'], + [0x2e80, 0x2eff, 'CJK Radicals Supplement'], + [0x2f00, 0x2fdf, 'CJK Radicals'], + [0x31c0, 0x31ef, 'CJK Strokes'], + [0x2ff0, 0x2fff, 'Ideographic Description Characters'], + [0xe0100, 0xe01ef, 'Variation Selectors Supplement'], + [0x3100, 0x312f, 'Bopomofo'], + [0x31a0, 0x31bf, 'Bopomofo Extended'], + [0xff00, 0xffef, 'Halfwidth and Fullwidth Forms'], + [0x3040, 0x309f, 'Hiragana'], + [0x30a0, 0x30ff, 'Katakana'], + [0x31f0, 0x31ff, 'Katakana Phonetic Extensions'], + [0x1b000, 0x1b0ff, 'Kana Supplement'], + [0xac00, 0xd7af, 'Hangul Syllables'], + [0x1100, 0x11ff, 'Hangul Jamo'], + [0xa960, 0xa97f, 'Hangul Jamo Extended A'], + [0xd7b0, 0xd7ff, 'Hangul Jamo Extended B'], + [0x3130, 0x318f, 'Hangul Compatibility Jamo'], + [0xa4d0, 0xa4ff, 'Lisu'], + [0x16f00, 0x16f9f, 'Miao'], + [0xa000, 0xa48f, 'Yi Syllables'], + [0xa490, 0xa4cf, 'Yi Radicals'], +]; + +final COMBININGCODEPOINTS = combiningcodepoints(); + +List combiningcodepoints() { + final source = '300:34e|350:36f|483:487|591:5bd|5bf|5c1|5c2|5c4|5c5|5c7|610:61a|64b:65f|670|' + + '6d6:6dc|6df:6e4|6e7|6e8|6ea:6ed|711|730:74a|7eb:7f3|816:819|81b:823|825:827|' + + '829:82d|859:85b|8d4:8e1|8e3:8ff|93c|94d|951:954|9bc|9cd|a3c|a4d|abc|acd|b3c|' + + 'b4d|bcd|c4d|c55|c56|cbc|ccd|d4d|dca|e38:e3a|e48:e4b|eb8|eb9|ec8:ecb|f18|f19|' + + 'f35|f37|f39|f71|f72|f74|f7a:f7d|f80|f82:f84|f86|f87|fc6|1037|1039|103a|108d|' + + '135d:135f|1714|1734|17d2|17dd|18a9|1939:193b|1a17|1a18|1a60|1a75:1a7c|1a7f|' + + '1ab0:1abd|1b34|1b44|1b6b:1b73|1baa|1bab|1be6|1bf2|1bf3|1c37|1cd0:1cd2|' + + '1cd4:1ce0|1ce2:1ce8|1ced|1cf4|1cf8|1cf9|1dc0:1df5|1dfb:1dff|20d0:20dc|20e1|' + + '20e5:20f0|2cef:2cf1|2d7f|2de0:2dff|302a:302f|3099|309a|a66f|a674:a67d|a69e|' + + 'a69f|a6f0|a6f1|a806|a8c4|a8e0:a8f1|a92b:a92d|a953|a9b3|a9c0|aab0|aab2:aab4|' + + 'aab7|aab8|aabe|aabf|aac1|aaf6|abed|fb1e|fe20:fe2f|101fd|102e0|10376:1037a|' + + '10a0d|10a0f|10a38:10a3a|10a3f|10ae5|10ae6|11046|1107f|110b9|110ba|11100:11102|' + + '11133|11134|11173|111c0|111ca|11235|11236|112e9|112ea|1133c|1134d|11366:1136c|' + + '11370:11374|11442|11446|114c2|114c3|115bf|115c0|1163f|116b6|116b7|1172b|11c3f|' + + '16af0:16af4|16b30:16b36|1bc9e|1d165:1d169|1d16d:1d172|1d17b:1d182|1d185:1d18b|' + + '1d1aa:1d1ad|1d242:1d244|1e000:1e006|1e008:1e018|1e01b:1e021|1e023|1e024|' + + '1e026:1e02a|1e8d0:1e8d6|1e944:1e94a'; + + return source.split('|').map((e) { + if (e.contains(':')) { + return e.split(':').map((hex) => int.parse(hex, radix: 16)); + } + + return int.parse(e, radix: 16); + }).fold([], (List acc, element) { + if (element is List) { + for (var i = element[0] as int; i <= (element[1] as int); i++) {} + } else if (element is int) { + acc.add(element); + } + + return acc; + }).toList(); +} + +String _removeCombiningCharacters(String source) { + return source + .split('') + .where((char) => !COMBININGCODEPOINTS.contains(char.codeUnits.first)) + .join(''); +} + +String _removeCJKSpaces(String source) { + final splitted = source.split(''); + final filtered = []; + + for (var i = 0; i < splitted.length; i++) { + final char = splitted[i]; + final isSpace = char.trim() == ''; + final prevIsCJK = i != 0 && _isCJK(splitted[i - 1]); + final nextIsCJK = i != splitted.length - 1 && _isCJK(splitted[i + 1]); + + if (!(isSpace && prevIsCJK && nextIsCJK)) { + filtered.add(char); + } + } + + return filtered.join(''); +} + +bool _isCJK(String char) { + final n = char.codeUnitAt(0); + + for (var x in CJKINTERVALS) { + final imin = x[0] as num; + final imax = x[1] as num; + + if (n >= imin && n <= imax) return true; + } + + return false; +} + +/// This method normalize text which transforms Unicode text into an equivalent decomposed form, allowing for easier sorting and searching of text. +String normalizeText(String source) { + final res = + _removeCombiningCharacters(unorm.nfkd(source).toLowerCase()).trim().split('/\s+/').join(' '); + + return _removeCJKSpaces(res); +} diff --git a/cw_core/pubspec.lock b/cw_core/pubspec.lock index abfdbfc58..88fddae09 100644 --- a/cw_core/pubspec.lock +++ b/cw_core/pubspec.lock @@ -656,6 +656,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.3.2" + unorm_dart: + dependency: "direct main" + description: + name: unorm_dart + sha256: "5b35bff83fce4d76467641438f9e867dc9bcfdb8c1694854f230579d68cd8f4b" + url: "https://pub.dev" + source: hosted + version: "0.2.0" vector_math: dependency: transitive description: diff --git a/cw_core/pubspec.yaml b/cw_core/pubspec.yaml index 51d671dc7..0513b122c 100644 --- a/cw_core/pubspec.yaml +++ b/cw_core/pubspec.yaml @@ -20,6 +20,7 @@ dependencies: intl: ^0.18.0 encrypt: ^5.0.1 socks5_proxy: ^1.0.4 + unorm_dart: ^0.3.0 # tor: # git: # url: https://github.com/cake-tech/tor.git diff --git a/cw_monero/pubspec.yaml b/cw_monero/pubspec.yaml index c49a541ab..56f5d2fa6 100644 --- a/cw_monero/pubspec.yaml +++ b/cw_monero/pubspec.yaml @@ -19,7 +19,7 @@ dependencies: flutter_mobx: ^2.0.6+1 intl: ^0.18.0 encrypt: ^5.0.1 - polyseed: ^0.0.2 + polyseed: ^0.0.5 cw_core: path: ../cw_core diff --git a/lib/core/seed_validator.dart b/lib/core/seed_validator.dart index 3e3445757..9fb839ea2 100644 --- a/lib/core/seed_validator.dart +++ b/lib/core/seed_validator.dart @@ -1,15 +1,15 @@ import 'package:cake_wallet/bitcoin/bitcoin.dart'; -import 'package:cake_wallet/ethereum/ethereum.dart'; -import 'package:cake_wallet/haven/haven.dart'; import 'package:cake_wallet/core/validator.dart'; import 'package:cake_wallet/entities/mnemonic_item.dart'; +import 'package:cake_wallet/ethereum/ethereum.dart'; +import 'package:cake_wallet/haven/haven.dart'; +import 'package:cake_wallet/monero/monero.dart'; +import 'package:cake_wallet/nano/nano.dart'; import 'package:cake_wallet/polygon/polygon.dart'; import 'package:cake_wallet/solana/solana.dart'; import 'package:cake_wallet/tron/tron.dart'; -import 'package:cw_core/wallet_type.dart'; -import 'package:cake_wallet/monero/monero.dart'; -import 'package:cake_wallet/nano/nano.dart'; import 'package:cake_wallet/utils/language_list.dart'; +import 'package:cw_core/wallet_type.dart'; class SeedValidator extends Validator { SeedValidator({required this.type, required this.language}) @@ -41,13 +41,16 @@ class SeedValidator extends Validator { return polygon!.getPolygonWordList(language); case WalletType.solana: return solana!.getSolanaWordList(language); - case WalletType.tron: + case WalletType.tron: return tron!.getTronWordList(language); default: return []; } } + static bool needsNormalization(String language) => + ["POLYSEED_French", "POLYSEED_Spanish"].contains(language); + static List getBitcoinWordList(String language) { assert(language.toLowerCase() == LanguageList.english.toLowerCase()); return bitcoin!.getWordList(); diff --git a/lib/src/widgets/seed_widget.dart b/lib/src/widgets/seed_widget.dart index bf9a85b32..d71208bb2 100644 --- a/lib/src/widgets/seed_widget.dart +++ b/lib/src/widgets/seed_widget.dart @@ -1,11 +1,11 @@ +import 'package:cake_wallet/core/seed_validator.dart'; import 'package:cake_wallet/generated/i18n.dart'; -import 'package:cake_wallet/themes/extensions/cake_text_theme.dart'; -import 'package:cw_core/wallet_type.dart'; import 'package:cake_wallet/src/widgets/validable_annotated_editable_text.dart'; +import 'package:cake_wallet/themes/extensions/cake_text_theme.dart'; +import 'package:cake_wallet/themes/extensions/send_page_theme.dart'; +import 'package:cw_core/wallet_type.dart'; import 'package:flutter/material.dart'; import 'package:flutter/services.dart'; -import 'package:cake_wallet/core/seed_validator.dart'; -import 'package:cake_wallet/themes/extensions/send_page_theme.dart'; class SeedWidget extends StatefulWidget { SeedWidget({ @@ -23,7 +23,6 @@ class SeedWidget extends StatefulWidget { } class SeedWidgetState extends State { - SeedWidgetState(String language, this.type) : controller = TextEditingController(), focusNode = FocusNode(), @@ -46,6 +45,7 @@ class SeedWidgetState extends State { final FocusNode focusNode; final WalletType type; List words; + bool normalizeSeed = false; bool _showPlaceholder; String get text => controller.text; @@ -60,6 +60,7 @@ class SeedWidgetState extends State { void changeSeedLanguage(String language) { setState(() { words = SeedValidator.getWordList(type: type, language: language); + normalizeSeed = SeedValidator.needsNormalization(language); }); } @@ -97,6 +98,7 @@ class SeedWidgetState extends State { focusNode: focusNode, controller: controller, words: words, + normalizeSeed: normalizeSeed, textStyle: TextStyle( color: Theme.of(context).extension()!.titleColor, backgroundColor: Colors.transparent, diff --git a/lib/src/widgets/validable_annotated_editable_text.dart b/lib/src/widgets/validable_annotated_editable_text.dart index 134eb16a8..a7777961d 100644 --- a/lib/src/widgets/validable_annotated_editable_text.dart +++ b/lib/src/widgets/validable_annotated_editable_text.dart @@ -1,6 +1,6 @@ +import 'package:cw_core/utils/text_normalizer.dart'; import 'package:flutter/material.dart'; - extension Compare on Comparable { bool operator <=(T other) => compareTo(other) <= 0; bool operator >=(T other) => compareTo(other) >= 0; @@ -39,6 +39,7 @@ class ValidatableAnnotatedEditableText extends EditableText { required this.validStyle, required this.invalidStyle, required this.words, + this.normalizeSeed = false, TextStyle textStyle = const TextStyle( color: Colors.black, backgroundColor: Colors.transparent, @@ -74,6 +75,7 @@ class ValidatableAnnotatedEditableText extends EditableText { showSelectionHandles: true, showCursor: true); + final bool normalizeSeed; final List words; final TextStyle validStyle; final TextStyle invalidStyle; @@ -137,7 +139,8 @@ class ValidatableAnnotatedEditableTextState extends EditableTextState { return result; } - bool validate(String source) => widget.words.indexOf(source) >= 0; + bool validate(String source) => + widget.words.indexOf(widget.normalizeSeed ? normalizeText(source) : source) >= 0; List range(String pattern, String source) { final result = []; @@ -173,4 +176,4 @@ class ValidatableAnnotatedEditableTextState extends EditableTextState { return TextSpan(style: widget.style, text: text); } -} \ No newline at end of file +} diff --git a/pubspec_base.yaml b/pubspec_base.yaml index 47407f833..e00527d9f 100644 --- a/pubspec_base.yaml +++ b/pubspec_base.yaml @@ -60,8 +60,6 @@ dependencies: git: url: https://github.com/cake-tech/flutter_file_picker.git ref: master - unorm_dart: ^0.2.0 - # check unorm_dart for usage and for replace permission_handler: ^10.0.0 device_display_brightness: git: @@ -100,7 +98,7 @@ dependencies: # ref: main socks5_proxy: ^1.0.4 flutter_svg: ^2.0.9 - polyseed: ^0.0.4 + polyseed: ^0.0.5 nostr_tools: ^1.0.9 solana: ^0.30.1 bitcoin_base: