mirror of
https://github.com/monero-project/monero.git
synced 2025-01-10 04:44:59 +00:00
epee: support unicode in parsed strings
This commit is contained in:
parent
310c26824d
commit
eeca5ca0c8
3 changed files with 78 additions and 22 deletions
|
@ -31,6 +31,9 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <boost/utility/string_ref.hpp>
|
#include <boost/utility/string_ref.hpp>
|
||||||
|
|
||||||
|
#undef MONERO_DEFAULT_LOG_CATEGORY
|
||||||
|
#define MONERO_DEFAULT_LOG_CATEGORY "serialization"
|
||||||
|
|
||||||
namespace epee
|
namespace epee
|
||||||
{
|
{
|
||||||
namespace misc_utils
|
namespace misc_utils
|
||||||
|
@ -62,6 +65,26 @@ namespace misc_utils
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const constexpr unsigned char isx[256] =
|
||||||
|
{
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||||
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||||
|
0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||||
|
0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||||
|
};
|
||||||
|
|
||||||
inline bool isspace(char c)
|
inline bool isspace(char c)
|
||||||
{
|
{
|
||||||
return lut[(uint8_t)c] & 8;
|
return lut[(uint8_t)c] & 8;
|
||||||
|
@ -162,6 +185,42 @@ namespace misc_utils
|
||||||
val.push_back('\\');break;
|
val.push_back('\\');break;
|
||||||
case '/': //Slash character
|
case '/': //Slash character
|
||||||
val.push_back('/');break;
|
val.push_back('/');break;
|
||||||
|
case 'u': //Unicode code point
|
||||||
|
if (buf_end - it < 4)
|
||||||
|
{
|
||||||
|
ASSERT_MES_AND_THROW("Invalid Unicode escape sequence");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
uint32_t dst = 0;
|
||||||
|
for (int i = 0; i < 4; ++i)
|
||||||
|
{
|
||||||
|
const unsigned char tmp = isx[(int)*++it];
|
||||||
|
CHECK_AND_ASSERT_THROW_MES(tmp != 0xff, "Bad Unicode encoding");
|
||||||
|
dst = dst << 4 | tmp;
|
||||||
|
}
|
||||||
|
// encode as UTF-8
|
||||||
|
if (dst <= 0x7f)
|
||||||
|
{
|
||||||
|
val.push_back(dst);
|
||||||
|
}
|
||||||
|
else if (dst <= 0x7ff)
|
||||||
|
{
|
||||||
|
val.push_back(0xc0 | (dst >> 6));
|
||||||
|
val.push_back(0x80 | (dst & 0x3f));
|
||||||
|
}
|
||||||
|
else if (dst <= 0xffff)
|
||||||
|
{
|
||||||
|
val.push_back(0xe0 | (dst >> 12));
|
||||||
|
val.push_back(0x80 | ((dst >> 6) & 0x3f));
|
||||||
|
val.push_back(0x80 | (dst & 0x3f));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ASSERT_MES_AND_THROW("Unicode code point is out or range");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
val.push_back(*it);
|
val.push_back(*it);
|
||||||
LOG_PRINT_L0("Unknown escape sequence :\"\\" << *it << "\"");
|
LOG_PRINT_L0("Unknown escape sequence :\"\\" << *it << "\"");
|
||||||
|
|
|
@ -59,26 +59,6 @@
|
||||||
#pragma comment (lib, "Rpcrt4.lib")
|
#pragma comment (lib, "Rpcrt4.lib")
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static const constexpr unsigned char isx[256] =
|
|
||||||
{
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
||||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
||||||
0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
||||||
0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
||||||
};
|
|
||||||
|
|
||||||
namespace epee
|
namespace epee
|
||||||
{
|
{
|
||||||
namespace string_tools
|
namespace string_tools
|
||||||
|
@ -99,10 +79,10 @@ namespace string_tools
|
||||||
for(size_t i = 0; i < s.size(); i += 2)
|
for(size_t i = 0; i < s.size(); i += 2)
|
||||||
{
|
{
|
||||||
int tmp = *src++;
|
int tmp = *src++;
|
||||||
tmp = isx[tmp];
|
tmp = epee::misc_utils::parse::isx[tmp];
|
||||||
if (tmp == 0xff) return false;
|
if (tmp == 0xff) return false;
|
||||||
int t2 = *src++;
|
int t2 = *src++;
|
||||||
t2 = isx[t2];
|
t2 = epee::misc_utils::parse::isx[t2];
|
||||||
if (t2 == 0xff) return false;
|
if (t2 == 0xff) return false;
|
||||||
*dst++ = (tmp << 4) | t2;
|
*dst++ = (tmp << 4) | t2;
|
||||||
}
|
}
|
||||||
|
|
|
@ -946,3 +946,20 @@ TEST(parsing, number)
|
||||||
epee::misc_utils::parse::match_number(i, s.end(), val);
|
epee::misc_utils::parse::match_number(i, s.end(), val);
|
||||||
ASSERT_EQ(val, "+9.34e+03");
|
ASSERT_EQ(val, "+9.34e+03");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(parsing, unicode)
|
||||||
|
{
|
||||||
|
std::string bs;
|
||||||
|
std::string s;
|
||||||
|
std::string::const_iterator si;
|
||||||
|
|
||||||
|
s = "\"\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, "");
|
||||||
|
s = "\"\\u0000\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, std::string(1, '\0'));
|
||||||
|
s = "\"\\u0020\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, " ");
|
||||||
|
s = "\"\\u1\""; si = s.begin(); ASSERT_FALSE(epee::misc_utils::parse::match_string(si, s.end(), bs));
|
||||||
|
s = "\"\\u12\""; si = s.begin(); ASSERT_FALSE(epee::misc_utils::parse::match_string(si, s.end(), bs));
|
||||||
|
s = "\"\\u123\""; si = s.begin(); ASSERT_FALSE(epee::misc_utils::parse::match_string(si, s.end(), bs));
|
||||||
|
s = "\"\\u1234\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, "ሴ");
|
||||||
|
s = "\"foo\\u1234bar\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, "fooሴbar");
|
||||||
|
s = "\"\\u3042\\u307e\\u3084\\u304b\\u3059\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, "あまやかす");
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue