Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions lib/input/src/parse/lexer_impl.xch
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ namespace CppClingo::Input::Parse {
WS = [\t\r\n ]*;
IDENTIFIER = [_']*[a-z]['A-Za-z0-9_]*;
VARIABLE = [_']*[A-Z]['A-Za-z0-9_]*;
STRING = "\"" ([^\\"\n\000]|"\\\""|"\\\\"|"\\n")* "\"";
UNICODE = "\\u{" [0-9A-Fa-f]+ "}";
STRING = "\"" ([^\\"\n\000]|"\\\""|"\\\\"|"\\n"|"\\t"|"\\r"|UNICODE)* "\"";
INCSTRING = "<" IDENTIFIER ">";
THEORYOP = [/!<=>+\-*\\?&@|:;~^.]+;
KEYWORD = "#" [a-zA-Z0-9_]*;
Expand All @@ -49,7 +50,7 @@ namespace CppClingo::Input::Parse {
SIGN = [-+ ];
GROUPING = [,_];
TYPE = [bcdoxXns];
FLIT = ([^\\"\n{}\000] | "\\\"" | "\\\\" | "\\n" | "{{" | "}}")*;
FLIT = ([^\\"\n{}\000] | "\\\"" | "\\\\" | "\\n" | "\\t" | "\\r" | UNICODE | "{{" | "}}")*;
FSPEC = ACCESSOR* CONVERSION? ([:] (DOT? ALIGN)? SIGN? [#]? POS? GROUPING? TYPE?)?;
*/

Expand Down
8 changes: 6 additions & 2 deletions lib/input/src/parse/term.cc
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,9 @@ auto cont_fstr(ParserState &state) -> bool {
if (auto view = state.view().substr(0, state.view().size() - 1); !view.empty()) {
auto loc =
Location{state.cursor_pos(), Position(state.file(), state.token_line(), state.token_column() - 1)};
str.fields.emplace_back(std::in_place_type<FormatFieldLiteral>, loc, state.store().string(view));
auto &buf = state.buf(view.size());
Util::unquote(view, std::back_inserter(buf), true);
str.fields.emplace_back(std::in_place_type<FormatFieldLiteral>, loc, state.store().string(buf));
}
state.consume();
state.push(Prod::fstr_field);
Expand All @@ -214,7 +216,9 @@ auto cont_fstr(ParserState &state) -> bool {
if (auto view = state.view().substr(0, state.view().size() - 1); !view.empty()) {
auto loc =
Location{state.cursor_pos(), Position(state.file(), state.token_line(), state.token_column() - 1)};
str.fields.emplace_back(std::in_place_type<FormatFieldLiteral>, loc, state.store().string(view));
auto &buf = state.buf(view.size());
Util::unquote(view, std::back_inserter(buf), true);
str.fields.emplace_back(std::in_place_type<FormatFieldLiteral>, loc, state.store().string(buf));
}
auto fields = std::move(str.fields);
auto start = Position{state.file(), str.line, str.column};
Expand Down
15 changes: 15 additions & 0 deletions lib/input/tests/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,21 @@ TEST_CASE("parsev2") {
REQUIRE(parse("(a,;a)") == "(a,;a)");
REQUIRE(parse("f(;)") == "f(;)");
REQUIRE(parse("f(\"x\")") == "f(\"x\")");
REQUIRE(parse("f(\"\\n\")") == "f(\"\\n\")");
REQUIRE(parse("f(\"\\t\")") == "f(\"\\t\")");
REQUIRE(parse("f(\"\\r\")") == "f(\"\\r\")");
REQUIRE(parse("f(\"\\u{041}\")") == "f(\"A\")");
REQUIRE(parse("f(\"\\u{2665}\")") == "f(\"♥\")");
REQUIRE(parse("f(\"\\u{1F602}\")") == "f(\"😂\")");
REQUIRE(parse("f(\"\\u{FF}\")") == "f(\"\xC3\xBF\")");
REQUIRE(parse("f(\"\\u{1F600}\")") == "f(\"\xF0\x9F\x98\x80\")");
REQUIRE(parse("f(\"\\u{10FFFF}\")") == "f(\"\xF4\x8F\xBF\xBF\")");
REQUIRE(parse("f(\"hello\\u{00E9}\\nworld\")") == "f(\"helloé\\nworld\")");
REQUIRE(parse("f\"hello\\nworld\"") == "f\"hello\\nworld\"");
REQUIRE(parse("f\"hello\\tworld\"") == "f\"hello\\tworld\"");
REQUIRE(parse("f\"hello\\rworld\"") == "f\"hello\\rworld\"");
REQUIRE(parse("f\"{{hello}}\"") == "f\"{{hello}}\"");
REQUIRE(parse("f\"hello\\u{00E9}\\nworld\"") == "f\"helloé\\nworld\"");
REQUIRE(parse("a+b+c") == "a+b+c");
REQUIRE(parse("a*b+c") == "a*b+c");
REQUIRE(parse("a+b*c") == "a+b*c");
Expand Down
5 changes: 1 addition & 4 deletions lib/python-api/tests/test_write_aspif.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,7 @@ def test_buffer_inc(self):
ctl.parse_string("""{c}. #show d : a, c.""")
ctl.ground()
ctl.solve()
assert (
ctl.buffer
== """\
assert ctl.buffer == """\
asp 2 0 0 incremental
1 1 1 1 0 0
4 1 0 1 b
Expand All @@ -135,7 +133,6 @@ def test_buffer_inc(self):
4 0 2 1 c
0
"""
)

def test_rule(self):
"""
Expand Down
2 changes: 2 additions & 0 deletions lib/util/include/clingo/util/print.hh
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,8 @@ class PrintQuoted {
out << "\\n";
} else if (c == '\t') {
out << "\\t";
} else if (c == '\r') {
out << "\\r";
} else if (c == '"') {
out << "\\\"";
} else {
Expand Down
107 changes: 76 additions & 31 deletions lib/util/include/clingo/util/string.hh
Original file line number Diff line number Diff line change
@@ -1,52 +1,94 @@
#pragma once

#include <cassert>
#include <cstdint>
#include <stdexcept>
#include <string_view>

namespace CppClingo::Util {

void quote(std::string_view in, auto out) {
for (auto c : in) {
switch (c) {
case '\n': {
*out++ = '\\';
*out++ = 'n';
break;
}
case '\t': {
*out++ = '\\';
*out++ = 't';
break;
}
case '\\': {
*out++ = '\\';
*out++ = '\\';
break;
}
case '"': {
*out++ = '\\';
*out++ = '"';
break;
}
default: {
*out++ = c;
break;
namespace Detail {

// NOLINTBEGIN(readability-magic-numbers)

inline auto hex_val(char c) -> uint32_t {
if (c >= '0' && c <= '9') {
return c - '0';
}
if (c >= 'A' && c <= 'F') {
return 10 + (c - 'A');
}
if (c >= 'a' && c <= 'f') {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would typically turn the condition into an assertion or exception and simply return; or throw an exception at the end.

return 10 + (c - 'a');
}
throw std::invalid_argument("invalid hex character");
}

inline auto encode_utf8(uint32_t cp, auto out) -> void {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add an assertion for the accepted input range or throw an exception.

if (cp > 0x10FFFF) {
throw std::invalid_argument("invalid unicode code point");
}
if (cp <= 0x7F) {
*out++ = static_cast<char>(cp);
} else if (cp <= 0x7FF) {
*out++ = static_cast<char>(0xC0 | (cp >> 6));
*out++ = static_cast<char>(0x80 | (cp & 0x3F));
} else if (cp <= 0xFFFF) {
*out++ = static_cast<char>(0xE0 | (cp >> 12));
*out++ = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
*out++ = static_cast<char>(0x80 | (cp & 0x3F));
} else {
*out++ = static_cast<char>(0xF0 | (cp >> 18));
*out++ = static_cast<char>(0x80 | ((cp >> 12) & 0x3F));
*out++ = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
*out++ = static_cast<char>(0x80 | (cp & 0x3F));
}
}

inline auto parse_unicode_escape(auto it, auto ie, auto out) -> auto {
uint32_t cp = 0;
if (it == ie || *it != '{') {
throw std::runtime_error("expected '{' at the beginning of unicode escape");
}
size_t count = 0;
for (++it; it != ie; ++it, ++count) {
if (*it == '}') {
if (count == 0) {
throw std::runtime_error("expected at least one hex digit in unicode escape");
}
encode_utf8(cp, out);
return it;
}
if (count >= 6) {
throw std::runtime_error("too many hex digits in unicode escape");
}
cp = (cp << 4) | hex_val(*it);
}
throw std::runtime_error("expected '}' at the end of unicode escape");
}

// NOLINTEND(readability-magic-numbers)

} // namespace Detail

void unquote(std::string_view in, auto out, bool fstring = false) {
auto escape = '\0';
for (auto c : in) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
for (auto it = in.begin(), ie = in.end(); it != ie; ++it) {
char c = *it;
if (escape == '{' || escape == '}') {
if (c == escape) {
*out++ = escape;
} else {
assert(false);
throw std::runtime_error("expected brace");
}
escape = '\0';
} else if (escape == '\\') {
switch (c) {
case 'u': {
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
it = Detail::parse_unicode_escape(it + 1, ie, out);
break;
}
case 'n': {
*out++ = '\n';
break;
Expand All @@ -55,6 +97,10 @@ void unquote(std::string_view in, auto out, bool fstring = false) {
*out++ = '\t';
break;
}
case 'r': {
*out++ = '\r';
break;
}
case '\\': {
*out++ = '\\';
break;
Expand All @@ -64,8 +110,7 @@ void unquote(std::string_view in, auto out, bool fstring = false) {
break;
}
default: {
assert(false);
break;
throw std::runtime_error("invalid escape sequence");
}
}
escape = '\0';
Expand Down
Loading