Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
### Updated

### Fixed
- Fixed `EnableBatchedInserts` silently falling back to individual execution when table or schema names contain special characters (e.g., hyphens) inside backtick-quoted identifiers. Added a warn log when the fallback occurs.
- Fixed primitive types within complex types (ARRAY, MAP, STRUCT) not being correctly parsed when Arrow serialization uses alternate formats: TIMESTAMP/TIMESTAMP_NTZ as epoch microseconds or component arrays, and BINARY as base64-encoded strings.
- Fixed `PARSE_SYNTAX_ERROR` for column names containing special characters (e.g., dots) when `EnableBatchedInserts` is enabled, by re-quoting column names with backticks in reconstructed multi-row INSERT statements.
- Fixed Volume ingestion for SEA mode, which was broken due to statement being closed prematurely.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ private boolean canUseBatchedInsert() {
return true;
} catch (Exception e) {
// Not a valid INSERT statement suitable for batching
LOGGER.warn(
"EnableBatchedInserts is enabled but the INSERT statement could not be parsed for"
+ " batching, falling back to individual execution: {}",
e.getMessage());
return false;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
public class InsertStatementParser {

// Pattern to extract table and columns from INSERT INTO table (col1, col2, ...) VALUES format
// Table name group matches dot-separated segments where each segment is either a
// backtick-quoted identifier (allowing any character inside) or an unquoted identifier (\w+).
private static final Pattern INSERT_DETAILS_PATTERN =
Pattern.compile(
"^\\s*INSERT\\s+INTO\\s+([\\w`\\.]+)\\s*\\(([^)]+)\\)\\s+VALUES\\s*\\(",
"^\\s*INSERT\\s+INTO\\s+((?:`[^`]+`|\\w+)(?:\\.(?:`[^`]+`|\\w+))*)\\s*\\(([^)]+)\\)\\s+VALUES\\s*\\(",
Copy link
Copy Markdown

@cliffsun-anaplan cliffsun-anaplan Apr 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I saw this PR got linked to the issue I raised - thanks for this, but I think there's an edge case that is missing here which is when the schema / table name itself contains backticks (which would have been escaped by another backtick), e.g. table-with-``-backticks. When I tested it initially I iterated through with Claude and ended up with the first capture group being ((?:`(?:[^`]|``)+`|\w+)(?:\.(?:`(?:[^`]|``)+`|\w+))*) which is very similar to what you have but just also is able to match on the double backticks inside of a quoted identifier.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing this out! Updated the regex to handle escaped backticks as well.

Pattern.CASE_INSENSITIVE | Pattern.DOTALL);

/** Represents the parsed components of an INSERT statement. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,4 +284,56 @@ private String generateLargeInsert(int columnCount) {

return "INSERT INTO large_table (" + columns + ") VALUES (" + values + ")";
}

@Test
void testParseInsertWithHyphenatedTableName() {
String sql = "INSERT INTO catalog.schema.`my-table` (id, name, value) VALUES (?, ?, ?)";
InsertInfo info = InsertStatementParser.parseInsert(sql);

assertNotNull(info);
assertEquals("catalog.schema.`my-table`", info.getTableName());
assertEquals(Arrays.asList("id", "name", "value"), info.getColumns());
}

@Test
void testParseInsertWithSpacesInTableName() {
String sql = "INSERT INTO `my table` (id, name) VALUES (?, ?)";
InsertInfo info = InsertStatementParser.parseInsert(sql);

assertNotNull(info);
assertEquals("`my table`", info.getTableName());
assertEquals(Arrays.asList("id", "name"), info.getColumns());
}

@Test
void testParseInsertWithAllSegmentsQuoted() {
String sql = "INSERT INTO `my-catalog`.`my-schema`.`my-table` (id, name) VALUES (?, ?)";
InsertInfo info = InsertStatementParser.parseInsert(sql);

assertNotNull(info);
assertEquals("`my-catalog`.`my-schema`.`my-table`", info.getTableName());
assertEquals(Arrays.asList("id", "name"), info.getColumns());
}

@Test
void testParseInsertWithMixedQuotedAndUnquotedSegments() {
String sql = "INSERT INTO catalog.`my-schema`.normal_table (id, name) VALUES (?, ?)";
InsertInfo info = InsertStatementParser.parseInsert(sql);

assertNotNull(info);
assertEquals("catalog.`my-schema`.normal_table", info.getTableName());
assertEquals(Arrays.asList("id", "name"), info.getColumns());
}

@Test
void testGenerateMultiRowInsertWithHyphenatedTableName() throws Exception {
String sql = "INSERT INTO catalog.schema.`my-table` (id, name, value) VALUES (?, ?, ?)";
InsertInfo info = InsertStatementParser.parseInsert(sql);

assertNotNull(info);
String multiRowSql = InsertStatementParser.generateMultiRowInsert(info, 3);
String expected =
"INSERT INTO catalog.schema.`my-table` (`id`, `name`, `value`) VALUES (?, ?, ?), (?, ?, ?), (?, ?, ?)";
assertEquals(expected, multiRowSql);
}
}
Loading