Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/column-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,14 @@ export class ColumnParser {

private maybeIdent(token: Token): boolean {
const ch = token.value[0];
const startChars = this.dialect === 'mssql' ? ['"', '['] : ['"', '`'];
let startChars: string[];
if (this.dialect === 'mssql') {
startChars = ['"', '['];
} else if (this.dialect === 'snowflake') {
startChars = ['"'];
} else {
startChars = ['"', '`'];
}
return token.type !== 'string' && (startChars.includes(ch) || /[a-zA-Z_]/.test(ch));
}
}
23 changes: 23 additions & 0 deletions src/defines.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export const DIALECTS = [
'psql',
'bigquery',
'dynamodb',
'snowflake',
'generic',
] as const;
export type Dialect = (typeof DIALECTS)[number];
Expand Down Expand Up @@ -54,6 +55,16 @@ export type StatementType =
| 'SHOW_KEYS'
| 'SHOW_INDEX'
| 'SHOW_TABLE'
| 'SHOW_WAREHOUSES'
| 'SHOW_USERS'
| 'SHOW_ROLES'
| 'SHOW_SCHEMAS'
| 'SHOW_STAGES'
| 'SHOW_INTEGRATIONS'
| 'SHOW_STREAMS'
| 'SHOW_TASKS'
| 'SHOW_PIPES'
| 'SHOW_SEQUENCES'
| 'SHOW_TABLES'
| 'SHOW_COLUMNS'
| 'DROP_DATABASE'
Expand All @@ -76,6 +87,18 @@ export type StatementType =
| 'COMMIT'
| 'ROLLBACK'
| 'ANON_BLOCK'
| 'MERGE'
| 'CALL'
| 'GRANT'
| 'REVOKE'
| 'EXPLAIN'
| 'DESCRIBE'
| 'USE'
| 'COPY'
| 'PUT'
| 'GET'
| 'LIST'
| 'REMOVE'
| 'UNKNOWN';

export type ExecutionType =
Expand Down
182 changes: 174 additions & 8 deletions src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,28 @@ export const EXECUTION_TYPES: Record<StatementType, ExecutionType> = {
BEGIN_TRANSACTION: 'TRANSACTION',
COMMIT: 'TRANSACTION',
ROLLBACK: 'TRANSACTION',
MERGE: 'MODIFICATION',
CALL: 'MODIFICATION',
GRANT: 'MODIFICATION',
REVOKE: 'MODIFICATION',
EXPLAIN: 'INFORMATION',
DESCRIBE: 'INFORMATION',
USE: 'INFORMATION',
COPY: 'MODIFICATION',
PUT: 'MODIFICATION',
GET: 'LISTING',
LIST: 'LISTING',
REMOVE: 'MODIFICATION',
SHOW_WAREHOUSES: 'LISTING',
SHOW_USERS: 'LISTING',
SHOW_ROLES: 'LISTING',
SHOW_SCHEMAS: 'LISTING',
SHOW_STAGES: 'LISTING',
SHOW_INTEGRATIONS: 'LISTING',
SHOW_STREAMS: 'LISTING',
SHOW_TASKS: 'LISTING',
SHOW_PIPES: 'LISTING',
SHOW_SEQUENCES: 'LISTING',
UNKNOWN: 'UNKNOWN',
ANON_BLOCK: 'ANON_BLOCK',
};
Expand All @@ -114,6 +136,7 @@ const blockOpeners: Record<Dialect, string[]> = {
oracle: ['DECLARE', 'BEGIN', 'CASE'],
bigquery: ['BEGIN', 'CASE', 'IF', 'LOOP', 'REPEAT', 'WHILE', 'FOR'],
dynamodb: [],
snowflake: ['DECLARE', 'BEGIN', 'CASE', 'LOOP', 'IF', 'WHILE', 'FOR', 'REPEAT'],
};

interface ParseOptions {
Expand Down Expand Up @@ -357,7 +380,7 @@ function createStatementParserByToken(
case 'CREATE':
return createCreateStatementParser(options);
case 'SHOW':
if (['mysql', 'generic'].includes(options.dialect)) {
if (['mysql', 'generic', 'snowflake'].includes(options.dialect)) {
return createShowStatementParser(options);
}
break;
Expand All @@ -374,7 +397,13 @@ function createStatementParserByToken(
case 'TRUNCATE':
return createTruncateStatementParser(options);
case 'BEGIN':
if (['bigquery', 'oracle'].includes(options.dialect) && nextToken.value !== 'TRANSACTION') {
if (
(['bigquery', 'oracle'].includes(options.dialect) &&
nextToken.value.toUpperCase() !== 'TRANSACTION') ||
(options.dialect === 'snowflake' &&
nextToken.value.toUpperCase() !== 'WORK' &&
nextToken.value.toUpperCase() !== 'TRANSACTION')
) {
return createBlockStatementParser(options);
}
return createBeginTransactionStatementParser(options);
Expand All @@ -388,10 +417,44 @@ function createStatementParserByToken(
case 'ROLLBACK':
return createRollbackStatementParser(options);
case 'DECLARE':
if (options.dialect === 'oracle') {
if (['oracle', 'snowflake'].includes(options.dialect)) {
return createBlockStatementParser(options);
}
break;
case 'MERGE':
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are any of these Snowflake specific? Looks like we dialect-gate some below, but not these presumably because they're widely supported?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm pretty sure that PartiQL doesn't support a lot of these for example

return createMergeStatementParser(options);
case 'CALL':
return createCallStatementParser(options);
case 'GRANT':
return createGrantStatementParser(options);
case 'REVOKE':
return createRevokeStatementParser(options);
case 'EXPLAIN':
return createExplainStatementParser(options);
case 'DESCRIBE':
case 'DESC':
return createDescribeStatementParser(options);
case 'USE':
if (['mssql', 'mysql', 'snowflake'].includes(options.dialect)) {
return createUseStatementParser(options);
}
break;
case 'COPY':
return createCopyStatementParser(options);
case 'PUT':
if (options.dialect === 'snowflake') return createPutStatementParser(options);
break;
case 'GET':
if (options.dialect === 'snowflake') return createGetStatementParser(options);
break;
case 'LIST':
case 'LS':
if (options.dialect === 'snowflake') return createListStatementParser(options);
break;
case 'REMOVE':
case 'RM':
if (options.dialect === 'snowflake') return createRemoveStatementParser(options);
break;
default:
break;
}
Expand Down Expand Up @@ -437,7 +500,9 @@ function createBlockStatementParser(options: ParseOptions) {
preCanGoToNext: () => false,
validation: {
acceptTokens: [
...(options.dialect === 'oracle' ? [{ type: 'keyword', value: 'DECLARE' }] : []),
...(['oracle', 'snowflake'].includes(options.dialect)
? [{ type: 'keyword', value: 'DECLARE' }]
: []),
{ type: 'keyword', value: 'BEGIN' },
],
},
Expand Down Expand Up @@ -685,6 +750,80 @@ function createTruncateStatementParser(options: ParseOptions) {
return stateMachineStatementParser(statement, steps, options);
}

function createSingleKeywordStatementParser(
options: ParseOptions,
type: StatementType,
keywords: string[],
) {
const statement = createInitialStatement();

const steps: Step[] = [
{
preCanGoToNext: () => false,
validation: {
acceptTokens: keywords.map((value) => ({ type: 'keyword', value })),
},
add: (token) => {
statement.type = type;
if (statement.start < 0) {
statement.start = token.start;
}
},
postCanGoToNext: () => true,
},
];

return stateMachineStatementParser(statement, steps, options);
}

function createMergeStatementParser(options: ParseOptions) {
return createSingleKeywordStatementParser(options, 'MERGE', ['MERGE']);
}

function createCallStatementParser(options: ParseOptions) {
return createSingleKeywordStatementParser(options, 'CALL', ['CALL']);
}

function createGrantStatementParser(options: ParseOptions) {
return createSingleKeywordStatementParser(options, 'GRANT', ['GRANT']);
}

function createRevokeStatementParser(options: ParseOptions) {
return createSingleKeywordStatementParser(options, 'REVOKE', ['REVOKE']);
}

function createExplainStatementParser(options: ParseOptions) {
return createSingleKeywordStatementParser(options, 'EXPLAIN', ['EXPLAIN']);
}

function createDescribeStatementParser(options: ParseOptions) {
return createSingleKeywordStatementParser(options, 'DESCRIBE', ['DESCRIBE', 'DESC']);
}

function createUseStatementParser(options: ParseOptions) {
return createSingleKeywordStatementParser(options, 'USE', ['USE']);
}

function createCopyStatementParser(options: ParseOptions) {
return createSingleKeywordStatementParser(options, 'COPY', ['COPY']);
}

function createPutStatementParser(options: ParseOptions) {
return createSingleKeywordStatementParser(options, 'PUT', ['PUT']);
}

function createGetStatementParser(options: ParseOptions) {
return createSingleKeywordStatementParser(options, 'GET', ['GET']);
}

function createListStatementParser(options: ParseOptions) {
return createSingleKeywordStatementParser(options, 'LIST', ['LIST', 'LS']);
}

function createRemoveStatementParser(options: ParseOptions) {
return createSingleKeywordStatementParser(options, 'REMOVE', ['REMOVE', 'RM']);
}

function createShowStatementParser(options: ParseOptions) {
const statement = createInitialStatement();

Expand Down Expand Up @@ -741,6 +880,16 @@ function createShowStatementParser(options: ParseOptions) {
{ type: 'keyword', value: 'TRIGGERS' },
{ type: 'keyword', value: 'VARIABLES' },
{ type: 'keyword', value: 'WARNINGS' },
{ type: 'keyword', value: 'WAREHOUSES' },
{ type: 'keyword', value: 'USERS' },
{ type: 'keyword', value: 'ROLES' },
{ type: 'keyword', value: 'SCHEMAS' },
{ type: 'keyword', value: 'STAGES' },
{ type: 'keyword', value: 'INTEGRATIONS' },
{ type: 'keyword', value: 'STREAMS' },
{ type: 'keyword', value: 'TASKS' },
{ type: 'keyword', value: 'PIPES' },
{ type: 'keyword', value: 'SEQUENCES' },
],
},
add: (token) => {
Expand Down Expand Up @@ -938,12 +1087,13 @@ function stateMachineStatementParser(
(token.value.toUpperCase() !== 'BEGIN' ||
(token.value.toUpperCase() === 'BEGIN' &&
nextToken.value.toUpperCase() !== 'TRANSACTION' &&
(dialect !== 'snowflake' || nextToken.value.toUpperCase() !== 'WORK') &&
(dialect !== 'sqlite' ||
(dialect === 'sqlite' &&
!['DEFERRED', 'IMMEDIATE', 'EXCLUSIVE'].includes(nextToken.value.toUpperCase())))))
) {
if (
dialect === 'oracle' &&
['oracle', 'snowflake'].includes(dialect) &&
lastBlockOpener?.value === 'DECLARE' &&
token.value.toUpperCase() === 'BEGIN'
) {
Expand Down Expand Up @@ -1001,13 +1151,22 @@ function stateMachineStatementParser(
}

if (
['psql', 'mssql', 'bigquery'].includes(dialect) &&
['psql', 'mssql', 'bigquery', 'snowflake'].includes(dialect) &&
token.value.toUpperCase() === 'MATERIALIZED'
) {
setPrevToken(token);
return;
}

let afterOrTokens: string[];
if (dialect === 'mssql') {
afterOrTokens = ['ALTER'];
} else if (dialect === 'snowflake') {
afterOrTokens = ['ALTER', 'REPLACE'];
} else {
afterOrTokens = ['REPLACE'];
}

// technically these dialects don't allow "OR REPLACE" or "OR ALTER" between all statement
// types, but we'll allow it for now.
// For "ALTER", we need to make sure we only catch it here if it directly follows "OR", so
Expand All @@ -1016,7 +1175,7 @@ function stateMachineStatementParser(
dialect !== 'sqlite' &&
(token.value.toUpperCase() === 'OR' ||
(prevNonWhitespaceToken?.value.toUpperCase() === 'OR' &&
token.value.toUpperCase() === (dialect === 'mssql' ? 'ALTER' : 'REPLACE')))
afterOrTokens.includes(token.value.toUpperCase())))
) {
setPrevToken(token);
return;
Expand All @@ -1026,7 +1185,9 @@ function stateMachineStatementParser(
if (
(dialect === 'psql' && ['TEMP', 'TEMPORARY'].includes(token.value.toUpperCase())) ||
(dialect === 'sqlite' &&
['TEMP', 'TEMPORARY', 'VIRTUAL'].includes(token.value.toUpperCase()))
['TEMP', 'TEMPORARY', 'VIRTUAL'].includes(token.value.toUpperCase())) ||
(dialect === 'snowflake' &&
['TEMP', 'TEMPORARY', 'TRANSIENT', 'VOLATILE'].includes(token.value.toUpperCase()))
) {
setPrevToken(token);
return;
Expand Down Expand Up @@ -1188,6 +1349,11 @@ export function defaultParamTypesFor(dialect: Dialect): ParamTypes {
return {
positional: true,
};
case 'snowflake':
return {
positional: true,
named: [':'],
};
default:
return {
positional: true,
Expand Down
34 changes: 33 additions & 1 deletion src/tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,31 @@ const KEYWORDS = [
'TRIGGERS',
'VARIABLES',
'WARNINGS',
'MERGE',
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these keywords valid for every dialect? Should we be scoping these somehow?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like the addition of LIST even required a test change?

'CALL',
'GRANT',
'REVOKE',
'EXPLAIN',
'DESCRIBE',
'DESC',
'USE',
'COPY',
'PUT',
'GET',
'LIST',
'LS',
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

certainly feels like we might need to keep snowflake-specific keywords out of the global tokenizer list and scope them based on dialect

'REMOVE',
'RM',
'WAREHOUSES',
'USERS',
'ROLES',
'SCHEMAS',
'STAGES',
'INTEGRATIONS',
'STREAMS',
'TASKS',
'PIPES',
'SEQUENCES',
];

const INDIVIDUALS: Record<string, Token['type']> = {
Expand Down Expand Up @@ -520,7 +545,14 @@ function isDollarQuotedString(state: State): boolean {
}

function isQuotedIdentifier(ch: Char, dialect: Dialect): boolean {
const startQuoteChars: Char[] = dialect === 'mssql' ? ['"', '['] : ['"', '`'];
let startQuoteChars: Char[];
if (dialect === 'mssql') {
startQuoteChars = ['"', '['];
} else if (dialect === 'snowflake') {
startQuoteChars = ['"'];
} else {
startQuoteChars = ['"', '`'];
}
return startQuoteChars.includes(ch);
}

Expand Down
Loading
Loading