diff --git a/pkg/yqlib/doc/operators/headers/slice-array.md b/pkg/yqlib/doc/operators/headers/slice-array.md index 87307bd16f..448c38ea85 100644 --- a/pkg/yqlib/doc/operators/headers/slice-array.md +++ b/pkg/yqlib/doc/operators/headers/slice-array.md @@ -1,5 +1,5 @@ -# Slice/Splice Array +# Slice Array or String -The slice array operator takes an array as input and returns a subarray. Like the `jq` equivalent, `.[10:15]` will return an array of length 5, starting from index 10 inclusive, up to index 15 exclusive. Negative numbers count backwards from the end of the array. +The slice operator works on both arrays and strings. Like the `jq` equivalent, `.[10:15]` will return a subarray (or substring) of length 5, starting from index 10 inclusive, up to index 15 exclusive. Negative numbers count backwards from the end of the array or string. -You may leave out the first or second number, which will refer to the start or end of the array respectively. +You may leave out the first or second number, which will refer to the start or end of the array or string respectively. diff --git a/pkg/yqlib/doc/operators/slice-array.md b/pkg/yqlib/doc/operators/slice-array.md index 9b89210b3f..8daf340fe2 100644 --- a/pkg/yqlib/doc/operators/slice-array.md +++ b/pkg/yqlib/doc/operators/slice-array.md @@ -1,8 +1,8 @@ -# Slice/Splice Array +# Slice Array or String -The slice array operator takes an array as input and returns a subarray. Like the `jq` equivalent, `.[10:15]` will return an array of length 5, starting from index 10 inclusive, up to index 15 exclusive. Negative numbers count backwards from the end of the array. +The slice operator works on both arrays and strings. Like the `jq` equivalent, `.[10:15]` will return a subarray (or substring) of length 5, starting from index 10 inclusive, up to index 15 exclusive. Negative numbers count backwards from the end of the array or string. -You may leave out the first or second number, which will refer to the start or end of the array respectively. +You may leave out the first or second number, which will refer to the start or end of the array or string respectively. ## Slicing arrays Given a sample.yml file of: @@ -103,3 +103,81 @@ will output - cow ``` +## Slicing strings +Given a sample.yml file of: +```yaml +country: Australia +``` +then +```bash +yq '.country[0:5]' sample.yml +``` +will output +```yaml +Austr +``` + +## Slicing strings - without the second number +Finishes at the end of the string + +Given a sample.yml file of: +```yaml +country: Australia +``` +then +```bash +yq '.country[5:]' sample.yml +``` +will output +```yaml +alia +``` + +## Slicing strings - without the first number +Starts from the start of the string + +Given a sample.yml file of: +```yaml +country: Australia +``` +then +```bash +yq '.country[:5]' sample.yml +``` +will output +```yaml +Austr +``` + +## Slicing strings - use negative numbers to count backwards from the end +Negative indices count from the end of the string + +Given a sample.yml file of: +```yaml +country: Australia +``` +then +```bash +yq '.country[-5:]' sample.yml +``` +will output +```yaml +ralia +``` + +## Slicing strings - Unicode +Indices are rune-based, so multi-byte characters are handled correctly + +Given a sample.yml file of: +```yaml +greeting: héllo +``` +then +```bash +yq '.greeting[1:3]' sample.yml +``` +will output +```yaml +él +``` + diff --git a/pkg/yqlib/lexer.go b/pkg/yqlib/lexer.go index 04212fcea8..12e63e3713 100644 --- a/pkg/yqlib/lexer.go +++ b/pkg/yqlib/lexer.go @@ -131,6 +131,11 @@ func handleToken(tokens []*token, index int, postProcessedTokens []*token) (toke log.Debugf("previous token is : traverseArrayOpType") // need to put the number 0 before this token, as that is implied postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: createValueOperation(0, "0")}) + } else if index >= 2 && tokens[index-1].TokenType == openCollect && + (tokens[index-2].TokenType == operationToken || tokens[index-2].TokenType == closeCollect || tokens[index-2].TokenType == closeCollectObject) { + log.Debugf("previous token is : openCollect following a traversal, implying 0 start") + // need to put the number 0 before this token, as that is implied + postProcessedTokens = append(postProcessedTokens, &token{TokenType: operationToken, Operation: createValueOperation(0, "0")}) } } diff --git a/pkg/yqlib/operator_slice.go b/pkg/yqlib/operator_slice.go index 363322d00e..1f3bfdd3cd 100644 --- a/pkg/yqlib/operator_slice.go +++ b/pkg/yqlib/operator_slice.go @@ -31,6 +31,24 @@ func clampSliceIndex(index, length int) int { return index } +func sliceStringNode(lhsNode *CandidateNode, firstNumber int, secondNumber int) *CandidateNode { + runes := []rune(lhsNode.Value) + length := len(runes) + + relativeFirstNumber := clampSliceIndex(firstNumber, length) + relativeSecondNumber := clampSliceIndex(secondNumber, length) + if relativeSecondNumber < relativeFirstNumber { + relativeSecondNumber = relativeFirstNumber + } + + log.Debugf("sliceStringNode: slice from %v to %v", relativeFirstNumber, relativeSecondNumber) + + slicedString := string(runes[relativeFirstNumber:relativeSecondNumber]) + replacement := lhsNode.CreateReplacement(ScalarNode, lhsNode.Tag, slicedString) + replacement.Style = lhsNode.Style + return replacement +} + func sliceArrayOperator(d *dataTreeNavigator, context Context, expressionNode *ExpressionNode) (Context, error) { log.Debug("slice array operator!") @@ -43,16 +61,21 @@ func sliceArrayOperator(d *dataTreeNavigator, context Context, expressionNode *E lhsNode := el.Value.(*CandidateNode) firstNumber, err := getSliceNumber(d, context, lhsNode, expressionNode.LHS) - if err != nil { return Context{}, err } - relativeFirstNumber := clampSliceIndex(firstNumber, len(lhsNode.Content)) secondNumber, err := getSliceNumber(d, context, lhsNode, expressionNode.RHS) if err != nil { return Context{}, err } + + if lhsNode.Kind == ScalarNode && lhsNode.guessTagFromCustomType() == "!!str" { + results.PushBack(sliceStringNode(lhsNode, firstNumber, secondNumber)) + continue + } + + relativeFirstNumber := clampSliceIndex(firstNumber, len(lhsNode.Content)) relativeSecondNumber := clampSliceIndex(secondNumber, len(lhsNode.Content)) log.Debugf("calculateIndicesToTraverse: slice from %v to %v", relativeFirstNumber, relativeSecondNumber) diff --git a/pkg/yqlib/operator_slice_test.go b/pkg/yqlib/operator_slice_test.go index c06bf44eeb..46f691a8df 100644 --- a/pkg/yqlib/operator_slice_test.go +++ b/pkg/yqlib/operator_slice_test.go @@ -129,6 +129,84 @@ var sliceArrayScenarios = []expressionScenario{ "D0, P[], (!!seq)::[]\n", }, }, + { + description: "Slicing strings", + document: `country: Australia`, + expression: `.country[0:5]`, + expected: []string{ + "D0, P[country], (!!str)::Austr\n", + }, + }, + { + description: "Slicing strings - without the second number", + subdescription: "Finishes at the end of the string", + document: `country: Australia`, + expression: `.country[5:]`, + expected: []string{ + "D0, P[country], (!!str)::alia\n", + }, + }, + { + description: "Slicing strings - without the first number", + subdescription: "Starts from the start of the string", + document: `country: Australia`, + expression: `.country[:5]`, + expected: []string{ + "D0, P[country], (!!str)::Austr\n", + }, + }, + { + description: "Slicing strings - use negative numbers to count backwards from the end", + subdescription: "Negative indices count from the end of the string", + document: `country: Australia`, + expression: `.country[-5:]`, + expected: []string{ + "D0, P[country], (!!str)::ralia\n", + }, + }, + { + skipDoc: true, + document: `country: Australia`, + expression: `.country[1:-1]`, + expected: []string{ + "D0, P[country], (!!str)::ustrali\n", + }, + }, + { + skipDoc: true, + document: `country: Australia`, + expression: `.country[:]`, + expected: []string{ + "D0, P[country], (!!str)::Australia\n", + }, + }, + { + skipDoc: true, + description: "second index beyond string length clamps", + document: `country: Australia`, + expression: `.country[:100]`, + expected: []string{ + "D0, P[country], (!!str)::Australia\n", + }, + }, + { + skipDoc: true, + description: "first index beyond string length returns empty string", + document: `country: Australia`, + expression: `.country[100:]`, + expected: []string{ + "D0, P[country], (!!str)::\n", + }, + }, + { + description: "Slicing strings - Unicode", + subdescription: "Indices are rune-based, so multi-byte characters are handled correctly", + document: `greeting: héllo`, + expression: `.greeting[1:3]`, + expected: []string{ + "D0, P[greeting], (!!str)::él\n", + }, + }, } func TestSliceOperatorScenarios(t *testing.T) { diff --git a/pkg/yqlib/operator_traverse_path.go b/pkg/yqlib/operator_traverse_path.go index 5d152fc320..1ed0757246 100644 --- a/pkg/yqlib/operator_traverse_path.go +++ b/pkg/yqlib/operator_traverse_path.go @@ -99,7 +99,11 @@ func traverseArrayOperator(d *dataTreeNavigator, context Context, expressionNode log.Debugf("--traverseArrayOperator") if expressionNode.RHS != nil && expressionNode.RHS.RHS != nil && expressionNode.RHS.RHS.Operation.OperationType == createMapOpType { - return sliceArrayOperator(d, context, expressionNode.RHS.RHS) + lhsContext, err := d.GetMatchingNodes(context, expressionNode.LHS) + if err != nil { + return Context{}, err + } + return sliceArrayOperator(d, lhsContext, expressionNode.RHS.RHS) } lhs, err := d.GetMatchingNodes(context, expressionNode.LHS) diff --git a/project-words.txt b/project-words.txt index 86c85e01a8..8f2fcf9e16 100644 --- a/project-words.txt +++ b/project-words.txt @@ -298,4 +298,9 @@ subsubarray Ffile Fquery coverpkg -gsub \ No newline at end of file +gsub +ralia +Austr +ustrali +héllo +alia