diff --git a/CODEBASE_OVERVIEW.md b/CODEBASE_OVERVIEW.md index 9618bbf2b..3fef85e4d 100644 --- a/CODEBASE_OVERVIEW.md +++ b/CODEBASE_OVERVIEW.md @@ -195,13 +195,14 @@ graph LR; click opensearch-ruby href "https://rubygems.org/gems/opensearch-ruby" "Open on RubyGems.org" _blank; ``` -### Extensions (6 gems) +### Extensions (7 gems) These libraries extend ElasticGraph to provide optional but commonly needed functionality. * [elasticgraph-apollo](elasticgraph-apollo/README.md): Transforms an ElasticGraph project into an Apollo subgraph. * [elasticgraph-health_check](elasticgraph-health_check/README.md): Provides a health check for high availability ElasticGraph deployments. * [elasticgraph-json_ingestion](elasticgraph-json_ingestion/README.md): Pluggable JSON Schema ingestion serializer for ElasticGraph. +* [elasticgraph-protobuf](elasticgraph-protobuf/README.md): Generates Protocol Buffers schema artifacts from ElasticGraph schemas. * [elasticgraph-query_interceptor](elasticgraph-query_interceptor/README.md): Intercepts ElasticGraph datastore queries. * [elasticgraph-query_registry](elasticgraph-query_registry/README.md): Provides a source-controlled query registry for ElasticGraph applications. * [elasticgraph-warehouse](elasticgraph-warehouse/README.md): Extends ElasticGraph to support ingestion into a data warehouse. @@ -221,6 +222,7 @@ graph LR; elasticgraph-health_check["eg-health_check"]; elasticgraph-datastore_core["eg-datastore_core"]; elasticgraph-json_ingestion["eg-json_ingestion"]; + elasticgraph-protobuf["eg-protobuf"]; elasticgraph-query_interceptor["eg-query_interceptor"]; elasticgraph-schema_artifacts["eg-schema_artifacts"]; elasticgraph-query_registry["eg-query_registry"]; @@ -234,6 +236,7 @@ graph LR; elasticgraph-health_check --> elasticgraph-graphql; elasticgraph-health_check --> elasticgraph-support; elasticgraph-json_ingestion --> elasticgraph-support; + elasticgraph-protobuf --> elasticgraph-support; elasticgraph-query_interceptor --> elasticgraph-graphql; elasticgraph-query_interceptor --> elasticgraph-schema_artifacts; elasticgraph-query_registry --> elasticgraph-graphql; @@ -249,6 +252,7 @@ graph LR; class elasticgraph-health_check targetGemStyle; class elasticgraph-datastore_core otherEgGemStyle; class elasticgraph-json_ingestion targetGemStyle; + class elasticgraph-protobuf targetGemStyle; class elasticgraph-query_interceptor targetGemStyle; class elasticgraph-schema_artifacts otherEgGemStyle; class elasticgraph-query_registry targetGemStyle; diff --git a/Gemfile.lock b/Gemfile.lock index b13f9085f..0e6d03854 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -163,6 +163,12 @@ PATH faraday-retry (~> 2.4) opensearch-ruby (~> 3.4) +PATH + remote: elasticgraph-protobuf + specs: + elasticgraph-protobuf (1.1.1.pre) + elasticgraph-support (= 1.1.1.pre) + PATH remote: elasticgraph-query_interceptor specs: @@ -707,6 +713,7 @@ DEPENDENCIES elasticgraph-lambda_support (= 1.1.1.pre)! elasticgraph-local (= 1.1.1.pre)! elasticgraph-opensearch (= 1.1.1.pre)! + elasticgraph-protobuf (= 1.1.1.pre)! elasticgraph-query_interceptor (= 1.1.1.pre)! elasticgraph-query_registry (= 1.1.1.pre)! elasticgraph-rack (= 1.1.1.pre)! @@ -799,6 +806,7 @@ CHECKSUMS elasticgraph-lambda_support (1.1.1.pre) elasticgraph-local (1.1.1.pre) elasticgraph-opensearch (1.1.1.pre) + elasticgraph-protobuf (1.1.1.pre) elasticgraph-query_interceptor (1.1.1.pre) elasticgraph-query_registry (1.1.1.pre) elasticgraph-rack (1.1.1.pre) diff --git a/config/docker_demo/Dockerfile b/config/docker_demo/Dockerfile index 2110740fd..eda165650 100644 --- a/config/docker_demo/Dockerfile +++ b/config/docker_demo/Dockerfile @@ -19,6 +19,7 @@ COPY elasticgraph-indexer elasticgraph-indexer/ COPY elasticgraph-json_ingestion elasticgraph-json_ingestion/ COPY elasticgraph-local elasticgraph-local/ COPY elasticgraph-opensearch elasticgraph-opensearch/ +COPY elasticgraph-protobuf elasticgraph-protobuf/ COPY elasticgraph-query_registry elasticgraph-query_registry/ COPY elasticgraph-rack elasticgraph-rack/ COPY elasticgraph-schema_artifacts elasticgraph-schema_artifacts/ diff --git a/elasticgraph-protobuf/.rspec b/elasticgraph-protobuf/.rspec new file mode 120000 index 000000000..67e6e21b3 --- /dev/null +++ b/elasticgraph-protobuf/.rspec @@ -0,0 +1 @@ +../spec_support/subdir_dot_rspec \ No newline at end of file diff --git a/elasticgraph-protobuf/.yardopts b/elasticgraph-protobuf/.yardopts new file mode 120000 index 000000000..e11a2057f --- /dev/null +++ b/elasticgraph-protobuf/.yardopts @@ -0,0 +1 @@ +../config/site/yardopts \ No newline at end of file diff --git a/elasticgraph-protobuf/Gemfile b/elasticgraph-protobuf/Gemfile new file mode 120000 index 000000000..26cb2ad91 --- /dev/null +++ b/elasticgraph-protobuf/Gemfile @@ -0,0 +1 @@ +../Gemfile \ No newline at end of file diff --git a/elasticgraph-protobuf/LICENSE.txt b/elasticgraph-protobuf/LICENSE.txt new file mode 100644 index 000000000..aa18b5db8 --- /dev/null +++ b/elasticgraph-protobuf/LICENSE.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2024 - 2026 Block, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/elasticgraph-protobuf/README.md b/elasticgraph-protobuf/README.md new file mode 100644 index 000000000..1c9614f30 --- /dev/null +++ b/elasticgraph-protobuf/README.md @@ -0,0 +1,162 @@ +# ElasticGraph::Protobuf + +An ElasticGraph extension that generates Protocol Buffers (`proto3`) schema artifacts from ElasticGraph schemas. + +## Dependency Diagram + +```mermaid +graph LR; + classDef targetGemStyle fill:#FADBD8,stroke:#EC7063,color:#000,stroke-width:2px; + classDef otherEgGemStyle fill:#A9DFBF,stroke:#2ECC71,color:#000; + classDef externalGemStyle fill:#E0EFFF,stroke:#70A1D7,color:#2980B9; + elasticgraph-protobuf["elasticgraph-protobuf"]; + class elasticgraph-protobuf targetGemStyle; + elasticgraph-support["elasticgraph-support"]; + elasticgraph-protobuf --> elasticgraph-support; + class elasticgraph-support otherEgGemStyle; +``` + +## Usage + +First, add `elasticgraph-protobuf` to your `Gemfile`, alongside the other ElasticGraph gems: + +```diff +diff --git a/Gemfile b/Gemfile +index 4a5ef1e..5c16c2b 100644 +--- a/Gemfile ++++ b/Gemfile +@@ -8,6 +8,7 @@ gem "elasticgraph-query_registry", *elasticgraph_details + + # Can be elasticgraph-elasticsearch or elasticgraph-opensearch based on the datastore you want to use. + gem "elasticgraph-opensearch", *elasticgraph_details ++gem "elasticgraph-protobuf", *elasticgraph_details + + gem "httpx", "~> 1.3" + +``` + +Next, update your `Rakefile` so that `ElasticGraph::Protobuf::SchemaDefinition::APIExtension` is +included in the schema-definition extension modules: + +```diff +diff --git a/Rakefile b/Rakefile +index 2943335..26633c3 100644 +--- a/Rakefile ++++ b/Rakefile +@@ -1,5 +1,6 @@ + project_root = File.expand_path(__dir__) + ++require "elastic_graph/protobuf/schema_definition/api_extension" + require "elastic_graph/local/rake_tasks" + require "elastic_graph/query_registry/rake_tasks" + require "rspec/core/rake_task" +@@ -12,6 +13,8 @@ ElasticGraph::Local::RakeTasks.new( + local_config_yaml: settings_file, + path_to_schema: "#{project_root}/config/schema.rb" + ) do |tasks| ++ tasks.schema_definition_extension_modules << ElasticGraph::Protobuf::SchemaDefinition::APIExtension ++ + # Set this to true once you're beyond the prototyping stage. + tasks.enforce_json_schema_version = false + +``` + +Then opt into proto generation from your schema definition: + +```ruby +# in config/schema/protobuf.rb + +ElasticGraph.define_schema do |schema| + schema.proto_schema_artifacts package_name: "myapp.events.v1" +end +``` + +After running `bundle exec rake schema_artifacts:dump`, ElasticGraph will generate: + +- `schema.proto` +- `proto_field_numbers.yaml` + +## Schema Definition Options + +### Custom Scalar Types + +Built-in ElasticGraph scalar types are automatically mapped to proto scalar types. +For custom scalar types, the generator infers proto scalar types from `json_schema type:` when it is one +of `string`, `boolean`, `number`, or `integer`. You can override inference with `proto_field`: + +```ruby +# in config/schema/money.rb + +ElasticGraph.define_schema do |schema| + schema.scalar_type "Money" do |t| + t.mapping type: "long" + t.json_schema type: "integer" + t.proto_field type: "int64" + end +end +``` + +### Sourcing Enum Values From Existing Protobuf Mappings + +If your project already maintains GraphQL-to-proto enum mappings (for example in tests), +you can reuse them for proto schema generation: + +```ruby +# in config/schema/proto_enum_mappings.rb + +ElasticGraph.define_schema do |schema| + schema.proto_enum_mappings( + SalesEg::ProtoEnumMappings::PROTO_ENUMS_BY_GRAPHQL_ENUM + ) if defined?(SalesEg::ProtoEnumMappings) +end +``` + +When a mapping exists for an enum, `elasticgraph-protobuf` uses the mapped proto enum(s) +as the source of enum values (respecting `exclusions`, `expected_extras`, and `name_transform`). + +### Stable Field Numbers + +`schema_artifacts:dump` automatically reads and writes `proto_field_numbers.yaml` +in the schema artifacts directory. Existing numbers stay fixed even if field order +changes, and new fields get the next available numbers. + +`schema.proto` always uses the public GraphQL field names. When a field uses a +different `name_in_index`, the sidecar YAML stores that override privately: + +```yaml +messages: + Widget: + fields: + id: 1 + display_name: + field_number: 2 + name_in_index: displayName +``` + +If a field is renamed with `field.renamed_from`, `elasticgraph-protobuf` reuses the +existing field number under the new public field name. + +## Type Mappings + +The generated `schema.proto` uses these built-in scalar mappings: + +| ElasticGraph Type | Protobuf Type | +|-------------------|------------| +| `Boolean` | `bool` | +| `Cursor` | `string` | +| `Date` | `string` | +| `DateTime` | `string` | +| `Float` | `double` | +| `ID` | `string` | +| `Int` | `int32` | +| `JsonSafeLong` | `int64` | +| `LocalTime` | `string` | +| `LongString` | `int64` | +| `String` | `string` | +| `TimeZone` | `string` | +| `Untyped` | `string` | + +Additionally: +- List types become `repeated` fields. +- Nested list types generate wrapper messages so the output remains valid `proto3`. +- Enum types generate `enum` definitions whose values are prefixed with the enum type name in `UPPER_SNAKE_CASE`, including a zero-valued `*_UNSPECIFIED` entry. diff --git a/elasticgraph-protobuf/elasticgraph-protobuf.gemspec b/elasticgraph-protobuf/elasticgraph-protobuf.gemspec new file mode 100644 index 000000000..68bd79eda --- /dev/null +++ b/elasticgraph-protobuf/elasticgraph-protobuf.gemspec @@ -0,0 +1,40 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require_relative "../elasticgraph-support/lib/elastic_graph/version" + +Gem::Specification.new do |spec| + spec.name = "elasticgraph-protobuf" + spec.version = ElasticGraph::VERSION + spec.authors = ["Josh Wilson", "Myron Marston", "Block Engineering"] + spec.email = ["joshuaw@squareup.com"] + spec.homepage = "https://block.github.io/elasticgraph/" + spec.license = "MIT" + spec.summary = "Generates Protocol Buffers schema artifacts from ElasticGraph schemas." + + spec.metadata = { + "bug_tracker_uri" => "https://github.com/block/elasticgraph/issues", + "changelog_uri" => "https://github.com/block/elasticgraph/releases/tag/v#{ElasticGraph::VERSION}", + "documentation_uri" => "https://block.github.io/elasticgraph/api-docs/v#{ElasticGraph::VERSION}/", + "homepage_uri" => "https://block.github.io/elasticgraph/", + "source_code_uri" => "https://github.com/block/elasticgraph/tree/v#{ElasticGraph::VERSION}/#{spec.name}", + "gem_category" => "extension" + } + + spec.files = Dir.chdir(File.expand_path(__dir__)) do + `git ls-files -z`.split("\x0").reject do |f| + (f == __FILE__) || f.match(%r{\A(?:(?:test|spec|features|sig)/|\.(?:git|travis|circleci)|appveyor)}) + end - [".rspec", "Gemfile", ".yardopts"] + end + + spec.required_ruby_version = [">= 3.4", "< 4.1"] + + spec.add_dependency "elasticgraph-support", ElasticGraph::VERSION + + spec.add_development_dependency "elasticgraph-schema_definition", ElasticGraph::VERSION +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf.rb new file mode 100644 index 000000000..f87686aae --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf.rb @@ -0,0 +1,18 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + # Namespace for Protocol Buffers schema artifact generation extensions. + module Protobuf + # The name of the generated Protocol Buffers schema file. + PROTO_SCHEMA_FILE = "schema.proto" + + # The name of the generated proto field-number mapping file. + PROTO_FIELD_NUMBERS_FILE = "proto_field_numbers.yaml" + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/api_extension.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/api_extension.rb new file mode 100644 index 000000000..4c8cb46a0 --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/api_extension.rb @@ -0,0 +1,108 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/errors" +require "elastic_graph/protobuf" +require "elastic_graph/protobuf/schema_definition/factory_extension" + +module ElasticGraph + module Protobuf + module SchemaDefinition + # Module designed to be extended onto an API instance to enable proto artifact generation. + module APIExtension + # Maps built-in ElasticGraph scalar types to proto field types. + PROTO_TYPES_BY_BUILT_IN_SCALAR_TYPE = { + "Boolean" => "bool", + "Cursor" => "string", + "Date" => "string", + "DateTime" => "string", + "Float" => "double", + "ID" => "string", + "Int" => "int32", + "JsonSafeLong" => "int64", + "LocalTime" => "string", + "LongString" => "int64", + "String" => "string", + "TimeZone" => "string", + "Untyped" => "string" + }.freeze + + def self.extended(api) + api.factory.extend FactoryExtension + + api.proto_schema_artifacts + + api.on_built_in_types do |type| + if type.is_a?(ScalarTypeExtension) + type.proto_field type: PROTO_TYPES_BY_BUILT_IN_SCALAR_TYPE.fetch(type.name) + end + end + end + + # Configures protobuf artifact generation behavior. + # + # @param package_name [String] proto package name to emit + # @return [void] + def proto_schema_artifacts(package_name: "elasticgraph") + if !package_name.is_a?(String) || package_name.empty? + raise Errors::SchemaError, "`package_name` must be a non-empty String" + end + + @proto_schema_package_name = package_name + nil + end + + # Registers mappings from GraphQL enum names to protobuf enum classes and transform options. + # This is intended to support reusing enum mappings already maintained by applications + # (for example in schema/proto consistency tests). + # + # @param proto_enums_by_graphql_enum [Hash] + # @return [void] + def proto_enum_mappings(proto_enums_by_graphql_enum) + @proto_enums_by_graphql_enum = proto_enums_by_graphql_enum + nil + end + + # Configures proto field-number mappings directly from a hash. + # Useful for tests and advanced use cases where mappings are sourced outside artifacts. + # + # @param proto_field_number_mappings [Hash] + # @param enforce [Boolean] ignored; retained for compatibility with earlier prototypes and tests + # @return [void] + def configure_proto_field_number_mappings(proto_field_number_mappings, enforce: false) + unless [true, false].include?(enforce) + raise Errors::SchemaError, "`enforce` must be true or false" + end + + @proto_field_number_mappings = proto_field_number_mappings + nil + end + + # @private + def proto_schema_package_name + @proto_schema_package_name || "elasticgraph" + end + + # @private + def proto_enums_by_graphql_enum + @proto_enums_by_graphql_enum || {} + end + + # @private + def proto_field_number_mapping_file + Protobuf::PROTO_FIELD_NUMBERS_FILE + end + + # @private + def proto_field_number_mappings + @proto_field_number_mappings || {} + end + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/enum_type_extension.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/enum_type_extension.rb new file mode 100644 index 000000000..ddae0b6e6 --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/enum_type_extension.rb @@ -0,0 +1,25 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/identifier" + +module ElasticGraph + module Protobuf + module SchemaDefinition + # Extends EnumType with proto field type conversion. + module EnumTypeExtension + # Returns the proto field type representation for this enum type. + # + # @return [String] + def to_proto_field_type + Identifier.enum_name(name) + end + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/factory_extension.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/factory_extension.rb new file mode 100644 index 000000000..118f4a8b1 --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/factory_extension.rb @@ -0,0 +1,100 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/enum_type_extension" +require "elastic_graph/protobuf/schema_definition/object_interface_and_union_extension" +require "elastic_graph/protobuf/schema_definition/results_extension" +require "elastic_graph/protobuf/schema_definition/scalar_type_extension" +require "elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension" + +module ElasticGraph + module Protobuf + module SchemaDefinition + # Extension module applied to Factory to add proto support. + module FactoryExtension + # Creates a new enum type with proto extensions. + # + # @param name [String] enum type name + # @yield [ElasticGraph::SchemaDefinition::SchemaElements::EnumType] + # @return [ElasticGraph::SchemaDefinition::SchemaElements::EnumType] + def new_enum_type(name) + super(name) do |type| + type.extend EnumTypeExtension + yield type if block_given? + end + end + + # Creates a new interface type with proto extensions. + # + # @param name [String] interface type name + # @yield [ElasticGraph::SchemaDefinition::SchemaElements::InterfaceType] + # @return [ElasticGraph::SchemaDefinition::SchemaElements::InterfaceType] + def new_interface_type(name) + super(name) do |type| + type.extend ObjectInterfaceAndUnionExtension + yield type if block_given? + end + end + + # Creates a new object type with proto extensions. + # + # @param name [String] object type name + # @yield [ElasticGraph::SchemaDefinition::SchemaElements::ObjectType] + # @return [ElasticGraph::SchemaDefinition::SchemaElements::ObjectType] + def new_object_type(name) + super(name) do |type| + type.extend ObjectInterfaceAndUnionExtension + yield type if block_given? + end + end + + # Creates a new scalar type with proto extensions. + # + # @param name [String] scalar type name + # @yield [ElasticGraph::SchemaDefinition::SchemaElements::ScalarType] + # @return [ElasticGraph::SchemaDefinition::SchemaElements::ScalarType] + def new_scalar_type(name) + super(name) do |type| + type.extend ScalarTypeExtension + yield type if block_given? + end + end + + # Creates a new union type with proto extensions. + # + # @param name [String] union type name + # @yield [ElasticGraph::SchemaDefinition::SchemaElements::UnionType] + # @return [ElasticGraph::SchemaDefinition::SchemaElements::UnionType] + def new_union_type(name) + super(name) do |type| + type.extend ObjectInterfaceAndUnionExtension + yield type if block_given? + end + end + + # Creates a new results object and extends it with proto generation APIs. + # + # @return [ElasticGraph::SchemaDefinition::Results] + def new_results + super.tap do |results| + results.extend ResultsExtension + end + end + + # Creates a new schema artifact manager and extends it with proto artifact support. + # + # @return [ElasticGraph::SchemaDefinition::SchemaArtifactManager] + def new_schema_artifact_manager(...) + super.tap do |manager| + manager.extend SchemaArtifactManagerExtension + end + end + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/field_type_converter.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/field_type_converter.rb new file mode 100644 index 000000000..de4b83ea4 --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/field_type_converter.rb @@ -0,0 +1,38 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/errors" + +module ElasticGraph + module Protobuf + module SchemaDefinition + # Converts non-list ElasticGraph field types to protobuf field types. + class FieldTypeConverter + # Converts a non-list schema field type reference to a protobuf field type name. + # + # @param field_type [ElasticGraph::SchemaDefinition::SchemaElements::TypeReference] + # @return [String] + # @raise [Errors::SchemaError] when the type cannot be converted + def self.convert(field_type) + type = field_type.unwrap_non_null + + if type.list? + raise Errors::SchemaError, "FieldTypeConverter only supports non-list types, but got list type `#{field_type}`." + end + + resolved = type.resolved + unless resolved&.respond_to?(:to_proto_field_type) + raise Errors::SchemaError, "Type `#{type.unwrapped_name}` cannot be converted to proto. Add a `to_proto_field_type` extension for it." + end + + resolved.to_proto_field_type + end + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/identifier.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/identifier.rb new file mode 100644 index 000000000..28d4e3d59 --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/identifier.rb @@ -0,0 +1,74 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + module Protobuf + module SchemaDefinition + # Helpers for rendering Protocol Buffers identifiers while avoiding keyword conflicts. + class Identifier + # Builds a protobuf package identifier and escapes reserved keywords in each segment. + # + # @param name [#to_s] + # @return [String] + def self.package_name(name) + name.to_s.split(".").map { |part| escape_keyword(part) }.join(".") + end + + # Builds a protobuf message identifier. + # + # @param name [#to_s] + # @return [String] + def self.message_name(name) + escape_keyword(name.to_s) + end + + # Builds a protobuf enum identifier. + # + # @param name [#to_s] + # @return [String] + def self.enum_name(name) + escape_keyword(name.to_s) + end + + # Builds a protobuf field identifier. + # + # @param name [#to_s] + # @return [String] + def self.field_name(name) + escape_keyword(name.to_s) + end + + # Builds a protobuf enum value identifier. + # + # @param name [#to_s] + # @return [String] + def self.enum_value_name(name) + escape_keyword(name.to_s) + end + + # Escapes protobuf reserved keywords by suffixing them with an underscore. + # + # @param identifier [String] + # @return [String] + def self.escape_keyword(identifier) + return identifier unless PROTO_KEYWORDS.include?(identifier) + "#{identifier}_" + end + + # Reserved words in protobuf syntax that cannot be used as identifiers verbatim. + # + # @return [Set] + PROTO_KEYWORDS = ::Set[ + "bool", "bytes", "double", "enum", "false", "fixed32", "fixed64", "float", "import", "int32", "int64", "map", + "message", "oneof", "option", "package", "public", "repeated", "reserved", "rpc", "service", "sfixed32", "sfixed64", + "sint32", "sint64", "stream", "string", "syntax", "to", "true", "uint32", "uint64", "weak" + ].freeze + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/object_interface_and_union_extension.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/object_interface_and_union_extension.rb new file mode 100644 index 000000000..b2de48751 --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/object_interface_and_union_extension.rb @@ -0,0 +1,25 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/identifier" + +module ElasticGraph + module Protobuf + module SchemaDefinition + # Extends object/interface/union types with proto field type conversion. + module ObjectInterfaceAndUnionExtension + # Returns the proto field type representation for this type. + # + # @return [String] + def to_proto_field_type + Identifier.message_name(name) + end + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/results_extension.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/results_extension.rb new file mode 100644 index 000000000..d86223068 --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/results_extension.rb @@ -0,0 +1,70 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/schema" + +module ElasticGraph + module Protobuf + module SchemaDefinition + # Extension module for Results that adds proto schema generation support. + module ResultsExtension + # Returns the generated proto schema. + # + # @return [String] complete `proto3` schema file contents + def proto_schema + @proto_schema ||= proto_schema_generator.to_proto + end + + # Returns proto field-number mappings suitable for artifact storage. + # + # @return [Hash] + def proto_field_number_mappings + # Ensure generation has occurred before reading mappings from the generator. + _ = proto_schema + proto_schema_generator.field_number_mappings_for_artifact + end + + private + + def proto_schema_generator + @proto_schema_generator ||= build_proto_schema_generator + end + + def build_proto_schema_generator + package_name = + if state.api.respond_to?(:proto_schema_package_name) + state.api.proto_schema_package_name + else + "elasticgraph" + end + + proto_enums_by_graphql_enum = + if state.api.respond_to?(:proto_enums_by_graphql_enum) + state.api.proto_enums_by_graphql_enum + else + {} + end + + proto_field_number_mappings = + if state.api.respond_to?(:proto_field_number_mappings) + state.api.proto_field_number_mappings + else + {} + end + + Schema.new( + self, + package_name: package_name, + proto_enums_by_graphql_enum: proto_enums_by_graphql_enum, + proto_field_number_mappings: proto_field_number_mappings + ) + end + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/scalar_type_extension.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/scalar_type_extension.rb new file mode 100644 index 000000000..23ff014f3 --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/scalar_type_extension.rb @@ -0,0 +1,74 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/errors" + +module ElasticGraph + module Protobuf + module SchemaDefinition + # Extends ScalarType with proto field type conversion. + module ScalarTypeExtension + # Fallback mapping from JSON schema scalar types to protobuf scalar field types. + # + # @return [Hash] + PROTO_FIELD_TYPE_BY_JSON_SCHEMA_TYPE = { + "boolean" => "bool", + "integer" => "int64", + "number" => "double", + "string" => "string" + }.freeze + + # Configured proto field type (e.g. string, int64, bool). + # @dynamic proto_field_type + attr_reader :proto_field_type + + # Configures the proto field type for this scalar type. + # + # @param type [String] protobuf scalar type name + # @return [void] + def proto_field(type:) + @proto_field_type = type + end + + # Returns this scalar's proto field type. + # + # @return [String] + # @raise [Errors::SchemaError] when missing + def to_proto_field_type + proto_field_type || + infer_proto_field_type_from_json_schema || + raise(Errors::SchemaError, "Protobuf field type not configured for scalar type `#{name}`. " \ + 'To proceed, call `proto_field type: "TYPE"` on the scalar type definition.') + end + + private + + def infer_proto_field_type_from_json_schema + return nil unless respond_to?(:json_schema_options) + + types = + case (type = json_schema_options[:type]) + when String, Symbol + [type.to_s] + when Array + type.filter_map do |entry| + (entry.is_a?(String) || entry.is_a?(Symbol)) ? entry.to_s : nil + end + else + [] + end + + normalized_types = (types - ["null"]).uniq + return nil unless normalized_types.size == 1 + + PROTO_FIELD_TYPE_BY_JSON_SCHEMA_TYPE[normalized_types.first] + end + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema.rb new file mode 100644 index 000000000..f512b130c --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema.rb @@ -0,0 +1,673 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/errors" +require "elastic_graph/protobuf/schema_definition/field_type_converter" +require "elastic_graph/protobuf/schema_definition/identifier" + +module ElasticGraph + module Protobuf + # Protocol Buffers schema-generation support for ElasticGraph schema-definition results. + module SchemaDefinition + # Builds a `proto3` schema string from an ElasticGraph schema definition. + class Schema + # Internal representation of a protobuf field definition. + # + # @!attribute [r] name + # @return [String] + # @!attribute [r] type + # @return [String] + # @!attribute [r] field_number + # @return [Integer] + # @!attribute [r] repeated + # @return [Boolean] + # @!attribute [r] comment + # @return [String, nil] + FieldDefinition = ::Data.define(:name, :type, :field_number, :repeated, :comment) + # Internal representation of a protobuf message definition. + # + # @!attribute [r] name + # @return [String] + # @!attribute [r] fields + # @return [Array] + MessageDefinition = ::Data.define(:name, :fields) + # Internal representation of a protobuf enum value definition. + # + # @!attribute [r] name + # @return [String] + # @!attribute [r] number + # @return [Integer] + EnumValueDefinition = ::Data.define(:name, :number) + # Internal representation of a protobuf enum definition. + # + # @!attribute [r] name + # @return [String] + # @!attribute [r] zero_value_name + # @return [String] + # @!attribute [r] values + # @return [Array] + EnumDefinition = ::Data.define(:name, :zero_value_name, :values) + # Internal representation of a stored field-number mapping. + # + # @!attribute [r] field_number + # @return [Integer] + # @!attribute [r] name_in_index + # @return [String] + FieldNumberMapping = ::Data.define(:field_number, :name_in_index) + + # Generates the full `proto3` schema text for indexed types. + # + # @param results [ElasticGraph::SchemaDefinition::Results] + # @param package_name [String] + # @param proto_enums_by_graphql_enum [Hash] + # @param proto_field_number_mappings [Hash] + # @return [String] + def self.generate( + results, + package_name: "elasticgraph", + proto_enums_by_graphql_enum: {}, + proto_field_number_mappings: {} + ) + new( + results, + package_name: package_name, + proto_enums_by_graphql_enum: proto_enums_by_graphql_enum, + proto_field_number_mappings: proto_field_number_mappings + ).to_proto + end + + def initialize( + results, + package_name:, + proto_enums_by_graphql_enum:, + proto_field_number_mappings: {} + ) + @results = results + @package_name = Identifier.package_name(package_name) + @proto_enums_by_graphql_enum = normalize_proto_enum_mappings(proto_enums_by_graphql_enum) + @proto_field_number_mappings_by_message = normalize_proto_field_number_mappings(proto_field_number_mappings) + @message_definitions_by_name = {} + @enum_definitions_by_name = {} + @generated_message_definitions_by_name = {} + @wrapper_root_name_by_context = {} + @type_name_by_message_name = {} + @type_name_by_enum_name = {} + end + + # Renders the schema as a valid `proto3` file. + # + # @return [String] + def to_proto + root_types = indexed_types + return "" if root_types.empty? + + root_types.each { |type| register_type(type) } + + sections = [ + 'syntax = "proto3";', + "package #{@package_name};", + render_definitions + ] + + sections.join("\n\n") + "\n" + end + + private + + def indexed_types + types = @results.schema_artifact_types + + types + .filter_map { |type| (_ = type).index_def if type.respond_to?(:index_def) } + .map(&:indexed_type) + .uniq(&:name) + .sort_by(&:name) + end + + def register_type(type) + if type.respond_to?(:values_by_name) + register_enum(type) + elsif type.respond_to?(:indexing_fields_by_name_in_index) + register_message(type) + elsif type.respond_to?(:to_proto_field_type) + type.to_proto_field_type + else + raise Errors::SchemaError, "Type `#{type.respond_to?(:name) ? type.name : type.inspect}` cannot be converted to proto." + end + end + + def register_type_ref(type_ref) + _list_depth, base_type_ref = list_depth_and_base_type(type_ref) + + resolved = base_type_ref.resolved + if resolved.nil? + raise Errors::SchemaError, "Type `#{base_type_ref.unwrapped_name}` cannot be resolved for proto generation." + end + + register_type(resolved) + end + + def register_message(type) + message_name = Identifier.message_name(type.name) + check_message_name_collision(message_name, type.name) + return if @message_definitions_by_name.key?(message_name) + + # Register a placeholder first so recursive type references do not recurse forever. + @message_definitions_by_name[message_name] = MessageDefinition.new(name: message_name, fields: []) + + fields = type + .indexing_fields_by_name_in_index + .values + .filter_map(&:to_indexing_field) + .map do |field| + field_name = Identifier.field_name(field.name) + repeated, field_type = proto_field_type_for( + field.type, + context_message_name: message_name, + context_field_name: field.name + ) + field_number = field_number_for( + message_name: message_name, + type_name: type.name, + public_field_name: field.name, + name_in_index: field.name_in_index + ) + + comment = + if field_name == field.name + nil + else + "source name: #{field.name}" + end + + FieldDefinition.new( + name: field_name, + type: field_type, + field_number: field_number, + repeated: repeated, + comment: comment + ) + end + + duplicate_names = fields.group_by(&:name).select { |_, defs| defs.size > 1 } + if duplicate_names.any? + duplicates = duplicate_names.keys.sort.join(", ") + raise Errors::SchemaError, "Type `#{type.name}` maps to duplicate proto field names: #{duplicates}." + end + + @message_definitions_by_name[message_name] = MessageDefinition.new(name: message_name, fields: fields) + end + + def register_enum(enum_type) + enum_name = Identifier.enum_name(enum_type.name) + check_enum_name_collision(enum_name, enum_type.name) + return if @enum_definitions_by_name.key?(enum_name) + + values = enum_value_names_for(enum_type).each_with_index.map do |enum_value_name, i| + EnumValueDefinition.new( + name: proto_enum_value_name(enum_type.name, enum_value_name), + number: i + 1 + ) + end + + duplicate_names = values.group_by(&:name).select { |_, defs| defs.size > 1 } + if duplicate_names.any? + duplicates = duplicate_names.keys.sort.join(", ") + raise Errors::SchemaError, "Enum `#{enum_type.name}` maps to duplicate proto enum value names: #{duplicates}." + end + + zero_value_name = proto_zero_enum_value_name(enum_type.name) + while values.any? { |value| value.name == zero_value_name } + zero_value_name = "#{zero_value_name}_" + end + + @enum_definitions_by_name[enum_name] = EnumDefinition.new( + name: enum_name, + zero_value_name: zero_value_name, + values: values + ) + end + + def enum_value_names_for(enum_type) + mapping_entries = @proto_enums_by_graphql_enum[enum_type.name] + return enum_type.values_by_name.keys if mapping_entries.nil? || mapping_entries.empty? + + values_by_source = mapping_entries.map do |proto_type, options| + enum_value_names_from_proto_mapping(enum_type_name: enum_type.name, proto_type: proto_type, options: options || {}) + end + + canonical_values = values_by_source.first + canonical_set = canonical_values.uniq.sort + + values_by_source.drop(1).each do |source_values| + next if source_values.uniq.sort == canonical_set + + raise Errors::SchemaError, "Protobuf enum mappings for `#{enum_type.name}` produce inconsistent value sets. " \ + "Ensure each mapped proto enum (with exclusions/expected_extras/name_transform) resolves to the same values." + end + + canonical_values + end + + def enum_value_names_from_proto_mapping(enum_type_name:, proto_type:, options:) + unless proto_type.respond_to?(:enums) + raise Errors::SchemaError, "Protobuf enum mapping for `#{enum_type_name}` must map to a proto enum class with `.enums`, " \ + "but got: #{proto_type.inspect}." + end + + name_transform = fetch_mapping_option(options, :name_transform, :itself.to_proc) + exclusions = fetch_mapping_option(options, :exclusions, []).map(&:to_s) + expected_extras = fetch_mapping_option(options, :expected_extras, []).map(&:to_s) + + mapped_values = proto_type.enums.map(&:name).map(&:to_s).map do |name| + transformed = name_transform.call(name) + transformed.to_s + end + + (mapped_values - exclusions + expected_extras).uniq + rescue Errors::SchemaError + raise + rescue => e + raise Errors::SchemaError, "Failed loading proto enum mapping for `#{enum_type_name}` from `#{proto_type}`: #{e.message}" + end + + def field_number_for(message_name:, type_name:, public_field_name:, name_in_index:) + mappings_for_message = @proto_field_number_mappings_by_message[message_name] ||= {} + + mapping = + if mappings_for_message.key?(public_field_name) + mappings_for_message.fetch(public_field_name) + else + migrate_renamed_field_mapping( + mappings_for_message, + type_name: type_name, + public_field_name: public_field_name + ) || begin + next_field_number = next_available_field_number_for(mappings_for_message) + FieldNumberMapping.new(field_number: next_field_number, name_in_index: name_in_index) + end + end + + if mapping.name_in_index != name_in_index + mapping = FieldNumberMapping.new(field_number: mapping.field_number, name_in_index: name_in_index) + end + + mappings_for_message[public_field_name] = mapping + field_number = mapping.field_number + + duplicate_field_name = mappings_for_message.find do |mapped_field_name, mapped_field_number| + mapped_field_name != public_field_name && mapped_field_number.field_number == field_number + end&.first + + if duplicate_field_name + raise Errors::SchemaError, "Protobuf field-number mapping collision in message `#{message_name}`: " \ + "`#{duplicate_field_name}` and `#{public_field_name}` are both mapped to field number #{field_number}." + end + + field_number + end + + def next_available_field_number_for(mappings_for_message) + used_numbers = ::Set.new(mappings_for_message.values.map(&:field_number)) + candidate = 1 + candidate += 1 while used_numbers.include?(candidate) + candidate + end + + def migrate_renamed_field_mapping(mappings_for_message, type_name:, public_field_name:) + old_field_names = + renamed_public_field_names_by_type_name + .fetch(type_name, {}) + .fetch(public_field_name, []) + + old_field_names.each do |old_field_name| + return mappings_for_message.delete(old_field_name) if mappings_for_message.key?(old_field_name) + end + + nil + end + + public + + # Exposes normalized field-number mappings for writing to artifact YAML. + # + # @return [Hash>>] + def field_number_mappings_for_artifact + { + "messages" => @proto_field_number_mappings_by_message + .sort_by { |message_name, _| message_name } + .to_h do |message_name, field_numbers| + [message_name, { + "fields" => field_numbers.sort_by { |field_name, mapping| [mapping.field_number, field_name] }.to_h do |field_name, mapping| + artifact_mapping = + if mapping.name_in_index == field_name + mapping.field_number + else + { + "field_number" => mapping.field_number, + "name_in_index" => mapping.name_in_index + } + end + + [field_name, artifact_mapping] + end + }] + end + } + end + + private + + def proto_field_type_for(type_ref, context_message_name:, context_field_name:) + list_depth, base_type_ref = list_depth_and_base_type(type_ref) + register_type_ref(base_type_ref) + + base_type_name = FieldTypeConverter.convert(base_type_ref) + + if list_depth <= 1 + [list_depth == 1, base_type_name] + else + wrapper_type = register_nested_list_wrappers( + context_message_name: context_message_name, + context_field_name: context_field_name, + list_depth: list_depth, + base_type_name: base_type_name + ) + + [true, wrapper_type] + end + end + + def list_depth_and_base_type(type_ref) + list_depth = 0 + current = type_ref.unwrap_non_null + + while current.list? + list_depth += 1 + current = current.unwrap_list.unwrap_non_null + end + + [list_depth, current] + end + + def register_nested_list_wrappers(context_message_name:, context_field_name:, list_depth:, base_type_name:) + context_key = [context_message_name, context_field_name, list_depth, base_type_name] + existing_root = @wrapper_root_name_by_context[context_key] + return existing_root if existing_root + + next_type_name = base_type_name + root_wrapper_name = nil + + (list_depth - 1).downto(1) do |level| + base_wrapper_name = "#{context_message_name}#{to_title_case(context_field_name)}ListLevel#{level}" + wrapper_name = unique_generated_message_name(base_wrapper_name) + + field = FieldDefinition.new( + name: "values", + type: next_type_name, + field_number: 1, + repeated: true, + comment: nil + ) + + @generated_message_definitions_by_name[wrapper_name] = MessageDefinition.new( + name: wrapper_name, + fields: [field] + ) + + next_type_name = wrapper_name + root_wrapper_name = wrapper_name if level == 1 + end + + @wrapper_root_name_by_context[context_key] = root_wrapper_name + end + + def unique_generated_message_name(base_name) + index = 0 + + loop do + candidate_name = + if index.zero? + Identifier.message_name(base_name) + else + Identifier.message_name("#{base_name}#{index + 1}") + end + + return candidate_name unless name_taken?(candidate_name) + index += 1 + end + end + + def name_taken?(name) + @message_definitions_by_name.key?(name) || + @generated_message_definitions_by_name.key?(name) || + @enum_definitions_by_name.key?(name) + end + + def render_definitions + rendered_enums = all_enum_definitions.sort_by(&:name).map { |definition| render_enum(definition) } + rendered_messages = all_message_definitions.sort_by(&:name).map { |definition| render_message(definition) } + (rendered_enums + rendered_messages).join("\n\n") + end + + def proto_enum_value_name(enum_type_name, enum_value_name) + Identifier.enum_value_name("#{enum_value_prefix(enum_type_name)}_#{to_upper_snake_case(enum_value_name)}") + end + + def proto_zero_enum_value_name(enum_type_name) + "#{enum_value_prefix(enum_type_name)}_UNSPECIFIED" + end + + def enum_value_prefix(enum_type_name) + to_upper_snake_case(enum_type_name) + end + + def render_enum(enum_definition) + lines = [ + "enum #{enum_definition.name} {", + " #{enum_definition.zero_value_name} = 0;" + ] + + enum_definition.values.each do |value| + lines << " #{value.name} = #{value.number};" + end + + lines << "}" + lines.join("\n") + end + + def render_message(message_definition) + lines = ["message #{message_definition.name} {"] + + if message_definition.fields.empty? + lines << " // No indexed fields were defined for this type." + else + message_definition.fields.each do |field| + repeated_modifier = field.repeated ? "repeated " : "" + line = " #{repeated_modifier}#{field.type} #{field.name} = #{field.field_number};" + line += " // #{field.comment}" if field.comment + lines << line + end + end + + lines << "}" + lines.join("\n") + end + + def all_enum_definitions + @enum_definitions_by_name.values + end + + def all_message_definitions + @message_definitions_by_name.values + @generated_message_definitions_by_name.values + end + + def to_title_case(name) + name + .gsub(/([[:lower:]\d])([[:upper:]])/, "\\1_\\2") + .split("_") + .reject(&:empty?) + .map(&:capitalize) + .join + end + + def to_upper_snake_case(name) + name + .to_s + .gsub(/([[:upper:]]+)([[:upper:]][[:lower:]])/, "\\1_\\2") + .gsub(/([[:lower:]\d])([[:upper:]])/, "\\1_\\2") + .upcase + end + + def check_message_name_collision(message_name, type_name) + existing_type_name = @type_name_by_message_name.fetch(message_name, type_name) + @type_name_by_message_name[message_name] = existing_type_name + return if existing_type_name == type_name + + raise Errors::SchemaError, "Type names `#{existing_type_name}` and `#{type_name}` both map to the same proto message name `#{message_name}`." + end + + def check_enum_name_collision(enum_name, type_name) + existing_type_name = @type_name_by_enum_name.fetch(enum_name, type_name) + @type_name_by_enum_name[enum_name] = existing_type_name + return if existing_type_name == type_name + + raise Errors::SchemaError, "Type names `#{existing_type_name}` and `#{type_name}` both map to the same proto enum name `#{enum_name}`." + end + + def normalize_proto_enum_mappings(raw_mappings) + return {} if raw_mappings.nil? + + raw_mappings.each_with_object({}) do |(graphql_enum_name, mappings), normalized| + normalized[graphql_enum_name.to_s] = mappings + end + end + + def normalize_proto_field_number_mappings(raw_mappings) + return {} if raw_mappings.nil? + unless raw_mappings.is_a?(Hash) + raise Errors::SchemaError, "Protobuf field-number mappings must be a Hash, got: #{raw_mappings.class}." + end + + messages_hash = + if raw_mappings.key?("messages") + raw_mappings.fetch("messages") + elsif raw_mappings.key?(:messages) + raw_mappings.fetch(:messages) + else + raw_mappings + end + + unless messages_hash.is_a?(Hash) + raise Errors::SchemaError, "Protobuf field-number mappings must have a `messages` Hash." + end + + messages_hash.each_with_object({}) do |(message_name, field_numbers), normalized| + unless field_numbers.is_a?(Hash) + raise Errors::SchemaError, "Field-number mapping for message `#{message_name}` must be a Hash." + end + + normalized_fields = + if field_numbers.key?("fields") + field_numbers.fetch("fields") + elsif field_numbers.key?(:fields) + field_numbers.fetch(:fields) + else + field_numbers + end + + unless normalized_fields.is_a?(Hash) + raise Errors::SchemaError, "Field-number mapping for message `#{message_name}` must contain a `fields` Hash." + end + + normalized_message_name = message_name.to_s + normalized[normalized_message_name] = normalized_fields.each_with_object({}) do |(field_name, field_number_or_mapping), normalized_field_numbers| + normalized_field_name = field_name.to_s + normalized_field_number, normalized_name_in_index = normalize_field_number_mapping_entry( + normalized_message_name, + normalized_field_name, + field_number_or_mapping + ) + + if normalized_field_number <= 0 + raise Errors::SchemaError, "Field-number mapping for `#{normalized_message_name}.#{normalized_field_name}` " \ + "must be a positive integer, got: #{field_number_or_mapping.inspect}." + end + + normalized_field_numbers[normalized_field_name] = FieldNumberMapping.new( + field_number: normalized_field_number, + name_in_index: normalized_name_in_index + ) + rescue ArgumentError, TypeError + raise Errors::SchemaError, "Field-number mapping for `#{normalized_message_name}.#{normalized_field_name}` " \ + "must be an integer, got: #{field_number_or_mapping.inspect}." + end + end + end + + def normalize_field_number_mapping_entry(message_name, field_name, field_number_or_mapping) + if field_number_or_mapping.is_a?(Hash) + raw_field_number = + if field_number_or_mapping.key?("field_number") + field_number_or_mapping.fetch("field_number") + elsif field_number_or_mapping.key?(:field_number) + field_number_or_mapping.fetch(:field_number) + else + raise Errors::SchemaError, "Field-number mapping for `#{message_name}.#{field_name}` must include `field_number`." + end + + raw_name_in_index = + if field_number_or_mapping.key?("name_in_index") + field_number_or_mapping.fetch("name_in_index") + elsif field_number_or_mapping.key?(:name_in_index) + field_number_or_mapping.fetch(:name_in_index) + else + field_name + end + + unless raw_name_in_index.is_a?(String) || raw_name_in_index.is_a?(Symbol) + raise Errors::SchemaError, "Field-number mapping for `#{message_name}.#{field_name}` " \ + "must use a String or Symbol `name_in_index`, got: #{raw_name_in_index.inspect}." + end + + [Integer(raw_field_number), raw_name_in_index.to_s] + else + [Integer(field_number_or_mapping), field_name] + end + end + + def renamed_public_field_names_by_type_name + @renamed_public_field_names_by_type_name ||= begin + renamed_fields_by_type_name_and_old_field_name = + if @results.respond_to?(:state) && @results.state.respond_to?(:renamed_fields_by_type_name_and_old_field_name) + @results.state.renamed_fields_by_type_name_and_old_field_name + else + {} + end + + renamed_fields_by_type_name_and_old_field_name.each_with_object({}) do |(type_name, old_to_new), mappings| + mappings[type_name] = old_to_new.each_with_object(::Hash.new { |h, k| h[k] = [] }) do |(old_field_name, renamed_field), current_to_old| + current_to_old[renamed_field.name] << old_field_name + end + end + end + end + + def fetch_mapping_option(options, key, default) + if options.key?(key) + options[key] + elsif options.key?(key.to_s) + options[key.to_s] + else + default + end + end + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension.rb new file mode 100644 index 000000000..8a33d4cc8 --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension.rb @@ -0,0 +1,51 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf" +require "yaml" + +module ElasticGraph + module Protobuf + module SchemaDefinition + # Extension module for SchemaArtifactManager that adds proto artifact generation support. + module SchemaArtifactManagerExtension + private + + def artifacts_from_schema_def + results = schema_definition_results # : ElasticGraph::SchemaDefinition::Results & ResultsExtension + load_proto_field_number_mappings(results) + + base_artifacts = super + proto_schema = results.respond_to?(:proto_schema) ? results.proto_schema : "" + return base_artifacts if proto_schema.empty? + + base_artifacts + [ + new_yaml_artifact( + PROTO_FIELD_NUMBERS_FILE, + results.proto_field_number_mappings, + extra_comment_lines: [ + "This file reserves protobuf field numbers to keep them stable over time.", + "Do not renumber existing entries." + ] + ), + new_raw_artifact(PROTO_SCHEMA_FILE, proto_schema.chomp) + ] + end + + def load_proto_field_number_mappings(results) + api = results.state.api + return unless api.respond_to?(:configure_proto_field_number_mappings) + + full_path = ::File.join(@schema_artifacts_directory, PROTO_FIELD_NUMBERS_FILE) + loaded = ::File.exist?(full_path) ? ::YAML.safe_load_file(full_path, aliases: false) : {} + api.configure_proto_field_number_mappings(loaded || {}) + end + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf.rbs new file mode 100644 index 000000000..321ca4187 --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf.rbs @@ -0,0 +1,6 @@ +module ElasticGraph + module Protobuf + PROTO_SCHEMA_FILE: ::String + PROTO_FIELD_NUMBERS_FILE: ::String + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/api_extension.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/api_extension.rbs new file mode 100644 index 000000000..d68bd6a9c --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/api_extension.rbs @@ -0,0 +1,21 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + module APIExtension: ::ElasticGraph::SchemaDefinition::API + PROTO_TYPES_BY_BUILT_IN_SCALAR_TYPE: ::Hash[::String, ::String] + + def self.extended: (::ElasticGraph::SchemaDefinition::API) -> void + def proto_schema_artifacts: ( + ?package_name: ::String + ) -> void + def proto_enum_mappings: (untyped) -> void + def configure_proto_field_number_mappings: (untyped, ?enforce: bool) -> void + + def proto_schema_package_name: () -> ::String + def proto_enums_by_graphql_enum: () -> untyped + def proto_field_number_mapping_file: () -> ::String + def proto_field_number_mappings: () -> untyped + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/enum_type_extension.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/enum_type_extension.rbs new file mode 100644 index 000000000..97e832063 --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/enum_type_extension.rbs @@ -0,0 +1,9 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + module EnumTypeExtension: ::ElasticGraph::SchemaDefinition::SchemaElements::EnumType + def to_proto_field_type: () -> ::String + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/factory_extension.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/factory_extension.rbs new file mode 100644 index 000000000..178113baa --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/factory_extension.rbs @@ -0,0 +1,8 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + module FactoryExtension: ::ElasticGraph::SchemaDefinition::Factory + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/field_type_converter.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/field_type_converter.rbs new file mode 100644 index 000000000..48a7f27cd --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/field_type_converter.rbs @@ -0,0 +1,9 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + class FieldTypeConverter + def self.convert: (::ElasticGraph::SchemaDefinition::SchemaElements::TypeReference) -> ::String + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/identifier.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/identifier.rbs new file mode 100644 index 000000000..6f6dd8762 --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/identifier.rbs @@ -0,0 +1,19 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + class Identifier + PROTO_KEYWORDS: ::Set[::String] + + def self.package_name: (::String) -> ::String + def self.message_name: (::String) -> ::String + def self.enum_name: (::String) -> ::String + def self.field_name: (::String) -> ::String + def self.enum_value_name: (::String) -> ::String + + private + + def self.escape_keyword: (::String) -> ::String + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/object_interface_and_union_extension.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/object_interface_and_union_extension.rbs new file mode 100644 index 000000000..08252826b --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/object_interface_and_union_extension.rbs @@ -0,0 +1,9 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + module ObjectInterfaceAndUnionExtension : ::ElasticGraph::SchemaDefinition::SchemaElements::ObjectType + def to_proto_field_type: () -> ::String + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/results_extension.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/results_extension.rbs new file mode 100644 index 000000000..f49a327ed --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/results_extension.rbs @@ -0,0 +1,18 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + module ResultsExtension : ::ElasticGraph::SchemaDefinition::Results + def proto_schema: () -> ::String + def proto_field_number_mappings: () -> untyped + + private + + def proto_schema_generator: () -> Schema + def build_proto_schema_generator: () -> Schema + + @proto_schema: ::String? + @proto_schema_generator: Schema? + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/scalar_type_extension.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/scalar_type_extension.rbs new file mode 100644 index 000000000..98e099da1 --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/scalar_type_extension.rbs @@ -0,0 +1,12 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + module ScalarTypeExtension: ::ElasticGraph::SchemaDefinition::SchemaElements::ScalarType + attr_reader proto_field_type: ::String + + def proto_field: (type: ::String) -> void + def to_proto_field_type: () -> ::String + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema.rbs new file mode 100644 index 000000000..f59ffc367 --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema.rbs @@ -0,0 +1,29 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + class Schema + def self.generate: ( + ::ElasticGraph::SchemaDefinition::Results, + ?package_name: ::String, + ?proto_enums_by_graphql_enum: untyped, + ?proto_field_number_mappings: untyped + ) -> ::String + + def initialize: ( + ::ElasticGraph::SchemaDefinition::Results, + package_name: ::String, + proto_enums_by_graphql_enum: untyped, + ?proto_field_number_mappings: untyped + ) -> void + def to_proto: () -> ::String + def field_number_mappings_for_artifact: () -> untyped + + private + + def indexed_types: () -> ::Array[untyped] + def register_type: (untyped type) -> void + def register_type_ref: (::ElasticGraph::SchemaDefinition::SchemaElements::TypeReference) -> void + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension.rbs new file mode 100644 index 000000000..9a1decf6d --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension.rbs @@ -0,0 +1,8 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + module SchemaArtifactManagerExtension : ::ElasticGraph::SchemaDefinition::SchemaArtifactManager + end + end + end +end diff --git a/elasticgraph-protobuf/spec/integration/elastic_graph/protobuf/schema_definition/rake_tasks_spec.rb b/elasticgraph-protobuf/spec/integration/elastic_graph/protobuf/schema_definition/rake_tasks_spec.rb new file mode 100644 index 000000000..b81759edf --- /dev/null +++ b/elasticgraph-protobuf/spec/integration/elastic_graph/protobuf/schema_definition/rake_tasks_spec.rb @@ -0,0 +1,130 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/protobuf" +require "elastic_graph/protobuf/schema_definition/api_extension" +require "elastic_graph/schema_definition/rake_tasks" +require "fileutils" +require "yaml" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe "Protobuf RakeTasks", :rake_task, :in_temp_dir do + describe "schema_artifacts:dump" do + it "dumps proto artifact when indexed types are defined" do + write_proto_schema(table_defs: <<~EOS) + s.object_type "Product" do |t| + t.field "id", "ID" + t.field "name", "String" + t.index "products" + end + EOS + + expect { + output = run_rake_with_proto("schema_artifacts:dump") + expect(output.lines).to include(a_string_including("Dumped", PROTO_SCHEMA_FILE)) + }.to change { read_artifact(PROTO_SCHEMA_FILE) } + .from(nil) + .to(a_string_including('syntax = "proto3";', "message Product", "string name = 2;")) + end + + it "idempotently dumps proto artifacts" do + write_proto_schema(table_defs: <<~EOS) + s.object_type "Product" do |t| + t.field "id", "ID" + t.index "products" + end + EOS + + run_rake_with_proto("schema_artifacts:dump", enforce_json_schema_version: false) + + expect { + output = run_rake_with_proto("schema_artifacts:dump") + expect(output.lines).to include(a_string_including("already up to date", PROTO_SCHEMA_FILE)) + }.to maintain { read_artifact(PROTO_SCHEMA_FILE) } + end + + it "can persist and reuse proto field-number mappings from an artifact file" do + write_proto_schema(table_defs: <<~EOS) + s.object_type "Product" do |t| + t.field "id", "ID" + t.field "name", "String" + t.index "products" + end + EOS + + run_rake_with_proto("schema_artifacts:dump", enforce_json_schema_version: false) + + expect(read_artifact(PROTO_FIELD_NUMBERS_FILE)).not_to be_nil + expect(parsed_proto_field_numbers).to eq({ + "messages" => { + "Product" => { + "fields" => { + "id" => 1, + "name" => 2 + } + } + } + }) + + write_proto_schema(table_defs: <<~EOS) + s.object_type "Product" do |t| + t.field "name", "String" + t.field "id", "ID" + t.index "products" + end + EOS + + run_rake_with_proto("schema_artifacts:dump", enforce_json_schema_version: false) + + expect(read_artifact(PROTO_SCHEMA_FILE)).to include("string name = 2;") + expect(read_artifact(PROTO_SCHEMA_FILE)).to include("string id = 1;") + end + end + + private + + def write_proto_schema(table_defs:, proto_config: nil) + ::File.write("schema.rb", <<~EOS) + ElasticGraph.define_schema do |s| + s.json_schema_version 1 + #{proto_config} + + #{table_defs} + end + EOS + end + + def run_rake_with_proto(*args, enforce_json_schema_version: true) + run_rake(*args) do |output| + ElasticGraph::SchemaDefinition::RakeTasks.new( + schema_element_name_form: :snake_case, + index_document_sizes: false, + path_to_schema: "schema.rb", + schema_artifacts_directory: "config/schema/artifacts", + enforce_json_schema_version: enforce_json_schema_version, + extension_modules: [SchemaDefinition::APIExtension], + output: output + ) + end + end + + def read_artifact(name) + path = File.join("config", "schema", "artifacts", name) + File.read(path) if File.exist?(path) + end + + def parsed_proto_field_numbers + ::YAML.safe_load(read_artifact(PROTO_FIELD_NUMBERS_FILE)) + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/spec_helper.rb b/elasticgraph-protobuf/spec/spec_helper.rb new file mode 100644 index 000000000..8b4efe6ba --- /dev/null +++ b/elasticgraph-protobuf/spec/spec_helper.rb @@ -0,0 +1,16 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +# This file contains RSpec configuration for `elasticgraph-protobuf`. +# It is loaded by the shared spec helper at `spec_support/spec_helper.rb`. + +RSpec.configure do |config| + config.when_first_matching_example_defined(:proto_schema) do + require "support/proto_schema_support" + end +end diff --git a/elasticgraph-protobuf/spec/support/proto_schema_support.rb b/elasticgraph-protobuf/spec/support/proto_schema_support.rb new file mode 100644 index 000000000..26ddddf4c --- /dev/null +++ b/elasticgraph-protobuf/spec/support/proto_schema_support.rb @@ -0,0 +1,35 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/schema_definition/test_support" +require "elastic_graph/protobuf/schema_definition/api_extension" + +module ElasticGraph + module Protobuf + module SchemaSupport + include ElasticGraph::SchemaDefinition::TestSupport + + def define_proto_schema(**options, &block) + define_schema( + schema_element_name_form: :snake_case, + extension_modules: [SchemaDefinition::APIExtension], + **options, + &block + ) + end + + def proto_schema_from(results) + results.proto_schema + end + end + + RSpec.configure do |config| + config.include SchemaSupport, :proto_schema + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/api_extension_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/api_extension_spec.rb new file mode 100644 index 000000000..fd3215279 --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/api_extension_spec.rb @@ -0,0 +1,72 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/api_extension" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe APIExtension do + def build_api(built_in_types: []) + factory = ::Object.new + + api = ::Object.new + api.define_singleton_method(:factory) { factory } + api.define_singleton_method(:on_built_in_types) do |&block| + built_in_types.each(&block) + end + + api.extend(APIExtension) + [api, factory] + end + + it "extends the factory, applies default artifact settings, and maps built-in scalars" do + scalar_type = ::Struct.new(:name) do + include ScalarTypeExtension + end.new("String") + + api, factory = build_api(built_in_types: [scalar_type, ::Object.new]) + + expect(factory).to be_a(FactoryExtension) + expect(scalar_type.to_proto_field_type).to eq("string") + expect(api.proto_schema_package_name).to eq("elasticgraph") + expect(api.proto_field_number_mapping_file).to eq(Protobuf::PROTO_FIELD_NUMBERS_FILE) + end + + it "stores proto artifact settings and mappings" do + api, = build_api + + api.proto_schema_artifacts(package_name: "sales.v1") + api.proto_enum_mappings("Status" => {::Object => {}}) + api.configure_proto_field_number_mappings({"messages" => {"Account" => {"id" => 1}}}, enforce: true) + + expect(api.proto_schema_package_name).to eq("sales.v1") + expect(api.proto_field_number_mapping_file).to eq(Protobuf::PROTO_FIELD_NUMBERS_FILE) + expect(api.proto_enums_by_graphql_enum).to eq("Status" => {::Object => {}}) + expect(api.proto_field_number_mappings).to eq("messages" => {"Account" => {"id" => 1}}) + end + + it "validates proto_schema_artifacts arguments" do + api, = build_api + + expect { + api.proto_schema_artifacts(package_name: "") + }.to raise_error(Errors::SchemaError, a_string_including("`package_name` must be a non-empty String")) + end + + it "validates configure_proto_field_number_mappings arguments" do + api, = build_api + + expect { + api.configure_proto_field_number_mappings({}, enforce: :yes) + }.to raise_error(Errors::SchemaError, a_string_including("`enforce` must be true or false")) + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/factory_extension_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/factory_extension_spec.rb new file mode 100644 index 000000000..391447e45 --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/factory_extension_spec.rb @@ -0,0 +1,111 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/factory_extension" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe FactoryExtension do + let(:factory_class) do + base_class = ::Class.new do + def new_enum_type(_name) + type = ::Object.new + yield type + type + end + + def new_interface_type(_name) + type = ::Object.new + yield type + type + end + + def new_object_type(_name) + type = ::Object.new + yield type + type + end + + def new_scalar_type(_name) + type = ::Object.new + yield type + type + end + + def new_union_type(_name) + type = ::Object.new + yield type + type + end + + def new_results + ::Object.new + end + + def new_schema_artifact_manager(*args, **kwargs) + @last_schema_artifact_manager_args = args + @last_schema_artifact_manager_kwargs = kwargs + ::Object.new + end + + attr_reader :last_schema_artifact_manager_args, :last_schema_artifact_manager_kwargs + end + + ::Class.new(base_class) do + prepend FactoryExtension + end + end + + it "extends enum types with enum conversion behavior" do + type = factory_class.new.new_enum_type("Status") + expect(type).to be_a(EnumTypeExtension) + end + + it "extends interface and union types with object conversion behavior" do + factory = factory_class.new + interface_from_block = nil + union_from_block = nil + + factory.new_interface_type("Node") { |type| interface_from_block = type } + expect(factory.new_interface_type("Node")).to be_a(ObjectInterfaceAndUnionExtension) + expect(interface_from_block).to be_a(ObjectInterfaceAndUnionExtension) + + factory.new_union_type("SearchResult") { |type| union_from_block = type } + expect(factory.new_union_type("SearchResult")).to be_a(ObjectInterfaceAndUnionExtension) + expect(union_from_block).to be_a(ObjectInterfaceAndUnionExtension) + end + + it "extends object and scalar types and yields to provided blocks" do + object_type = nil + scalar_type = nil + + factory = factory_class.new + factory.new_object_type("Account") { |type| object_type = type } + factory.new_scalar_type("Custom") { |type| scalar_type = type } + + expect(object_type).to be_a(ObjectInterfaceAndUnionExtension) + expect(scalar_type).to be_a(ScalarTypeExtension) + expect(factory.new_object_type("Account")).to be_a(ObjectInterfaceAndUnionExtension) + expect(factory.new_scalar_type("Custom")).to be_a(ScalarTypeExtension) + end + + it "extends results and schema artifact managers" do + factory = factory_class.new + + expect(factory.new_results).to be_a(ResultsExtension) + + manager = factory.new_schema_artifact_manager(:positional, key: "value") + expect(manager).to be_a(SchemaArtifactManagerExtension) + expect(factory.last_schema_artifact_manager_args).to eq([:positional]) + expect(factory.last_schema_artifact_manager_kwargs).to eq({key: "value"}) + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/field_type_converter_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/field_type_converter_spec.rb new file mode 100644 index 000000000..0dfeb15dc --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/field_type_converter_spec.rb @@ -0,0 +1,52 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/field_type_converter" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe FieldTypeConverter do + it "converts non-list field types that expose to_proto_field_type" do + resolved_type = ::Object.new + resolved_type.define_singleton_method(:to_proto_field_type) { "bytes" } + + type_ref = ::Object.new + type_ref.define_singleton_method(:unwrap_non_null) { type_ref } + type_ref.define_singleton_method(:list?) { false } + type_ref.define_singleton_method(:resolved) { resolved_type } + type_ref.define_singleton_method(:unwrapped_name) { "Binary" } + + expect(FieldTypeConverter.convert(type_ref)).to eq("bytes") + end + + it "raises for list types" do + type_ref = ::Object.new + type_ref.define_singleton_method(:unwrap_non_null) { type_ref } + type_ref.define_singleton_method(:list?) { true } + + expect { + FieldTypeConverter.convert(type_ref) + }.to raise_error(Errors::SchemaError, a_string_including("only supports non-list types")) + end + + it "raises when the resolved type does not expose to_proto_field_type" do + type_ref = ::Object.new + type_ref.define_singleton_method(:unwrap_non_null) { type_ref } + type_ref.define_singleton_method(:list?) { false } + type_ref.define_singleton_method(:resolved) { nil } + type_ref.define_singleton_method(:unwrapped_name) { "UnknownType" } + + expect { + FieldTypeConverter.convert(type_ref) + }.to raise_error(Errors::SchemaError, a_string_including("Type `UnknownType` cannot be converted to proto")) + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/identifier_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/identifier_spec.rb new file mode 100644 index 000000000..fb87afe97 --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/identifier_spec.rb @@ -0,0 +1,33 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/identifier" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe Identifier do + it "escapes reserved keywords" do + expect(Identifier.escape_keyword("package")).to eq("package_") + expect(Identifier.escape_keyword("custom")).to eq("custom") + end + + it "escapes package name segments independently" do + expect(Identifier.package_name("proto.package.v1")).to eq("proto.package_.v1") + end + + it "escapes message, enum, field, and enum value names" do + expect(Identifier.message_name("service")).to eq("service_") + expect(Identifier.enum_name("message")).to eq("message_") + expect(Identifier.field_name("string")).to eq("string_") + expect(Identifier.enum_value_name("stream")).to eq("stream_") + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/results_extension_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/results_extension_spec.rb new file mode 100644 index 000000000..a2df8ed7c --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/results_extension_spec.rb @@ -0,0 +1,63 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/results_extension" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe ResultsExtension do + def build_results_with_api(api) + state = ::Struct.new(:api).new(api) + + ::Object.new.tap do |results| + results.extend(ResultsExtension) + results.define_singleton_method(:state) { state } + end + end + + it "builds schema generators from configured api values and memoizes proto_schema" do + api = ::Object.new + api.define_singleton_method(:proto_schema_package_name) { "sales.v1" } + api.define_singleton_method(:proto_enums_by_graphql_enum) { {"Status" => {}} } + api.define_singleton_method(:proto_field_number_mappings) { {"messages" => {"Account" => {"id" => 1}}} } + + generator = instance_double(Schema, to_proto: "syntax = \"proto3\";", field_number_mappings_for_artifact: {"messages" => {}}) + + results = build_results_with_api(api) + + expect(Schema).to receive(:new).with( + results, + package_name: "sales.v1", + proto_enums_by_graphql_enum: {"Status" => {}}, + proto_field_number_mappings: {"messages" => {"Account" => {"id" => 1}}} + ).and_return(generator) + + expect(results.proto_schema).to eq("syntax = \"proto3\";") + expect(results.proto_schema).to eq("syntax = \"proto3\";") + expect(results.proto_field_number_mappings).to eq({"messages" => {}}) + end + + it "falls back to defaults when api does not expose proto configuration methods" do + api = ::Object.new + generator = instance_double(Schema, to_proto: "", field_number_mappings_for_artifact: {"messages" => {}}) + results = build_results_with_api(api) + + expect(Schema).to receive(:new).with( + results, + package_name: "elasticgraph", + proto_enums_by_graphql_enum: {}, + proto_field_number_mappings: {} + ).and_return(generator) + + expect(results.proto_schema).to eq("") + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/scalar_type_extension_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/scalar_type_extension_spec.rb new file mode 100644 index 000000000..04e6019c7 --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/scalar_type_extension_spec.rb @@ -0,0 +1,89 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/scalar_type_extension" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe ScalarTypeExtension do + let(:scalar_type_class) do + ::Class.new do + include ScalarTypeExtension + + attr_reader :name + + def initialize(name:, json_schema_options:) + @name = name + @json_schema_options = json_schema_options + end + + def json_schema_options + @json_schema_options + end + end + end + + it "returns an explicitly configured proto field type" do + scalar = scalar_type_class.new(name: "CustomScalar", json_schema_options: {}) + scalar.proto_field(type: "fixed64") + + expect(scalar.to_proto_field_type).to eq("fixed64") + end + + it "infers the proto field type from a string json_schema type" do + scalar = scalar_type_class.new(name: "EmailAddress", json_schema_options: {type: "string"}) + + expect(scalar.to_proto_field_type).to eq("string") + end + + it "infers the proto field type from a symbol json_schema type" do + scalar = scalar_type_class.new(name: "Count", json_schema_options: {type: :integer}) + + expect(scalar.to_proto_field_type).to eq("int64") + end + + it "infers the proto field type from an array json_schema type with null" do + scalar = scalar_type_class.new(name: "MaybeFloat", json_schema_options: {type: [:null, :number, 7]}) + + expect(scalar.to_proto_field_type).to eq("double") + end + + it "raises when json_schema type cannot be inferred" do + scalar = scalar_type_class.new(name: "Ambiguous", json_schema_options: {type: ["string", "integer"]}) + + expect { + scalar.to_proto_field_type + }.to raise_error(Errors::SchemaError, a_string_including("Protobuf field type not configured for scalar type `Ambiguous`")) + end + + it "raises when json_schema type is not a string, symbol, or array" do + scalar = scalar_type_class.new(name: "Unknown", json_schema_options: {type: 123}) + + expect { + scalar.to_proto_field_type + }.to raise_error(Errors::SchemaError, a_string_including("Protobuf field type not configured for scalar type `Unknown`")) + end + + it "raises when no json_schema_options are exposed" do + scalar = ::Class.new do + include ScalarTypeExtension + + def name + "WithoutJsonSchema" + end + end.new + + expect { + scalar.to_proto_field_type + }.to raise_error(Errors::SchemaError, a_string_including("Protobuf field type not configured for scalar type `WithoutJsonSchema`")) + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension_spec.rb new file mode 100644 index 000000000..dd1fe2dc2 --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension_spec.rb @@ -0,0 +1,78 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension" +require "tmpdir" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe SchemaArtifactManagerExtension do + def build_manager(results) + stateful_base = ::Class.new do + attr_reader :schema_definition_results + + def initialize(schema_definition_results) + @schema_definition_results = schema_definition_results + @schema_artifacts_directory = ::Dir.pwd + end + + private + + def artifacts_from_schema_def + [:base_artifact] + end + end + + ::Class.new(stateful_base) do + prepend SchemaArtifactManagerExtension + end.new(results) + end + + it "returns base artifacts when api cannot configure proto field-number mappings" do + api = ::Object.new + state = ::Struct.new(:api).new(api) + results = ::Object.new + results.define_singleton_method(:state) { state } + + manager = build_manager(results) + + expect(manager.send(:artifacts_from_schema_def)).to eq([:base_artifact]) + end + + it "loads mappings from file and forwards them to api configuration" do + configured = false + + mapping_dir = ::Dir.mktmpdir + mapping_file = ::File.join(mapping_dir, Protobuf::PROTO_FIELD_NUMBERS_FILE) + ::File.write(mapping_file, <<~YAML) + --- + messages: + Account: + id: 1 + YAML + + api = ::Object.new + api.define_singleton_method(:configure_proto_field_number_mappings) do |mappings, enforce: false| + configured = (mappings == {"messages" => {"Account" => {"id" => 1}}}) + end + + state = ::Struct.new(:api).new(api) + results = ::Object.new + results.define_singleton_method(:state) { state } + + manager = build_manager(results) + manager.instance_variable_set(:@schema_artifacts_directory, mapping_dir) + + expect(manager.send(:artifacts_from_schema_def)).to eq([:base_artifact]) + expect(configured).to eq(true) + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_edge_cases_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_edge_cases_spec.rb new file mode 100644 index 000000000..1f97da26a --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_edge_cases_spec.rb @@ -0,0 +1,658 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/api_extension" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe Schema, :proto_schema do + it "returns an empty string when no indexed types are present" do + expect(build_schema_with_root_indexed_types.to_proto).to eq("") + end + + it "supports the .generate convenience API" do + results = define_proto_schema do |s| + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect(Schema.generate(results)).to eq(results.proto_schema) + end + + it "raises when a root indexed type cannot be converted to proto" do + bad_type = ::Object.new + bad_type.define_singleton_method(:name) { "BadType" } + + schema = build_schema_with_root_indexed_types(bad_type) + + expect { + schema.to_proto + }.to raise_error(Errors::SchemaError, a_string_including("Type `BadType` cannot be converted to proto")) + end + + it "uses inspect output for nameless types that cannot be converted" do + bad_type = ::Object.new + message = build_fake_message_type( + "Account", + "id" => build_fake_type_ref(resolved: build_fake_scalar_type("string"), unwrapped_name: "ID"), + "broken" => build_fake_type_ref(resolved: bad_type, unwrapped_name: "Broken") + ) + schema = build_schema_with_root_indexed_types(message) + + expect { + schema.to_proto + }.to raise_error(Errors::SchemaError, a_string_including(bad_type.inspect)) + end + + it "raises when a field type reference cannot be resolved" do + message = build_fake_message_type( + "BrokenMessage", + "broken_field" => build_fake_type_ref(resolved: nil, unwrapped_name: "MissingType") + ) + schema = build_schema_with_root_indexed_types(message) + + expect { + schema.to_proto + }.to raise_error(Errors::SchemaError, a_string_including("Type `MissingType` cannot be resolved")) + end + + it "raises when enum values map to duplicate proto value names" do + results = define_proto_schema do |s| + s.enum_type "Status" do |t| + t.values "option", "OPTION" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("duplicate proto enum value names")) + end + + it "uses a suffixed zero enum value when needed to avoid collisions" do + results = define_proto_schema do |s| + s.enum_type "Status" do |t| + t.values "UNSPECIFIED", "ACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + generated = proto_schema_from(results) + expect(generated).to include("STATUS_UNSPECIFIED_ = 0;") + expect(generated).to include("STATUS_UNSPECIFIED = 1;") + end + + it "raises when a configured proto enum mapping source does not expose .enums" do + results = define_proto_schema do |s| + s.proto_enum_mappings("Status" => {::Object.new => {}}) + + s.enum_type "Status" do |t| + t.values "ACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must map to a proto enum class with `.enums`")) + end + + it "wraps unexpected exceptions from enum mapping sources" do + proto_status = ::Class.new do + def self.enums + [::Data.define(:name).new(name: :ACTIVE)] + end + end + + results = define_proto_schema do |s| + s.proto_enum_mappings( + "Status" => { + proto_status => { + "name_transform" => ->(_name) { raise "boom" } + } + } + ) + + s.enum_type "Status" do |t| + t.values "ACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("Failed loading proto enum mapping for `Status`")) + end + + it "supports string-key mapping options in proto_enum_mappings" do + proto_status = ::Class.new do + def self.enums + [ + ::Data.define(:name).new(name: :UNKNOWN_DO_NOT_USE), + ::Data.define(:name).new(name: :ACTIVE) + ] + end + end + + results = define_proto_schema do |s| + s.proto_enum_mappings( + "Status" => { + proto_status => { + "exclusions" => [:UNKNOWN_DO_NOT_USE], + "expected_extras" => [:LEGACY] + } + } + ) + + s.enum_type "Status" do |t| + t.values "ACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + generated = proto_schema_from(results) + expect(generated).to include("STATUS_ACTIVE = 1;") + expect(generated).to include("STATUS_LEGACY = 2;") + end + + it "raises on field-number mapping collisions for a message" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + { + "messages" => { + "Account" => { + "id" => 1, + "name" => 1 + } + } + } + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "name", "String" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("field-number mapping collision")) + end + + it "raises when two fields collapse to the same proto field name after keyword escaping" do + results = define_proto_schema do |s| + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "string", "String" + t.field "string_", "String" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("duplicate proto field names")) + end + + it "generates unique nested-wrapper names when the base name is already taken" do + results = define_proto_schema do |s| + s.object_type "MatrixValuesListLevel1" do |t| + t.field "id", "ID" + end + + s.object_type "Matrix" do |t| + t.field "id", "ID" + t.field "already_taken", "MatrixValuesListLevel1" + t.field "values", "[[Float!]!]!" + t.index "matrices" + end + end + + generated = proto_schema_from(results) + expect(generated).to include("message MatrixValuesListLevel12 {") + expect(generated).to include("repeated MatrixValuesListLevel12 values = 3;") + end + + it "renders a placeholder comment for indexed types with no fields" do + schema = build_schema_with_root_indexed_types(build_fake_message_type("EmptyType")) + expect(schema.to_proto).to include("// No indexed fields were defined for this type.") + end + + it "supports recursive message references without infinite recursion" do + node_type = build_fake_message_type( + "Node", + "id" => build_fake_type_ref(resolved: build_fake_scalar_type("string"), unwrapped_name: "ID"), + "parent" => build_fake_type_ref(resolved: nil, unwrapped_name: "Node") + ) + + node_type + .indexing_fields_by_name_in_index + .fetch("parent") + .to_indexing_field + .type + .define_singleton_method(:resolved) { node_type } + + schema = build_schema_with_root_indexed_types(node_type) + + expect(schema.to_proto).to include("Node parent = 2;") + end + + it "supports re-registering already-known enums from field references" do + status_enum = build_fake_enum_type("Status", values: ["ACTIVE"]) + account_type = build_fake_message_type( + "Account", + "status" => build_fake_type_ref(resolved: status_enum, unwrapped_name: "Status") + ) + + schema = build_schema_with_root_indexed_types(status_enum, account_type) + expect(schema.to_proto).to include("STATUS_ACTIVE = 1;") + end + + it "accepts multiple enum mapping sources when they resolve to the same values" do + proto_status_a = ::Class.new do + def self.enums + [::Data.define(:name).new(name: :ACTIVE)] + end + end + + proto_status_b = ::Class.new do + def self.enums + [::Data.define(:name).new(name: :ACTIVE)] + end + end + + results = define_proto_schema do |s| + s.proto_enum_mappings( + "Status" => { + proto_status_a => {}, + proto_status_b => {} + } + ) + + s.enum_type "Status" do |t| + t.values "ACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to include("STATUS_ACTIVE = 1;") + end + + it "reuses nested wrapper types for repeated generation requests with the same context" do + schema = build_schema_with_root_indexed_types + + first = schema.send( + :register_nested_list_wrappers, + context_message_name: "Matrix", + context_field_name: "values", + list_depth: 2, + base_type_name: "double" + ) + second = schema.send( + :register_nested_list_wrappers, + context_message_name: "Matrix", + context_field_name: "values", + list_depth: 2, + base_type_name: "double" + ) + + expect(second).to eq(first) + end + + it "creates intermediate wrappers for deeply nested lists" do + results = define_proto_schema do |s| + s.object_type "Matrix" do |t| + t.field "id", "ID" + t.field "values", "[[[Float!]!]!]!" + t.index "matrices" + end + end + + generated = proto_schema_from(results) + expect(generated).to include("message MatrixValuesListLevel2 {") + expect(generated).to include("message MatrixValuesListLevel1 {") + end + + it "normalizes nil proto enum and field-number mappings to empty hashes" do + schema = Schema.new( + build_fake_results_with_root_types, + package_name: "elasticgraph", + proto_enums_by_graphql_enum: nil, + proto_field_number_mappings: nil + ) + + expect(schema.to_proto).to eq("") + expect(schema.field_number_mappings_for_artifact).to eq({"messages" => {}}) + end + + it "raises when type names collide after proto message escaping" do + first = build_fake_message_type("package") + second = build_fake_message_type("package_") + schema = build_schema_with_root_indexed_types(first, second) + + expect { + schema.to_proto + }.to raise_error(Errors::SchemaError, a_string_including("both map to the same proto message name")) + end + + it "raises when type names collide after proto enum escaping" do + first = build_fake_enum_type("option", values: ["ACTIVE"]) + second = build_fake_enum_type("option_", values: ["ACTIVE"]) + schema = build_schema_with_root_indexed_types(first, second) + + expect { + schema.to_proto + }.to raise_error(Errors::SchemaError, a_string_including("both map to the same proto enum name")) + end + + it "validates field-number mapping input type" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings("bad") + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must be a Hash")) + end + + it "validates that `messages` is a hash in field-number mappings" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings({"messages" => "bad"}) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must have a `messages` Hash")) + end + + it "accepts symbol `:messages` key in field-number mappings" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + { + messages: { + "Account" => { + "id" => 7 + } + } + } + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to include("string id = 7;") + end + + it "accepts symbol `:fields` and nested symbol keys in field-number mappings" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + { + messages: { + "Account" => { + fields: { + "id" => { + field_number: 7, + name_in_index: :account_id + } + } + } + } + } + ) + + s.object_type "Account" do |t| + t.field "id", "ID", name_in_index: "account_id" + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to include("string id = 7;") + end + + it "validates per-message field-number mapping structure" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings({"messages" => {"Account" => "bad"}}) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must be a Hash")) + end + + it "validates that nested `fields` is a hash in field-number mappings" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings({"messages" => {"Account" => {"fields" => "bad"}}}) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must contain a `fields` Hash")) + end + + it "validates that mapped field numbers are positive integers" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings({"messages" => {"Account" => {"id" => 0}}}) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must be a positive integer")) + end + + it "validates that mapped field numbers are integers" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings({"messages" => {"Account" => {"id" => "abc"}}}) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must be an integer")) + end + + it "validates that structured mappings include `field_number`" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + {"messages" => {"Account" => {"fields" => {"id" => {"name_in_index" => "account_id"}}}}} + ) + + s.object_type "Account" do |t| + t.field "id", "ID", name_in_index: "account_id" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must include `field_number`")) + end + + it "validates that structured mappings use a String or Symbol `name_in_index`" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + {"messages" => {"Account" => {"fields" => {"id" => {"field_number" => 7, "name_in_index" => 123}}}}} + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must use a String or Symbol `name_in_index`")) + end + + it "defaults structured mappings without `name_in_index` to the field name" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + {"messages" => {"Account" => {"fields" => {"display_name" => {"field_number" => 7}}}}} + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "display_name", "String" + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to include("string display_name = 7;") + end + + it "allocates the next available field number when a renamed field has no old mapping entry" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + {"messages" => {"Account" => {"fields" => {"other_name" => 7}}}} + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "display_name", "String" do |f| + f.renamed_from "full_name" + end + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to include("string id = 1;") + expect(proto_schema_from(results)).to include("string display_name = 2;") + end + + private + + def build_schema_with_root_indexed_types(*indexed_types) + fake_results = build_fake_results_with_root_types(*indexed_types) + + Schema.new( + fake_results, + package_name: "elasticgraph", + proto_enums_by_graphql_enum: {}, + proto_field_number_mappings: {} + ) + end + + def build_fake_results_with_root_types(*indexed_types) + wrappers = indexed_types.map do |indexed_type| + ::Object.new.tap do |wrapper| + wrapper.define_singleton_method(:index_def) do + ::Struct.new(:indexed_type).new(indexed_type) + end + end + end + + fake_results = ::Object.new + fake_results.define_singleton_method(:schema_artifact_types) { wrappers } + fake_results + end + + def build_fake_scalar_type(proto_type_name) + scalar = ::Object.new + scalar.define_singleton_method(:to_proto_field_type) { proto_type_name } + scalar + end + + def build_fake_type_ref(resolved:, unwrapped_name:) + ref = ::Object.new + ref.define_singleton_method(:unwrap_non_null) { ref } + ref.define_singleton_method(:list?) { false } + ref.define_singleton_method(:resolved) { resolved } + ref.define_singleton_method(:unwrapped_name) { unwrapped_name } + ref + end + + def build_fake_message_type(name, fields_by_name = {}) + type = ::Object.new + type.define_singleton_method(:name) { name } + type.define_singleton_method(:to_proto_field_type) { Identifier.message_name(name) } + type.define_singleton_method(:indexing_fields_by_name_in_index) do + fields_by_name.each_with_object({}) do |(field_name, type_ref), transformed| + indexing_field = ::Struct.new(:name, :name_in_index, :type).new(field_name, field_name, type_ref) + transformed[field_name] = ::Struct.new(:to_indexing_field).new(indexing_field) + end + end + type + end + + def build_fake_enum_type(name, values:) + type = ::Object.new + type.define_singleton_method(:name) { name } + type.define_singleton_method(:to_proto_field_type) { Identifier.enum_name(name) } + type.define_singleton_method(:values_by_name) { values.to_h { |v| [v, true] } } + type + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_spec.rb new file mode 100644 index 000000000..96d3ae5aa --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_spec.rb @@ -0,0 +1,436 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/api_extension" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe Schema, :proto_schema do + it "generates a proto schema from indexed types" do + results = define_proto_schema do |s| + s.enum_type "Status" do |t| + t.values "ACTIVE", "INACTIVE" + end + + s.object_type "Address" do |t| + t.field "street", "String" + t.field "city", "String" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.field "address", "Address" + t.field "tags", "[String!]!" + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to eq(<<~PROTO) + syntax = "proto3"; + + package elasticgraph; + + enum Status { + STATUS_UNSPECIFIED = 0; + STATUS_ACTIVE = 1; + STATUS_INACTIVE = 2; + } + + message Account { + string id = 1; + Status status = 2; + Address address = 3; + repeated string tags = 4; + } + + message Address { + string street = 1; + string city = 2; + } + PROTO + end + + it "generates wrapper messages for nested lists" do + results = define_proto_schema do |s| + s.object_type "Matrix" do |t| + t.field "id", "ID" + t.field "values", "[[Float!]!]!" + t.index "matrices" + end + end + + expect(proto_schema_from(results)).to eq(<<~PROTO) + syntax = "proto3"; + + package elasticgraph; + + message Matrix { + string id = 1; + repeated MatrixValuesListLevel1 values = 2; + } + + message MatrixValuesListLevel1 { + repeated double values = 1; + } + PROTO + end + + it "uses custom proto scalar mappings" do + results = define_proto_schema do |s| + s.scalar_type "CustomTimestamp" do |t| + t.mapping type: "date" + t.json_schema type: "string", format: "date-time" + t.proto_field type: "int64" + end + + s.object_type "Event" do |t| + t.field "id", "ID" + t.field "occurred_at", "CustomTimestamp" + t.index "events" + end + end + + expect(proto_schema_from(results)).to include("int64 occurred_at = 2;") + end + + it "infers scalar mappings from json_schema type" do + results = define_proto_schema do |s| + s.scalar_type "EmailAddress" do |t| + t.mapping type: "keyword" + t.json_schema type: "string", format: "email" + end + + s.object_type "User" do |t| + t.field "id", "ID" + t.field "email", "EmailAddress" + t.index "users" + end + end + + expect(proto_schema_from(results)).to include("string email = 2;") + end + + it "prefers explicit proto_field over inferred mapping" do + results = define_proto_schema do |s| + s.scalar_type "UnixTimestamp" do |t| + t.mapping type: "long" + t.json_schema type: "integer" + t.proto_field type: "fixed64" + end + + s.object_type "Event" do |t| + t.field "id", "ID" + t.field "occurred_at", "UnixTimestamp" + t.index "events" + end + end + + expect(proto_schema_from(results)).to include("fixed64 occurred_at = 2;") + end + + it "can assign field numbers from configured mappings" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + { + "messages" => { + "Account" => { + "id" => 10, + "name" => 2 + } + } + }, + enforce: true + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "name", "String" + t.index "accounts" + end + end + + generated = proto_schema_from(results) + expect(generated).to include("string id = 10;") + expect(generated).to include("string name = 2;") + end + + it "assigns new field numbers after mapped values when mappings are partial" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + { + "messages" => { + "Account" => { + "id" => 1 + } + } + }, + enforce: true + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "name", "String" + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to include("string name = 2;") + end + + it "exposes generated field-number mappings as an artifact hash" do + results = define_proto_schema do |s| + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "name", "String" + t.index "accounts" + end + end + + expect(results.proto_field_number_mappings).to eq({ + "messages" => { + "Account" => { + "fields" => { + "id" => 1, + "name" => 2 + } + } + } + }) + end + + it "preserves reserved numbers for removed fields and allocates new numbers above them" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + { + "messages" => { + "Account" => { + "id" => 1, + "legacyField" => 2 + } + } + } + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "name", "String" + t.index "accounts" + end + end + + generated = proto_schema_from(results) + expect(generated).to include("string id = 1;") + expect(generated).to include("string name = 3;") + + expect(results.proto_field_number_mappings).to eq({ + "messages" => { + "Account" => { + "fields" => { + "id" => 1, + "legacyField" => 2, + "name" => 3 + } + } + } + }) + end + + it "uses public field names in schema.proto and stores name_in_index overrides in the mapping artifact" do + results = define_proto_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "display_name", "String", name_in_index: "displayName" + t.index "widgets" + end + end + + expect(proto_schema_from(results)).to include("string display_name = 2;") + expect(proto_schema_from(results)).not_to include("displayName") + + expect(results.proto_field_number_mappings).to eq({ + "messages" => { + "Widget" => { + "fields" => { + "id" => 1, + "display_name" => { + "field_number" => 2, + "name_in_index" => "displayName" + } + } + } + } + }) + end + + it "preserves a field number across a public field rename" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + { + "messages" => { + "Account" => { + "fields" => { + "full_name" => 7 + } + } + } + } + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "display_name", "String" do |f| + f.renamed_from "full_name" + end + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to include("string id = 1;") + expect(proto_schema_from(results)).to include("string display_name = 7;") + expect(results.proto_field_number_mappings).to eq({ + "messages" => { + "Account" => { + "fields" => { + "id" => 1, + "display_name" => 7 + } + } + } + }) + end + + it "raises an error when a custom scalar does not configure proto_field" do + results = define_proto_schema do |s| + s.scalar_type "UnconfiguredScalar" do |t| + t.mapping type: "keyword" + t.json_schema type: "object" + end + + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "value", "UnconfiguredScalar" + t.index "widgets" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including( + "Protobuf field type not configured for scalar type `UnconfiguredScalar`.", + "call `proto_field type:" + )) + end + + it "prefixes enum values and escapes proto keywords in generated identifiers" do + results = define_proto_schema do |s| + s.enum_type "Command" do |t| + t.values "option", "stream" + end + + s.object_type "Request" do |t| + t.field "id", "ID" + t.field "package", "String" + t.field "command", "Command" + t.index "requests" + end + end + + expect(proto_schema_from(results)).to include("COMMAND_OPTION = 1;") + expect(proto_schema_from(results)).to include("COMMAND_STREAM = 2;") + expect(proto_schema_from(results)).to include("string package_ = 2; // source name: package") + end + + it "can source enum values from configured proto enum mappings" do + proto_status = ::Class.new do + def self.enums + [ + ::Data.define(:name).new(name: :UNKNOWN_DO_NOT_USE), + ::Data.define(:name).new(name: :ACTIVE), + ::Data.define(:name).new(name: :INACTIVE) + ] + end + end + + results = define_proto_schema do |s| + s.proto_enum_mappings( + "Status" => { + proto_status => { + exclusions: [:UNKNOWN_DO_NOT_USE], + expected_extras: [:LEGACY] + } + } + ) + + s.enum_type "Status" do |t| + t.values "ACTIVE", "INACTIVE", "OBSOLETE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + generated = proto_schema_from(results) + expect(generated).to include("STATUS_ACTIVE = 1;") + expect(generated).to include("STATUS_INACTIVE = 2;") + expect(generated).to include("STATUS_LEGACY = 3;") + expect(generated).not_to include("OBSOLETE") + end + + it "raises when mapped proto enum sources produce inconsistent values" do + proto_status_a = ::Class.new do + def self.enums + [ + ::Data.define(:name).new(name: :ACTIVE), + ::Data.define(:name).new(name: :INACTIVE) + ] + end + end + + proto_status_b = ::Class.new do + def self.enums + [ + ::Data.define(:name).new(name: :ACTIVE), + ::Data.define(:name).new(name: :PENDING) + ] + end + end + + results = define_proto_schema do |s| + s.proto_enum_mappings( + "Status" => { + proto_status_a => {}, + proto_status_b => {} + } + ) + + s.enum_type "Status" do |t| + t.values "ACTIVE", "INACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including( + "Protobuf enum mappings for `Status` produce inconsistent value sets" + )) + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf_spec.rb new file mode 100644 index 000000000..b93e372d5 --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf_spec.rb @@ -0,0 +1,21 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf" + +module ElasticGraph + RSpec.describe Protobuf do + it "defines the PROTO_SCHEMA_FILE constant" do + expect(Protobuf::PROTO_SCHEMA_FILE).to eq("schema.proto") + end + + it "defines the PROTO_FIELD_NUMBERS_FILE constant" do + expect(Protobuf::PROTO_FIELD_NUMBERS_FILE).to eq("proto_field_numbers.yaml") + end + end +end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb index 70ccb30a7..a471312e3 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb @@ -44,6 +44,15 @@ def runtime_metadata @runtime_metadata ||= build_runtime_metadata end + # Returns the fully customized GraphQL type list used by artifact generators. + # + # Built-in and derived-type customization callbacks have already been applied to these types. + # + # @return [Array] + def schema_artifact_types + all_types + end + # @private STATIC_SCRIPT_REPO = Scripting::FileSystemRepository.new(::File.join(__dir__.to_s, "scripting", "scripts")) diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs index 212249d9b..df1cc0c7a 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs @@ -9,6 +9,7 @@ module ElasticGraph include Support::_MemoizableDataClass def derived_indexing_type_names: () -> ::Set[::String] + def schema_artifact_types: () -> ::Array[SchemaElements::graphQLType] @graphql_schema_string: ::String? @datastore_config: ::Hash[::String, untyped] diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/define_schema_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/define_schema_spec.rb index 632d04ed2..0c400fa71 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/define_schema_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/define_schema_spec.rb @@ -58,6 +58,25 @@ module SchemaDefinition expect(api.results).to respond_to(:available_json_schema_versions) end + it "exposes schema_artifact_types that includes user-defined and built-in types" do + api = API.new(schema_elements, true) + + api.as_active_instance do + ElasticGraph.define_schema do |schema| + schema.json_schema_version 1 + schema.object_type("Widget") do |t| + t.field "id", "ID" + end + end + end + + types = api.results.schema_artifact_types + type_names = types.map(&:name) + + expect(type_names).to include("Widget") + expect(type_names).to include("Int") + end + it "allows the factory to build an object type even when no block is provided" do api = API.new(schema_elements, true) diff --git a/elasticgraph-support/README.md b/elasticgraph-support/README.md index 8464cbc88..ebf1c6a2e 100644 --- a/elasticgraph-support/README.md +++ b/elasticgraph-support/README.md @@ -51,6 +51,9 @@ graph LR; elasticgraph-opensearch["elasticgraph-opensearch"]; elasticgraph-opensearch --> elasticgraph-support; class elasticgraph-opensearch otherEgGemStyle; + elasticgraph-protobuf["elasticgraph-protobuf"]; + elasticgraph-protobuf --> elasticgraph-support; + class elasticgraph-protobuf otherEgGemStyle; elasticgraph-query_registry["elasticgraph-query_registry"]; elasticgraph-query_registry --> elasticgraph-support; class elasticgraph-query_registry otherEgGemStyle;