diff --git a/AGENTS.md b/AGENTS.md index 942b3a657..60db61df9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -85,9 +85,10 @@ All gems follow the pattern: `elasticgraph-[name]/` containing: - `elasticgraph-elasticsearch`: Elasticsearch client wrapper - `elasticgraph-opensearch`: OpenSearch client wrapper -**Extensions** (5 gems): Optional functionality +**Extensions** (6 gems): Optional functionality - `elasticgraph-apollo`: Apollo Federation support - `elasticgraph-health_check`: Health checks +- `elasticgraph-json_ingestion`: JSON Schema ingestion serializer - `elasticgraph-query_interceptor`: Query interception - `elasticgraph-query_registry`: Source-controlled query registry - `elasticgraph-warehouse`: Data warehouse ingestion diff --git a/CODEBASE_OVERVIEW.md b/CODEBASE_OVERVIEW.md index d6e62ebfd..9618bbf2b 100644 --- a/CODEBASE_OVERVIEW.md +++ b/CODEBASE_OVERVIEW.md @@ -111,6 +111,7 @@ graph LR; rackup["rackup"]; rake["rake"]; webrick["webrick"]; + elasticgraph-json_ingestion["eg-json_ingestion"]; elasticgraph-schema_artifacts["eg-schema_artifacts"]; graphql["graphql"]; elasticgraph --> elasticgraph-support; @@ -125,6 +126,7 @@ graph LR; elasticgraph-local --> webrick; elasticgraph-schema_definition --> elasticgraph-graphql; elasticgraph-schema_definition --> elasticgraph-indexer; + elasticgraph-schema_definition --> elasticgraph-json_ingestion; elasticgraph-schema_definition --> elasticgraph-schema_artifacts; elasticgraph-schema_definition --> elasticgraph-support; elasticgraph-schema_definition --> graphql; @@ -141,6 +143,7 @@ graph LR; class rackup externalGemCatStyle; class rake externalGemCatStyle; class webrick externalGemCatStyle; + class elasticgraph-json_ingestion otherEgGemStyle; class elasticgraph-schema_artifacts otherEgGemStyle; class graphql externalGemCatStyle; click thor href "https://rubygems.org/gems/thor" "Open on RubyGems.org" _blank; @@ -192,12 +195,13 @@ graph LR; click opensearch-ruby href "https://rubygems.org/gems/opensearch-ruby" "Open on RubyGems.org" _blank; ``` -### Extensions (5 gems) +### Extensions (6 gems) These libraries extend ElasticGraph to provide optional but commonly needed functionality. * [elasticgraph-apollo](elasticgraph-apollo/README.md): Transforms an ElasticGraph project into an Apollo subgraph. * [elasticgraph-health_check](elasticgraph-health_check/README.md): Provides a health check for high availability ElasticGraph deployments. +* [elasticgraph-json_ingestion](elasticgraph-json_ingestion/README.md): Pluggable JSON Schema ingestion serializer for ElasticGraph. * [elasticgraph-query_interceptor](elasticgraph-query_interceptor/README.md): Intercepts ElasticGraph datastore queries. * [elasticgraph-query_registry](elasticgraph-query_registry/README.md): Provides a source-controlled query registry for ElasticGraph applications. * [elasticgraph-warehouse](elasticgraph-warehouse/README.md): Extends ElasticGraph to support ingestion into a data warehouse. @@ -216,6 +220,7 @@ graph LR; apollo-federation["apollo-federation"]; elasticgraph-health_check["eg-health_check"]; elasticgraph-datastore_core["eg-datastore_core"]; + elasticgraph-json_ingestion["eg-json_ingestion"]; elasticgraph-query_interceptor["eg-query_interceptor"]; elasticgraph-schema_artifacts["eg-schema_artifacts"]; elasticgraph-query_registry["eg-query_registry"]; @@ -228,6 +233,7 @@ graph LR; elasticgraph-health_check --> elasticgraph-datastore_core; elasticgraph-health_check --> elasticgraph-graphql; elasticgraph-health_check --> elasticgraph-support; + elasticgraph-json_ingestion --> elasticgraph-support; elasticgraph-query_interceptor --> elasticgraph-graphql; elasticgraph-query_interceptor --> elasticgraph-schema_artifacts; elasticgraph-query_registry --> elasticgraph-graphql; @@ -242,6 +248,7 @@ graph LR; class apollo-federation externalGemCatStyle; class elasticgraph-health_check targetGemStyle; class elasticgraph-datastore_core otherEgGemStyle; + class elasticgraph-json_ingestion targetGemStyle; class elasticgraph-query_interceptor targetGemStyle; class elasticgraph-schema_artifacts otherEgGemStyle; class elasticgraph-query_registry targetGemStyle; diff --git a/Gemfile.lock b/Gemfile.lock index 88c017452..b13f9085f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -128,6 +128,12 @@ PATH elasticgraph-support (= 1.1.1.pre) hashdiff (~> 1.2, >= 1.2.1) +PATH + remote: elasticgraph-json_ingestion + specs: + elasticgraph-json_ingestion (1.1.1.pre) + elasticgraph-support (= 1.1.1.pre) + PATH remote: elasticgraph-lambda_support specs: @@ -192,6 +198,7 @@ PATH elasticgraph-schema_definition (1.1.1.pre) elasticgraph-graphql (= 1.1.1.pre) elasticgraph-indexer (= 1.1.1.pre) + elasticgraph-json_ingestion (= 1.1.1.pre) elasticgraph-schema_artifacts (= 1.1.1.pre) elasticgraph-support (= 1.1.1.pre) graphql (~> 2.5.22) @@ -696,6 +703,7 @@ DEPENDENCIES elasticgraph-indexer (= 1.1.1.pre)! elasticgraph-indexer_autoscaler_lambda (= 1.1.1.pre)! elasticgraph-indexer_lambda (= 1.1.1.pre)! + elasticgraph-json_ingestion (= 1.1.1.pre)! elasticgraph-lambda_support (= 1.1.1.pre)! elasticgraph-local (= 1.1.1.pre)! elasticgraph-opensearch (= 1.1.1.pre)! @@ -787,6 +795,7 @@ CHECKSUMS elasticgraph-indexer (1.1.1.pre) elasticgraph-indexer_autoscaler_lambda (1.1.1.pre) elasticgraph-indexer_lambda (1.1.1.pre) + elasticgraph-json_ingestion (1.1.1.pre) elasticgraph-lambda_support (1.1.1.pre) elasticgraph-local (1.1.1.pre) elasticgraph-opensearch (1.1.1.pre) diff --git a/config/docker_demo/Dockerfile b/config/docker_demo/Dockerfile index c67a71782..2110740fd 100644 --- a/config/docker_demo/Dockerfile +++ b/config/docker_demo/Dockerfile @@ -16,6 +16,7 @@ COPY elasticgraph-datastore_core elasticgraph-datastore_core/ COPY elasticgraph-graphiql elasticgraph-graphiql/ COPY elasticgraph-graphql elasticgraph-graphql/ COPY elasticgraph-indexer elasticgraph-indexer/ +COPY elasticgraph-json_ingestion elasticgraph-json_ingestion/ COPY elasticgraph-local elasticgraph-local/ COPY elasticgraph-opensearch elasticgraph-opensearch/ COPY elasticgraph-query_registry elasticgraph-query_registry/ diff --git a/config/site/support/doctest_helper.rb b/config/site/support/doctest_helper.rb index 6f6d256d3..bec0bef25 100644 --- a/config/site/support/doctest_helper.rb +++ b/config/site/support/doctest_helper.rb @@ -9,6 +9,7 @@ require "elastic_graph/apollo/schema_definition/api_extension" require "elastic_graph/schema_artifacts/runtime_metadata/schema_element_names" require "elastic_graph/schema_definition/api" +require "elastic_graph/schema_definition/extension_module_support" require "elastic_graph/schema_definition/schema_artifact_manager" require "elastic_graph/warehouse/schema_definition/api_extension" require "rspec/mocks" @@ -60,7 +61,9 @@ module ElasticGraph @api = SchemaDefinition::API.new( SchemaArtifacts::RuntimeMetadata::SchemaElementNames.new(form: :camelCase, overrides: {}), true, - extension_modules: extension_modules + extension_modules: SchemaDefinition::ExtensionModuleSupport.build_api_extension_modules( + extension_modules: extension_modules + ) ) # This is required in all schemas, but we don't want to have to put in all our examples, @@ -95,7 +98,8 @@ module ElasticGraph ElasticGraph.define_schema do |schema| # `schema.json_schema_version` raises an error when the version is set more than once. # By default we set it above. Here we clear it to allow our example to set it. - schema.state.json_schema_version = nil + schema.state.ingestion_serializer_state.delete(:json_schema_version) + schema.state.ingestion_serializer_state.delete(:json_schema_version_setter_location) end end diff --git a/elasticgraph-apollo/apollo_tests_implementation/Dockerfile b/elasticgraph-apollo/apollo_tests_implementation/Dockerfile index 23442ab26..f2cd8c122 100644 --- a/elasticgraph-apollo/apollo_tests_implementation/Dockerfile +++ b/elasticgraph-apollo/apollo_tests_implementation/Dockerfile @@ -14,6 +14,7 @@ COPY elasticgraph-elasticsearch /web/elasticgraph-elasticsearch COPY elasticgraph-graphiql /web/elasticgraph-graphiql COPY elasticgraph-graphql /web/elasticgraph-graphql COPY elasticgraph-indexer /web/elasticgraph-indexer +COPY elasticgraph-json_ingestion /web/elasticgraph-json_ingestion COPY elasticgraph-rack /web/elasticgraph-rack COPY elasticgraph-schema_artifacts /web/elasticgraph-schema_artifacts COPY elasticgraph-schema_definition /web/elasticgraph-schema_definition diff --git a/elasticgraph-apollo/apollo_tests_implementation/Gemfile b/elasticgraph-apollo/apollo_tests_implementation/Gemfile index f082fa258..60b08ec0d 100644 --- a/elasticgraph-apollo/apollo_tests_implementation/Gemfile +++ b/elasticgraph-apollo/apollo_tests_implementation/Gemfile @@ -16,6 +16,7 @@ source "https://rubygems.org" graphiql graphql indexer + json_ingestion rack schema_artifacts schema_definition diff --git a/elasticgraph-json_ingestion/.rspec b/elasticgraph-json_ingestion/.rspec new file mode 120000 index 000000000..67e6e21b3 --- /dev/null +++ b/elasticgraph-json_ingestion/.rspec @@ -0,0 +1 @@ +../spec_support/subdir_dot_rspec \ No newline at end of file diff --git a/elasticgraph-json_ingestion/.yardopts b/elasticgraph-json_ingestion/.yardopts new file mode 120000 index 000000000..e11a2057f --- /dev/null +++ b/elasticgraph-json_ingestion/.yardopts @@ -0,0 +1 @@ +../config/site/yardopts \ No newline at end of file diff --git a/elasticgraph-json_ingestion/Gemfile b/elasticgraph-json_ingestion/Gemfile new file mode 120000 index 000000000..26cb2ad91 --- /dev/null +++ b/elasticgraph-json_ingestion/Gemfile @@ -0,0 +1 @@ +../Gemfile \ No newline at end of file diff --git a/elasticgraph-json_ingestion/LICENSE.txt b/elasticgraph-json_ingestion/LICENSE.txt new file mode 100644 index 000000000..aa18b5db8 --- /dev/null +++ b/elasticgraph-json_ingestion/LICENSE.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2024 - 2026 Block, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/elasticgraph-json_ingestion/README.md b/elasticgraph-json_ingestion/README.md new file mode 100644 index 000000000..df6fa1ae2 --- /dev/null +++ b/elasticgraph-json_ingestion/README.md @@ -0,0 +1,27 @@ +# ElasticGraph::JSONIngestion + +Pluggable JSON Schema ingestion serializer for ElasticGraph. + +This gem extracts the JSON Schema generation and validation logic from ElasticGraph's core into a +pluggable extension, following the same pattern as `elasticgraph-warehouse` and `elasticgraph-apollo`. +This is the first step toward supporting alternative ingestion serializers (e.g., Protocol Buffers). + +Higher-level schema-definition entry points use the JSON Schema serializer by default for backward +compatibility, so existing users do not need configuration changes. + +## Dependency Diagram + +```mermaid +graph LR; + classDef targetGemStyle fill:#FADBD8,stroke:#EC7063,color:#000,stroke-width:2px; + classDef otherEgGemStyle fill:#A9DFBF,stroke:#2ECC71,color:#000; + classDef externalGemStyle fill:#E0EFFF,stroke:#70A1D7,color:#2980B9; + elasticgraph-json_ingestion["elasticgraph-json_ingestion"]; + class elasticgraph-json_ingestion targetGemStyle; + elasticgraph-support["elasticgraph-support"]; + elasticgraph-json_ingestion --> elasticgraph-support; + class elasticgraph-support otherEgGemStyle; + elasticgraph-schema_definition["elasticgraph-schema_definition"]; + elasticgraph-schema_definition --> elasticgraph-json_ingestion; + class elasticgraph-schema_definition otherEgGemStyle; +``` diff --git a/elasticgraph-json_ingestion/elasticgraph-json_ingestion.gemspec b/elasticgraph-json_ingestion/elasticgraph-json_ingestion.gemspec new file mode 100644 index 000000000..883f03a21 --- /dev/null +++ b/elasticgraph-json_ingestion/elasticgraph-json_ingestion.gemspec @@ -0,0 +1,41 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require_relative "../elasticgraph-support/lib/elastic_graph/version" + +Gem::Specification.new do |spec| + spec.name = "elasticgraph-json_ingestion" + spec.version = ElasticGraph::VERSION + spec.authors = ["Josh Wilson", "Myron Marston", "Block Engineering"] + spec.email = ["joshuaw@squareup.com"] + spec.homepage = "https://block.github.io/elasticgraph/" + spec.license = "MIT" + spec.summary = "Pluggable JSON Schema ingestion serializer for ElasticGraph." + + spec.metadata = { + "bug_tracker_uri" => "https://github.com/block/elasticgraph/issues", + "changelog_uri" => "https://github.com/block/elasticgraph/releases/tag/v#{ElasticGraph::VERSION}", + "documentation_uri" => "https://block.github.io/elasticgraph/api-docs/v#{ElasticGraph::VERSION}/", + "homepage_uri" => "https://block.github.io/elasticgraph/", + "source_code_uri" => "https://github.com/block/elasticgraph/tree/v#{ElasticGraph::VERSION}/#{spec.name}", + "gem_category" => "extension" + } + + spec.files = Dir.chdir(File.expand_path(__dir__)) do + `git ls-files -z`.split("\x0").reject do |f| + (f == __FILE__) || f.match(%r{\A(?:(?:test|spec|features|sig)/|\.(?:git|travis|circleci)|appveyor)}) + end - [".rspec", "Gemfile", ".yardopts"] + end + + spec.required_ruby_version = [">= 3.4", "< 4.1"] + + # This extension is loaded by `elasticgraph-schema_definition` at schema-definition time, so we intentionally + # avoid a runtime dependency here to keep the dependency direction acyclic across gems. + spec.add_development_dependency "elasticgraph-schema_definition", ElasticGraph::VERSION + spec.add_dependency "elasticgraph-support", ElasticGraph::VERSION +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion.rb new file mode 100644 index 000000000..264d88c81 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion.rb @@ -0,0 +1,19 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + # Pluggable JSON Schema ingestion serializer for ElasticGraph. + # + # This gem extracts the JSON Schema generation and validation logic from ElasticGraph's + # core into a pluggable extension, following the same pattern as `elasticgraph-warehouse` + # and `elasticgraph-apollo`. This is the first step toward supporting alternative ingestion + # serializers (e.g., Protocol Buffers). Higher-level schema-definition entry points use it by + # default for backward compatibility. + module JSONIngestion + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/api_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/api_extension.rb new file mode 100644 index 000000000..6ad6dba20 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/api_extension.rb @@ -0,0 +1,93 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/factory_extension" + +module ElasticGraph + module JSONIngestion + # Namespace for all JSON Schema schema definition support. + # + # {SchemaDefinition::APIExtension} is the primary entry point and should be used as a schema definition extension module. + module SchemaDefinition + # Module designed to be extended onto an {ElasticGraph::SchemaDefinition::API} instance + # to add JSON Schema ingestion serializer capabilities. Higher-level schema-definition + # entry points use it by default for backward compatibility, but it can also be explicitly passed in + # `schema_definition_ingestion_serializer_extension_modules` when defining your {ElasticGraph::Local::RakeTasks}. + module APIExtension + # Wires up the factory extension when this module is extended onto an API instance. + # + # @param api [ElasticGraph::SchemaDefinition::API] the API instance to extend + # @return [void] + # @api private + def self.extended(api) + api.instance_variable_get(:@state).ingestion_serializer_state.tap do |state| + state[:allow_omitted_json_schema_fields] = false unless state.key?(:allow_omitted_json_schema_fields) + state[:allow_extra_json_schema_fields] = true unless state.key?(:allow_extra_json_schema_fields) + state[:reserved_type_names] = (state[:reserved_type_names] || ::Set.new).merge([EVENT_ENVELOPE_JSON_SCHEMA_NAME]) + end + + api.factory.extend FactoryExtension + end + + # Defines the version number of the current JSON schema. Importantly, every time a change is made that impacts the JSON schema + # artifact, the version number must be incremented to ensure that each different version of the JSON schema is identified by a unique + # version number. The publisher will then include this version number in published events to identify the version of the schema it + # was using. This avoids the need to deploy the publisher and ElasticGraph indexer at the same time to keep them in sync. + # + # @note While this is an important part of how ElasticGraph is designed to support schema evolution, it can be annoying constantly + # have to increment this while rapidly changing the schema during prototyping. You can disable the requirement to increment this + # on every JSON schema change by setting `enforce_json_schema_version` to `false` in your `Rakefile`. + # + # @param version [Integer] current version number of the JSON schema artifact + # @return [void] + # @see Local::RakeTasks#enforce_json_schema_version + def json_schema_version(version) + if !version.is_a?(Integer) || version < 1 + raise Errors::SchemaError, "`json_schema_version` must be a positive integer. Specified version: #{version}" + end + + if @state.ingestion_serializer_state[:json_schema_version] + raise Errors::SchemaError, "`json_schema_version` can only be set once on a schema. Previously-set version: #{@state.ingestion_serializer_state[:json_schema_version]}" + end + + @state.ingestion_serializer_state[:json_schema_version] = version + @state.ingestion_serializer_state[:json_schema_version_setter_location] = caller_locations(1, 1).to_a.first + nil + end + + # Defines strictness of the JSON schema validation. By default, the JSON schema will require all fields to be provided by the + # publisher (but they can be nullable) and will ignore extra fields that are not defined in the schema. Use this method to + # configure this behavior. + # + # @param allow_omitted_fields [bool] Whether nullable fields can be omitted from indexing events. + # @param allow_extra_fields [bool] Whether extra fields (e.g. beyond fields defined in the schema) can be included in indexing events. + # @return [void] + # + # @note If you allow both omitted fields and extra fields, ElasticGraph's JSON schema validation will allow (and ignore) misspelled + # field names in indexing events. For example, if the ElasticGraph schema has a nullable field named `parentId` but the publisher + # accidentally provides it as `parent_id`, ElasticGraph would happily ignore the `parent_id` field entirely, because `parentId` + # is allowed to be omitted and `parent_id` would be treated as an extra field. Therefore, we recommend that you only set one of + # these to `true` (or none). + def json_schema_strictness(allow_omitted_fields: false, allow_extra_fields: true) + unless [true, false].include?(allow_omitted_fields) + raise Errors::SchemaError, "`allow_omitted_fields` must be true or false" + end + + unless [true, false].include?(allow_extra_fields) + raise Errors::SchemaError, "`allow_extra_fields` must be true or false" + end + + @state.ingestion_serializer_state[:allow_omitted_json_schema_fields] = allow_omitted_fields + @state.ingestion_serializer_state[:allow_extra_json_schema_fields] = allow_extra_fields + nil + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/built_in_types_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/built_in_types_extension.rb new file mode 100644 index 000000000..0b0e5be4f --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/built_in_types_extension.rb @@ -0,0 +1,49 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extends ElasticGraph's built-in types with JSON ingestion configuration. + module BuiltInTypesExtension + # JSON Schema defaults applied to ElasticGraph's built-in scalar types. + BUILT_IN_SCALAR_JSON_SCHEMA_OPTIONS_BY_NAME = { + "Boolean" => {type: "boolean"}, + "Float" => {type: "number"}, + "ID" => {type: "string"}, + "Int" => {type: "integer", minimum: INT_MIN, maximum: INT_MAX}, + "String" => {type: "string"}, + "Cursor" => {type: "string"}, + "Date" => {type: "string", format: "date"}, + "DateTime" => {type: "string", format: "date-time"}, + "LocalTime" => {type: "string", pattern: VALID_LOCAL_TIME_JSON_SCHEMA_PATTERN}, + "TimeZone" => {type: "string", enum: GraphQL::ScalarCoercionAdapters::VALID_TIME_ZONES.to_a.freeze}, + "Untyped" => {type: ["array", "boolean", "integer", "number", "object", "string"].freeze}, + "JsonSafeLong" => {type: "integer", minimum: JSON_SAFE_LONG_MIN, maximum: JSON_SAFE_LONG_MAX}, + "LongString" => {type: "integer", minimum: LONG_STRING_MIN, maximum: LONG_STRING_MAX} + }.freeze + + private + + def register_standard_elastic_graph_types + super + + geo_location = schema_def_state.object_types_by_name.fetch(schema_def_state.type_ref("GeoLocation").to_final_form.name) + + # We use `nullable: false` because `GeoLocation` is indexed as a single `geo_point` field, + # and therefore can't support a `latitude` without a `longitude` or vice-versa. + geo_location.graphql_fields_by_name.fetch(names.latitude).json_schema minimum: -90, maximum: 90, nullable: false + geo_location.graphql_fields_by_name.fetch(names.longitude).json_schema minimum: -180, maximum: 180, nullable: false + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/enum_type_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/enum_type_extension.rb new file mode 100644 index 000000000..1c94d9e2a --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/enum_type_extension.rb @@ -0,0 +1,29 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/field_type/enum_extension" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extends enum types with JSON schema behavior. + module EnumTypeExtension + # @private + def configure_derived_scalar_type(scalar_type) + super + scalar_type.json_schema type: "string" + end + + # @private + def to_indexing_field_type + FieldType::Enum.new(super) + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/factory_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/factory_extension.rb new file mode 100644 index 000000000..62cca4de9 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/factory_extension.rb @@ -0,0 +1,113 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/built_in_types_extension" +require "elastic_graph/json_ingestion/schema_definition/enum_type_extension" +require "elastic_graph/json_ingestion/schema_definition/field_extension" +require "elastic_graph/json_ingestion/schema_definition/index_extension" +require "elastic_graph/json_ingestion/schema_definition/object_interface_extension" +require "elastic_graph/json_ingestion/schema_definition/results_extension" +require "elastic_graph/json_ingestion/schema_definition/scalar_type_extension" +require "elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension" +require "elastic_graph/json_ingestion/schema_definition/union_type_extension" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extension module applied to `ElasticGraph::SchemaDefinition::Factory` to wire up + # JSON Schema support on Results and SchemaArtifactManager instances. + # + # @api private + module FactoryExtension + # @private + def new_built_in_types(api) + super(api).tap do |built_in_types| + built_in_types.extend BuiltInTypesExtension + end + end + + # @private + def new_enum_type(name) + super(name) do |type| + type.extend EnumTypeExtension + yield type if block_given? + end + end + + # @private + def new_field(**kwargs, &block) + super(**kwargs) do |field| + field.extend FieldExtension + block&.call(field) + end + end + + # @private + def new_index(name, settings, type, &block) + super(name, settings, type) do |index| + index.extend IndexExtension + block&.call(index) + end + end + + # @private + def new_interface_type(name) + super(name) do |type| + type.extend ObjectInterfaceExtension + yield type if block_given? + end + end + + # @private + def new_object_type(name) + super(name) do |type| + type.extend ObjectInterfaceExtension + yield type if block_given? + end + end + + # @private + def new_scalar_type(name) + super(name) do |type| + type.extend ScalarTypeExtension + if (built_in_json_schema_options = BuiltInTypesExtension::BUILT_IN_SCALAR_JSON_SCHEMA_OPTIONS_BY_NAME[name]) + type.json_schema(**built_in_json_schema_options) + end + yield type if block_given? + end.tap(&:validate_json_schema_configuration!) + end + + # @private + def new_union_type(name) + super(name) do |type| + type.extend UnionTypeExtension + yield type if block_given? + end + end + + # Creates a new Results instance with JSON Schema extensions. + # + # @return [ElasticGraph::SchemaDefinition::Results] the created results instance + def new_results + super.tap do |results| + results.extend ResultsExtension + end + end + + # Creates a new SchemaArtifactManager instance with JSON Schema extensions. + # + # @return [ElasticGraph::SchemaDefinition::SchemaArtifactManager] the created artifact manager + def new_schema_artifact_manager(...) + super.tap do |manager| + manager.extend SchemaArtifactManagerExtension + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_extension.rb new file mode 100644 index 000000000..847e3b4ae --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_extension.rb @@ -0,0 +1,98 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/indexing/field_reference" +require "elastic_graph/json_ingestion/schema_definition/json_schema_option_validator" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extends schema-definition fields with JSON schema validation behavior. + module FieldExtension + # @return [Hash] JSON schema options for this field + def json_schema_options + @json_schema_options ||= {} + end + + # @return [Boolean] whether this field has been marked as non-nullable in the JSON schema + def non_nullable_in_json_schema + @non_nullable_in_json_schema || false + end + + # Sets whether this field is non-nullable in the JSON schema. + # @param value [Boolean] true to make the field non-nullable + attr_writer :non_nullable_in_json_schema + + # Configures JSON schema options for this field. + # + # @param nullable [Boolean, nil] set to `false` to make this field non-nullable in the JSON schema + # @param options [Hash] additional JSON schema options + # @return [void] + def json_schema(nullable: nil, **options) + if options.key?(:type) + raise Errors::SchemaError, "Cannot override JSON schema type of field `#{name}` with `#{options.fetch(:type)}`" + end + + case nullable + when true + raise Errors::SchemaError, "`nullable: true` is not allowed on a field--just declare the GraphQL field as being nullable (no `!` suffix) instead." + when false + @non_nullable_in_json_schema = true + end + + JSONSchemaOptionValidator.validate!(self, options) + json_schema_options.update(options) + end + + # @private + def to_indexing_field_reference + reference = super + return nil unless reference + + type_for_json_schema = non_nullable_in_json_schema ? type.wrap_non_null : type + + Indexing::FieldReference.new( + field_reference: reference.with(type: type_for_json_schema), + json_schema_layers: FieldExtension.compute_json_schema_layers(type_for_json_schema), + json_schema_customizations: json_schema_options + ) + end + + # Computes JSON schema layers from a GraphQL type reference. + # Returns all the JSON schema array/nullable layers of a type, from outermost to innermost. + # For example, `[[Int]]` will return `[:nullable, :array, :nullable, :array, :nullable]`. + # + # @param type_ref [ElasticGraph::SchemaDefinition::SchemaElements::TypeReference] the type reference + # @return [Array] the layers + def self.compute_json_schema_layers(type_ref) + layers, inner_type = peel_json_schema_layers_once(type_ref) + + if layers.empty? || inner_type == type_ref + layers + else + layers + compute_json_schema_layers(inner_type) + end + end + + # Peels one layer of JSON schema type wrapping. + # + # @param type_ref [ElasticGraph::SchemaDefinition::SchemaElements::TypeReference] the type reference + # @return [Array] a pair of [layers, inner_type] + def self.peel_json_schema_layers_once(type_ref) + if type_ref.list? + return [[:array], type_ref.unwrap_list] if type_ref.non_null? + return [[:nullable, :array], type_ref.unwrap_list] + end + + return [[], type_ref.unwrap_non_null] if type_ref.non_null? + [[:nullable], type_ref] + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_type/enum_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_type/enum_extension.rb new file mode 100644 index 000000000..837b2c76b --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_type/enum_extension.rb @@ -0,0 +1,38 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "delegate" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module FieldType + # Wraps an enum indexing field type to add JSON schema serialization. + class Enum < ::SimpleDelegator + # @return [Hash] empty hash, as enum types have no subfields + def json_schema_field_metadata_by_field_name + {} + end + + # Filters customizations to only include `enum` for enum types. + # + # @param customizations [Hash] the customizations to format + # @return [Hash] the filtered customizations + def format_field_json_schema_customizations(customizations) + customizations.slice("enum") + end + + # @return [Hash] the JSON schema definition for this enum type + def to_json_schema + {"type" => "string", "enum" => __getobj__.enum_value_names} + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_type/object_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_type/object_extension.rb new file mode 100644 index 000000000..c7e59b335 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_type/object_extension.rb @@ -0,0 +1,92 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "delegate" +require "elastic_graph/support/hash_util" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Namespace for JSON-schema-aware wrappers around core indexing field types. + module FieldType + # Wraps an object/interface indexing field type to add JSON schema serialization. + class Object < ::SimpleDelegator + # @param wrapped [ElasticGraph::SchemaDefinition::Indexing::FieldType::Object] the core field type to wrap + # @param json_schema_options [Hash] JSON schema options from the type definition + def initialize(wrapped, json_schema_options: {}) + @json_schema_options = json_schema_options + super(wrapped) + end + + # @return [Hash] field metadata keyed by field name + def json_schema_field_metadata_by_field_name + __getobj__.subfields.to_h { |field| [field.name, field.json_schema_metadata] } + end + + # Returns the customizations as-is for object types. + # + # @param customizations [Hash] the customizations to format + # @return [Hash] the formatted customizations + def format_field_json_schema_customizations(customizations) + customizations + end + + # @return [Hash] the JSON schema definition for this object type + def to_json_schema + wrapped = __getobj__ + ingestion_state = wrapped.schema_def_state.ingestion_serializer_state + + @to_json_schema ||= + if @json_schema_options.empty? + other_source_subfields, json_schema_candidate_subfields = wrapped.subfields.partition(&:source) + validate_sourced_fields_have_no_json_schema_overrides(other_source_subfields) + json_schema_subfields = json_schema_candidate_subfields.reject(&:runtime_field_script) + required_fields = json_schema_subfields + required_fields = required_fields.reject(&:nullable?) if ingestion_state[:allow_omitted_json_schema_fields] + + { + "type" => "object", + "properties" => json_schema_subfields.to_h { |field| [field.name, field.json_schema] }.merge(json_schema_typename_field), + "required" => required_fields.map(&:name).freeze, + "additionalProperties" => (false unless ingestion_state[:allow_extra_json_schema_fields]), + "description" => wrapped.doc_comment + }.compact.freeze + else + Support::HashUtil.stringify_keys(@json_schema_options) + end + end + + private + + def json_schema_typename_field + type_name = __getobj__.type_name + + { + "__typename" => { + "type" => "string", + "const" => type_name, + "default" => type_name + } + } + end + + def validate_sourced_fields_have_no_json_schema_overrides(other_source_subfields) + problem_fields = other_source_subfields.reject { |field| field.json_schema_customizations.empty? } + return if problem_fields.empty? + + field_descriptions = problem_fields.map(&:name).sort.map { |field| "`#{field}`" }.join(", ") + raise Errors::SchemaError, + "`#{type_name}` has #{problem_fields.size} field(s) (#{field_descriptions}) that are `sourced_from` " \ + "another type and also have JSON schema customizations. Instead, put the JSON schema " \ + "customizations on the source type's field definitions." + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_type/scalar_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_type/scalar_extension.rb new file mode 100644 index 000000000..ee66ca68d --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_type/scalar_extension.rb @@ -0,0 +1,39 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "delegate" +require "elastic_graph/support/hash_util" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module FieldType + # Wraps a scalar indexing field type to add JSON schema serialization. + class Scalar < ::SimpleDelegator + # @return [Hash] empty hash, as scalar types have no subfields + def json_schema_field_metadata_by_field_name + {} + end + + # Returns the customizations as-is for scalar types. + # + # @param customizations [Hash] the customizations to format + # @return [Hash] the formatted customizations + def format_field_json_schema_customizations(customizations) + customizations + end + + # @return [Hash] the JSON schema definition for this scalar type + def to_json_schema + Support::HashUtil.stringify_keys(__getobj__.scalar_type.json_schema_options) + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_type/union_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_type/union_extension.rb new file mode 100644 index 000000000..bfffc415e --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_type/union_extension.rb @@ -0,0 +1,43 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "delegate" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module FieldType + # Wraps a union indexing field type to add JSON schema serialization. + class Union < ::SimpleDelegator + # @return [Hash] empty hash, as union types have no subfields + def json_schema_field_metadata_by_field_name + {} + end + + # Returns the customizations as-is for union types. + # + # @param customizations [Hash] the customizations to format + # @return [Hash] the formatted customizations + def format_field_json_schema_customizations(customizations) + customizations + end + + # @return [Hash] the JSON schema definition for this union type + def to_json_schema + subtype_json_schemas = __getobj__.subtypes_by_name.keys.map { |name| {"$ref" => "#/$defs/#{name}"} } + + { + "required" => %w[__typename], + "oneOf" => subtype_json_schemas + } + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/index_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/index_extension.rb new file mode 100644 index 000000000..2fe9a48f7 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/index_extension.rb @@ -0,0 +1,39 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extends indices with JSON-schema-specific event requirements. + module IndexExtension + # @private + def rollover(frequency, timestamp_field_path_name) + super + + schema_def_state.after_user_definition_complete do + public_field_path(timestamp_field_path_name, explanation: "it is referenced as an index `rollover` field") + .path_parts + .each { |field| field.json_schema nullable: false } + end + end + + # @private + def route_with(routing_field_path_name) + super + + schema_def_state.after_user_definition_complete do + routing_field_path = public_field_path(routing_field_path_name, explanation: "it is referenced as an index `route_with` field") + + routing_field_path.path_parts[0..-2].each { |field| field.json_schema nullable: false } + routing_field_path.last_part.json_schema nullable: false, pattern: ElasticGraph::SchemaDefinition::Indexing::Index::HAS_NON_WHITE_SPACE_REGEX + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rb new file mode 100644 index 000000000..9677a8024 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rb @@ -0,0 +1,89 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + # Contains logic related to the JSON schema for ElasticGraph's event envelope. + # + # @api private + module EventEnvelope + # Builds the JSON schema definition for ElasticGraph's event envelope. + # + # @param indexed_type_names [Array] names of indexed types + # @param json_schema_version [Integer] the JSON schema version number + # @return [Hash] the event envelope JSON schema + def self.json_schema(indexed_type_names, json_schema_version) + { + "type" => "object", + "description" => "Required by ElasticGraph to wrap every data event.", + "properties" => { + "op" => { + "description" => "Indicates what type of operation the event represents. For now, only `upsert` is supported, but we plan to support other operations in the future.", + "type" => "string", + "enum" => %w[upsert] + }, + "type" => { + "description" => "The type of object present in `record`.", + "type" => "string", + "enum" => indexed_type_names.sort + }, + "id" => { + "description" => "The unique identifier of the record.", + "type" => "string", + "maxLength" => DEFAULT_MAX_KEYWORD_LENGTH + }, + "version" => { + "description" => 'Used to handle duplicate and out-of-order events. When ElasticGraph ingests multiple events for the same `type` and `id`, the one with the largest `version` will "win".', + "type" => "integer", + "minimum" => 0, + "maximum" => (2**63) - 1 + }, + "record" => { + "description" => "The record of this event. The payload of this field must match the JSON schema of the named `type`.", + "type" => "object" + }, + "latency_timestamps" => { + "description" => "Timestamps from which ElasticGraph measures indexing latency. The `ElasticGraphIndexingLatencies` log message produced for each event will include a measurement from each timestamp included in this map.", + "type" => "object", + "additionalProperties" => false, + "patternProperties" => { + "^\\w+_at$" => { + "description" => "A timestamp from which ElasticGraph will measure indexing latency. The timestamp name must end in `_at`.", + "type" => "string", + "format" => "date-time" + } + } + }, + JSON_SCHEMA_VERSION_KEY => { + "description" => "The version of the JSON schema the publisher was using when the event was published. ElasticGraph will use the JSON schema matching this version to process the event.", + "const" => json_schema_version + }, + "message_id" => { + "description" => "The optional ID of the message containing this event from whatever messaging system is being used between the publisher and the ElasticGraph indexer.", + "type" => "string" + } + }, + "additionalProperties" => false, + "required" => ["op", "type", "id", "version", JSON_SCHEMA_VERSION_KEY], + "if" => { + "properties" => { + "op" => {"const" => "upsert"} + } + }, + "then" => {"required" => ["record"]} + } + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field.rb new file mode 100644 index 000000000..40c8f5a6b --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field.rb @@ -0,0 +1,116 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "delegate" +require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata" +require "elastic_graph/support/hash_util" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Namespace for JSON-schema-aware indexing components. + module Indexing + # Wraps an indexing field with JSON schema generation behavior. + # + # @api private + class Field < ::SimpleDelegator + # JSON schema overrides that automatically apply to specific mapping types so that the JSON schema + # validation will reject values which cannot be indexed into fields of a specific mapping type. + # + # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/number.html Elasticsearch numeric field type documentation + # @note We don't handle `integer` here because it's the default numeric type (handled by our definition of the `Int` scalar type). + # @note Likewise, we don't handle `long` here because a custom scalar type must be used for that since GraphQL's `Int` type can't handle long values. + JSON_SCHEMA_OVERRIDES_BY_MAPPING_TYPE = { + "byte" => {"minimum" => -(2**7), "maximum" => (2**7) - 1}, + "short" => {"minimum" => -(2**15), "maximum" => (2**15) - 1}, + "keyword" => {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH}, + "text" => {"maxLength" => DEFAULT_MAX_TEXT_LENGTH} + } + + # @return [Hash] user-specified JSON schema customizations for this field + attr_reader :json_schema_customizations + + # @private + def initialize(field, json_schema_layers:, json_schema_customizations:) + @json_schema_layers = json_schema_layers + @json_schema_customizations = json_schema_customizations + super(field) + end + + # Returns the JSON schema definition for this field. + # + # @return [Hash] the JSON schema hash + def json_schema + @json_schema ||= + json_schema_layers + .reverse + .reduce(inner_json_schema) { |acc, layer| process_layer(layer, acc) } + .merge(outer_json_schema_customizations) + .merge({"description" => doc_comment}.compact) + .then { |hash| Support::HashUtil.stringify_keys(hash) } + end + + # @return [JSONSchemaFieldMetadata] metadata about this field for inclusion in the JSON schema + def json_schema_metadata + JSONSchemaFieldMetadata.new(type: type.name, name_in_index: name_in_index) + end + + def nullable? + json_schema_layers.include?(:nullable) + end + + private + + attr_reader :json_schema_layers + + def inner_json_schema + user_specified_customizations = + if user_specified_json_schema_customizations_go_on_outside? + {} # : ::Hash[::String, untyped] + else + Support::HashUtil.stringify_keys(json_schema_customizations) + end + + customizations_from_mapping = JSON_SCHEMA_OVERRIDES_BY_MAPPING_TYPE[mapping["type"]] || {} + customizations = customizations_from_mapping.merge(user_specified_customizations) + customizations = indexing_field_type.format_field_json_schema_customizations(customizations) + + ref = {"$ref" => "#/$defs/#{type.unwrapped_name}"} + return ref if customizations.empty? + + {"allOf" => [ref, customizations]} + end + + def outer_json_schema_customizations + return {} unless user_specified_json_schema_customizations_go_on_outside? + Support::HashUtil.stringify_keys(json_schema_customizations) + end + + def user_specified_json_schema_customizations_go_on_outside? + json_schema_layers.include?(:array) + end + + def process_layer(layer, schema) + case layer + when :nullable + { + "anyOf" => [ + schema, + {"type" => "null"} + ] + } + when :array + {"type" => "array", "items" => schema} + end + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_reference.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_reference.rb new file mode 100644 index 000000000..0890a49c0 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_reference.rb @@ -0,0 +1,44 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/indexing/field" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + # @!parse class FieldReference < ::Data; end + FieldReference = ::Data.define( + :field_reference, + :json_schema_layers, + :json_schema_customizations + ) + + # A JSON-schema-aware wrapper around the core indexing field reference. + # + # @api private + class FieldReference < ::Data + # Resolves this field reference into a JSON-schema-aware {Field}, or `nil` if unresolvable. + # + # @return [Field, nil] + def resolve + return nil unless (resolved_field = field_reference.resolve) + + Indexing::Field.new( + resolved_field, + json_schema_layers: json_schema_layers, + json_schema_customizations: json_schema_customizations + ) + end + + # @dynamic initialize, with, field_reference, json_schema_layers, json_schema_customizations + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rb new file mode 100644 index 000000000..e1e1cefc8 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rb @@ -0,0 +1,30 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + # @!parse class JSONSchemaFieldMetadata; end + JSONSchemaFieldMetadata = ::Data.define(:type, :name_in_index) + + # Metadata about an ElasticGraph field that needs to be stored in our versioned JSON schemas. + # + # @api private + class JSONSchemaFieldMetadata < ::Data + # @return [Hash] hash representation suitable for serialization + def to_dumpable_hash + {"type" => type, "nameInIndex" => name_in_index} + end + + # @dynamic initialize, type, name_in_index + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rb new file mode 100644 index 000000000..de8d74e6b --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rb @@ -0,0 +1,217 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + # Represents the result of merging a JSON schema with ElasticGraph metadata. + # + # @private + class JSONSchemaWithMetadata < ::Data.define( + :json_schema, + :missing_fields, + :missing_types, + :definition_conflicts, + :missing_necessary_fields + ) + def json_schema_version + json_schema.fetch(JSON_SCHEMA_VERSION_KEY) + end + + # Responsible for building `JSONSchemaWithMetadata` instances. + # + # @private + class Merger + attr_reader :unused_deprecated_elements + + def initialize(schema_def_results) + @field_metadata_by_type_and_field_name = schema_def_results.json_schema_field_metadata_by_type_and_field_name + @renamed_types_by_old_name = schema_def_results.state.renamed_types_by_old_name + @deleted_types_by_old_name = schema_def_results.state.deleted_types_by_old_name + @renamed_fields_by_type_name_and_old_field_name = schema_def_results.state.renamed_fields_by_type_name_and_old_field_name + @deleted_fields_by_type_name_and_old_field_name = schema_def_results.state.deleted_fields_by_type_name_and_old_field_name + @state = schema_def_results.state + @derived_indexing_type_names = schema_def_results.derived_indexing_type_names + + @unused_deprecated_elements = ( + @renamed_types_by_old_name.values + + @deleted_types_by_old_name.values + + @renamed_fields_by_type_name_and_old_field_name.values.flat_map(&:values) + + @deleted_fields_by_type_name_and_old_field_name.values.flat_map(&:values) + ).to_set + end + + def merge_metadata_into(json_schema) + missing_fields = ::Set.new + missing_types = ::Set.new + definition_conflicts = ::Set.new + old_type_name_by_current_name = {} # : ::Hash[::String, ::String] + + defs = json_schema.fetch("$defs").to_h do |type_name, type_def| + if type_name != EVENT_ENVELOPE_JSON_SCHEMA_NAME && (properties = type_def["properties"]) + current_type_name = determine_current_type_name( + type_name, + missing_types: missing_types, + definition_conflicts: definition_conflicts + ) + + if current_type_name + old_type_name_by_current_name[current_type_name] = type_name + end + + properties = properties.to_h do |field_name, prop| + unless field_name == "__typename" + field_metadata = current_type_name&.then do |name| + field_metadata_for( + name, + field_name, + missing_fields: missing_fields, + definition_conflicts: definition_conflicts + ) + end + + prop = prop.merge({"ElasticGraph" => field_metadata&.to_dumpable_hash}) + end + + [field_name, prop] + end + + type_def = type_def.merge({"properties" => properties}) + end + + [type_name, type_def] + end + + json_schema = json_schema.merge("$defs" => defs) + + JSONSchemaWithMetadata.new( + json_schema: json_schema, + missing_fields: missing_fields, + missing_types: missing_types, + definition_conflicts: definition_conflicts, + missing_necessary_fields: identify_missing_necessary_fields(json_schema, old_type_name_by_current_name) + ) + end + + private + + def determine_current_type_name(type_name, missing_types:, definition_conflicts:) + exists_currently = @field_metadata_by_type_and_field_name.key?(type_name) + deleted = @deleted_types_by_old_name[type_name]&.tap { |elem| @unused_deprecated_elements.delete(elem) } + renamed = @renamed_types_by_old_name[type_name]&.tap { |elem| @unused_deprecated_elements.delete(elem) } + + if [exists_currently, deleted, renamed].count(&:itself) > 1 + definition_conflicts.merge([deleted, renamed].compact) + end + + return type_name if exists_currently + return nil if deleted + return renamed.name if renamed + + missing_types << type_name + nil + end + + def field_metadata_for(type_name, field_name, missing_fields:, definition_conflicts:) + full_name = "#{type_name}.#{field_name}" + + current_meta = @field_metadata_by_type_and_field_name.dig(type_name, field_name) + deleted = @deleted_fields_by_type_name_and_old_field_name.dig(type_name, field_name)&.tap do |elem| + @unused_deprecated_elements.delete(elem) + end + renamed = @renamed_fields_by_type_name_and_old_field_name.dig(type_name, field_name)&.tap do |elem| + @unused_deprecated_elements.delete(elem) + end + + if [current_meta, deleted, renamed].count(&:itself) > 1 + definition_conflicts.merge([deleted, renamed].compact.map { |elem| elem.with(name: full_name) }) + end + + return current_meta if current_meta + return nil if deleted + return @field_metadata_by_type_and_field_name.dig(type_name, renamed.name) if renamed + + missing_fields << full_name + nil + end + + def identify_missing_necessary_fields(json_schema, old_type_name_by_current_name) + json_schema_resolver = JSONSchemaResolver.new(@state, json_schema, old_type_name_by_current_name) + + @state.object_types_by_name.values + .select { |type| type.has_own_index_def? && !@derived_indexing_type_names.include?(type.name) } + .flat_map { |object_type| identify_missing_necessary_fields_for_index_def(object_type, object_type.own_index_def, json_schema_resolver) } + end + + def identify_missing_necessary_fields_for_index_def(object_type, index_def, json_schema_resolver) + { + "routing" => index_def.routing_field_path, + "rollover" => index_def.rollover_config&.timestamp_field_path + }.compact.filter_map do |field_type, field_path| + if json_schema_resolver.necessary_path_missing?(field_path) + MissingNecessaryField.new( + field_type: field_type, + fully_qualified_path: field_path.fully_qualified_path_in_index + ) + end + end + end + + class JSONSchemaResolver + def initialize(state, json_schema, old_type_name_by_current_name) + @state = state + @old_type_name_by_current_name = old_type_name_by_current_name + @meta_by_old_type_and_name_in_index = ::Hash.new do |hash, type_name| + properties = json_schema.fetch("$defs").fetch(type_name).fetch("properties") + + hash[type_name] = properties.filter_map do |name, prop| + if (metadata = prop["ElasticGraph"]) + [metadata.fetch("nameInIndex"), metadata] + end + end.to_h + end + end + + def necessary_path_missing?(field_path) + parent_type = field_path.first_part.parent_type.name + + field_path.path_parts.any? do |path_part| + necessary_path_part_missing?(parent_type, path_part.name_in_index) do |meta| + parent_type = @state.type_ref(meta.fetch("type")).fully_unwrapped.name + end + end + end + + private + + def necessary_path_part_missing?(parent_type, name_in_index) + old_type_name = @old_type_name_by_current_name[parent_type] + return false unless old_type_name + + meta = @meta_by_old_type_and_name_in_index.dig(old_type_name, name_in_index) + yield meta if meta + !meta + end + end + end + + # @!parse class MissingNecessaryField < ::Data; end + MissingNecessaryField = ::Data.define(:field_type, :fully_qualified_path) + + # @private + class MissingNecessaryField < ::Data + # @dynamic initialize, with, field_type, fully_qualified_path + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_option_validator.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_option_validator.rb new file mode 100644 index 000000000..76fd9a7f5 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_option_validator.rb @@ -0,0 +1,37 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/support/hash_util" +require "elastic_graph/support/json_schema/meta_schema_validator" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Validates JSON-schema-specific configuration supplied through schema definition APIs. + # + # @api private + module JSONSchemaOptionValidator + module_function + + # Validates JSON schema options against the JSON meta-schema. + # + # @param schema_element [Object] the schema element being configured (used in error messages) + # @param options [Hash] the JSON schema options to validate + # @raise [Errors::SchemaError] if the options are invalid + # @return [void] + def validate!(schema_element, options) + validatable_json_schema = Support::HashUtil.stringify_keys(options) + + if (error_msg = Support::JSONSchema.strict_meta_schema_validator.validate_with_error_message(validatable_json_schema)) + raise Errors::SchemaError, "Invalid JSON schema options set on #{schema_element}:\n\n#{error_msg}" + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rb new file mode 100644 index 000000000..1222a6488 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rb @@ -0,0 +1,62 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Prunes unused type definitions from a given JSON schema. + # + # @private + class JSONSchemaPruner + def self.prune(original_json_schema) + initial_type_names = [EVENT_ENVELOPE_JSON_SCHEMA_NAME] + original_json_schema + .dig("$defs", EVENT_ENVELOPE_JSON_SCHEMA_NAME, "properties", "type", "enum") + + types_to_keep = referenced_type_names(initial_type_names, original_json_schema["$defs"]) + + # The .select will preserve the sort order of the original hash. + # standard:disable Style/HashSlice -- We intentionally preserve the dumped definition order. + pruned_defs = original_json_schema["$defs"].select { |type_name, _type_def| types_to_keep.include?(type_name) } + # standard:enable Style/HashSlice + + original_json_schema.merge("$defs" => pruned_defs) + end + + private_class_method def self.referenced_type_names(source_type_names, original_defs) + return Set.new if source_type_names.empty? + + referenced_type_defs = original_defs.slice(*source_type_names) + ref_names = collect_ref_names(referenced_type_defs) + + referenced_type_names(ref_names, original_defs) + source_type_names + end + + private_class_method def self.collect_ref_names(hash) + hash.flat_map do |key, value| + case value + when ::Hash + collect_ref_names(value) + when ::Array + value.grep(::Hash).flat_map { |subhash| collect_ref_names(subhash) } + when ::String + if key == "$ref" && (type = value[%r{\A#/\$defs/(.+)\z}, 1]) + [type] + else + [] + end + else + [] + end + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/object_interface_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/object_interface_extension.rb new file mode 100644 index 000000000..e920c94c0 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/object_interface_extension.rb @@ -0,0 +1,45 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/field_type/object_extension" +require "elastic_graph/json_ingestion/schema_definition/field_type/union_extension" +require "elastic_graph/json_ingestion/schema_definition/json_schema_option_validator" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extends object and interface types with JSON schema behavior. + module ObjectInterfaceExtension + # @return [Hash] JSON schema options for this type + def json_schema_options + @json_schema_options ||= {} + end + + # Configures JSON schema options for this object or interface type. + # + # @param options [Hash] JSON schema options + # @return [void] + def json_schema(**options) + JSONSchemaOptionValidator.validate!(self, options) + json_schema_options.update(options) + end + + # @private + def to_indexing_field_type + field_type = super + + if field_type.is_a?(ElasticGraph::SchemaDefinition::Indexing::FieldType::Union) + FieldType::Union.new(field_type) + else + FieldType::Object.new(field_type, json_schema_options: json_schema_options) + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/results_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/results_extension.rb new file mode 100644 index 000000000..5f3ac53a1 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/results_extension.rb @@ -0,0 +1,115 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/errors" +require "elastic_graph/json_ingestion/schema_definition/indexing/event_envelope" +require "elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extension module for {ElasticGraph::SchemaDefinition::Results} that adds + # JSON Schema generation support. + # + # @private + module ResultsExtension + # @param version [Integer] desired JSON schema version + # @return [Hash] the JSON schema for the requested version, if available + # @raise [Errors::NotFoundError] if the requested JSON schema version is not available + def json_schemas_for(version) + unless available_json_schema_versions.include?(version) + raise Errors::NotFoundError, "The requested json schema version (#{version}) is not available. Available versions: #{available_json_schema_versions.to_a.join(", ")}." + end + + @latest_versioned_json_schema ||= merge_field_metadata_into_json_schema(current_public_json_schema).json_schema + end + + # @return [Set] set of available JSON schema versions + def available_json_schema_versions + @available_json_schema_versions ||= Set[latest_json_schema_version] + end + + # @return [Integer] the current JSON schema version + def latest_json_schema_version + current_public_json_schema[JSON_SCHEMA_VERSION_KEY] + end + + # @private + def json_schema_version_setter_location + state.ingestion_serializer_state[:json_schema_version_setter_location] + end + + # @private + def json_schema_field_metadata_by_type_and_field_name + @json_schema_field_metadata_by_type_and_field_name ||= json_schema_indexing_field_types_by_name + .transform_values(&:json_schema_field_metadata_by_field_name) + end + + # @private + def current_public_json_schema + @current_public_json_schema ||= build_public_json_schema + end + + # @private + def merge_field_metadata_into_json_schema(json_schema) + json_schema_with_metadata_merger.merge_metadata_into(json_schema) + end + + # @private + def unused_deprecated_elements + json_schema_with_metadata_merger.unused_deprecated_elements + end + + private + + def json_schema_with_metadata_merger + @json_schema_with_metadata_merger ||= Indexing::JSONSchemaWithMetadata::Merger.new(self) + end + + def build_public_json_schema + json_schema_version = state.ingestion_serializer_state[:json_schema_version] + if json_schema_version.nil? + raise Errors::SchemaError, "`json_schema_version` must be specified in the schema. To resolve, add `schema.json_schema_version 1` in a schema definition block." + end + + root_document_type_names = state.object_types_by_name.values + .select { |type| type.root_document_type? && !type.abstract? } + .reject { |type| derived_indexing_type_names.include?(type.name) } + .map(&:name) + + definitions_by_name = json_schema_indexing_field_types_by_name + .transform_values(&:to_json_schema) + .compact + + { + "$schema" => JSON_META_SCHEMA, + JSON_SCHEMA_VERSION_KEY => json_schema_version, + "$defs" => { + "ElasticGraphEventEnvelope" => Indexing::EventEnvelope.json_schema(root_document_type_names, json_schema_version) + }.merge(definitions_by_name) + } + end + + def json_schema_indexing_field_types_by_name + @json_schema_indexing_field_types_by_name ||= state + .types_by_name + .except("Query") + .values + .reject do |t| + derived_indexing_type_names.include?(t.name) || + # Skip graphql framework types + t.graphql_only? + end + .sort_by(&:name) + .to_h { |type| [type.name, type.to_indexing_field_type] } + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/scalar_type_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/scalar_type_extension.rb new file mode 100644 index 000000000..6b05328e2 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/scalar_type_extension.rb @@ -0,0 +1,48 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/field_type/scalar_extension" +require "elastic_graph/json_ingestion/schema_definition/json_schema_option_validator" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extends scalar types with JSON schema validation and serialization behavior. + module ScalarTypeExtension + # @return [Hash] JSON schema options for this scalar type + def json_schema_options + @json_schema_options ||= {} + end + + # Configures JSON schema options for this scalar type. + # + # @param options [Hash] JSON schema options + # @return [void] + def json_schema(**options) + JSONSchemaOptionValidator.validate!(self, options) + json_schema_options.update(options) + end + + # @private + def to_indexing_field_type + FieldType::Scalar.new(super) + end + + # Validates that json_schema has been configured on this scalar type. + # + # @raise [Errors::SchemaError] if json_schema has not been configured + # @return [void] + def validate_json_schema_configuration! + return unless json_schema_options.empty? + + raise Errors::SchemaError, "Scalar types require `json_schema` to be configured, but `#{name}` lacks `json_schema`." + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rb new file mode 100644 index 000000000..f542528de --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rb @@ -0,0 +1,250 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/json_schema_pruner" +require "yaml" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extension module for {ElasticGraph::SchemaDefinition::SchemaArtifactManager} that adds + # JSON Schema artifact generation support. + # + # @private + module SchemaArtifactManagerExtension + # Overrides `dump_artifacts` to add JSON schema version bump checking before dumping. + def dump_artifacts + check_if_needs_json_schema_version_bump do |recommended_json_schema_version| + if @enforce_json_schema_version + # @type var setter_location: ::Thread::Backtrace::Location + # We use `_ =` because while `json_schema_version_setter_location` can be nil, + # it'll never be nil if we get here and we want the type to be non-nilable. + setter_location = _ = schema_definition_results.json_schema_version_setter_location + setter_location_path = ::Pathname.new(setter_location.absolute_path.to_s).relative_path_from(::Dir.pwd) + + abort "A change has been attempted to `json_schemas.yaml`, but the `json_schema_version` has not been correspondingly incremented. Please " \ + "increase the schema's version, and then run the `bundle exec rake schema_artifacts:dump` command again.\n\n" \ + "To update the schema version to the expected version, change line #{setter_location.lineno} at `#{setter_location_path}` to:\n" \ + " `schema.json_schema_version #{recommended_json_schema_version}`\n\n" \ + "Alternately, pass `enforce_json_schema_version: false` to `ElasticGraph::SchemaDefinition::RakeTasks.new` to allow the JSON schemas " \ + "file to change without requiring a version bump, but that is only recommended for non-production applications during initial schema prototyping." + else + @output.puts <<~EOS + WARNING: the `json_schemas.yaml` artifact is being updated without the `json_schema_version` being correspondingly incremented. + This is not recommended for production applications, but is currently allowed because you have set `enforce_json_schema_version: false`. + EOS + end + end + + super + end + + private + + # Overrides the base `artifacts_from_schema_def` method to add JSON schema artifacts. + def artifacts_from_schema_def + base_artifacts = super + + versioned_artifacts = build_desired_versioned_json_schemas(json_schemas_artifact.desired_contents).values.map do |versioned_schema| + new_versioned_json_schema_artifact(versioned_schema) + end + + base_artifacts + [json_schemas_artifact] + versioned_artifacts + end + + def json_schemas_artifact + @json_schemas_artifact ||= new_yaml_artifact( + JSON_SCHEMAS_FILE, + JSONSchemaPruner.prune(schema_definition_results.current_public_json_schema), + extra_comment_lines: [ + "This is the \"public\" JSON schema file and is intended to be provided to publishers so that", + "they can perform code generation and event validation." + ] + ) + end + + def check_if_needs_json_schema_version_bump(&block) + if json_schemas_artifact.out_of_date? + existing_schema_version = json_schemas_artifact.existing_dumped_contents&.dig(JSON_SCHEMA_VERSION_KEY) || -1 + desired_schema_version = json_schemas_artifact.desired_contents[JSON_SCHEMA_VERSION_KEY] + + if existing_schema_version >= desired_schema_version + yield existing_schema_version + 1 + end + end + end + + def build_desired_versioned_json_schemas(current_public_json_schema) + versioned_parsed_yamls = ::Dir.glob(::File.join(@schema_artifacts_directory, JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v*.yaml")).map do |file| + ::YAML.safe_load_file(file) + end + [current_public_json_schema] + + results_by_json_schema_version = versioned_parsed_yamls.to_h do |parsed_yaml| + merged_schema = @schema_definition_results.merge_field_metadata_into_json_schema(parsed_yaml) + [merged_schema.json_schema_version, merged_schema] + end + + report_json_schema_merge_errors(results_by_json_schema_version.values) + report_json_schema_merge_warnings + + results_by_json_schema_version.transform_values(&:json_schema) + end + + def report_json_schema_merge_errors(merged_results) + json_schema_versions_by_missing_field = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[::String, ::Array[::Integer]] + json_schema_versions_by_missing_type = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[::String, ::Array[::Integer]] + json_schema_versions_by_missing_necessary_field = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[Indexing::JSONSchemaWithMetadata::MissingNecessaryField, ::Array[::Integer]] + + merged_results.each do |result| + result.missing_fields.each do |field| + json_schema_versions_by_missing_field[field] << result.json_schema_version + end + + result.missing_types.each do |type| + json_schema_versions_by_missing_type[type] << result.json_schema_version + end + + result.missing_necessary_fields.each do |missing_necessary_field| + json_schema_versions_by_missing_necessary_field[missing_necessary_field] << result.json_schema_version + end + end + + missing_field_errors = json_schema_versions_by_missing_field.map do |field, json_schema_versions| + missing_field_error_for(field, json_schema_versions) + end + + missing_type_errors = json_schema_versions_by_missing_type.map do |type, json_schema_versions| + missing_type_error_for(type, json_schema_versions) + end + + missing_necessary_field_errors = json_schema_versions_by_missing_necessary_field.map do |field, json_schema_versions| + missing_necessary_field_error_for(field, json_schema_versions) + end + + definition_conflict_errors = merged_results + .flat_map { |result| result.definition_conflicts.to_a } + .group_by(&:name) + .map do |name, deprecated_elements| + <<~EOS + The schema definition of `#{name}` has conflicts. To resolve the conflict, remove the unneeded definitions from the following: + + #{format_deprecated_elements(deprecated_elements)} + EOS + end + + errors = missing_field_errors + missing_type_errors + missing_necessary_field_errors + definition_conflict_errors + return if errors.empty? + + abort errors.join("\n\n") + end + + def report_json_schema_merge_warnings + unused_elements = @schema_definition_results.unused_deprecated_elements + return if unused_elements.empty? + + @output.puts <<~EOS + The schema definition has #{unused_elements.size} unneeded reference(s) to deprecated schema elements. These can all be safely deleted: + + #{format_deprecated_elements(unused_elements)} + + EOS + end + + def format_deprecated_elements(deprecated_elements) + descriptions = deprecated_elements + .sort_by { |e| [e.defined_at.path, e.defined_at.lineno] } + .map(&:description) + .uniq + + descriptions.each.with_index(1).map { |desc, idx| "#{idx}. #{desc}" }.join("\n") + end + + def missing_field_error_for(qualified_field, json_schema_versions) + type, field = qualified_field.split(".") + + <<~EOS + The `#{qualified_field}` field (which existed in #{describe_json_schema_versions(json_schema_versions, "and")}) no longer exists in the current schema definition. + ElasticGraph cannot guess what it should do with this field's data when ingesting events at #{old_versions(json_schema_versions)}. + To continue, do one of the following: + + 1. If the `#{qualified_field}` field has been renamed, indicate this by calling `field.renamed_from "#{field}"` on the renamed field. + 2. If the `#{qualified_field}` field has been dropped, indicate this by calling `type.deleted_field "#{field}"` on the `#{type}` type. + 3. Alternately, if no publishers or in-flight events use #{describe_json_schema_versions(json_schema_versions, "or")}, delete #{files_noun_phrase(json_schema_versions)} from `#{JSON_SCHEMAS_BY_VERSION_DIRECTORY}`, and no further changes are required. + EOS + end + + def missing_type_error_for(type, json_schema_versions) + <<~EOS + The `#{type}` type (which existed in #{describe_json_schema_versions(json_schema_versions, "and")}) no longer exists in the current schema definition. + ElasticGraph cannot guess what it should do with this type's data when ingesting events at #{old_versions(json_schema_versions)}. + To continue, do one of the following: + + 1. If the `#{type}` type has been renamed, indicate this by calling `type.renamed_from "#{type}"` on the renamed type. + 2. If the `#{type}` field has been dropped, indicate this by calling `schema.deleted_type "#{type}"` on the schema. + 3. Alternately, if no publishers or in-flight events use #{describe_json_schema_versions(json_schema_versions, "or")}, delete #{files_noun_phrase(json_schema_versions)} from `#{JSON_SCHEMAS_BY_VERSION_DIRECTORY}`, and no further changes are required. + EOS + end + + def missing_necessary_field_error_for(field, json_schema_versions) + path = field.fully_qualified_path.split(".").last + # :nocov: -- we only cover one side of this ternary. + has_or_have = (json_schema_versions.size == 1) ? "has" : "have" + # :nocov: + + <<~EOS + #{describe_json_schema_versions(json_schema_versions, "and")} #{has_or_have} no field that maps to the #{field.field_type} field path of `#{field.fully_qualified_path}`. + Since the field path is required for #{field.field_type}, ElasticGraph cannot ingest events that lack it. To continue, do one of the following: + + 1. If the `#{field.fully_qualified_path}` field has been renamed, indicate this by calling `field.renamed_from "#{path}"` on the renamed field rather than using `deleted_field`. + 2. Alternately, if no publishers or in-flight events use #{describe_json_schema_versions(json_schema_versions, "or")}, delete #{files_noun_phrase(json_schema_versions)} from `#{JSON_SCHEMAS_BY_VERSION_DIRECTORY}`, and no further changes are required. + EOS + end + + def describe_json_schema_versions(json_schema_versions, conjunction) + json_schema_versions = json_schema_versions.sort + + # Steep doesn't support pattern matching yet, so have to skip type checking here. + __skip__ = case json_schema_versions + in [single_version] + "JSON schema version #{single_version}" + in [version1, version2] + "JSON schema versions #{version1} #{conjunction} #{version2}" + else + *versions, last_version = json_schema_versions + "JSON schema versions #{versions.join(", ")}, #{conjunction} #{last_version}" + end + end + + def old_versions(json_schema_versions) + return "this old version" if json_schema_versions.size == 1 + "these old versions" + end + + def files_noun_phrase(json_schema_versions) + return "its file" if json_schema_versions.size == 1 + "their files" + end + + def new_versioned_json_schema_artifact(desired_contents) + # File name depends on the schema_version field in the json schema. + schema_version = desired_contents[JSON_SCHEMA_VERSION_KEY] + + new_yaml_artifact( + ::File.join(JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v#{schema_version}.yaml"), + desired_contents, + extra_comment_lines: [ + "This JSON schema file contains internal ElasticGraph metadata and should be considered private.", + "The unversioned JSON schema file is public and intended to be provided to publishers." + ] + ) + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/union_type_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/union_type_extension.rb new file mode 100644 index 000000000..663c6087f --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/union_type_extension.rb @@ -0,0 +1,23 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/field_type/union_extension" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extends union types with JSON schema behavior. + module UnionTypeExtension + # @private + def to_indexing_field_type + FieldType::Union.new(super) + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/spec_helper.rb b/elasticgraph-json_ingestion/spec/spec_helper.rb new file mode 100644 index 000000000..15cf0ed46 --- /dev/null +++ b/elasticgraph-json_ingestion/spec/spec_helper.rb @@ -0,0 +1,16 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +# This file contains RSpec configuration for `elasticgraph-json_ingestion`. +# It is loaded by the shared spec helper at `spec_support/spec_helper.rb`. + +RSpec.configure do |config| + config.when_first_matching_example_defined(:json_ingestion_schema) do + require "support/json_ingestion_schema_support" + end +end diff --git a/elasticgraph-json_ingestion/spec/support/json_ingestion_schema_support.rb b/elasticgraph-json_ingestion/spec/support/json_ingestion_schema_support.rb new file mode 100644 index 000000000..4f1b667bc --- /dev/null +++ b/elasticgraph-json_ingestion/spec/support/json_ingestion_schema_support.rb @@ -0,0 +1,31 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/schema_definition/test_support" +require "elastic_graph/json_ingestion/schema_definition/api_extension" + +module ElasticGraph + module JSONIngestion + module SchemaSupport + include ElasticGraph::SchemaDefinition::TestSupport + + def define_json_ingestion_schema(**options, &block) + define_schema( + schema_element_name_form: :snake_case, + ingestion_serializer_extension_modules: [SchemaDefinition::APIExtension], + **options, + &block + ) + end + end + + RSpec.configure do |config| + config.include SchemaSupport, :json_ingestion_schema + end + end +end diff --git a/elasticgraph-schema_definition/spec/support/json_schema_matcher.rb b/elasticgraph-json_ingestion/spec/support/json_schema_matcher.rb similarity index 100% rename from elasticgraph-schema_definition/spec/support/json_schema_matcher.rb rename to elasticgraph-json_ingestion/spec/support/json_schema_matcher.rb diff --git a/elasticgraph-schema_definition/spec/support/json_schema_matcher_spec.rb b/elasticgraph-json_ingestion/spec/support/json_schema_matcher_spec.rb similarity index 100% rename from elasticgraph-schema_definition/spec/support/json_schema_matcher_spec.rb rename to elasticgraph-json_ingestion/spec/support/json_schema_matcher_spec.rb diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/api_extension_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/api_extension_spec.rb new file mode 100644 index 000000000..de003cc44 --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/api_extension_spec.rb @@ -0,0 +1,204 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/api_extension" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + RSpec.describe APIExtension do + def build_api_with_extension + state = ::Data.define(:ingestion_serializer_state).new(ingestion_serializer_state: {}) + + factory = ::Object.new + + api = ::Object.new + api.instance_variable_set(:@state, state) + api.define_singleton_method(:factory) { factory } + api.extend(APIExtension) + + [api, state, factory] + end + + it "extends the api factory with JSON schema factory behavior" do + _api, _state, factory = build_api_with_extension + + expect(factory).to be_a(FactoryExtension) + end + + it "initializes the JSON schema strictness defaults" do + _api, state, = build_api_with_extension + + expect(state.ingestion_serializer_state[:allow_omitted_json_schema_fields]).to eq(false) + expect(state.ingestion_serializer_state[:allow_extra_json_schema_fields]).to eq(true) + end + + it "preserves existing JSON schema strictness settings when extended" do + state = ::Data.define(:ingestion_serializer_state).new( + ingestion_serializer_state: { + allow_omitted_json_schema_fields: true, + allow_extra_json_schema_fields: false + } + ) + + factory = ::Object.new + + api = ::Object.new + api.instance_variable_set(:@state, state) + api.define_singleton_method(:factory) { factory } + api.extend(APIExtension) + + expect(state.ingestion_serializer_state[:allow_omitted_json_schema_fields]).to eq(true) + expect(state.ingestion_serializer_state[:allow_extra_json_schema_fields]).to eq(false) + end + + it "merges reserved type names when composed with another ingestion serializer extension" do + state = ::Data.define(:ingestion_serializer_state).new( + ingestion_serializer_state: { + reserved_type_names: Set["ReservedName"] + } + ) + + factory = ::Object.new + + api = ::Object.new + api.instance_variable_set(:@state, state) + api.define_singleton_method(:factory) { factory } + api.extend(APIExtension) + + expect(state.ingestion_serializer_state[:reserved_type_names]).to eq( + Set["ReservedName", EVENT_ENVELOPE_JSON_SCHEMA_NAME] + ) + end + + it "stores the JSON schema version and its setter location" do + api, state, = build_api_with_extension + + expect(api.json_schema_version(3)).to eq(nil) + expect(state.ingestion_serializer_state[:json_schema_version]).to eq(3) + expect(state.ingestion_serializer_state[:json_schema_version_setter_location]).to be_a(::Thread::Backtrace::Location) + end + + it "rejects invalid JSON schema versions" do + api, = build_api_with_extension + + expect { + api.json_schema_version(0) + }.to raise_error(Errors::SchemaError, /must be a positive integer/) + + expect { + api.json_schema_version("3") + }.to raise_error(Errors::SchemaError, /must be a positive integer/) + end + + it "rejects setting the JSON schema version more than once" do + api, = build_api_with_extension + api.json_schema_version(1) + + expect { + api.json_schema_version(2) + }.to raise_error(Errors::SchemaError, /can only be set once/) + end + + it "stores JSON schema strictness settings" do + api, state, = build_api_with_extension + + expect(api.json_schema_strictness(allow_omitted_fields: true, allow_extra_fields: false)).to eq(nil) + expect(state.ingestion_serializer_state[:allow_omitted_json_schema_fields]).to eq(true) + expect(state.ingestion_serializer_state[:allow_extra_json_schema_fields]).to eq(false) + end + + it "validates JSON schema strictness arguments" do + api, = build_api_with_extension + + expect { + api.json_schema_strictness(allow_omitted_fields: :sometimes) + }.to raise_error(Errors::SchemaError, /allow_omitted_fields/) + + expect { + api.json_schema_strictness(allow_extra_fields: :sometimes) + }.to raise_error(Errors::SchemaError, /allow_extra_fields/) + end + end + + RSpec.describe APIExtension, :json_ingestion_schema do + it "adds JSON schema generation and artifact dumping through schema definition extension hooks" do + results = define_json_ingestion_schema(reload_schema_artifacts: true, json_schema_version: nil) do |schema| + schema.json_schema_version 2 + schema.json_schema_strictness allow_omitted_fields: true, allow_extra_fields: false + + schema.object_type "Widget" do |type| + type.field "id", "ID!" + type.field "name", "String" + type.index "widgets" + end + end + + expect(results.available_json_schema_versions.to_a).to eq([2]) + expect(results.latest_json_schema_version).to eq(2) + + json_schema = results.json_schemas_for(2) + + expect(json_schema.fetch(JSON_SCHEMA_VERSION_KEY)).to eq(2) + expect(json_schema.fetch("$defs")).to include("ElasticGraphEventEnvelope") + expect(json_schema.dig("$defs", "Widget", "required")).to include("id") + end + + it "exposes the JSON schema version setter location on schema results" do + results = define_json_ingestion_schema(json_schema_version: nil) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |type| + type.field "id", "ID!" + type.index "widgets" + end + end + + expect(results.json_schema_version_setter_location).to be_a(::Thread::Backtrace::Location) + end + + it "rejects user-defined scalar types without a JSON schema definition" do + expect { + define_json_ingestion_schema(json_schema_version: nil) do |schema| + schema.json_schema_version 2 + + schema.scalar_type "Url" do |type| + type.mapping type: "keyword" + end + end + }.to raise_error(Errors::SchemaError, /Scalar types require `json_schema` to be configured, but `Url` lacks `json_schema`/) + end + + it "supports enums whose input and output names are the same" do + results = define_json_ingestion_schema( + derived_type_name_formats: {InputEnum: "%{base}"}, + json_schema_version: nil + ) do |schema| + schema.json_schema_version 2 + + schema.enum_type "Color" do |type| + type.values "RED", "BLUE" + end + + schema.object_type "Widget" do |type| + type.field "id", "ID!" + type.field "color", "Color!" + type.index "widgets" + end + end + + expect(results.graphql_schema_string.scan(/^enum Color\b/)).to eq(["enum Color"]) + expect(results.json_schemas_for(2).dig("$defs", "Color")).to eq({ + "type" => "string", + "enum" => %w[RED BLUE] + }) + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/factory_extension_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/factory_extension_spec.rb new file mode 100644 index 000000000..360f2c392 --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/factory_extension_spec.rb @@ -0,0 +1,91 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/factory_extension" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + RSpec.describe FactoryExtension do + let(:factory_class) do + base_class = ::Class.new do + def new_results + ::Object.new + end + + def new_schema_artifact_manager(*args, **kwargs) + @last_schema_artifact_manager_args = args + @last_schema_artifact_manager_kwargs = kwargs + ::Object.new + end + + attr_reader :last_schema_artifact_manager_args, :last_schema_artifact_manager_kwargs + end + + ::Class.new(base_class) do + prepend FactoryExtension + end + end + + it "extends results and schema artifact managers with JSON schema behavior" do + factory = factory_class.new + + expect(factory.new_results).to be_a(ResultsExtension) + + manager = factory.new_schema_artifact_manager(:positional, key: "value") + expect(manager).to be_a(SchemaArtifactManagerExtension) + expect(factory.last_schema_artifact_manager_args).to eq([:positional]) + expect(factory.last_schema_artifact_manager_kwargs).to eq({key: "value"}) + end + + it "extends schema elements even when no customization block is provided" do + base_class = ::Class.new do + def new_enum_type(name, &block) + build_type(name, &block) + end + + def new_interface_type(name, &block) + build_type(name, &block) + end + + def new_object_type(name, &block) + build_type(name, &block) + end + + def new_scalar_type(name, &block) + build_type(name, &block) + end + + def new_union_type(name, &block) + build_type(name, &block) + end + + private + + def build_type(name) + ::Object.new.tap do |type| + type.define_singleton_method(:name) { name } + yield type + end + end + end + + factory = ::Class.new(base_class) do + prepend FactoryExtension + end.new + + expect(factory.new_enum_type("Color")).to be_a(EnumTypeExtension) + expect(factory.new_interface_type("Node")).to be_a(ObjectInterfaceExtension) + expect(factory.new_object_type("Widget")).to be_a(ObjectInterfaceExtension) + expect(factory.new_scalar_type("Boolean")).to be_a(ScalarTypeExtension) + expect(factory.new_union_type("Result")).to be_a(UnionTypeExtension) + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/field_extension_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/field_extension_spec.rb new file mode 100644 index 000000000..c87e8189e --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/field_extension_spec.rb @@ -0,0 +1,29 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/field_extension" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + RSpec.describe FieldExtension do + it "returns nil when the core indexing field reference is unavailable" do + field_class = ::Class.new do + prepend FieldExtension + + def to_indexing_field_reference + nil + end + end + + expect(field_class.new.to_indexing_field_reference).to be_nil + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/field_reference_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/field_reference_spec.rb new file mode 100644 index 000000000..7924f6d87 --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/field_reference_spec.rb @@ -0,0 +1,32 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/indexing/field_reference" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + RSpec.describe FieldReference do + it "returns nil when the wrapped field reference cannot be resolved" do + unresolved_field_reference = ::Object.new + unresolved_field_reference.define_singleton_method(:resolve) { nil } + + field_reference = described_class.new( + field_reference: unresolved_field_reference, + json_schema_layers: [], + json_schema_customizations: {} + ) + + expect(field_reference.resolve).to be_nil + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/field_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/field_spec.rb new file mode 100644 index 000000000..9152ae21b --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/field_spec.rb @@ -0,0 +1,29 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/indexing/field" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + RSpec.describe Field do + it "returns nil for unexpected JSON schema layers" do + field = described_class.new( + ::Object.new, + json_schema_layers: [], + json_schema_customizations: {} + ) + + expect(field.send(:process_layer, :unexpected, {"type" => "string"})).to be_nil + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata_spec.rb new file mode 100644 index 000000000..7e1a3a4ef --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata_spec.rb @@ -0,0 +1,153 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/spec_support/schema_definition_helpers" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + ::RSpec.describe "JSON schema field metadata generation" do + include_context "SchemaDefinitionHelpers" + + it "generates no field metadata for built-in scalar and enum types" do + metadata_by_type_and_field_name = dump_metadata + + json_schema_field_metadata = %w[ + Boolean Float ID Int String + Cursor Date DateTime DistanceUnit JsonSafeLong LocalTime LongString TimeZone Untyped + ].map do |type_name| + metadata_by_type_and_field_name.fetch(type_name) + end + + expect(json_schema_field_metadata).to all eq({}) + end + + it "generates field metadata for built-in object types" do + metadata_by_field_name = dump_metadata.fetch("GeoLocation") + + expect(metadata_by_field_name).to eq({ + "latitude" => field_meta_of("Float!", "lat"), + "longitude" => field_meta_of("Float!", "lon") + }) + end + + it "generates field metadata for user-defined object types" do + metadata_by_field_name = dump_metadata do |schema| + schema.object_type "Money" do |t| + t.field "amount", "Int" + t.field "currency", "String" + end + end.fetch("Money") + + expect(metadata_by_field_name).to eq({ + "amount" => field_meta_of("Int", "amount"), + "currency" => field_meta_of("String", "currency") + }) + end + + it "respects the type and `name_in_index` on user-defined fields" do + metadata_by_field_name = dump_metadata do |schema| + schema.object_type "Money" do |t| + t.field "amount", "Int!", name_in_index: "amount2" + t.field "currency", "[String]!", name_in_index: "currency2" + end + end.fetch("Money") + + expect(metadata_by_field_name).to eq({ + "amount" => field_meta_of("Int!", "amount2"), + "currency" => field_meta_of("[String]!", "currency2") + }) + end + + it "generates no field metadata for user-defined scalar or enum types since they have no subfields" do + metadata_by_type_and_field_name = dump_metadata do |schema| + schema.scalar_type "Url" do |t| + t.json_schema type: "string" + t.mapping type: "keyword" + end + + schema.enum_type "Color" do |t| + t.value "RED" + t.value "GREEN" + t.value "BLUE" + end + end + + json_schema_field_metadata = %w[Url Color].map do |type_name| + metadata_by_type_and_field_name.fetch(type_name) + end + + expect(json_schema_field_metadata).to all eq({}) + end + + it "generates no field metadata for user-defined union or interface types since the JSON schema" do + metadata_by_type_and_field_name = dump_metadata do |schema| + schema.interface_type "Named" do |t| + t.field "name", "String" + end + + schema.union_type "Character" do |t| + t.subtype "Droid" + t.subtype "Human" + end + + schema.object_type "Droid" do |t| + t.implements "Named" + t.field "name", "String" + t.field "model", "String" + end + + schema.object_type "Human" do |t| + t.implements "Named" + t.field "name", "String" + t.field "home_planet", "String" + end + end + + json_schema_field_metadata = %w[Named Character].map do |type_name| + metadata_by_type_and_field_name.fetch(type_name) + end + + expect(json_schema_field_metadata).to all eq({}) + end + + it "includes the JSON schema field metadata in the versioned JSON schemas but not in the current public JSON schema" do + results = define_schema do |schema| + schema.object_type "Money" do |t| + t.field "amount", "Int" + t.field "currency", "String" + end + end + + amount_path = ["$defs", "Money", "properties", "amount"] + + expect(results.json_schemas_for(1).dig(*amount_path)).to eq({ + "anyOf" => [{"$ref" => "#/$defs/Int"}, {"type" => "null"}], + "ElasticGraph" => {"nameInIndex" => "amount", "type" => "Int"} + }) + + expect(results.current_public_json_schema.dig(*amount_path)).to eq({ + "anyOf" => [{"$ref" => "#/$defs/Int"}, {"type" => "null"}] + }) + end + + def dump_metadata(&schema_definition) + define_schema(&schema_definition).json_schema_field_metadata_by_type_and_field_name + end + + def define_schema(&schema_definition) + super(schema_element_name_form: "snake_case", &schema_definition) + end + + def field_meta_of(type, name_in_index) + Indexing::JSONSchemaFieldMetadata.new(type: type, name_in_index: name_in_index) + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata_spec.rb new file mode 100644 index 000000000..e3091e7c8 --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata_spec.rb @@ -0,0 +1,1072 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata" +require "elastic_graph/spec_support/schema_definition_helpers" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + ::RSpec.describe JSONSchemaWithMetadata do + include_context "SchemaDefinitionHelpers" + + it "ignores derived indexed types that do not show up in the JSON schema" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + t.field "cost_currency", "String" + t.field "cost_currency_name", "String" + t.derive_indexed_type_fields "WidgetCurrency", from_id: "cost_currency" do |derive| + derive.immutable_value "name", from: "cost_currency_name" + end + end + + schema.object_type "WidgetCurrency" do |t| + t.field "id", "ID!" + t.field "name", "String" + t.index "widget_currencies" + end + end + + expect(v1_json_schema.fetch("$defs").keys).to include("Widget").and exclude("WidgetCurrency") + end + + context "when merged into an old versioned JSON schema" do + it "maintains the same metadata when a field has not changed" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + end + end + + expect( + metadata_for(v1_json_schema, "Widget", "amount") + ).to eq(metadata_for(updated_v1_json_schema, "Widget", "amount")).and have_dumped_metadata("amount", "Float") + end + + it "does not record metadata on the `__typename` field since it has special handling in our indexing logic" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + end + end + + expect( + v1_json_schema.dig("$defs", "Widget", "properties", "__typename").keys + ).to eq(updated_v1_json_schema.dig("$defs", "Widget", "properties", "__typename").keys).and exclude("ElasticGraph") + end + + it "records a changed field `type` so that the correct indexing preparer gets used when events at the old version are ingested" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "amount", "Int" + end + end + + expect(metadata_for(v1_json_schema, "Widget", "amount")).to have_dumped_metadata("amount", "Float") + expect(metadata_for(updated_v1_json_schema, "Widget", "amount")).to have_dumped_metadata("amount", "Int") + end + + it "records a changed field `name_in_index` so that the field gets written to the correct field in the index" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "description", "String" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "description", "String", name_in_index: "description_text" do |f| + f.mapping type: "text" + end + end + end + + expect(metadata_for(v1_json_schema, "Widget", "description")).to have_dumped_metadata("description", "String") + expect(metadata_for(updated_v1_json_schema, "Widget", "description")).to have_dumped_metadata("description_text", "String") + end + + it "notifies of an issue when a field has been deleted or renamed without recording what happened" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "description", "String" + end + end + + missing_fields = dump_versioned_json_schema_missing_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "full_description", "String", name_in_index: "description" + end + end + + expect(missing_fields).to contain_exactly("Widget.description", "Widget.id") + end + + it "supports renamed fields when `renamed_from` is used" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "description", "String" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "full_description", "String!", name_in_index: "description" do |f| + f.renamed_from "description" + end + end + end + + expect(metadata_for(v1_json_schema, "Widget", "description")).to have_dumped_metadata("description", "String") + expect(metadata_for(updated_v1_json_schema, "Widget", "description")).to have_dumped_metadata("description", "String!") + end + + it "supports deleted fields when `deleted_field` is used" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "description", "String" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.deleted_field "description" + end + end + + expect(metadata_for(v1_json_schema, "Widget", "description")).to have_dumped_metadata("description", "String") + expect(metadata_for(updated_v1_json_schema, "Widget", "description")).to eq nil + end + + it "notifies of an issue when a type has been deleted or renamed without recording what happened" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Options" do |t| + t.field "size", "Int" + end + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + end + end + + missing_types = dump_versioned_json_schema_missing_types(v1_json_schema) do |schema| + schema.json_schema_version 2 + + # Widget has been renamed to `Component`. + schema.object_type "Component" do |t| + t.field "amount", "Float" + end + end + + expect(missing_types).to contain_exactly("Options", "Widget") + end + + it "supports renamed types when `renamed_from` is used" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Component" do |t| + t.field "amount", "Int", name_in_index: "amount_int" + t.renamed_from "Widget" + end + end + + expect(metadata_for(v1_json_schema, "Widget", "amount")).to have_dumped_metadata("amount", "Float") + expect(metadata_for(updated_v1_json_schema, "Widget", "amount")).to have_dumped_metadata("amount_int", "Int") + end + + it "supports deleted types when `deleted_type` is used" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Component" do |t| + t.field "id", "ID" + end + + schema.deleted_type "Widget" + end + + expect(metadata_for(v1_json_schema, "Widget", "amount")).to have_dumped_metadata("amount", "Float") + expect(metadata_for(updated_v1_json_schema, "Widget", "amount")).to eq(nil) + end + + it "supports deleted and renamed fields on a renamed type so long as these are indicated through `deleted_` and `renamed_` API calls" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "token", "String" + t.field "amount", "Float" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Component" do |t| + t.renamed_from "Widget" + + t.field "id", "ID" do |f| + f.renamed_from "token" + end + + t.deleted_field "amount" + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget", "token")).to have_dumped_metadata("id", "ID") + expect(metadata_for(updated_v1_json_schema, "Widget", "amount")).to eq(nil) + end + + it "keeps track of unused `deleted_field` calls" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "token", "ID" + end + end + + unused_deprecated_elements = dump_versioned_json_schema_unused_deprecated_elements(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.deleted_field "token" # used + t.deleted_field "other" # unused + end + end + + expect(unused_deprecated_elements.map(&:description)).to eq [ + %(`type.deleted_field "other"` at #{__FILE__}:#{__LINE__ - 5}) + ] + end + + it "keeps track of unused `renamed_field` calls" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "token", "ID" + end + end + + unused_deprecated_elements = dump_versioned_json_schema_unused_deprecated_elements(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" do |f| + f.renamed_from "token" # used + f.renamed_from "other" # unused + end + end + end + + expect(unused_deprecated_elements.map(&:description)).to eq [ + %(`field.renamed_from "other"` at #{__FILE__}:#{__LINE__ - 6}) + ] + end + + it "keeps track of unused `deleted_type` calls" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "token", "ID" + end + end + + unused_deprecated_elements = dump_versioned_json_schema_unused_deprecated_elements(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.deleted_type "Widget" # used + schema.deleted_type "Other" # unused + end + + expect(unused_deprecated_elements.map(&:description)).to eq [ + %(`schema.deleted_type "Other"` at #{__FILE__}:#{__LINE__ - 4}) + ] + end + + it "keeps track of unused `renamed_type` calls" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "token", "ID" + end + end + + unused_deprecated_elements = dump_versioned_json_schema_unused_deprecated_elements(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Component" do |t| + t.field "token", "ID" + t.renamed_from "Widget" # used + t.renamed_from "Other" # unused + end + end + + expect(unused_deprecated_elements.map(&:description)).to eq [ + %(`type.renamed_from "Other"` at #{__FILE__}:#{__LINE__ - 5}) + ] + end + + context "on a type that is using `route_with`" do + it "does not allow a `route_with` field to be entirely missing from an old version of the schema" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "workspace_id", "ID" + + t.index "widgets" do |f| + f.route_with "workspace_id" + end + end + end + + missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "workspace_id2", "ID" + t.deleted_field "workspace_id" + + t.index "widgets" do |f| + f.route_with "workspace_id2" + end + end + end + + expect(missing_necessary_fields).to eq [missing_necessary_field_of("routing", "Widget.workspace_id2")] + end + + it "uses the `name_in_index` when determining if a `route_with` field is missing from an old version of the schema" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "workspace_id", "ID" + + t.index "widgets" do |f| + f.route_with "workspace_id" + end + end + end + + missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "workspace_id2", "ID", name_in_index: "workspace_id3" + t.deleted_field "workspace_id" + + t.index "widgets" do |f| + f.route_with "workspace_id2" + end + end + end + + expect(missing_necessary_fields).to eq [missing_necessary_field_of("routing", "Widget.workspace_id3")] + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "workspace_id2", "ID", name_in_index: "workspace_id" do |f| + f.renamed_from "workspace_id" + end + + t.index "widgets" do |f| + f.route_with "workspace_id2" + end + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget", "workspace_id")).to include("nameInIndex" => "workspace_id") + end + + it "handles embedded fields when determining if a `route_with` field is missing from an old schema version" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Embedded" do |t| + t.field "workspace_id", "ID" + end + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "embedded", "Embedded" + + t.index "widgets" do |f| + f.route_with "embedded.workspace_id" + end + end + end + + missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Embedded" do |t| + t.field "workspace_id", "ID" + end + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "embedded2", "Embedded" + t.deleted_field "embedded" + + t.index "widgets" do |f| + f.route_with "embedded2.workspace_id" + end + end + end + + expect(missing_necessary_fields).to eq [missing_necessary_field_of("routing", "Widget.embedded2.workspace_id")] + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Embedded" do |t| + t.field "workspace_id", "ID" + end + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "embedded2", "Embedded" do |f| + f.renamed_from "embedded" + end + + t.index "widgets" do |f| + f.route_with "embedded2.workspace_id" + end + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget", "embedded")).to include("nameInIndex" => "embedded2") + end + + it "handles renamed types" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "workspace_id", "ID" + + t.index "widgets" do |f| + f.route_with "workspace_id" + end + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget2" do |t| + t.field "id", "ID" + t.field "workspace_id", "ID" + t.renamed_from "Widget" + + t.index "widgets" do |f| + f.route_with "workspace_id" + end + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget", "workspace_id")).to include("nameInIndex" => "workspace_id") + + missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget2" do |t| + t.field "id", "ID" + t.field "workspace_id2", "ID" + t.deleted_field "workspace_id" + t.renamed_from "Widget" + + t.index "widgets" do |f| + f.route_with "workspace_id2" + end + end + end + + expect(missing_necessary_fields).to eq [missing_necessary_field_of("routing", "Widget2.workspace_id2")] + end + + it "handles deleted types" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "workspace_id", "ID" + + t.index "widgets" do |f| + f.route_with "workspace_id" + end + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.deleted_type "Widget" + + schema.object_type "Widget2" do |t| + t.field "id", "ID" + t.field "workspace_id", "ID" + + t.index "widgets" do |f| + f.route_with "workspace_id" + end + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget2", "workspace_id")).to eq nil + expect(metadata_for(updated_v1_json_schema, "Widget", "workspace_id")).to eq nil + end + end + + context "on a type using `rollover`" do + it "does not allow a `rollover` field to be entirely missing from an old version of the schema" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at", "DateTime" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at" + end + end + end + + missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at2", "DateTime", name_in_index: "created_at3" + t.deleted_field "created_at" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at2" + end + end + end + + expect(missing_necessary_fields).to eq [missing_necessary_field_of("rollover", "Widget.created_at3")] + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at2", "DateTime", name_in_index: "created_at" do |f| + f.renamed_from "created_at" + end + + t.index "widgets" do |f| + f.rollover :yearly, "created_at2" + end + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget", "created_at")).to include("nameInIndex" => "created_at") + end + + it "uses the `name_in_index` when determining if a `rollover` field is missing from an old version of the schema" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at", "DateTime" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at" + end + end + end + + missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at2", "DateTime" + t.deleted_field "created_at" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at2" + end + end + end + + expect(missing_necessary_fields).to eq [missing_necessary_field_of("rollover", "Widget.created_at2")] + end + + it "handles embedded fields when determining if a `rollover` field is missing from an old schema version" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Embedded" do |t| + t.field "created_at", "DateTime" + end + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "embedded", "Embedded" + + t.index "widgets" do |f| + f.rollover :yearly, "embedded.created_at" + end + end + end + + missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Embedded" do |t| + t.field "created_at", "DateTime" + end + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "embedded2", "Embedded" + t.deleted_field "embedded" + + t.index "widgets" do |f| + f.rollover :yearly, "embedded2.created_at" + end + end + end + + expect(missing_necessary_fields).to eq [missing_necessary_field_of("rollover", "Widget.embedded2.created_at")] + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Embedded" do |t| + t.field "created_at", "DateTime" + end + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "embedded2", "Embedded" do |f| + f.renamed_from "embedded" + end + + t.index "widgets" do |f| + f.rollover :yearly, "embedded2.created_at" + end + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget", "embedded")).to include("nameInIndex" => "embedded2") + end + + it "handles renamed types" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at", "DateTime" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at" + end + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget2" do |t| + t.field "id", "ID" + t.field "created_at", "DateTime" + t.renamed_from "Widget" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at" + end + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget", "created_at")).to include("nameInIndex" => "created_at") + + missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget2" do |t| + t.field "id", "ID" + t.field "created_at2", "DateTime" + t.deleted_field "created_at" + t.renamed_from "Widget" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at2" + end + end + end + + expect(missing_necessary_fields).to eq [missing_necessary_field_of("rollover", "Widget2.created_at2")] + end + + it "handles deleted types" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at", "DateTime" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at" + end + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.deleted_type "Widget" + + schema.object_type "Widget2" do |t| + t.field "id", "ID" + t.field "created_at", "DateTime" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at" + end + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget2", "created_at")).to eq nil + expect(metadata_for(updated_v1_json_schema, "Widget", "created_at")).to eq nil + end + end + + describe "conflicting definition tracking" do + it "includes a type that exists and is referenced from `deleted_type`" do + elements = dump_versioned_json_schema_definition_conflicts do |schema| + schema.object_type "Widget" do |t| + t.field "id", "ID" + end + + schema.deleted_type "Widget" + end + + expect(elements.map(&:description)).to contain_exactly( + %(`schema.deleted_type "Widget"` at #{__FILE__}:#{__LINE__ - 4}) + ) + end + + it "includes a type that exists and is referenced from `renamed_from`" do + elements = dump_versioned_json_schema_definition_conflicts do |schema| + schema.object_type "Widget" do |t| + t.field "id", "ID" + end + + schema.object_type "Component" do |t| + t.field "id", "ID" + t.renamed_from "Widget" + end + end + + expect(elements.map(&:description)).to contain_exactly( + %(`type.renamed_from "Widget"` at #{__FILE__}:#{__LINE__ - 5}) + ) + end + + it "includes a type that exists and is referenced from `deleted_type` and `renamed_from`" do + elements = dump_versioned_json_schema_definition_conflicts do |schema| + schema.object_type "Widget" do |t| + t.field "id", "ID" + end + + schema.object_type "Component" do |t| + t.field "id", "ID" + t.renamed_from "Widget" + end + + schema.deleted_type "Widget" + end + + expect(elements.map(&:description)).to contain_exactly( + %(`type.renamed_from "Widget"` at #{__FILE__}:#{__LINE__ - 7}), + %(`schema.deleted_type "Widget"` at #{__FILE__}:#{__LINE__ - 5}) + ) + end + + it "includes a type that is referenced from `deleted_type` and `renamed_from` but does not exist" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "token", "ID" + end + end + + elements = dump_versioned_json_schema_definition_conflicts(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Component" do |t| + t.field "id", "ID" + t.renamed_from "Widget" + end + + schema.deleted_type "Widget" + end + + expect(elements.map(&:description)).to contain_exactly( + %(`type.renamed_from "Widget"` at #{__FILE__}:#{__LINE__ - 7}), + %(`schema.deleted_type "Widget"` at #{__FILE__}:#{__LINE__ - 5}) + ) + end + + it "includes a field that exists and is referenced from `deleted_field`" do + elements = dump_versioned_json_schema_definition_conflicts do |schema| + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.deleted_field "id" + end + end + + expect(elements.map(&:description)).to contain_exactly( + %(`type.deleted_field "id"` at #{__FILE__}:#{__LINE__ - 5}) + ) + end + + it "includes a field that exists and is referenced from `renamed_from`" do + elements = dump_versioned_json_schema_definition_conflicts do |schema| + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "token", "ID" do |f| + f.renamed_from "id" + end + end + end + + expect(elements.map(&:description)).to contain_exactly( + %(`field.renamed_from "id"` at #{__FILE__}:#{__LINE__ - 6}) + ) + end + + it "includes a field that exists and is referenced from `deleted_field` and `renamed_from`" do + elements = dump_versioned_json_schema_definition_conflicts do |schema| + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "token", "ID" do |f| + f.renamed_from "id" + end + t.deleted_field "id" + end + end + + expect(elements.map(&:description)).to contain_exactly( + %(`type.deleted_field "id"` at #{__FILE__}:#{__LINE__ - 5}), + %(`field.renamed_from "id"` at #{__FILE__}:#{__LINE__ - 8}) + ) + end + + it "includes a field that is referenced from `deleted_field` and `renamed_from` but does not exist" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + end + end + + elements = dump_versioned_json_schema_definition_conflicts(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "token", "ID" do |f| + f.renamed_from "id" + end + t.deleted_field "id" + end + end + + expect(elements.map(&:description)).to contain_exactly( + %(`type.deleted_field "id"` at #{__FILE__}:#{__LINE__ - 5}), + %(`field.renamed_from "id"` at #{__FILE__}:#{__LINE__ - 8}) + ) + end + end + end + + def dump_versioned_json_schema(old_versioned_json_schema = nil, &schema_definition) + merge_result = perform_merge(old_versioned_json_schema, &schema_definition) + + expect(merge_result.missing_fields).to be_empty + expect(merge_result.missing_types).to be_empty + expect(merge_result.definition_conflicts).to be_empty + expect(merge_result.missing_necessary_fields).to be_empty + + merge_result.json_schema + end + + def dump_versioned_json_schema_missing_fields(old_versioned_json_schema = nil, &schema_definition) + merge_result = perform_merge(old_versioned_json_schema, &schema_definition) + + expect(merge_result.missing_fields).not_to be_empty + expect(merge_result.missing_types).to be_empty + expect(merge_result.definition_conflicts).to be_empty + expect(merge_result.missing_necessary_fields).to be_empty + + merge_result.missing_fields + end + + def dump_versioned_json_schema_definition_conflicts(old_versioned_json_schema = nil, &schema_definition) + merge_result = perform_merge(old_versioned_json_schema, &schema_definition) + + expect(merge_result.missing_fields).to be_empty + expect(merge_result.missing_types).to be_empty + expect(merge_result.definition_conflicts).not_to be_empty + expect(merge_result.missing_necessary_fields).to be_empty + + merge_result.definition_conflicts + end + + def dump_versioned_json_schema_missing_types(old_versioned_json_schema = nil, &schema_definition) + merge_result = perform_merge(old_versioned_json_schema, &schema_definition) + + expect(merge_result.missing_fields).to be_empty + expect(merge_result.missing_types).not_to be_empty + expect(merge_result.definition_conflicts).to be_empty + expect(merge_result.missing_necessary_fields).to be_empty + + merge_result.missing_types + end + + def dump_versioned_json_schema_missing_necessary_fields(old_versioned_json_schema = nil, &schema_definition) + merge_result = perform_merge(old_versioned_json_schema, &schema_definition) + + expect(merge_result.missing_fields).to be_empty + expect(merge_result.missing_types).to be_empty + expect(merge_result.definition_conflicts).to be_empty + expect(merge_result.missing_necessary_fields).not_to be_empty + + merge_result.missing_necessary_fields + end + + def dump_versioned_json_schema_unused_deprecated_elements(old_versioned_json_schema = nil, &schema_definition) + results = define_schema(&schema_definition) + results.merge_field_metadata_into_json_schema(old_versioned_json_schema || results.current_public_json_schema) + results.unused_deprecated_elements + end + + def perform_merge(old_versioned_json_schema = nil, &schema_definition) + results = define_schema(&schema_definition) + results.merge_field_metadata_into_json_schema(old_versioned_json_schema || results.current_public_json_schema).tap do + expect(results.unused_deprecated_elements).to be_empty + end + end + + def metadata_for(json_schema, type, field) + json_schema.dig("$defs", type, "properties", field, "ElasticGraph") + end + + def define_schema(&schema_definition) + super(schema_element_name_form: "snake_case", &schema_definition) + end + + def have_dumped_metadata(name_in_index, type) + eq({"nameInIndex" => name_in_index, "type" => type}) + end + + def missing_necessary_field_of(field_type, fully_qualified_path) + JSONSchemaWithMetadata::MissingNecessaryField.new(field_type, fully_qualified_path) + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_pruner_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_pruner_spec.rb new file mode 100644 index 000000000..aa69cd2aa --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_pruner_spec.rb @@ -0,0 +1,132 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/json_schema_pruner" +require "elastic_graph/spec_support/schema_definition_helpers" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + RSpec.describe JSONSchemaPruner do + include_context "SchemaDefinitionHelpers" + + describe ".prune" do + subject { described_class.prune(schema) } + + shared_examples "prunes types not referenced by indexed types" do |expected_type_names| + it do + expect(subject["$defs"].keys).to match_array(expected_type_names) + end + end + + context "when there are indexable types" do + let(:schema) do + dump_schema do |s| + # Widget and Boolean should be present + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "inStock", "Boolean" + t.index "widgets" + end + + # UnindexedWidget and Float should get pruned + s.object_type "UnindexedWidget" do |t| + t.field "id", "ID!" + t.field "cost", "Float" + end + end + end + + it_behaves_like "prunes types not referenced by indexed types", + [EVENT_ENVELOPE_JSON_SCHEMA_NAME, "Boolean", "ID", "Widget"] + end + + context "when there are no types defined" do + let(:schema) { dump_schema } + + it_behaves_like "prunes types not referenced by indexed types", [EVENT_ENVELOPE_JSON_SCHEMA_NAME] + end + + context "when there are no indexable types defined" do + let(:schema) do + dump_schema do |s| + # UnindexedWidget and Float should get pruned + s.object_type "UnindexedWidget" do |t| + t.field "id", "ID!" + t.field "cost", "Float" + end + end + end + + it_behaves_like "prunes types not referenced by indexed types", [EVENT_ENVELOPE_JSON_SCHEMA_NAME] + end + + context "when there are nested types referenced from an indexed type" do + let(:schema) do + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "options", "WidgetOptions" + t.index "widgets" + end + + s.object_type "WidgetOptions" do |t| + t.field "size", "Size" + t.field "color", "Color" + t.field "cost", "Money" + end + + s.enum_type "Size" do |t| + t.value "SMALL" + t.value "MEDIUM" + t.value "LARGE" + end + + s.enum_type "Color" do |t| + t.value "RED" + t.value "YELLOW" + t.value "BLUE" + end + + s.object_type "Money" do |t| + t.field "currency", "Currency" + t.field "amount_cents", "Int" + end + + s.enum_type "Currency" do |t| + t.value "USD" + t.value "CAD" + end + end + end + + it_behaves_like "prunes types not referenced by indexed types", [ + EVENT_ENVELOPE_JSON_SCHEMA_NAME, + "Color", + "Currency", + "ID", + "Int", + "Money", + "Size", + "Widget", + "WidgetOptions" + ] + end + end + + def dump_schema(&schema_definition) + schema_definition_results = define_schema(schema_element_name_form: "snake_case", &schema_definition) + latest_json_schema_version = schema_definition_results.latest_json_schema_version + + schema_definition_results.json_schemas_for(latest_json_schema_version) + end + end + end + end +end diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_spec.rb similarity index 100% rename from elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_spec.rb rename to elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_spec.rb diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension_spec.rb new file mode 100644 index 000000000..2bb0efdd1 --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension_spec.rb @@ -0,0 +1,376 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension" +require "stringio" +require "tmpdir" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + RSpec.describe SchemaArtifactManagerExtension do + let(:yaml_artifact_class) do + ::Data.define( + :path, + :desired_contents, + :existing_dumped_contents, + :out_of_date_value, + :extra_comment_lines + ) do + def out_of_date? + out_of_date_value + end + end + end + + let(:deprecated_element_class) do + ::Data.define(:name, :description, :defined_at) + end + + let(:defined_at_class) do + ::Data.define(:path, :lineno) + end + + let(:missing_necessary_field_class) do + ::Data.define(:fully_qualified_path, :field_type) + end + + let(:merged_schema_class) do + ::Data.define( + :json_schema_version, + :json_schema, + :missing_fields, + :missing_types, + :missing_necessary_fields, + :definition_conflicts + ) + end + + let(:fake_manager_results_class) do + ::Class.new do + attr_accessor :current_public_json_schema, :json_schema_version_setter_location, :unused_deprecated_elements + + def merge_field_metadata_into_json_schema(_json_schema) + end + end + end + + def build_manager(schema_definition_results:, enforce_json_schema_version:, schema_artifacts_directory:, artifacts_by_path:, output:) + yaml_artifact_class = self.yaml_artifact_class + + base_class = ::Class.new do + attr_reader :schema_definition_results, :new_yaml_artifact_calls + + def initialize(schema_definition_results, enforce_json_schema_version, schema_artifacts_directory, artifacts_by_path, output) + @schema_definition_results = schema_definition_results + @enforce_json_schema_version = enforce_json_schema_version + @schema_artifacts_directory = schema_artifacts_directory + @artifacts_by_path = artifacts_by_path + @output = output + @new_yaml_artifact_calls = [] + end + + def dump_artifacts + :base_dump + end + + private + + def artifacts_from_schema_def + [:base_artifact] + end + + define_method(:new_yaml_artifact) do |path, contents, extra_comment_lines:| + @new_yaml_artifact_calls << { + path: path, + contents: contents, + extra_comment_lines: extra_comment_lines + } + + @artifacts_by_path.fetch(path) do + yaml_artifact_class.new( + path: path, + desired_contents: contents, + existing_dumped_contents: nil, + out_of_date_value: false, + extra_comment_lines: extra_comment_lines + ) + end + end + end + + ::Class.new(base_class) do + prepend SchemaArtifactManagerExtension + end.new( + schema_definition_results, + enforce_json_schema_version, + schema_artifacts_directory, + artifacts_by_path, + output + ) + end + + before do + allow(ElasticGraph::JSONIngestion::SchemaDefinition::JSONSchemaPruner).to receive(:prune) { |json_schema| json_schema } + end + + it "warns when a version bump is needed but enforcement is disabled" do + output = ::StringIO.new + public_schema = {JSON_SCHEMA_VERSION_KEY => 2} + artifact = yaml_artifact_class.new( + path: JSON_SCHEMAS_FILE, + desired_contents: public_schema, + existing_dumped_contents: {JSON_SCHEMA_VERSION_KEY => 2}, + out_of_date_value: true, + extra_comment_lines: [] + ) + results = instance_double(fake_manager_results_class, current_public_json_schema: public_schema, unused_deprecated_elements: []) + + manager = build_manager( + schema_definition_results: results, + enforce_json_schema_version: false, + schema_artifacts_directory: ::Dir.mktmpdir, + artifacts_by_path: {JSON_SCHEMAS_FILE => artifact}, + output: output + ) + + expect(manager.dump_artifacts).to eq(:base_dump) + expect(output.string).to include("WARNING: the `json_schemas.yaml` artifact is being updated") + end + + it "aborts when a version bump is needed and enforcement is enabled" do + output = ::StringIO.new + public_schema = {JSON_SCHEMA_VERSION_KEY => 2} + artifact = yaml_artifact_class.new( + path: JSON_SCHEMAS_FILE, + desired_contents: public_schema, + existing_dumped_contents: {JSON_SCHEMA_VERSION_KEY => 2}, + out_of_date_value: true, + extra_comment_lines: [] + ) + location = instance_double(::Thread::Backtrace::Location, absolute_path: __FILE__, lineno: 123) + results = instance_double( + fake_manager_results_class, + current_public_json_schema: public_schema, + json_schema_version_setter_location: location, + unused_deprecated_elements: [] + ) + + manager = build_manager( + schema_definition_results: results, + enforce_json_schema_version: true, + schema_artifacts_directory: ::Dir.mktmpdir, + artifacts_by_path: {JSON_SCHEMAS_FILE => artifact}, + output: output + ) + manager.define_singleton_method(:abort) do |message| + raise message + end + + expect { + manager.dump_artifacts + }.to raise_error(RuntimeError, /schema\.json_schema_version 3/) + end + + it "yields only when a dumped schema is out of date and its version is not newer" do + output = ::StringIO.new + manager = build_manager( + schema_definition_results: instance_double(fake_manager_results_class), + enforce_json_schema_version: false, + schema_artifacts_directory: ::Dir.mktmpdir, + artifacts_by_path: {}, + output: output + ) + + current_artifact = yaml_artifact_class.new( + path: JSON_SCHEMAS_FILE, + desired_contents: {JSON_SCHEMA_VERSION_KEY => 2}, + existing_dumped_contents: {JSON_SCHEMA_VERSION_KEY => 2}, + out_of_date_value: true, + extra_comment_lines: [] + ) + manager.define_singleton_method(:json_schemas_artifact) { current_artifact } + + yielded_versions = [] + manager.send(:check_if_needs_json_schema_version_bump) do |recommended_version| + yielded_versions << recommended_version + end + expect(yielded_versions).to eq([3]) + + clean_artifact = yaml_artifact_class.new( + path: JSON_SCHEMAS_FILE, + desired_contents: {JSON_SCHEMA_VERSION_KEY => 3}, + existing_dumped_contents: nil, + out_of_date_value: false, + extra_comment_lines: [] + ) + manager.define_singleton_method(:json_schemas_artifact) { clean_artifact } + + expect { + manager.send(:check_if_needs_json_schema_version_bump) { raise "should not yield" } + }.not_to raise_error + end + + it "builds public and versioned JSON schema artifacts alongside base artifacts" do + output = ::StringIO.new + schema_artifacts_directory = ::Dir.mktmpdir + ::Dir.mkdir(::File.join(schema_artifacts_directory, JSON_SCHEMAS_BY_VERSION_DIRECTORY)) + ::File.write( + ::File.join(schema_artifacts_directory, JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v1.yaml"), + <<~YAML + --- + json_schema_version: 1 + YAML + ) + + public_schema = {JSON_SCHEMA_VERSION_KEY => 2} + merged_v1 = merged_schema_class.new( + json_schema_version: 1, + json_schema: {JSON_SCHEMA_VERSION_KEY => 1}, + missing_fields: [], + missing_types: [], + missing_necessary_fields: [], + definition_conflicts: [] + ) + merged_v2 = merged_schema_class.new( + json_schema_version: 2, + json_schema: {JSON_SCHEMA_VERSION_KEY => 2}, + missing_fields: [], + missing_types: [], + missing_necessary_fields: [], + definition_conflicts: [] + ) + + results = instance_double( + fake_manager_results_class, + current_public_json_schema: public_schema, + merge_field_metadata_into_json_schema: nil, + unused_deprecated_elements: [] + ) + expect(results).to receive(:merge_field_metadata_into_json_schema).with({JSON_SCHEMA_VERSION_KEY => 1}).and_return(merged_v1) + expect(results).to receive(:merge_field_metadata_into_json_schema).with(public_schema).and_return(merged_v2) + + manager = build_manager( + schema_definition_results: results, + enforce_json_schema_version: false, + schema_artifacts_directory: schema_artifacts_directory, + artifacts_by_path: {}, + output: output + ) + + artifacts = manager.send(:artifacts_from_schema_def) + + expect(artifacts.first).to eq(:base_artifact) + expect(artifacts.drop(1).map(&:path)).to contain_exactly( + JSON_SCHEMAS_FILE, + ::File.join(JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v1.yaml"), + ::File.join(JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v2.yaml") + ) + expect(manager.new_yaml_artifact_calls.map { |call| call[:path] }).to include( + JSON_SCHEMAS_FILE, + ::File.join(JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v1.yaml"), + ::File.join(JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v2.yaml") + ) + end + + it "reports merge errors for missing fields, missing types, missing necessary fields, and conflicts" do + output = ::StringIO.new + manager = build_manager( + schema_definition_results: instance_double(fake_manager_results_class, unused_deprecated_elements: []), + enforce_json_schema_version: false, + schema_artifacts_directory: ::Dir.mktmpdir, + artifacts_by_path: {}, + output: output + ) + manager.define_singleton_method(:abort) do |message| + raise message + end + + missing_necessary_field = missing_necessary_field_class.new( + fully_qualified_path: "Widget.metadata.currency", + field_type: "routing" + ) + conflict_a = deprecated_element_class.new( + name: "Widget", + description: "schema.object_type \"Widget\"", + defined_at: defined_at_class.new(path: "config/schema/widget.rb", lineno: 12) + ) + conflict_b = deprecated_element_class.new( + name: "Widget", + description: "schema.deleted_type \"Widget\"", + defined_at: defined_at_class.new(path: "config/schema/deleted_widget.rb", lineno: 4) + ) + merged_result = merged_schema_class.new( + json_schema_version: 3, + json_schema: {JSON_SCHEMA_VERSION_KEY => 3}, + missing_fields: ["Widget.old_name"], + missing_types: ["OldWidget"], + missing_necessary_fields: [missing_necessary_field], + definition_conflicts: [conflict_a, conflict_b] + ) + + expect { + manager.send(:report_json_schema_merge_errors, [merged_result]) + }.to raise_error( + RuntimeError, + /field\.renamed_from "old_name".*schema\.deleted_type "OldWidget".*field has been renamed.*The schema definition of `Widget` has conflicts/m + ) + end + + it "reports warnings for unused deprecated elements" do + output = ::StringIO.new + unused_a = deprecated_element_class.new( + name: "Widget", + description: "schema.deleted_field \"old_name\"", + defined_at: defined_at_class.new(path: "config/schema/widget.rb", lineno: 20) + ) + unused_b = deprecated_element_class.new( + name: "Widget", + description: "schema.deleted_type \"LegacyWidget\"", + defined_at: defined_at_class.new(path: "config/schema/legacy_widget.rb", lineno: 5) + ) + results = instance_double(fake_manager_results_class, unused_deprecated_elements: [unused_a, unused_b]) + manager = build_manager( + schema_definition_results: results, + enforce_json_schema_version: false, + schema_artifacts_directory: ::Dir.mktmpdir, + artifacts_by_path: {}, + output: output + ) + + manager.send(:report_json_schema_merge_warnings) + + expect(output.string).to include( + "The schema definition has 2 unneeded reference(s) to deprecated schema elements.", + "1. schema.deleted_type \"LegacyWidget\"", + "2. schema.deleted_field \"old_name\"" + ) + end + + it "formats JSON schema version descriptions and noun helpers" do + output = ::StringIO.new + manager = build_manager( + schema_definition_results: instance_double(fake_manager_results_class, unused_deprecated_elements: []), + enforce_json_schema_version: false, + schema_artifacts_directory: ::Dir.mktmpdir, + artifacts_by_path: {}, + output: output + ) + + expect(manager.send(:describe_json_schema_versions, [7], "and")).to eq("JSON schema version 7") + expect(manager.send(:describe_json_schema_versions, [7, 8], "and")).to eq("JSON schema versions 7 and 8") + expect(manager.send(:describe_json_schema_versions, [7, 8, 9], "or")).to eq("JSON schema versions 7, 8, or 9") + expect(manager.send(:old_versions, [7])).to eq("this old version") + expect(manager.send(:old_versions, [7, 8])).to eq("these old versions") + expect(manager.send(:files_noun_phrase, [7])).to eq("its file") + expect(manager.send(:files_noun_phrase, [7, 8])).to eq("their files") + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion_spec.rb new file mode 100644 index 000000000..117d17981 --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion_spec.rb @@ -0,0 +1,17 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion" + +module ElasticGraph + RSpec.describe JSONIngestion do + it "exposes the expected module name" do + expect(described_class.name).to eq "ElasticGraph::JSONIngestion" + end + end +end diff --git a/elasticgraph-local/lib/elastic_graph/local/rake_tasks.rb b/elasticgraph-local/lib/elastic_graph/local/rake_tasks.rb index 044465c40..d4fcd5206 100644 --- a/elasticgraph-local/lib/elastic_graph/local/rake_tasks.rb +++ b/elasticgraph-local/lib/elastic_graph/local/rake_tasks.rb @@ -213,6 +213,23 @@ class RakeTasks < ::Rake::TaskLib # @dynamic schema_definition_extension_modules, schema_definition_extension_modules= attr_accessor :schema_definition_extension_modules + # List of Ruby modules implementing the ingestion serializer portion of the schema definition API. Defaults to the built-in JSON + # Schema serializer for backward compatibility, but can be set to `[]` to disable it or replaced with a different serializer + # extension. + # + # @return [Array] list of ingestion serializer extension modules + # + # @example Disable the default JSON Schema serializer + # ElasticGraph::Local::RakeTasks.new( + # local_config_yaml: "config/settings/local.yaml", + # path_to_schema: "config/schema.rb" + # ) do |tasks| + # tasks.schema_definition_ingestion_serializer_extension_modules = [] + # end + # + # @dynamic schema_definition_ingestion_serializer_extension_modules, schema_definition_ingestion_serializer_extension_modules= + attr_accessor :schema_definition_ingestion_serializer_extension_modules + # Whether or not to enforce the requirement that the JSON schema version is incremented every time # dumping the JSON schemas results in a changed artifact. Defaults to `true`. # @@ -362,6 +379,7 @@ def initialize(local_config_yaml:, path_to_schema:) self.type_name_overrides = {} self.enum_value_overrides_by_type = {} self.schema_definition_extension_modules = [] + self.schema_definition_ingestion_serializer_extension_modules = SchemaDefinition::ExtensionModuleSupport.default_ingestion_serializer_extension_modules self.enforce_json_schema_version = true self.env_port_mapping = {} self.output = $stdout @@ -394,6 +412,7 @@ def initialize(local_config_yaml:, path_to_schema:) type_name_overrides: type_name_overrides, enum_value_overrides_by_type: enum_value_overrides_by_type, extension_modules: schema_definition_extension_modules, + ingestion_serializer_extension_modules: schema_definition_ingestion_serializer_extension_modules, enforce_json_schema_version: enforce_json_schema_version, output: output ) diff --git a/elasticgraph-local/sig/elastic_graph/local/rake_tasks.rbs b/elasticgraph-local/sig/elastic_graph/local/rake_tasks.rbs index f8e0ce2bd..3f1ebc6c4 100644 --- a/elasticgraph-local/sig/elastic_graph/local/rake_tasks.rbs +++ b/elasticgraph-local/sig/elastic_graph/local/rake_tasks.rbs @@ -8,6 +8,7 @@ module ElasticGraph attr_accessor type_name_overrides: ::Hash[::Symbol, ::String] attr_accessor enum_value_overrides_by_type: ::Hash[::Symbol, ::Hash[::Symbol, ::String]] attr_accessor schema_definition_extension_modules: ::Array[::Module] + attr_accessor schema_definition_ingestion_serializer_extension_modules: ::Array[::Module] attr_accessor enforce_json_schema_version: bool attr_accessor elasticsearch_versions: ::Array[::String] attr_accessor opensearch_versions: ::Array[::String] diff --git a/elasticgraph-local/spec/unit/elastic_graph/local/rake_tasks_spec.rb b/elasticgraph-local/spec/unit/elastic_graph/local/rake_tasks_spec.rb index 665644da3..68286620e 100644 --- a/elasticgraph-local/spec/unit/elastic_graph/local/rake_tasks_spec.rb +++ b/elasticgraph-local/spec/unit/elastic_graph/local/rake_tasks_spec.rb @@ -44,6 +44,22 @@ module Local expect(output).to include(expected_snippet_1, expected_snippet_2, expected_snippet_3) end + it "passes configured ingestion serializer extensions through to schema definition rake tasks" do + allow(Admin::RakeTasks).to receive(:from_yaml_file) + allow(SchemaDefinition::RakeTasks).to receive(:new) + + RakeTasks.new( + local_config_yaml: config_dir / "settings" / "development.yaml", + path_to_schema: config_dir / "schema.rb" + ) do |tasks| + tasks.schema_definition_ingestion_serializer_extension_modules = [] + end + + expect(SchemaDefinition::RakeTasks).to have_received(:new).with(hash_including( + ingestion_serializer_extension_modules: [] + )) + end + context "when the local config file configures an `elasticsearch` backend" do it "defines elasticsearch tasks" do output = run_rake_with_yaml_changes "-T" do |config| diff --git a/elasticgraph-schema_definition/README.md b/elasticgraph-schema_definition/README.md index b85f79b9e..5c06af483 100644 --- a/elasticgraph-schema_definition/README.md +++ b/elasticgraph-schema_definition/README.md @@ -21,6 +21,9 @@ graph LR; elasticgraph-indexer["elasticgraph-indexer"]; elasticgraph-schema_definition --> elasticgraph-indexer; class elasticgraph-indexer otherEgGemStyle; + elasticgraph-json_ingestion["elasticgraph-json_ingestion"]; + elasticgraph-schema_definition --> elasticgraph-json_ingestion; + class elasticgraph-json_ingestion otherEgGemStyle; elasticgraph-schema_artifacts["elasticgraph-schema_artifacts"]; elasticgraph-schema_definition --> elasticgraph-schema_artifacts; class elasticgraph-schema_artifacts otherEgGemStyle; diff --git a/elasticgraph-schema_definition/elasticgraph-schema_definition.gemspec b/elasticgraph-schema_definition/elasticgraph-schema_definition.gemspec index 740a1a088..175a91285 100644 --- a/elasticgraph-schema_definition/elasticgraph-schema_definition.gemspec +++ b/elasticgraph-schema_definition/elasticgraph-schema_definition.gemspec @@ -43,6 +43,7 @@ Gem::Specification.new do |spec| spec.add_dependency "elasticgraph-graphql", ElasticGraph::VERSION # needed since we validate that scalar `coerce_with` options are valid (which loads scalar coercion adapters) spec.add_dependency "elasticgraph-indexer", ElasticGraph::VERSION # needed since we validate that scalar `prepare_for_indexing_with` options are valid (which loads indexing preparer adapters) + spec.add_dependency "elasticgraph-json_ingestion", ElasticGraph::VERSION # JSON ingestion serializer, auto-applied by default for backward compatibility spec.add_dependency "elasticgraph-schema_artifacts", ElasticGraph::VERSION spec.add_dependency "elasticgraph-support", ElasticGraph::VERSION spec.add_dependency "graphql", "~> 2.5.22" diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/api.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/api.rb index 4b61944c2..ad1558008 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/api.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/api.rb @@ -9,6 +9,7 @@ require "elastic_graph/errors" require "elastic_graph/schema_artifacts/runtime_metadata/extension" require "elastic_graph/schema_artifacts/runtime_metadata/graphql_resolver" +require "elastic_graph/schema_definition/extension_module_support" require "elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect" require "elastic_graph/schema_definition/results" require "elastic_graph/schema_definition/state" @@ -60,6 +61,7 @@ def initialize( schema_elements, index_document_sizes, extension_modules: [], + ingestion_serializer_extension_modules: ExtensionModuleSupport.default_ingestion_serializer_extension_modules, derived_type_name_formats: {}, type_name_overrides: {}, enum_value_overrides_by_type: {}, @@ -77,7 +79,12 @@ def initialize( @factory = @state.factory - extension_modules.each { |mod| extend(mod) } + api_extension_modules = ExtensionModuleSupport.build_api_extension_modules( + extension_modules: extension_modules, + ingestion_serializer_extension_modules: ingestion_serializer_extension_modules + ) + + api_extension_modules.each { |mod| extend(mod) } # These lines must come _after_ the extension modules are applied, so that the extension modules # have a chance to hook into the factory in order to customize built in types if desired. @@ -242,7 +249,7 @@ def union_type(name, &block) # ElasticGraph.define_schema do |schema| # schema.scalar_type "URL" do |t| # t.mapping type: "keyword" - # t.json_schema type: "string", format: "uri" + # t.json_schema type: "string" # end # end def scalar_type(name, &block) @@ -401,69 +408,6 @@ def results @results ||= @factory.new_results end - # Defines the version number of the current JSON schema. Importantly, every time a change is made that impacts the JSON schema - # artifact, the version number must be incremented to ensure that each different version of the JSON schema is identified by a unique - # version number. The publisher will then include this version number in published events to identify the version of the schema it - # was using. This avoids the need to deploy the publisher and ElasticGraph indexer at the same time to keep them in sync. - # - # @note While this is an important part of how ElasticGraph is designed to support schema evolution, it can be annoying constantly - # have to increment this while rapidly changing the schema during prototyping. You can disable the requirement to increment this - # on every JSON schema change by setting `enforce_json_schema_version` to `false` in your `Rakefile`. - # - # @param version [Integer] current version number of the JSON schema artifact - # @return [void] - # @see Local::RakeTasks#enforce_json_schema_version - # - # @example Set the JSON schema version to 1 - # ElasticGraph.define_schema do |schema| - # schema.json_schema_version 1 - # end - def json_schema_version(version) - if !version.is_a?(Integer) || version < 1 - raise Errors::SchemaError, "`json_schema_version` must be a positive integer. Specified version: #{version}" - end - - if @state.json_schema_version - raise Errors::SchemaError, "`json_schema_version` can only be set once on a schema. Previously-set version: #{@state.json_schema_version}" - end - - @state.json_schema_version = version - @state.json_schema_version_setter_location = caller_locations(1, 1).to_a.first - nil - end - - # Defines strictness of the JSON schema validation. By default, the JSON schema will require all fields to be provided by the - # publisher (but they can be nullable) and will ignore extra fields that are not defined in the schema. Use this method to - # configure this behavior. - # - # @param allow_omitted_fields [bool] Whether nullable fields can be omitted from indexing events. - # @param allow_extra_fields [bool] Whether extra fields (e.g. beyond fields defined in the schema) can be included in indexing events. - # @return [void] - # - # @note If you allow both omitted fields and extra fields, ElasticGraph's JSON schema validation will allow (and ignore) misspelled - # field names in indexing events. For example, if the ElasticGraph schema has a nullable field named `parentId` but the publisher - # accidentally provides it as `parent_id`, ElasticGraph would happily ignore the `parent_id` field entirely, because `parentId` - # is allowed to be omitted and `parent_id` would be treated as an extra field. Therefore, we recommend that you only set one of - # these to `true` (or none). - # - # @example Allow omitted fields and disallow extra fields - # ElasticGraph.define_schema do |schema| - # schema.json_schema_strictness allow_omitted_fields: true, allow_extra_fields: false - # end - def json_schema_strictness(allow_omitted_fields: false, allow_extra_fields: true) - unless [true, false].include?(allow_omitted_fields) - raise Errors::SchemaError, "`allow_omitted_fields` must be true or false" - end - - unless [true, false].include?(allow_extra_fields) - raise Errors::SchemaError, "`allow_extra_fields` must be true or false" - end - - @state.allow_omitted_json_schema_fields = allow_omitted_fields - @state.allow_extra_json_schema_fields = allow_extra_fields - nil - end - # Registers a customization callback that will be applied to every built-in type automatically provided by ElasticGraph. Provides # an opportunity to customize the built-in types (e.g. to add directives to them or whatever). # diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/extension_module_support.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/extension_module_support.rb new file mode 100644 index 000000000..c49740282 --- /dev/null +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/extension_module_support.rb @@ -0,0 +1,30 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + module SchemaDefinition + # Helper methods for composing schema definition extension modules. + # + # @private + module ExtensionModuleSupport + module_function + + def default_ingestion_serializer_extension_modules + require "elastic_graph/json_ingestion/schema_definition/api_extension" + [JSONIngestion::SchemaDefinition::APIExtension] + end + + def build_api_extension_modules( + extension_modules:, + ingestion_serializer_extension_modules: default_ingestion_serializer_extension_modules + ) + (ingestion_serializer_extension_modules + extension_modules).uniq + end + end + end +end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb index 067903eff..7bdcceb69 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb @@ -226,7 +226,9 @@ def new_interface_type(name) def new_object_type(name) @@object_type_new.call(@state, name.to_s) do |object_type| + # :nocov: -- most suites reach this through higher-level APIs or extensions that always pass a block. yield object_type if block_given? + # :nocov: end end @@object_type_new = prevent_non_factory_instantiation_of(SchemaElements::ObjectType) diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/event_envelope.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/event_envelope.rb deleted file mode 100644 index 605024146..000000000 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/event_envelope.rb +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright 2024 - 2026 Block, Inc. -# -# Use of this source code is governed by an MIT-style -# license that can be found in the LICENSE file or at -# https://opensource.org/licenses/MIT. -# -# frozen_string_literal: true - -require "elastic_graph/constants" - -module ElasticGraph - module SchemaDefinition - module Indexing - # Contains logic related to "event envelope"--the layer of metadata that wraps all indexing events. - # - # @api private - module EventEnvelope - # @param indexed_type_names [Array] names of the indexed types - # @param json_schema_version [Integer] the version of the JSON schema - # @return [Hash] the JSON schema for the ElasticGraph event envelope for the given `indexed_type_names`. - def self.json_schema(indexed_type_names, json_schema_version) - { - "type" => "object", - "description" => "Required by ElasticGraph to wrap every data event.", - "properties" => { - "op" => { - "description" => "Indicates what type of operation the event represents. For now, only `upsert` is supported, but we plan to support other operations in the future.", - "type" => "string", - "enum" => %w[upsert] - }, - "type" => { - "description" => "The type of object present in `record`.", - "type" => "string", - # Sorting doesn't really matter here, but it's nice for the output in the schema artifact to be consistent. - "enum" => indexed_type_names.sort - }, - "id" => { - "description" => "The unique identifier of the record.", - "type" => "string", - "maxLength" => DEFAULT_MAX_KEYWORD_LENGTH - }, - "version" => { - "description" => 'Used to handle duplicate and out-of-order events. When ElasticGraph ingests multiple events for the same `type` and `id`, the one with the largest `version` will "win".', - "type" => "integer", - "minimum" => 0, - "maximum" => (2**63) - 1 - }, - "record" => { - "description" => "The record of this event. The payload of this field must match the JSON schema of the named `type`.", - "type" => "object" - }, - "latency_timestamps" => { - "description" => "Timestamps from which ElasticGraph measures indexing latency. The `ElasticGraphIndexingLatencies` log message produced for each event will include a measurement from each timestamp included in this map.", - "type" => "object", - "additionalProperties" => false, - "patternProperties" => { - "^\\w+_at$" => { - "description" => "A timestamp from which ElasticGraph will measure indexing latency. The timestamp name must end in `_at`.", - "type" => "string", - "format" => "date-time" - } - } - }, - JSON_SCHEMA_VERSION_KEY => { - "description" => "The version of the JSON schema the publisher was using when the event was published. ElasticGraph will use the JSON schema matching this version to process the event.", - "const" => json_schema_version - }, - "message_id" => { - "description" => "The optional ID of the message containing this event from whatever messaging system is being used between the publisher and the ElasticGraph indexer.", - "type" => "string" - } - }, - "additionalProperties" => false, - "required" => ["op", "type", "id", "version", JSON_SCHEMA_VERSION_KEY], - "if" => { - "properties" => { - "op" => {"const" => "upsert"} - } - }, - "then" => {"required" => ["record"]} - } - end - end - end - end -end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb index 5b0c0db1c..51dfd4a1f 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb @@ -7,7 +7,6 @@ # frozen_string_literal: true require "elastic_graph/constants" -require "elastic_graph/schema_definition/indexing/json_schema_field_metadata" require "elastic_graph/schema_definition/indexing/list_counts_mapping" require "elastic_graph/support/hash_util" require "elastic_graph/support/memoizable_data" @@ -22,28 +21,13 @@ class Field < Support::MemoizableData.define( :name, :name_in_index, :type, - :json_schema_layers, :indexing_field_type, :accuracy_confidence, - :json_schema_customizations, :mapping_customizations, :source, :runtime_field_script, :doc_comment ) - # JSON schema overrides that automatically apply to specific mapping types so that the JSON schema - # validation will reject values which cannot be indexed into fields of a specific mapping type. - # - # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/number.html Elasticsearch numeric field type documentation - # @note We don't handle `integer` here because it's the default numeric type (handled by our definition of the `Int` scalar type). - # @note Likewise, we don't handle `long` here because a custom scalar type must be used for that since GraphQL's `Int` type can't handle long values. - JSON_SCHEMA_OVERRIDES_BY_MAPPING_TYPE = { - "byte" => {"minimum" => -(2**7), "maximum" => (2**7) - 1}, - "short" => {"minimum" => -(2**15), "maximum" => (2**15) - 1}, - "keyword" => {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH}, - "text" => {"maxLength" => DEFAULT_MAX_TEXT_LENGTH} - } - # @return [Hash] the mapping for this field. The returned hash should be composed entirely # of Ruby primitives that, when converted to a JSON string, match the structure required by # [Elasticsearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html). @@ -63,23 +47,6 @@ def mapping end end - # @return [Hash] the JSON schema definition for this field. The returned object should - # be composed entirely of Ruby primitives that, when converted to a JSON string, match the - # requirements of [the JSON schema spec](https://json-schema.org/). - def json_schema - json_schema_layers - .reverse # resolve layers from innermost to outermost wrappings - .reduce(inner_json_schema) { |acc, layer| process_layer(layer, acc) } - .merge(outer_json_schema_customizations) - .merge({"description" => doc_comment}.compact) - .then { |h| Support::HashUtil.stringify_keys(h) } - end - - # @return [JSONSchemaFieldMetadata] additional ElasticGraph metadata to be stored in the JSON schema for this field. - def json_schema_metadata - JSONSchemaFieldMetadata.new(type: type.name, name_in_index: name_in_index) - end - # Builds a hash containing the mapping for the provided fields, normalizing it in the same way that the # datastore does so that consistency checks between our index configuration and what's in the datastore # work properly. @@ -107,80 +74,6 @@ def self.normalized_mapping_hash_for(fields) mapping_hash end - - def nullable? - json_schema_layers.include?(:nullable) - end - - private - - def inner_json_schema - user_specified_customizations = - if user_specified_json_schema_customizations_go_on_outside? - {} # : ::Hash[::String, untyped] - else - Support::HashUtil.stringify_keys(json_schema_customizations) - end - - customizations_from_mapping = JSON_SCHEMA_OVERRIDES_BY_MAPPING_TYPE[mapping["type"]] || {} - customizations = customizations_from_mapping.merge(user_specified_customizations) - customizations = indexing_field_type.format_field_json_schema_customizations(customizations) - - ref = {"$ref" => "#/$defs/#{type.unwrapped_name}"} - return ref if customizations.empty? - - # Combine any customizations with type ref under an "allOf" subschema: - # All of these properties must hold true for the type to be valid. - # - # Note that if we simply combine the customizations with the `$ref` - # at the same level, it will not work, because other subschema - # properties are ignored when they are in the same object as a `$ref`: - # https://github.com/json-schema-org/JSON-Schema-Test-Suite/blob/2.0.0/tests/draft7/ref.json#L165-L168 - {"allOf" => [ref, customizations]} - end - - def outer_json_schema_customizations - return {} unless user_specified_json_schema_customizations_go_on_outside? - Support::HashUtil.stringify_keys(json_schema_customizations) - end - - # Indicates if the user-specified JSON schema customizations should go on the inside - # (where they normally go) or on the outside. They only go on the outside when it's - # an array field, because then they apply to the array itself instead of the items in the - # array. - def user_specified_json_schema_customizations_go_on_outside? - json_schema_layers.include?(:array) - end - - def process_layer(layer, schema) - case layer - when :nullable - make_nullable(schema) - when :array - make_array(schema) - else - # :nocov: - layer is only ever `:nullable` or `:array` so we never get here - schema - # :nocov: - end - end - - def make_nullable(schema) - # Here we use "anyOf" to ensure that JSON can either match the schema OR null. - # - # (Using "oneOf" would mean that if we had a schema that also allowed null, - # null would never be allowed, since "oneOf" must match exactly one subschema). - { - "anyOf" => [ - schema, - {"type" => "null"} - ] - } - end - - def make_array(schema) - {"type" => "array", "items" => schema} - end end end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_reference.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_reference.rb index 070f70db3..f0c3454eb 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_reference.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_reference.rb @@ -15,7 +15,6 @@ module Indexing :name_in_index, :type, :mapping_options, - :json_schema_options, :accuracy_confidence, :source, :runtime_field_script, @@ -35,10 +34,8 @@ def resolve name: name, name_in_index: name_in_index, type: type, - json_schema_layers: type.json_schema_layers, indexing_field_type: resolved_type.to_indexing_field_type, accuracy_confidence: accuracy_confidence, - json_schema_customizations: json_schema_options, mapping_customizations: mapping_options, source: source, runtime_field_script: runtime_field_script, @@ -46,7 +43,7 @@ def resolve ) end - # @dynamic initialize, with, name, name_in_index, type, mapping_options, json_schema_options, accuracy_confidence, source, runtime_field_script, doc_comment + # @dynamic initialize, with, name, name_in_index, type, mapping_options, accuracy_confidence, source, runtime_field_script, doc_comment end end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb index d2f72fb6f..e1aad284c 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb @@ -23,40 +23,11 @@ module FieldType # # @api private class Enum < ::Data - # @return [Hash] the JSON schema for this enum type. - def to_json_schema - {"type" => "string", "enum" => enum_value_names} - end - # @return [Hash] the datastore mapping for this enum type. def to_mapping {"type" => "keyword"} end - # @return [Hash] additional ElasticGraph metadata to put in the JSON schema for this enum type. - def json_schema_field_metadata_by_field_name - {} - end - - # @param customizations [Hash] JSON schema customizations - # @return [Hash] formatted customizations. - def format_field_json_schema_customizations(customizations) - # Since an enum type already restricts the values to a small set of allowed values, we do not need to keep - # other customizations (such as the `maxLength` field customization EG automatically applies to fields - # indexed as a `keyword`--we don't allow enum values to exceed that length, anyway). - # - # It's desirable to restrict what customizations are applied because when a publisher uses the JSON schema - # to generate code using a library such as https://github.com/pwall567/json-kotlin-schema-codegen, we found - # that the presence of extra field customizations inhibits the library's ability to generate code in the way - # we want (it causes the type of the enum to change since the JSON schema changes from a direct `$ref` to - # being wrapped in an `allOf`). - # - # However, we still want to apply `enum` customizations--this allows a user to "narrow" the set of allowed - # values for a field. For example, a `Currency` enum could contain every currency, and a user may want to - # restrict a specific `currency` field to a subset of currencies (e.g. to just USD, CAD, and EUR). - customizations.slice("enum") - end - # @dynamic initialize, enum_value_names end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/object.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/object.rb index c01fcbd4e..c02e463b6 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/object.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/object.rb @@ -14,7 +14,7 @@ module ElasticGraph module SchemaDefinition module Indexing module FieldType - # Responsible for the JSON schema and mapping of a {SchemaElements::ObjectType}. + # Responsible for the mapping of a {SchemaElements::ObjectType}. # # @!attribute [r] type_name # @return [String] name of the object type @@ -22,13 +22,11 @@ module FieldType # @return [Array] the subfields of this object type # @!attribute [r] mapping_options # @return [Hash] options to be included in the mapping - # @!attribute [r] json_schema_options - # @return [Hash] options to be included in the JSON schema # @!attribute [r] doc_comment # @return [String, nil] documentation for the type # # @api private - class Object < Support::MemoizableData.define(:schema_def_state, :type_name, :subfields, :mapping_options, :json_schema_options, :doc_comment) + class Object < Support::MemoizableData.define(:schema_def_state, :type_name, :subfields, :mapping_options, :doc_comment) # @return [Hash] the datastore mapping for this object type. def to_mapping @to_mapping ||= begin @@ -41,77 +39,10 @@ def to_mapping end end - # @return [Hash] the JSON schema for this object type. - def to_json_schema - @to_json_schema ||= - if json_schema_options.empty? - # Fields that are `sourced_from` an alternate type must not be included in this types JSON schema, - # since events of this type won't include them. - other_source_subfields, json_schema_candidate_subfields = subfields.partition(&:source) - validate_sourced_fields_have_no_json_schema_overrides(other_source_subfields) - json_schema_subfields = json_schema_candidate_subfields.reject(&:runtime_field_script) - required_fields = json_schema_subfields - required_fields = required_fields.reject(&:nullable?) if schema_def_state.allow_omitted_json_schema_fields - - { - "type" => "object", - "properties" => json_schema_subfields.to_h { |f| [f.name, f.json_schema] }.merge(json_schema_typename_field), - # Note: `__typename` is intentionally not included in the `required` list. If `__typename` is present - # we want it validated (as we do by merging in `json_schema_typename_field`) but we only want - # to require it in the context of a union type. The union's json schema requires the field. - "required" => required_fields.map(&:name).freeze, - "additionalProperties" => (false unless schema_def_state.allow_extra_json_schema_fields), - "description" => doc_comment - }.compact.freeze - else - Support::HashUtil.stringify_keys(json_schema_options) - end - end - - # @return [Hash] additional ElasticGraph metadata to put in the JSON schema for this object type. - def json_schema_field_metadata_by_field_name - subfields.to_h { |f| [f.name, f.json_schema_metadata] } - end - - # @param customizations [Hash] JSON schema customizations - # @return [Hash] formatted customizations. - def format_field_json_schema_customizations(customizations) - customizations - end - - private - + # @private def after_initialize subfields.freeze end - - # Returns a __typename property which we use for union types. - # - # This must always be set to the name of the type (thus the const value). - # - # We also add a "default" value. This does not impact validation, but rather - # aids tools like our kotlin codegen to save publishers from having to set the - # property explicitly when creating events. - def json_schema_typename_field - { - "__typename" => { - "type" => "string", - "const" => type_name, - "default" => type_name - } - } - end - - def validate_sourced_fields_have_no_json_schema_overrides(other_source_subfields) - problem_fields = other_source_subfields.reject { |f| f.json_schema_customizations.empty? } - return if problem_fields.empty? - - field_descriptions = problem_fields.map(&:name).sort.map { |f| "`#{f}`" }.join(", ") - raise Errors::SchemaError, - "`#{type_name}` has #{problem_fields.size} field(s) (#{field_descriptions}) that are `sourced_from` " \ - "another type and also have JSON schema customizations. Instead, put the JSON schema " \ - "customizations on the source type's field definitions." - end end end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb index cb9c3132e..600a05c90 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb @@ -27,22 +27,6 @@ def to_mapping Support::HashUtil.stringify_keys(scalar_type.mapping_options) end - # @return [Hash] the JSON schema for this scalar type. - def to_json_schema - Support::HashUtil.stringify_keys(scalar_type.json_schema_options) - end - - # @return [Hash] additional ElasticGraph metadata to put in the JSON schema for this scalar type. - def json_schema_field_metadata_by_field_name - {} - end - - # @param customizations [Hash] JSON schema customizations - # @return [Hash] formatted customizations. - def format_field_json_schema_customizations(customizations) - customizations - end - # @dynamic initialize, scalar_type end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/union.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/union.rb index e4d6e634f..044ac7795 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/union.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/union.rb @@ -24,25 +24,6 @@ module FieldType # # @api private class Union < ::Data.define(:subtypes_by_name) - # @return [Hash] the JSON schema for this union type. - def to_json_schema - subtype_json_schemas = subtypes_by_name.keys.map { |name| {"$ref" => "#/$defs/#{name}"} } - - # A union type can represent multiple subtypes, referenced by the "anyOf" clause below. - # We also add a requirement for the presence of __typename to indicate which type - # is being referenced (this property is pre-defined on the type itself as a constant). - # - # Note: Although both "oneOf" and "anyOf" keywords are valid for combining schemas - # to form a union, and validate equivalently when no object can satisfy multiple of the - # subschemas (which is the case here given the __typename requirements are mutually - # exclusive), we chose to use "oneOf" here because it works better with this library: - # https://github.com/pwall567/json-kotlin-schema-codegen - { - "required" => %w[__typename], - "oneOf" => subtype_json_schemas - } - end - # @return [Hash] the datastore mapping for this union type. def to_mapping mapping_subfields = subtypes_by_name.values.map(&:subfields).reduce([], :union) @@ -52,17 +33,6 @@ def to_mapping {"properties" => {"__typename" => {"type" => "keyword"}}} ) end - - # @return [Hash] additional ElasticGraph metadata to put in the JSON schema for this union type. - def json_schema_field_metadata_by_field_name - {} - end - - # @param customizations [Hash] JSON schema customizations - # @return [Hash] formatted customizations. - def format_field_json_schema_customizations(customizations) - customizations - end end end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb index 8d59dff2b..009f80709 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb @@ -136,8 +136,6 @@ def rollover(frequency, timestamp_field_path_name) raise Errors::SchemaError, "rollover field `#{timestamp_field_path.full_description}` cannot be used for rollover since it is a list field." end - timestamp_field_path.path_parts.each { |f| f.json_schema nullable: false } - self.rollover_config = RolloverConfig.new( frequency: frequency, timestamp_field_path: timestamp_field_path @@ -184,8 +182,6 @@ def route_with(routing_field_path_name) self.routing_field_path = routing_field_path - routing_field_path.path_parts[0..-2].each { |f| f.json_schema nullable: false } - routing_field_path.last_part.json_schema nullable: false, pattern: HAS_NON_WHITE_SPACE_REGEX indexed_type.append_to_documentation "For more performant queries on this type, please filter on `#{routing_field_path_name}` if possible." end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb deleted file mode 100644 index 535d11b2d..000000000 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright 2024 - 2026 Block, Inc. -# -# Use of this source code is governed by an MIT-style -# license that can be found in the LICENSE file or at -# https://opensource.org/licenses/MIT. -# -# frozen_string_literal: true - -module ElasticGraph - module SchemaDefinition - module Indexing - # @!parse class JSONSchemaFieldMetadata; end - JSONSchemaFieldMetadata = ::Data.define(:type, :name_in_index) - - # Metadata about an ElasticGraph field that needs to be stored in our versioned JSON schemas - # alongside the JSON schema fields. - # - # @!attribute [r] type - # @return [String] name of the ElasticGraph type for this field - # @!attribute [r] name_in_index - # @return [String] name of the field in the index - # - # @api private - class JSONSchemaFieldMetadata < ::Data - # @return [Hash] hash form of the metadata that can be dumped in JSON schema - def to_dumpable_hash - {"type" => type, "nameInIndex" => name_in_index} - end - - # @dynamic initialize, type, name_in_index - end - end - end -end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb deleted file mode 100644 index 820ac7b62..000000000 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright 2024 - 2026 Block, Inc. -# -# Use of this source code is governed by an MIT-style -# license that can be found in the LICENSE file or at -# https://opensource.org/licenses/MIT. -# -# frozen_string_literal: true - -require "elastic_graph/constants" - -module ElasticGraph - module SchemaDefinition - module Indexing - # Represents the result of merging a JSON schema with metadata. The result includes both - # the merged JSON schema and a list of `failed_fields` indicating which fields metadata - # could not be determined for. - # - # @private - class JSONSchemaWithMetadata < ::Data.define( - # The JSON schema. - :json_schema, - # A set of fields (in the form `Type.field`) that were needed but not found. - :missing_fields, - # A set of type names that were needed but not found. - :missing_types, - # A set of `DeprecatedElement` objects that create conflicting definitions. - :definition_conflicts, - # A set of fields that have been deleted but that must be retained (e.g. for custom shard routing or rollover) - :missing_necessary_fields - ) - def json_schema_version - json_schema.fetch(JSON_SCHEMA_VERSION_KEY) - end - - # Responsible for building `JSONSchemaWithMetadata` instances. - # - # @private - class Merger - # @dynamic unused_deprecated_elements - attr_reader :unused_deprecated_elements - - def initialize(schema_def_results) - @field_metadata_by_type_and_field_name = schema_def_results.json_schema_field_metadata_by_type_and_field_name - @renamed_types_by_old_name = schema_def_results.state.renamed_types_by_old_name - @deleted_types_by_old_name = schema_def_results.state.deleted_types_by_old_name - @renamed_fields_by_type_name_and_old_field_name = schema_def_results.state.renamed_fields_by_type_name_and_old_field_name - @deleted_fields_by_type_name_and_old_field_name = schema_def_results.state.deleted_fields_by_type_name_and_old_field_name - @state = schema_def_results.state - @derived_indexing_type_names = schema_def_results.derived_indexing_type_names - - @unused_deprecated_elements = ( - @renamed_types_by_old_name.values + - @deleted_types_by_old_name.values + - @renamed_fields_by_type_name_and_old_field_name.values.flat_map(&:values) + - @deleted_fields_by_type_name_and_old_field_name.values.flat_map(&:values) - ).to_set - end - - def merge_metadata_into(json_schema) - missing_fields = ::Set.new - missing_types = ::Set.new - definition_conflicts = ::Set.new - old_type_name_by_current_name = {} # : ::Hash[String, String] - - defs = json_schema.fetch("$defs").to_h do |type_name, type_def| - if type_name != EVENT_ENVELOPE_JSON_SCHEMA_NAME && (properties = type_def["properties"]) - current_type_name = determine_current_type_name( - type_name, - missing_types: missing_types, - definition_conflicts: definition_conflicts - ) - - if current_type_name - old_type_name_by_current_name[current_type_name] = type_name - end - - properties = properties.to_h do |field_name, prop| - unless field_name == "__typename" - field_metadata = current_type_name&.then do |name| - field_metadata_for( - name, - field_name, - missing_fields: missing_fields, - definition_conflicts: definition_conflicts - ) - end - - prop = prop.merge({"ElasticGraph" => field_metadata&.to_dumpable_hash}) - end - - [field_name, prop] - end - - type_def = type_def.merge({"properties" => properties}) - end - - [type_name, type_def] - end - - json_schema = json_schema.merge("$defs" => defs) - - JSONSchemaWithMetadata.new( - json_schema: json_schema, - missing_fields: missing_fields, - missing_types: missing_types, - definition_conflicts: definition_conflicts, - missing_necessary_fields: identify_missing_necessary_fields(json_schema, old_type_name_by_current_name) - ) - end - - private - - # Given a historical `type_name`, determines (and returns) the current name for that type. - def determine_current_type_name(type_name, missing_types:, definition_conflicts:) - exists_currently = @field_metadata_by_type_and_field_name.key?(type_name) - deleted = @deleted_types_by_old_name[type_name]&.tap { |elem| @unused_deprecated_elements.delete(elem) } - renamed = @renamed_types_by_old_name[type_name]&.tap { |elem| @unused_deprecated_elements.delete(elem) } - - if [exists_currently, deleted, renamed].count(&:itself) > 1 - definition_conflicts.merge([deleted, renamed].compact) - end - - return type_name if exists_currently - return nil if deleted - return renamed.name if renamed - - missing_types << type_name - nil - end - - # Given a historical `type_name` and `field_name` determines (and returns) the field metadata for it. - def field_metadata_for(type_name, field_name, missing_fields:, definition_conflicts:) - full_name = "#{type_name}.#{field_name}" - - current_meta = @field_metadata_by_type_and_field_name.dig(type_name, field_name) - deleted = @deleted_fields_by_type_name_and_old_field_name.dig(type_name, field_name)&.tap do |elem| - @unused_deprecated_elements.delete(elem) - end - renamed = @renamed_fields_by_type_name_and_old_field_name.dig(type_name, field_name)&.tap do |elem| - @unused_deprecated_elements.delete(elem) - end - - if [current_meta, deleted, renamed].count(&:itself) > 1 - definition_conflicts.merge([deleted, renamed].compact.map { |elem| elem.with(name: full_name) }) - end - - return current_meta if current_meta - return nil if deleted - return @field_metadata_by_type_and_field_name.dig(type_name, renamed.name) if renamed - - missing_fields << full_name - nil - end - - def identify_missing_necessary_fields(json_schema, old_type_name_by_current_name) - json_schema_resolver = JSONSchemaResolver.new(@state, json_schema, old_type_name_by_current_name) - version = json_schema.fetch(JSON_SCHEMA_VERSION_KEY) - - @state.object_types_by_name.values - .select { |type| type.has_own_index_def? && !@derived_indexing_type_names.include?(type.name) } - .flat_map { |object_type| identify_missing_necessary_fields_for_index_def(object_type, object_type.own_index_def, json_schema_resolver, version) } - end - - def identify_missing_necessary_fields_for_index_def(object_type, index_def, json_schema_resolver, json_schema_version) - { - "routing" => index_def.routing_field_path, - "rollover" => index_def.rollover_config&.timestamp_field_path - }.compact.filter_map do |field_type, field_path| - if json_schema_resolver.necessary_path_missing?(field_path) - # The JSON schema v # {json_schema_version} artifact has no field that maps to the #{field_type} path of `#{field_path.fully_qualified_path_in_index}`. - - MissingNecessaryField.new( - field_type: field_type, - fully_qualified_path: field_path.fully_qualified_path_in_index - ) - end - end - end - - class JSONSchemaResolver - def initialize(state, json_schema, old_type_name_by_current_name) - @state = state - @old_type_name_by_current_name = old_type_name_by_current_name - @meta_by_old_type_and_name_in_index = ::Hash.new do |hash, type_name| - properties = json_schema.fetch("$defs").fetch(type_name).fetch("properties") - - hash[type_name] = properties.filter_map do |name, prop| - if (metadata = prop["ElasticGraph"]) - [metadata.fetch("nameInIndex"), metadata] - end - end.to_h - end - end - - # Indicates if the given `field_path` is (1) necessary and (2) missing from the JSON schema, indicating a problem. - # - # - Returns `false` is the given `field_path` is present in the JSON schema. - # - Returns `false` is the parent type of `field_path` has not been retained in this JSON schema version - # (in that case, the field path is not necessary). - # - Otherwise, returns `true` since the field path is both necessary and missing. - def necessary_path_missing?(field_path) - parent_type = field_path.first_part.parent_type.name - - field_path.path_parts.any? do |path_part| - necessary_path_part_missing?(parent_type, path_part.name_in_index) do |meta| - parent_type = @state.type_ref(meta.fetch("type")).fully_unwrapped.name - end - end - end - - private - - def necessary_path_part_missing?(parent_type, name_in_index) - old_type_name = @old_type_name_by_current_name[parent_type] - return false unless old_type_name - - meta = @meta_by_old_type_and_name_in_index.dig(old_type_name, name_in_index) - yield meta if meta - !meta - end - end - end - - MissingNecessaryField = ::Data.define(:field_type, :fully_qualified_path) - end - end - end -end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/json_schema_pruner.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/json_schema_pruner.rb deleted file mode 100644 index 7a8323fa6..000000000 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/json_schema_pruner.rb +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright 2024 - 2026 Block, Inc. -# -# Use of this source code is governed by an MIT-style -# license that can be found in the LICENSE file or at -# https://opensource.org/licenses/MIT. -# -# frozen_string_literal: true - -require "elastic_graph/constants" - -module ElasticGraph - module SchemaDefinition - # Prunes unused type definitions from a given JSON schema. - # - # @private - class JSONSchemaPruner - def self.prune(original_json_schema) - initial_type_names = [EVENT_ENVELOPE_JSON_SCHEMA_NAME] + original_json_schema - .dig("$defs", EVENT_ENVELOPE_JSON_SCHEMA_NAME, "properties", "type", "enum") - - types_to_keep = referenced_type_names(initial_type_names, original_json_schema["$defs"]) - - # The .select will preserve the sort order of the original hash - # standard:disable Style/HashSlice -- https://github.com/soutaro/steep/issues/1503 - pruned_defs = original_json_schema["$defs"].select { |k, _v| types_to_keep.include?(k) } - # standard:enable Style/HashSlice - - original_json_schema.merge("$defs" => pruned_defs) - end - - # Returns a list of type names indicating all types referenced from any type in source_type_names. - private_class_method - def self.referenced_type_names(source_type_names, original_defs) - return Set.new if source_type_names.empty? - - referenced_type_defs = original_defs.slice(*source_type_names) - ref_names = collect_ref_names(referenced_type_defs) - - referenced_type_names(ref_names, original_defs) + source_type_names - end - - private_class_method - def self.collect_ref_names(hash) - hash.flat_map do |key, value| - case value - when ::Hash - collect_ref_names(value) - when ::Array - value.grep(::Hash).flat_map { |subhash| collect_ref_names(subhash) } - when ::String - if key == "$ref" && (type = value[%r{\A#/\$defs/(.+)\z}, 1]) - [type] - else - [] - end - else - [] - end - end - end - end - end -end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_type_info.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_type_info.rb index 454093ff6..89d9ba7f7 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_type_info.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_type_info.rb @@ -6,29 +6,22 @@ # # frozen_string_literal: true -require "elastic_graph/support/json_schema/meta_schema_validator" - module ElasticGraph module SchemaDefinition module Mixins - # Mixin used to specify non-GraphQL type info (datastore index and JSON schema type info). + # Mixin used to specify non-GraphQL type info on schema elements. # Exists as a mixin so we can apply the same consistent API to every place we need to use this. # Currently it's used in 3 places: # - # - {SchemaElements::ScalarType}: allows specification of how scalars are represented in JSON schema and the index. - # - {SchemaElements::TypeWithSubfields}: allows customization of how an object type is represented in JSON schema and the index. - # - {SchemaElements::Field}: allows customization of a specific field over the field type's standard JSON schema and the index mapping. + # - {SchemaElements::ScalarType}: allows specification of how scalars are represented in the datastore index. + # - {SchemaElements::TypeWithSubfields}: allows customization of how an object type is represented in the datastore index. + # - {SchemaElements::Field}: allows customization of a specific field over the field type's standard index mapping. module HasTypeInfo # @return [Hash] datastore mapping options def mapping_options @mapping_options ||= {} end - # @return [Hash] JSON schema options - def json_schema_options - @json_schema_options ||= {} - end - # Set of mapping parameters that it makes sense to allow customization of, based on # [the Elasticsearch docs](https://www.elastic.co/guide/en/elasticsearch/reference/8.15/mapping-params.html). CUSTOMIZABLE_DATASTORE_PARAMS = Set[ @@ -70,7 +63,7 @@ def json_schema_options # ElasticGraph.define_schema do |schema| # schema.scalar_type "URL" do |t| # t.mapping type: "keyword" - # t.json_schema type: "string", format: "uri" + # t.json_schema type: "string" # end # end # @@ -87,13 +80,6 @@ def json_schema_options # t.field "expYear", "Int" do |f| # # Use a smaller numeric type to save space in the datastore # f.mapping type: "short" - # f.json_schema minimum: 2000, maximum: 2099 - # end - # - # t.field "expMonth", "Int" do |f| - # # Use a smaller numeric type to save space in the datastore - # f.mapping type: "byte" - # f.json_schema minimum: 1, maximum: 12 # end # # t.index "cards" @@ -108,73 +94,6 @@ def mapping(**options) mapping_options.update(options) end - - # Defines the [JSON schema](https://json-schema.org/understanding-json-schema/) validations for this field or type. Validations - # defined here will be included in the generated `json_schemas.yaml` artifact, which is used by the ElasticGraph indexer to - # validate events before indexing their data in the datastore. In addition, the publisher may use `json_schemas.yaml` for code - # generation and to apply validation before publishing an event to ElasticGraph. - # - # Can be called multiple times; each time, the options will be merged into the existing options. - # - # This is _required_ on a {SchemaElements::ScalarType} (since we don’t know how a custom scalar type should be represented in - # JSON!). On a {SchemaElements::Field}, this is optional, but can be used to make the JSON schema validation stricter then it - # would otherwise be. For example, you could use `json_schema maxLength: 30` on a `String` field to limit the length. - # - # You can use any of the JSON schema validation keywords here. In addition, `nullable: false` is supported to configure the - # generated JSON schema to disallow `null` values for the field. Note that if you define a field with a non-nullable GraphQL type - # (e.g. `Int!`), the JSON schema will automatically disallow nulls. However, as explained in the - # {SchemaElements::TypeWithSubfields#field} documentation, we generally recommend against defining non-nullable GraphQL fields. - # `json_schema nullable: false` will disallow `null` values from being indexed, while still keeping the field nullable in the - # GraphQL schema. If you think you might want to make a field non-nullable in the GraphQL schema some day, it’s a good idea to use - # `json_schema nullable: false` now to ensure every indexed record has a non-null value for the field. - # - # @note We recommend using JSON schema validations in a limited fashion. Validations that are appropriate to apply when data is - # entering the system-of-record are often not appropriate on a secondary index like ElasticGraph. Events that violate a JSON - # schema validation will fail to index (typically they will be sent to the dead letter queue and page an oncall engineer). If an - # ElasticGraph instance is meant to contain all the data of some source system, you probably don’t want it applying stricter - # validations than the source system itself has. We recommend limiting your JSON schema validations to situations where - # violations would prevent ElasticGraph from operating correctly. - # - # @param options [Hash] JSON schema options - # @return [void] - # - # @example Define the JSON schema validations of a custom scalar type - # ElasticGraph.define_schema do |schema| - # schema.scalar_type "URL" do |t| - # t.mapping type: "keyword" - # - # # JSON schema has a built-in URI format validator: - # # https://json-schema.org/understanding-json-schema/reference/string.html#resource-identifiers - # t.json_schema type: "string", format: "uri" - # end - # end - # - # @example Define additional validations on a field - # ElasticGraph.define_schema do |schema| - # schema.object_type "Card" do |t| - # t.field "id", "ID!" - # - # t.field "expYear", "Int" do |f| - # # Use JSON schema to ensure the publisher is sending us 4 digit years, not 2 digit years. - # f.json_schema minimum: 2000, maximum: 2099 - # end - # - # t.field "expMonth", "Int" do |f| - # f.json_schema minimum: 1, maximum: 12 - # end - # - # t.index "cards" - # end - # end - def json_schema(**options) - validatable_json_schema = Support::HashUtil.stringify_keys(options) - - if (error_msg = Support::JSONSchema.strict_meta_schema_validator.validate_with_error_message(validatable_json_schema)) - raise Errors::SchemaError, "Invalid JSON schema options set on #{self}:\n\n#{error_msg}" - end - - json_schema_options.update(options) - end end end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/rake_tasks.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/rake_tasks.rb index 58fbe7517..d57b0282f 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/rake_tasks.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/rake_tasks.rb @@ -8,6 +8,7 @@ require "rake/tasklib" require "elastic_graph/schema_artifacts/runtime_metadata/schema_element_names" +require "elastic_graph/schema_definition/extension_module_support" module ElasticGraph module SchemaDefinition @@ -41,6 +42,9 @@ class RakeTasks < ::Rake::TaskLib # specific enum types. For example, to rename the `DayOfWeek.MONDAY` enum to `DayOfWeek.MON`, pass `{DayOfWeek: {MONDAY: "MON"}}`. # @param extension_modules [Array] List of Ruby modules to extend onto the `SchemaDefinition::API` instance. Designed to # support ElasticGraph extension gems (such as `elasticgraph-apollo`). + # @param ingestion_serializer_extension_modules [Array] List of Ruby modules implementing the ingestion serializer API. + # Defaults to the built-in JSON Schema serializer for backward compatibility, but can be set to `[]` to disable it or replaced + # with a different serializer extension. # @param enforce_json_schema_version [Boolean] Whether or not to enforce the requirement that the JSON schema version is incremented # every time dumping the JSON schemas results in a changed artifact. Generally speaking, you will want this to be `true` for any # ElasticGraph application that is in production as the versioning of JSON schemas is what supports safe schema evolution as it @@ -117,6 +121,7 @@ def initialize( type_name_overrides: {}, enum_value_overrides_by_type: {}, extension_modules: [], + ingestion_serializer_extension_modules: ExtensionModuleSupport.default_ingestion_serializer_extension_modules, enforce_json_schema_version: true, output: $stdout ) @@ -133,6 +138,7 @@ def initialize( @schema_artifacts_directory = schema_artifacts_directory @enforce_json_schema_version = enforce_json_schema_version @extension_modules = extension_modules + @ingestion_serializer_extension_modules = ingestion_serializer_extension_modules @output = output define_tasks @@ -178,6 +184,7 @@ def schema_def_api @schema_element_names, @index_document_sizes, extension_modules: @extension_modules, + ingestion_serializer_extension_modules: @ingestion_serializer_extension_modules, derived_type_name_formats: @derived_type_name_formats, type_name_overrides: @type_name_overrides, enum_value_overrides_by_type: @enum_value_overrides_by_type, diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb index 8106e9d5f..70ccb30a7 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb @@ -10,8 +10,6 @@ require "elastic_graph/errors" require "elastic_graph/schema_artifacts/runtime_metadata/schema" require "elastic_graph/schema_artifacts/artifacts_helper_methods" -require "elastic_graph/schema_definition/indexing/event_envelope" -require "elastic_graph/schema_definition/indexing/json_schema_with_metadata" require "elastic_graph/schema_definition/indexing/relationship_resolver" require "elastic_graph/schema_definition/indexing/update_target_resolver" require "elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect" @@ -46,53 +44,6 @@ def runtime_metadata @runtime_metadata ||= build_runtime_metadata end - # @param version [Integer] desired JSON schema version - # @return [Hash] the JSON schema for the requested version, if available - # @raise [Errors::NotFoundError] if the requested JSON schema version is not available - def json_schemas_for(version) - unless available_json_schema_versions.include?(version) - raise Errors::NotFoundError, "The requested json schema version (#{version}) is not available. Available versions: #{available_json_schema_versions.to_a.join(", ")}." - end - - @latest_versioned_json_schema ||= merge_field_metadata_into_json_schema(current_public_json_schema).json_schema - end - - # @return [Set] set of available JSON schema versions - def available_json_schema_versions - @available_json_schema_versions ||= Set[latest_json_schema_version] - end - - # @return [Hash] the newly generated JSON schema - def latest_json_schema_version - current_public_json_schema[JSON_SCHEMA_VERSION_KEY] - end - - # @private - def json_schema_version_setter_location - state.json_schema_version_setter_location - end - - # @private - def json_schema_field_metadata_by_type_and_field_name - @json_schema_field_metadata_by_type_and_field_name ||= json_schema_indexing_field_types_by_name - .transform_values(&:json_schema_field_metadata_by_field_name) - end - - # @private - def current_public_json_schema - @current_public_json_schema ||= build_public_json_schema - end - - # @private - def merge_field_metadata_into_json_schema(json_schema) - json_schema_with_metadata_merger.merge_metadata_into(json_schema) - end - - # @private - def unused_deprecated_elements - json_schema_with_metadata_merger.unused_deprecated_elements - end - # @private STATIC_SCRIPT_REPO = Scripting::FileSystemRepository.new(::File.join(__dir__.to_s, "scripting", "scripts")) @@ -171,10 +122,6 @@ def aggregation_efficiency_hints_for(derived_indexed_types) EOS end - def json_schema_with_metadata_merger - @json_schema_with_metadata_merger ||= Indexing::JSONSchemaWithMetadata::Merger.new(self) - end - def generate_datastore_config # We need to check this before generating our datastore configuration. # We can't generate a mapping from a recursively defined schema type. @@ -329,44 +276,6 @@ def generate_sdl [type_defs + state.sdl_parts].join("\n\n") end - def build_public_json_schema - json_schema_version = state.json_schema_version - if json_schema_version.nil? - raise Errors::SchemaError, "`json_schema_version` must be specified in the schema. To resolve, add `schema.json_schema_version 1` in a schema definition block." - end - - root_document_type_names = state.object_types_by_name.values - .select { |type| type.root_document_type? && !type.abstract? } - .reject { |type| derived_indexing_type_names.include?(type.name) } - .map(&:name) - - definitions_by_name = json_schema_indexing_field_types_by_name - .transform_values(&:to_json_schema) - .compact - - { - "$schema" => JSON_META_SCHEMA, - JSON_SCHEMA_VERSION_KEY => json_schema_version, - "$defs" => { - "ElasticGraphEventEnvelope" => Indexing::EventEnvelope.json_schema(root_document_type_names, json_schema_version) - }.merge(definitions_by_name) - } - end - - def json_schema_indexing_field_types_by_name - @json_schema_indexing_field_types_by_name ||= state - .types_by_name - .except("Query") - .values - .reject do |t| - derived_indexing_type_names.include?(t.name) || - # Skip graphql framework types - t.graphql_only? - end - .sort_by(&:name) - .to_h { |type| [type.name, type.to_indexing_field_type] } - end - def verify_runtime_metadata(runtime_metadata) registered_resolvers = runtime_metadata.graphql_resolvers_by_name diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb index 56d288fae..efcb930c7 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb @@ -8,7 +8,6 @@ require "did_you_mean" require "elastic_graph/constants" -require "elastic_graph/schema_definition/json_schema_pruner" require "elastic_graph/support/graphql_gem_loader" require "elastic_graph/support/memoizable_data" require "fileutils" @@ -37,41 +36,10 @@ def initialize(schema_definition_results:, schema_artifacts_directory:, enforce_ @enforce_json_schema_version = enforce_json_schema_version @output = output @max_diff_lines = max_diff_lines - - @json_schemas_artifact = new_yaml_artifact( - JSON_SCHEMAS_FILE, - JSONSchemaPruner.prune(schema_definition_results.current_public_json_schema), - extra_comment_lines: [ - "This is the \"public\" JSON schema file and is intended to be provided to publishers so that", - "they can perform code generation and event validation." - ] - ) end # Dumps all the schema artifacts to disk. def dump_artifacts - check_if_needs_json_schema_version_bump do |recommended_json_schema_version| - if @enforce_json_schema_version - # @type var setter_location: ::Thread::Backtrace::Location - # We use `_ =` because while `json_schema_version_setter_location` can be nil, - # it'll never be nil if we get here and we want the type to be non-nilable. - setter_location = _ = schema_definition_results.json_schema_version_setter_location - setter_location_path = ::Pathname.new(setter_location.absolute_path.to_s).relative_path_from(::Dir.pwd) - - abort "A change has been attempted to `json_schemas.yaml`, but the `json_schema_version` has not been correspondingly incremented. Please " \ - "increase the schema's version, and then run the `bundle exec rake schema_artifacts:dump` command again.\n\n" \ - "To update the schema version to the expected version, change line #{setter_location.lineno} at `#{setter_location_path}` to:\n" \ - " `schema.json_schema_version #{recommended_json_schema_version}`\n\n" \ - "Alternately, pass `enforce_json_schema_version: false` to `ElasticGraph::SchemaDefinition::RakeTasks.new` to allow the JSON schemas " \ - "file to change without requiring a version bump, but that is only recommended for non-production applications during initial schema prototyping." - else - @output.puts <<~EOS - WARNING: the `json_schemas.yaml` artifact is being updated without the `json_schema_version` being correspondingly incremented. - This is not recommended for production applications, but is currently allowed because you have set `enforce_json_schema_version: false`. - EOS - end - end - ::FileUtils.mkdir_p(@schema_artifacts_directory) artifacts.each { |artifact| artifact.dump(@output) } end @@ -112,18 +80,11 @@ def artifacts_from_schema_def # schema elements. graphql_schema = ::GraphQL::Schema.from_definition(schema_definition_results.graphql_schema_string).to_definition.chomp - unversioned_artifacts = [ + [ new_yaml_artifact(DATASTORE_CONFIG_FILE, schema_definition_results.datastore_config), new_yaml_artifact(RUNTIME_METADATA_FILE, pruned_runtime_metadata(graphql_schema).to_dumpable_hash), - @json_schemas_artifact, new_raw_artifact(GRAPHQL_SCHEMA_FILE, "\n" + graphql_schema) ] - - versioned_artifacts = build_desired_versioned_json_schemas(@json_schemas_artifact.desired_contents).values.map do |versioned_schema| - new_versioned_json_schema_artifact(versioned_schema) - end - - unversioned_artifacts + versioned_artifacts end def notify_about_unused_type_name_overrides @@ -173,157 +134,6 @@ def notify_about_unused_enum_value_overrides EOS end - def build_desired_versioned_json_schemas(current_public_json_schema) - versioned_parsed_yamls = ::Dir.glob(::File.join(@schema_artifacts_directory, JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v*.yaml")).map do |file| - ::YAML.safe_load_file(file) - end + [current_public_json_schema] - - results_by_json_schema_version = versioned_parsed_yamls.to_h do |parsed_yaml| - merged_schema = @schema_definition_results.merge_field_metadata_into_json_schema(parsed_yaml) - [merged_schema.json_schema_version, merged_schema] - end - - report_json_schema_merge_errors(results_by_json_schema_version.values) - report_json_schema_merge_warnings - - results_by_json_schema_version.transform_values(&:json_schema) - end - - def report_json_schema_merge_errors(merged_results) - json_schema_versions_by_missing_field = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[::String, ::Array[::Integer]] - json_schema_versions_by_missing_type = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[::String, ::Array[::Integer]] - json_schema_versions_by_missing_necessary_field = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[Indexing::JSONSchemaWithMetadata::MissingNecessaryField, ::Array[::Integer]] - - merged_results.each do |result| - result.missing_fields.each do |field| - json_schema_versions_by_missing_field[field] << result.json_schema_version - end - - result.missing_types.each do |type| - json_schema_versions_by_missing_type[type] << result.json_schema_version - end - - result.missing_necessary_fields.each do |missing_necessary_field| - json_schema_versions_by_missing_necessary_field[missing_necessary_field] << result.json_schema_version - end - end - - missing_field_errors = json_schema_versions_by_missing_field.map do |field, json_schema_versions| - missing_field_error_for(field, json_schema_versions) - end - - missing_type_errors = json_schema_versions_by_missing_type.map do |type, json_schema_versions| - missing_type_error_for(type, json_schema_versions) - end - - missing_necessary_field_errors = json_schema_versions_by_missing_necessary_field.map do |field, json_schema_versions| - missing_necessary_field_error_for(field, json_schema_versions) - end - - definition_conflict_errors = merged_results - .flat_map { |result| result.definition_conflicts.to_a } - .group_by(&:name) - .map do |name, deprecated_elements| - <<~EOS - The schema definition of `#{name}` has conflicts. To resolve the conflict, remove the unneeded definitions from the following: - - #{format_deprecated_elements(deprecated_elements)} - EOS - end - - errors = missing_field_errors + missing_type_errors + missing_necessary_field_errors + definition_conflict_errors - return if errors.empty? - - abort errors.join("\n\n") - end - - def report_json_schema_merge_warnings - unused_elements = @schema_definition_results.unused_deprecated_elements - return if unused_elements.empty? - - @output.puts <<~EOS - The schema definition has #{unused_elements.size} unneeded reference(s) to deprecated schema elements. These can all be safely deleted: - - #{format_deprecated_elements(unused_elements)} - - EOS - end - - def format_deprecated_elements(deprecated_elements) - descriptions = deprecated_elements - .sort_by { |e| [e.defined_at.path, e.defined_at.lineno] } - .map(&:description) - .uniq - - descriptions.each.with_index(1).map { |desc, idx| "#{idx}. #{desc}" }.join("\n") - end - - def missing_field_error_for(qualified_field, json_schema_versions) - type, field = qualified_field.split(".") - - <<~EOS - The `#{qualified_field}` field (which existed in #{describe_json_schema_versions(json_schema_versions, "and")}) no longer exists in the current schema definition. - ElasticGraph cannot guess what it should do with this field's data when ingesting events at #{old_versions(json_schema_versions)}. - To continue, do one of the following: - - 1. If the `#{qualified_field}` field has been renamed, indicate this by calling `field.renamed_from "#{field}"` on the renamed field. - 2. If the `#{qualified_field}` field has been dropped, indicate this by calling `type.deleted_field "#{field}"` on the `#{type}` type. - 3. Alternately, if no publishers or in-flight events use #{describe_json_schema_versions(json_schema_versions, "or")}, delete #{files_noun_phrase(json_schema_versions)} from `#{JSON_SCHEMAS_BY_VERSION_DIRECTORY}`, and no further changes are required. - EOS - end - - def missing_type_error_for(type, json_schema_versions) - <<~EOS - The `#{type}` type (which existed in #{describe_json_schema_versions(json_schema_versions, "and")}) no longer exists in the current schema definition. - ElasticGraph cannot guess what it should do with this type's data when ingesting events at #{old_versions(json_schema_versions)}. - To continue, do one of the following: - - 1. If the `#{type}` type has been renamed, indicate this by calling `type.renamed_from "#{type}"` on the renamed type. - 2. If the `#{type}` field has been dropped, indicate this by calling `schema.deleted_type "#{type}"` on the schema. - 3. Alternately, if no publishers or in-flight events use #{describe_json_schema_versions(json_schema_versions, "or")}, delete #{files_noun_phrase(json_schema_versions)} from `#{JSON_SCHEMAS_BY_VERSION_DIRECTORY}`, and no further changes are required. - EOS - end - - def missing_necessary_field_error_for(field, json_schema_versions) - path = field.fully_qualified_path.split(".").last - # :nocov: -- we only cover one side of this ternary. - has_or_have = (json_schema_versions.size == 1) ? "has" : "have" - # :nocov: - - <<~EOS - #{describe_json_schema_versions(json_schema_versions, "and")} #{has_or_have} no field that maps to the #{field.field_type} field path of `#{field.fully_qualified_path}`. - Since the field path is required for #{field.field_type}, ElasticGraph cannot ingest events that lack it. To continue, do one of the following: - - 1. If the `#{field.fully_qualified_path}` field has been renamed, indicate this by calling `field.renamed_from "#{path}"` on the renamed field rather than using `deleted_field`. - 2. Alternately, if no publishers or in-flight events use #{describe_json_schema_versions(json_schema_versions, "or")}, delete #{files_noun_phrase(json_schema_versions)} from `#{JSON_SCHEMAS_BY_VERSION_DIRECTORY}`, and no further changes are required. - EOS - end - - def describe_json_schema_versions(json_schema_versions, conjunction) - json_schema_versions = json_schema_versions.sort - - # Steep doesn't support pattern matching yet, so have to skip type checking here. - __skip__ = case json_schema_versions - in [single_version] - "JSON schema version #{single_version}" - in [version1, version2] - "JSON schema versions #{version1} #{conjunction} #{version2}" - else - *versions, last_version = json_schema_versions - "JSON schema versions #{versions.join(", ")}, #{conjunction} #{last_version}" - end - end - - def old_versions(json_schema_versions) - return "this old version" if json_schema_versions.size == 1 - "these old versions" - end - - def files_noun_phrase(json_schema_versions) - return "its file" if json_schema_versions.size == 1 - "their files" - end - def artifacts_out_of_date_error(out_of_date_artifacts) # @type var diffs: ::Array[[SchemaArtifact[untyped], ::String]] diffs = [] @@ -378,20 +188,6 @@ def new_yaml_artifact(file_name, desired_contents, extra_comment_lines: []) ) end - def new_versioned_json_schema_artifact(desired_contents) - # File name depends on the schema_version field in the json schema. - schema_version = desired_contents[JSON_SCHEMA_VERSION_KEY] - - new_yaml_artifact( - ::File.join(JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v#{schema_version}.yaml"), - desired_contents, - extra_comment_lines: [ - "This JSON schema file contains internal ElasticGraph metadata and should be considered private.", - "The unversioned JSON schema file is public and intended to be provided to publishers." - ] - ) - end - def new_raw_artifact(file_name, desired_contents) SchemaArtifact.new( ::File.join(@schema_artifacts_directory, file_name), @@ -402,17 +198,6 @@ def new_raw_artifact(file_name, desired_contents) ) end - def check_if_needs_json_schema_version_bump(&block) - if @json_schemas_artifact.out_of_date? - existing_schema_version = @json_schemas_artifact.existing_dumped_contents&.dig(JSON_SCHEMA_VERSION_KEY) || -1 - desired_schema_version = @json_schemas_artifact.desired_contents[JSON_SCHEMA_VERSION_KEY] - - if existing_schema_version >= desired_schema_version - yield existing_schema_version + 1 - end - end - end - def pruned_runtime_metadata(graphql_schema_string) schema = ::GraphQL::Schema.from_definition(graphql_schema_string) runtime_meta = schema_definition_results.runtime_metadata diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb index 99c6e3fb8..f4f07c8dc 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb @@ -405,19 +405,11 @@ def register_standard_elastic_graph_types # As per the Elasticsearch docs, the field MUST come in named `lat` in Elastisearch (but we want the full name in GraphQL). t.field names.latitude, "Float", name_in_index: "lat" do |f| f.documentation "Angular distance north or south of the Earth's equator, measured in degrees from -90 to +90." - - # Note: we use `nullable: false` because we index it as a single `geo_point` field, and therefore can't - # support a `latitude` without a `longitude` or vice-versa. - f.json_schema minimum: -90, maximum: 90, nullable: false end # As per the Elasticsearch docs, the field MUST come in named `lon` in Elastisearch (but we want the full name in GraphQL). t.field names.longitude, "Float", name_in_index: "lon" do |f| f.documentation "Angular distance east or west of the Prime Meridian at Greenwich, UK, measured in degrees from -180 to +180." - - # Note: we use `nullable: false` because we index it as a single `geo_point` field, and therefore can't - # support a `latitude` without a `longitude` or vice-versa. - f.json_schema minimum: -180, maximum: 180, nullable: false end t.mapping type: "geo_point" @@ -659,17 +651,15 @@ def register_standard_elastic_graph_types # Registers the standard GraphQL scalar types. Note that the SDL for the scalar type itself isn't # included in the dumped SDL, but registering it allows us to derive a filter for each, - # which we need. In addition, this lets us define the mapping and JSON schema for each standard - # scalar type. + # which we need. In addition, this lets us define the mapping for each standard scalar type. + # Ingestion serializers can layer their own built-in configuration on top. def register_standard_graphql_scalars schema_def_api.scalar_type "Boolean" do |t| t.mapping type: "boolean" - t.json_schema type: "boolean" end schema_def_api.scalar_type "Float" do |t| t.mapping type: "double" - t.json_schema type: "number" t.customize_aggregated_values_type do |avt| # not nullable, since sum(empty_set) == 0 @@ -709,12 +699,10 @@ def register_standard_graphql_scalars schema_def_api.scalar_type "ID" do |t| t.mapping type: "keyword" - t.json_schema type: "string" end schema_def_api.scalar_type "Int" do |t| t.mapping type: "integer" - t.json_schema type: "integer", minimum: INT_MIN, maximum: INT_MAX t.prepare_for_indexing_with "ElasticGraph::Indexer::IndexingPreparers::Integer", defined_at: "elastic_graph/indexer/indexing_preparers/integer" @@ -729,7 +717,6 @@ def register_standard_graphql_scalars schema_def_api.scalar_type "String" do |t| t.mapping type: "keyword" - t.json_schema type: "string" t.customize_filter_input_type do |fit| fit.field names.contains, schema_def_state.type_ref("StringContains").as_filter_input.name do |f| @@ -753,12 +740,11 @@ def register_standard_graphql_scalars def register_custom_elastic_graph_scalars schema_def_api.scalar_type "Cursor" do |t| - # Technically, we don't use the mapping or json_schema on this type since it's a return-only + # Technically, we don't use the mapping or ingestion config on this type since it's a return-only # type and isn't indexed. However, `scalar_type` requires them to be set (since custom scalars # defined by users will need those set) so we set them here to what they would be if we actually # used them. t.mapping type: "keyword" - t.json_schema type: "string" t.coerce_with "ElasticGraph::GraphQL::ScalarCoercionAdapters::Cursor", defined_at: "elastic_graph/graphql/scalar_coercion_adapters/cursor" @@ -771,7 +757,6 @@ def register_custom_elastic_graph_scalars schema_def_api.scalar_type "Date" do |t| t.mapping type: "date", format: DATASTORE_DATE_FORMAT - t.json_schema type: "string", format: "date" t.coerce_with "ElasticGraph::GraphQL::ScalarCoercionAdapters::Date", defined_at: "elastic_graph/graphql/scalar_coercion_adapters/date" @@ -791,7 +776,6 @@ def register_custom_elastic_graph_scalars schema_def_api.scalar_type "DateTime" do |t| t.mapping type: "date", format: DATASTORE_DATE_TIME_FORMAT - t.json_schema type: "string", format: "date-time" t.coerce_with "ElasticGraph::GraphQL::ScalarCoercionAdapters::DateTime", defined_at: "elastic_graph/graphql/scalar_coercion_adapters/date_time" t.prepare_for_indexing_with "ElasticGraph::Indexer::IndexingPreparers::DateTime", @@ -882,8 +866,6 @@ def register_custom_elastic_graph_scalars t.mapping type: "date", format: "HH:mm:ss||HH:mm:ss.S||HH:mm:ss.SS||HH:mm:ss.SSS" - t.json_schema type: "string", pattern: VALID_LOCAL_TIME_JSON_SCHEMA_PATTERN - t.customize_aggregated_values_type do |avt| define_exact_min_max_and_approx_avg_on_aggregated_values(avt, "LocalTime") do |adjective:, full_name:| <<~EOS @@ -896,7 +878,6 @@ def register_custom_elastic_graph_scalars schema_def_api.scalar_type "TimeZone" do |t| t.mapping type: "keyword" - t.json_schema type: "string", enum: GraphQL::ScalarCoercionAdapters::VALID_TIME_ZONES.to_a t.coerce_with "ElasticGraph::GraphQL::ScalarCoercionAdapters::TimeZone", defined_at: "elastic_graph/graphql/scalar_coercion_adapters/time_zone" @@ -913,8 +894,6 @@ def register_custom_elastic_graph_scalars # https://github.com/json-schema-org/json-schema-spec/blob/draft-07/schema.json#L23-L29 # # ...except we are omitting `null` here; it'll be added by the nullability decorator if the field is defined as nullable. - t.json_schema type: ["array", "boolean", "integer", "number", "object", "string"] - # In the index we store this as a JSON string in a `keyword` field. t.mapping type: "keyword" @@ -939,7 +918,6 @@ def register_custom_elastic_graph_scalars schema_def_api.scalar_type "JsonSafeLong" do |t| t.mapping type: "long" - t.json_schema type: "integer", minimum: JSON_SAFE_LONG_MIN, maximum: JSON_SAFE_LONG_MAX t.coerce_with "ElasticGraph::GraphQL::ScalarCoercionAdapters::JsonSafeLong", defined_at: "elastic_graph/graphql/scalar_coercion_adapters/longs" @@ -983,7 +961,6 @@ def register_custom_elastic_graph_scalars # to do if we ingest them as strings. (The `pattern` regex to validate the range # would be *extremely* complicated). t.mapping type: "long" - t.json_schema type: "integer", minimum: LONG_STRING_MIN, maximum: LONG_STRING_MAX t.coerce_with "ElasticGraph::GraphQL::ScalarCoercionAdapters::LongString", defined_at: "elastic_graph/graphql/scalar_coercion_adapters/longs" t.prepare_for_indexing_with "ElasticGraph::Indexer::IndexingPreparers::Integer", diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb index 0bdb27b60..57a1f67ac 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb @@ -145,7 +145,7 @@ def derived_graphql_types derived_scalar_types = schema_def_state.factory.new_scalar_type(name) do |t| t.mapping type: "keyword" - t.json_schema type: "string" + configure_derived_scalar_type(t) t.graphql_only graphql_only? end.derived_graphql_types @@ -156,6 +156,14 @@ def derived_graphql_types end end + # Hook for extensions to customize the scalar type derived from an enum type. + # @param scalar_type [ScalarType] the scalar type to configure + # @return [void] + # @api private + def configure_derived_scalar_type(scalar_type) + # No-op by default; extensions (e.g. JSONIngestion) override this. + end + # @return [Indexing::FieldType::Enum] indexing representation of this enum type def to_indexing_field_type Indexing::FieldType::Enum.new(values_by_name.keys) diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/field.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/field.rb index 9bf47dab0..882d193f9 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/field.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/field.rb @@ -83,8 +83,6 @@ module SchemaElements # @private # @!attribute [rw] computation_detail # @private - # @!attribute [rw] non_nullable_in_json_schema - # @private # @!attribute [rw] as_input # @private class Field < Struct.new( @@ -93,7 +91,7 @@ class Field < Struct.new( :aggregated_values_customizations, :sort_order_enum_value_customizations, :args, :sortable, :filterable, :aggregatable, :groupable, :highlightable, :graphql_only, :source, :runtime_field_script, :relationship, :singular_name, - :computation_detail, :non_nullable_in_json_schema, :as_input, + :computation_detail, :as_input, :name_in_index, :resolver ) include Mixins::HasDocumentation @@ -137,7 +135,6 @@ def initialize( # the `_name` suffix on the attribute for clarity. singular_name: singular, name_in_index: name_in_index, - non_nullable_in_json_schema: false, as_input: as_input, resolver: resolver ) @@ -453,22 +450,6 @@ def on_each_generated_schema_element(&customization_block) customize_sort_order_enum_values(&customization_block) end - # (see Mixins::HasTypeInfo#json_schema) - def json_schema(nullable: nil, **options) - if options.key?(:type) - raise Errors::SchemaError, "Cannot override JSON schema type of field `#{name}` with `#{options.fetch(:type)}`" - end - - case nullable - when true - raise Errors::SchemaError, "`nullable: true` is not allowed on a field--just declare the GraphQL field as being nullable (no `!` suffix) instead." - when false - self.non_nullable_in_json_schema = true - end - - super(**options) - end - # (see Mixins::HasTypeInfo#mapping) def mapping(**options) # ElasticGraph has special handling for the nested type (e.g. we generate sub-aggregation types in the GraphQL schema for @@ -965,9 +946,8 @@ def to_indexing_field_reference Indexing::FieldReference.new( name: name, name_in_index: name_in_index, - type: non_nullable_in_json_schema ? type.wrap_non_null : type, + type: type, mapping_options: mapping_options, - json_schema_options: json_schema_options, accuracy_confidence: accuracy_confidence, source: source, runtime_field_script: runtime_field_script, diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb index 8e2703cd2..282a7db59 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb @@ -25,7 +25,7 @@ module SchemaElements # ElasticGraph.define_schema do |schema| # schema.scalar_type "URL" do |t| # t.mapping type: "keyword" - # t.json_schema type: "string", format: "uri" + # t.json_schema type: "string" # end # end # @@ -60,7 +60,7 @@ class ScalarType < Struct.new( include Mixins::HasReadableToSAndInspect.new { |t| t.name } # `HasTypeInfo` provides the following methods: - # @dynamic mapping_options, json_schema_options + # @dynamic mapping_options include Mixins::HasTypeInfo # @dynamic graphql_only? @@ -78,13 +78,8 @@ def initialize(schema_def_state, name) yield self - missing = [ - ("`mapping`" if mapping_options.empty?), - ("`json_schema`" if json_schema_options.empty?) - ].compact - - if missing.any? - raise Errors::SchemaError, "Scalar types require `mapping` and `json_schema` to be configured, but `#{name}` lacks #{missing.join(" and ")}." + if mapping_options.empty? + raise Errors::SchemaError, "Scalar types require `mapping` to be configured, but `#{name}` lacks `mapping`." end if (placeholder = inferred_grouping_missing_value_placeholder) @@ -350,14 +345,17 @@ def inferred_grouping_missing_value_placeholder # JSON schema min/max only constrains newly indexed values, not existing data that may fall outside the range before the constraints were added. # This is an edge case where the long range may exceed safe float precision. # In this case, users can set grouping_missing_value_placeholder to nil. - if (json_schema_options[:minimum] || LONG_STRING_MIN) >= JSON_SAFE_LONG_MIN && - (json_schema_options[:maximum] || LONG_STRING_MAX) <= JSON_SAFE_LONG_MAX + ingestion_min = respond_to?(:json_schema_options) ? json_schema_options[:minimum] : nil + ingestion_max = respond_to?(:json_schema_options) ? json_schema_options[:maximum] : nil + if (ingestion_min || LONG_STRING_MIN) >= JSON_SAFE_LONG_MIN && + (ingestion_max || LONG_STRING_MAX) <= JSON_SAFE_LONG_MAX inferred_numeric_placeholder_for_integer_type end elsif mapping_type == "unsigned_long" # Similar to the checks above for long except we only need to check the max # (since the min is zero even if not specified) - if (json_schema_options[:maximum] || LONG_STRING_MAX) <= JSON_SAFE_LONG_MAX + ingestion_max = respond_to?(:json_schema_options) ? json_schema_options[:maximum] : nil + if (ingestion_max || LONG_STRING_MAX) <= JSON_SAFE_LONG_MAX inferred_numeric_placeholder_for_integer_type end elsif INTEGER_TYPES.include?(mapping_type) diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb index 483d296bf..22c574977 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb @@ -171,20 +171,6 @@ def with_reverted_override schema_def_state.type_ref(type_namer.revert_override_for(name)) end - # Returns all the JSON schema array/nullable layers of a type, from outermost to innermost. - # For example, [[Int]] will return [:nullable, :array, :nullable, :array, :nullable] - def json_schema_layers - @json_schema_layers ||= begin - layers, inner_type = peel_json_schema_layers_once - - if layers.empty? || inner_type == self - layers - else - layers + inner_type.json_schema_layers - end - end - end - # Most of ElasticGraph's derived GraphQL types have a static suffix (e.g. the full type name # is source_type + suffix). This is a map of all of these. STATIC_FORMAT_NAME_BY_CATEGORY = TypeNamer::REQUIRED_PLACEHOLDERS.filter_map do |format_name, placeholders| @@ -304,16 +290,6 @@ def after_initialize Mixins::VerifiesGraphQLName.verify_name!(unwrapped_name) end - def peel_json_schema_layers_once - if list? - return [[:array], unwrap_list] if non_null? - return [[:nullable, :array], unwrap_list] - end - - return [[], unwrap_non_null] if non_null? - [[:nullable], self] - end - def matches_format_of?(category) format_name = STATIC_FORMAT_NAME_BY_CATEGORY.fetch(category) type_namer.matches_format?(name, format_name) diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb index eb45743ac..7d2114f9c 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb @@ -150,7 +150,7 @@ def name # Therefore, we recommend limiting your use of `!` to only a few situations such as defining a type’s primary key (e.g. # `t.field "id", "ID!"`) or defining a list field (e.g. `t.field "authors", "[String!]!"`) since empty lists already provide a # "no data" representation. You can still configure the ElasticGraph indexer to require a non-null value for a field using - # `f.json_schema nullable: false`. + # `f.json_schema nullable: false` (when using `elasticgraph-json_ingestion`). # # @note ElasticGraph’s understanding of datastore capabilities may override your configured # `aggregatable`/`filterable`/`groupable`/`sortable` options. For example, a field indexed as `text` for full text search will @@ -477,7 +477,6 @@ def to_indexing_field_type type_name: name, subfields: indexing_fields_by_name_in_index.values.map(&:to_indexing_field).compact, mapping_options: mapping_options, - json_schema_options: json_schema_options, doc_comment: doc_comment ) end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/state.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/state.rb index 401ecfd21..f5db1c0d8 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/state.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/state.rb @@ -6,7 +6,6 @@ # # frozen_string_literal: true -require "elastic_graph/constants" require "elastic_graph/errors" require "elastic_graph/schema_definition/factory" require "elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect" @@ -40,8 +39,7 @@ class State < Struct.new( :deleted_types_by_old_name, :renamed_fields_by_type_name_and_old_field_name, :deleted_fields_by_type_name_and_old_field_name, - :json_schema_version, - :json_schema_version_setter_location, + :ingestion_serializer_state, :graphql_extension_modules, :graphql_resolvers_by_name, :built_in_graphql_resolvers, @@ -53,9 +51,7 @@ class State < Struct.new( :type_refs_by_name, :output, :type_namer, - :enum_value_namer, - :allow_omitted_json_schema_fields, - :allow_extra_json_schema_fields + :enum_value_namer ) include Mixins::HasReadableToSAndInspect.new @@ -88,8 +84,7 @@ def self.with( deleted_types_by_old_name: {}, renamed_fields_by_type_name_and_old_field_name: ::Hash.new { |h, k| h[k] = {} }, deleted_fields_by_type_name_and_old_field_name: ::Hash.new { |h, k| h[k] = {} }, - json_schema_version_setter_location: nil, - json_schema_version: nil, + ingestion_serializer_state: {}, graphql_extension_modules: [], graphql_resolvers_by_name: {}, built_in_graphql_resolvers: ::Set.new, @@ -104,9 +99,7 @@ def self.with( name_overrides: type_name_overrides ), enum_value_namer: SchemaElements::EnumValueNamer.new(enum_value_overrides_by_type), - output: output, - allow_omitted_json_schema_fields: false, - allow_extra_json_schema_fields: true + output: output ) end @@ -213,12 +206,11 @@ def field_path_resolver private - RESERVED_TYPE_NAMES = [EVENT_ENVELOPE_JSON_SCHEMA_NAME].to_set - def register_type(type, additional_type_index = nil) name = (_ = type).name - if RESERVED_TYPE_NAMES.include?(name) + reserved_names = ingestion_serializer_state[:reserved_type_names] + if reserved_names&.include?(name) raise Errors::SchemaError, "`#{name}` cannot be used as a schema type because it is a reserved name." end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/test_support.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/test_support.rb index 7084db978..656c3789a 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/test_support.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/test_support.rb @@ -7,8 +7,10 @@ # frozen_string_literal: true require "elastic_graph/errors" +require "elastic_graph/schema_artifacts/from_disk" require "elastic_graph/schema_artifacts/runtime_metadata/schema_element_names" require "elastic_graph/schema_definition/api" +require "elastic_graph/schema_definition/extension_module_support" require "elastic_graph/schema_definition/schema_artifact_manager" module ElasticGraph @@ -25,6 +27,7 @@ def define_schema( index_document_sizes: true, json_schema_version: 1, extension_modules: [], + ingestion_serializer_extension_modules: ExtensionModuleSupport.default_ingestion_serializer_extension_modules, derived_type_name_formats: {}, type_name_overrides: {}, enum_value_overrides_by_type: {}, @@ -42,6 +45,7 @@ def define_schema( index_document_sizes: index_document_sizes, json_schema_version: json_schema_version, extension_modules: extension_modules, + ingestion_serializer_extension_modules: ingestion_serializer_extension_modules, derived_type_name_formats: derived_type_name_formats, type_name_overrides: type_name_overrides, enum_value_overrides_by_type: enum_value_overrides_by_type, @@ -56,6 +60,7 @@ def define_schema_with_schema_elements( index_document_sizes: true, json_schema_version: 1, extension_modules: [], + ingestion_serializer_extension_modules: ExtensionModuleSupport.default_ingestion_serializer_extension_modules, derived_type_name_formats: {}, type_name_overrides: {}, enum_value_overrides_by_type: {}, @@ -66,6 +71,7 @@ def define_schema_with_schema_elements( schema_elements, index_document_sizes, extension_modules: extension_modules, + ingestion_serializer_extension_modules: ingestion_serializer_extension_modules, derived_type_name_formats: derived_type_name_formats, type_name_overrides: type_name_overrides, enum_value_overrides_by_type: enum_value_overrides_by_type, @@ -75,8 +81,8 @@ def define_schema_with_schema_elements( yield api if block_given? # Set the json_schema_version to the provided value, if needed. - if !json_schema_version.nil? && api.state.json_schema_version.nil? - api.json_schema_version json_schema_version + if api.respond_to?(:json_schema_version) && !json_schema_version.nil? && api.state.ingestion_serializer_state[:json_schema_version].nil? + api.public_send(:json_schema_version, json_schema_version) end # :nocov: -- the else branch and code past this aren't used by tests in elasticgraph-schema_definition. diff --git a/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/api_extension.rbs b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/api_extension.rbs new file mode 100644 index 000000000..0ea47a435 --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/api_extension.rbs @@ -0,0 +1,13 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module APIExtension: ::ElasticGraph::SchemaDefinition::API + def json_schema_version: (::Integer) -> void + def json_schema_strictness: (?allow_omitted_fields: bool, ?allow_extra_fields: bool) -> void + def results: () -> (::ElasticGraph::SchemaDefinition::Results & ResultsExtension) + + def self.extended: (::ElasticGraph::SchemaDefinition::API & APIExtension) -> void + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rbs b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rbs new file mode 100644 index 000000000..bed69cb23 --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rbs @@ -0,0 +1,11 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + module EventEnvelope + def self.json_schema: (::Array[::String], ::Integer) -> ::Hash[::String, untyped] + end + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rbs b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rbs new file mode 100644 index 000000000..98948fbca --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rbs @@ -0,0 +1,19 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + class JSONSchemaFieldMetadata + attr_reader type: ::String + attr_reader name_in_index: ::String + + def initialize: ( + type: ::String, + name_in_index: ::String + ) -> void + + def to_dumpable_hash: () -> {"type" => ::String, "nameInIndex" => ::String} + end + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rbs b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rbs new file mode 100644 index 000000000..d25d7eb92 --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rbs @@ -0,0 +1,99 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + class JSONSchemaWithMetadataSupertype + attr_reader json_schema: ::Hash[::String, untyped] + attr_reader missing_fields: ::Set[::String] + attr_reader missing_types: ::Set[::String] + attr_reader definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + attr_reader missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] + + def initialize: ( + json_schema: ::Hash[::String, untyped], + missing_fields: ::Set[::String], + missing_types: ::Set[::String], + definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement], + missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] + ) -> void + + def with: ( + ?json_schema: ::Hash[::String, untyped], + ?missing_fields: ::Set[::String], + ?missing_types: ::Set[::String], + ?definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement], + ?missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] + ) -> instance + end + + class JSONSchemaWithMetadata < JSONSchemaWithMetadataSupertype + def json_schema_version: () -> ::Integer + + class Merger + @field_metadata_by_type_and_field_name: ::Hash[::String, ::Hash[::String, JSONSchemaFieldMetadata]] + @renamed_types_by_old_name: ::Hash[::String, ::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + @deleted_types_by_old_name: ::Hash[::String, ::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + @renamed_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, ::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]] + @deleted_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, ::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]] + @state: ::ElasticGraph::SchemaDefinition::State + @derived_indexing_type_names: ::Set[::String] + + attr_reader unused_deprecated_elements: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + + def initialize: ((::ElasticGraph::SchemaDefinition::Results & ResultsExtension)) -> void + def merge_metadata_into: (::Hash[::String, untyped]) -> JSONSchemaWithMetadata + + private + + def determine_current_type_name: ( + ::String, + missing_types: ::Set[::String], + definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + ) -> ::String? + + def field_metadata_for: ( + ::String, + ::String, + missing_fields: ::Set[::String], + definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + ) -> JSONSchemaFieldMetadata? + + def identify_missing_necessary_fields: ( + ::Hash[::String, untyped], + ::Hash[::String, ::String] + ) -> ::Array[MissingNecessaryField] + + def identify_missing_necessary_fields_for_index_def: ( + ::ElasticGraph::SchemaDefinition::indexableType, + ::ElasticGraph::SchemaDefinition::Indexing::Index, + JSONSchemaResolver + ) -> ::Array[MissingNecessaryField] + + class JSONSchemaResolver + @state: ::ElasticGraph::SchemaDefinition::State + @old_type_name_by_current_name: ::Hash[::String, ::String] + @meta_by_old_type_and_name_in_index: ::Hash[::String, ::Hash[::String, ::Hash[::String, untyped]]] + + def initialize: (::ElasticGraph::SchemaDefinition::State, ::Hash[::String, untyped], ::Hash[::String, ::String]) -> void + def necessary_path_missing?: (::ElasticGraph::SchemaDefinition::SchemaElements::FieldPath) -> bool + + private + + def necessary_path_part_missing?: (::String, ::String) { (::Hash[::String, untyped]) -> void } -> bool + end + end + + class MissingNecessaryField + attr_reader field_type: ::String + attr_reader fully_qualified_path: ::String + + def initialize: ( + field_type: ::String, + fully_qualified_path: ::String + ) -> void + end + end + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rbs b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rbs new file mode 100644 index 000000000..898a7660d --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rbs @@ -0,0 +1,14 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + class JSONSchemaPruner + def self.prune: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] + + private + + def self.referenced_type_names: (::Array[::String], ::Hash[::String, untyped]) -> ::Set[::String] + def self.collect_ref_names: (::Hash[::String, untyped]) -> ::Array[::String] + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/results_extension.rbs b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/results_extension.rbs new file mode 100644 index 000000000..002b4e85a --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/results_extension.rbs @@ -0,0 +1,15 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module ResultsExtension : ::ElasticGraph::SchemaDefinition::Results + include ::ElasticGraph::_SchemaArtifacts + + def json_schema_version_setter_location: () -> ::Thread::Backtrace::Location? + def json_schema_field_metadata_by_type_and_field_name: () -> ::Hash[::String, ::Hash[::String, Indexing::JSONSchemaFieldMetadata]] + def current_public_json_schema: () -> ::Hash[::String, untyped] + def merge_field_metadata_into_json_schema: (::Hash[::String, untyped]) -> Indexing::JSONSchemaWithMetadata + def unused_deprecated_elements: () -> ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rbs b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rbs new file mode 100644 index 000000000..e23a16d49 --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rbs @@ -0,0 +1,28 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module SchemaArtifactManagerExtension : ::ElasticGraph::SchemaDefinition::SchemaArtifactManager + attr_reader schema_definition_results: (::ElasticGraph::SchemaDefinition::Results & ResultsExtension) + + private + + @json_schemas_artifact: ::ElasticGraph::SchemaDefinition::SchemaArtifact[untyped]? + + def artifacts_from_schema_def: () -> ::Array[::ElasticGraph::SchemaDefinition::SchemaArtifact[untyped]] + def json_schemas_artifact: () -> ::ElasticGraph::SchemaDefinition::SchemaArtifact[::Hash[::String, untyped]] + def check_if_needs_json_schema_version_bump: () { (::Integer) -> void } -> void + def build_desired_versioned_json_schemas: (::Hash[::String, untyped]) -> ::Hash[::Integer, ::Hash[::String, untyped]] + def report_json_schema_merge_errors: (::Array[Indexing::JSONSchemaWithMetadata]) -> void + def report_json_schema_merge_warnings: () -> void + def format_deprecated_elements: (::Enumerable[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]) -> ::String + def missing_field_error_for: (::String, ::Array[::Integer]) -> ::String + def missing_type_error_for: (::String, ::Array[::Integer]) -> ::String + def missing_necessary_field_error_for: (Indexing::JSONSchemaWithMetadata::MissingNecessaryField, ::Array[::Integer]) -> ::String + def describe_json_schema_versions: (::Array[::Integer], ::String) -> ::String + def old_versions: (::Array[::Integer]) -> ::String + def files_noun_phrase: (::Array[::Integer]) -> ::String + def new_versioned_json_schema_artifact: (::Hash[::String, untyped]) -> ::ElasticGraph::SchemaDefinition::SchemaArtifact[::Hash[::String, untyped]] + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/api.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/api.rbs index 604ed7842..3aa0445d0 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/api.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/api.rbs @@ -55,13 +55,13 @@ module ElasticGraph SchemaArtifacts::RuntimeMetadata::SchemaElementNames, bool, ?extension_modules: ::Array[::Module], + ?ingestion_serializer_extension_modules: ::Array[::Module], ?derived_type_name_formats: ::Hash[::Symbol, ::String], ?type_name_overrides: ::Hash[::Symbol, ::String], ?enum_value_overrides_by_type: ::Hash[::Symbol, ::Hash[::Symbol, ::String]], ?output: io ) -> void - def json_schema_strictness: (?allow_omitted_fields: bool, ?allow_extra_fields: bool) -> void def raw_sdl: (::String) -> void def object_type: (::String) { (SchemaElements::ObjectType) -> void } -> void def interface_type: (::String) { (SchemaElements::InterfaceType) -> void } -> void @@ -72,7 +72,6 @@ module ElasticGraph def as_active_instance: { () -> void } -> void @results: Results? def results: () -> Results - def json_schema_version: (::Integer) -> void def register_graphql_extension: (::Module, defined_at: ::String, **untyped) -> void def register_graphql_resolver: (::Symbol, ::Class, defined_at: ::String, ?built_in: bool, **untyped) -> void def on_built_in_types: () { (SchemaElements::graphQLType) -> void } -> void diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/event_envelope.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/event_envelope.rbs deleted file mode 100644 index aa609f3de..000000000 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/event_envelope.rbs +++ /dev/null @@ -1,9 +0,0 @@ -module ElasticGraph - module SchemaDefinition - module Indexing - module EventEnvelope - def self.json_schema: (::Array[::String], ::Integer) -> ::Hash[::String, untyped] - end - end - end -end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field.rbs index 432e62034..5230505ec 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field.rbs @@ -5,10 +5,8 @@ module ElasticGraph attr_reader name: ::String attr_reader name_in_index: ::String attr_reader type: SchemaElements::TypeReference - attr_reader json_schema_layers: jsonSchemaLayersArray attr_reader indexing_field_type: _FieldType attr_reader accuracy_confidence: Field::accuracyConfidence - attr_reader json_schema_customizations: ::Hash[::Symbol, untyped] attr_reader mapping_customizations: ::Hash[::Symbol, untyped] attr_reader source: SchemaElements::FieldSource? attr_accessor runtime_field_script: ::String? @@ -18,10 +16,8 @@ module ElasticGraph name: ::String, name_in_index: ::String, type: SchemaElements::TypeReference, - json_schema_layers: jsonSchemaLayersArray, indexing_field_type: _FieldType, accuracy_confidence: Field::accuracyConfidence, - json_schema_customizations: ::Hash[::Symbol, untyped], mapping_customizations: ::Hash[::Symbol, untyped], source: SchemaElements::FieldSource?, runtime_field_script: ::String?, @@ -32,10 +28,8 @@ module ElasticGraph ?name: ::String, ?name_in_index: ::String, ?type: SchemaElements::TypeReference, - ?json_schema_layers: jsonSchemaLayersArray, ?indexing_field_type: _FieldType, ?accuracy_confidence: Field::accuracyConfidence, - ?json_schema_customizations: ::Hash[::Symbol, untyped], ?mapping_customizations: ::Hash[::Symbol, untyped], ?source: SchemaElements::FieldSource?, ?runtime_field_script: ::String?, @@ -44,25 +38,11 @@ module ElasticGraph end class Field < FieldSupertype - JSON_SCHEMA_OVERRIDES_BY_MAPPING_TYPE: ::Hash[::String, untyped] - type accuracyConfidence = SchemaElements::Field::accuracyConfidence @mapping: ::Hash[::String, untyped]? def mapping: () -> ::Hash[::String, untyped] - def json_schema: () -> ::Hash[::String, untyped] - def json_schema_metadata: () -> JSONSchemaFieldMetadata def self.normalized_mapping_hash_for: (::Array[Field]) -> ::Hash[::String, untyped] - - def inner_json_schema: () -> ::Hash[::String, untyped] - def outer_json_schema_customizations: () -> ::Hash[::String, untyped] - - def nullable?: () -> bool - - def user_specified_json_schema_customizations_go_on_outside?: () -> bool - def process_layer: (::Symbol, ::Hash[::String, untyped]) -> ::Hash[::String, untyped] - def make_nullable: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] - def make_array: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] end end end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_reference.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_reference.rbs index 55a8f0724..89071a597 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_reference.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_reference.rbs @@ -6,7 +6,6 @@ module ElasticGraph attr_reader name_in_index: ::String attr_reader type: SchemaElements::TypeReference attr_reader mapping_options: ::Hash[::Symbol, untyped] - attr_reader json_schema_options: ::Hash[::Symbol, untyped] attr_reader accuracy_confidence: Field::accuracyConfidence attr_reader source: SchemaElements::FieldSource? attr_reader runtime_field_script: ::String? @@ -17,7 +16,6 @@ module ElasticGraph name_in_index: ::String, type: SchemaElements::TypeReference, mapping_options: ::Hash[::Symbol, untyped], - json_schema_options: ::Hash[::Symbol, untyped], accuracy_confidence: Field::accuracyConfidence, source: SchemaElements::FieldSource?, runtime_field_script: ::String?, @@ -29,7 +27,6 @@ module ElasticGraph ?name_in_index: ::String, ?type: SchemaElements::TypeReference, ?mapping_options: ::Hash[::Symbol, untyped], - ?json_schema_options: ::Hash[::Symbol, untyped], ?accuracy_confidence: Field::accuracyConfidence, ?source: SchemaElements::FieldSource?, ?runtime_field_script: ::String?, diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type.rbs index 5808a0709..d01805f99 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type.rbs @@ -3,9 +3,6 @@ module ElasticGraph module Indexing interface _FieldType def to_mapping: () -> ::Hash[::String, untyped] - def to_json_schema: () -> ::Hash[::String, untyped] - def json_schema_field_metadata_by_field_name: () -> ::Hash[::String, JSONSchemaFieldMetadata] - def format_field_json_schema_customizations: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] end end end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type/object.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type/object.rbs index 5dd23dbaa..e1b90df44 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type/object.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type/object.rbs @@ -3,29 +3,26 @@ module ElasticGraph module Indexing module FieldType class ObjectSuperType + attr_reader schema_def_state: State attr_reader type_name: ::String attr_reader subfields: ::Array[Field] attr_reader mapping_options: Mixins::HasTypeInfo::optionsHash - attr_reader json_schema_options: Mixins::HasTypeInfo::optionsHash attr_reader doc_comment: ::String? - attr_reader schema_def_state: State def initialize: ( + schema_def_state: State, type_name: ::String, subfields: ::Array[Field], mapping_options: Mixins::HasTypeInfo::optionsHash, - json_schema_options: Mixins::HasTypeInfo::optionsHash, - doc_comment: ::String?, - schema_def_state: State + doc_comment: ::String? ) -> void def with: ( + ?schema_def_state: State, ?type_name: ::String, ?subfields: ::Array[Field], ?mapping_options: Mixins::HasTypeInfo::optionsHash, - ?json_schema_options: Mixins::HasTypeInfo::optionsHash, - ?doc_comment: ::String?, - ?schema_def_state: State + ?doc_comment: ::String? ) -> Object end @@ -34,12 +31,6 @@ module ElasticGraph include Support::_MemoizableDataClass @to_mapping: ::Hash[::String, untyped]? - @to_json_schema: ::Hash[::String, untyped]? - - private - - def json_schema_typename_field: () -> ::Hash[::String, untyped] - def validate_sourced_fields_have_no_json_schema_overrides: (::Array[Field]) -> void end end end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rbs deleted file mode 100644 index b025eda15..000000000 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rbs +++ /dev/null @@ -1,17 +0,0 @@ -module ElasticGraph - module SchemaDefinition - module Indexing - class JSONSchemaFieldMetadata - attr_reader type: ::String - attr_reader name_in_index: ::String - - def initialize: ( - type: ::String, - name_in_index: ::String - ) -> void - - def to_dumpable_hash: () -> {"type" => ::String, "nameInIndex" => ::String} - end - end - end -end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rbs deleted file mode 100644 index dcc37b607..000000000 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rbs +++ /dev/null @@ -1,98 +0,0 @@ -module ElasticGraph - module SchemaDefinition - module Indexing - class JSONSchemaWithMetadataSupertype - attr_reader json_schema: ::Hash[::String, untyped] - attr_reader missing_fields: ::Set[::String] - attr_reader missing_types: ::Set[::String] - attr_reader definition_conflicts: ::Set[SchemaElements::DeprecatedElement] - attr_reader missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] - - def initialize: ( - json_schema: ::Hash[::String, untyped], - missing_fields: ::Set[::String], - missing_types: ::Set[::String], - definition_conflicts: ::Set[SchemaElements::DeprecatedElement], - missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] - ) -> void - - def with: ( - ?json_schema: ::Hash[::String, untyped], - ?missing_fields: ::Set[::String], - ?missing_types: ::Set[::String], - ?definition_conflicts: ::Set[SchemaElements::DeprecatedElement], - ?missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] - ) -> instance - end - - class JSONSchemaWithMetadata < JSONSchemaWithMetadataSupertype - def json_schema_version: () -> ::Integer - - class Merger - @field_metadata_by_type_and_field_name: ::Hash[::String, ::Hash[::String, JSONSchemaFieldMetadata]] - @renamed_types_by_old_name: ::Hash[::String, SchemaElements::DeprecatedElement] - @deleted_types_by_old_name: ::Hash[::String, SchemaElements::DeprecatedElement] - @renamed_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]] - @deleted_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]] - @state: State - @derived_indexing_type_names: ::Set[::String] - - attr_reader unused_deprecated_elements: ::Set[SchemaElements::DeprecatedElement] - - def initialize: (Results) -> void - def merge_metadata_into: (::Hash[::String, untyped]) -> JSONSchemaWithMetadata - - private - - def determine_current_type_name: ( - ::String, - missing_types: ::Set[::String], - definition_conflicts: ::Set[SchemaElements::DeprecatedElement] - ) -> ::String? - - def field_metadata_for: ( - ::String, - ::String, - missing_fields: ::Set[::String], - definition_conflicts: ::Set[SchemaElements::DeprecatedElement] - ) -> JSONSchemaFieldMetadata? - - def identify_missing_necessary_fields: ( - ::Hash[::String, untyped], - ::Hash[::String, ::String] - ) -> ::Array[MissingNecessaryField] - - def identify_missing_necessary_fields_for_index_def: ( - indexableType, - Index, - JSONSchemaResolver, - ::Integer - ) -> ::Array[MissingNecessaryField] - - class JSONSchemaResolver - @state: State - @old_type_name_by_current_name: ::Hash[::String, ::String] - @meta_by_old_type_and_name_in_index: ::Hash[::String, ::Hash[::String, ::Hash[::String, untyped]]] - - def initialize: (State, ::Hash[::String, untyped], ::Hash[::String, ::String]) -> void - def necessary_path_missing?: (SchemaElements::FieldPath) -> bool - - private - - def necessary_path_part_missing?: (::String, ::String) { (::Hash[::String, untyped]) -> void } -> bool - end - end - - class MissingNecessaryField - attr_reader field_type: ::String - attr_reader fully_qualified_path: ::String - - def initialize: ( - field_type: ::String, - fully_qualified_path: ::String - ) -> void - end - end - end - end -end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/json_schema_pruner.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/json_schema_pruner.rbs deleted file mode 100644 index 8c5f323a2..000000000 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/json_schema_pruner.rbs +++ /dev/null @@ -1,9 +0,0 @@ -module ElasticGraph - module SchemaDefinition - class JSONSchemaPruner - def self.prune: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] - def self.referenced_type_names: (::Array[::String], ::Hash[::String, untyped]) -> ::Set[::String] - def self.collect_ref_names: (::Hash[::String, untyped]) -> ::Array[::String] - end - end -end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_type_info.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_type_info.rbs index 1f3c1391b..665759c4c 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_type_info.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_type_info.rbs @@ -6,10 +6,8 @@ module ElasticGraph type optionsHash = ::Hash[::Symbol, untyped] attr_reader mapping_options: optionsHash - attr_reader json_schema_options: optionsHash def mapping: (**untyped) -> void - def json_schema: (**untyped) -> void end end end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/rake_tasks.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/rake_tasks.rbs index 9578488de..9b22bb7ff 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/rake_tasks.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/rake_tasks.rbs @@ -11,6 +11,7 @@ module ElasticGraph ?type_name_overrides: ::Hash[::Symbol, ::String], ?enum_value_overrides_by_type: ::Hash[::Symbol, ::Hash[::Symbol, ::String]], ?extension_modules: ::Array[::Module], + ?ingestion_serializer_extension_modules: ::Array[::Module], ?enforce_json_schema_version: bool, ?output: io ) -> void @@ -26,6 +27,7 @@ module ElasticGraph @schema_artifacts_directory: ::String | ::Pathname @enforce_json_schema_version: bool @extension_modules: ::Array[::Module] + @ingestion_serializer_extension_modules: ::Array[::Module] @output: io def define_tasks: () -> void diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs index c361ab4ba..212249d9b 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs @@ -6,30 +6,17 @@ module ElasticGraph end class Results < ResultsSupertype - include _SchemaArtifacts include Support::_MemoizableDataClass - def json_schema_version_setter_location: () -> ::Thread::Backtrace::Location? - def json_schema_field_metadata_by_type_and_field_name: () -> ::Hash[::String, ::Hash[::String, Indexing::JSONSchemaFieldMetadata]] - def current_public_json_schema: () -> ::Hash[::String, untyped] - def merge_field_metadata_into_json_schema: (::Hash[::String, untyped]) -> Indexing::JSONSchemaWithMetadata - def unused_deprecated_elements: () -> ::Set[SchemaElements::DeprecatedElement] def derived_indexing_type_names: () -> ::Set[::String] @graphql_schema_string: ::String? @datastore_config: ::Hash[::String, untyped] @runtime_metadata: SchemaArtifacts::RuntimeMetadata::Schema? - @current_json_schemas: ::Hash[::String, untyped]? @static_script_repo: Scripting::FileSystemRepository? - @available_json_schema_versions: ::Set[::Integer]? @no_circular_dependencies: bool? @field_path_resolver: SchemaElements::FieldPath::Resolver? - @json_schema_indexing_field_types_by_name: ::Hash[::String, Indexing::_FieldType]? @derived_indexing_type_names: ::Set[::String]? - @json_schema_field_metadata_by_type_and_field_name: ::Hash[::String, ::Hash[::String, Indexing::JSONSchemaFieldMetadata]]? - @current_public_json_schema: ::Hash[::String, untyped]? - @latest_versioned_json_schema: ::Hash[::String, untyped]? - @json_schema_with_metadata_merger: Indexing::JSONSchemaWithMetadata::Merger? STATIC_SCRIPT_REPO: Scripting::FileSystemRepository @@ -37,14 +24,11 @@ module ElasticGraph def define_root_graphql_type: () -> void def aggregation_efficiency_hints_for: (::Array[Indexing::DerivedIndexedType]) -> ::String? - def json_schema_with_metadata_merger: () -> Indexing::JSONSchemaWithMetadata::Merger def generate_datastore_config: () -> ::Hash[::String, untyped] def build_dynamic_scripts: () -> ::Array[Scripting::Script] def build_runtime_metadata: () -> SchemaArtifacts::RuntimeMetadata::Schema def identify_extra_update_targets_by_object_type_name: () -> ::Hash[::String, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]] def generate_sdl: () -> ::String - def build_public_json_schema: () -> ::Hash[::String, untyped] - def json_schema_indexing_field_types_by_name: () -> ::Hash[::String, Indexing::_FieldType] def verify_runtime_metadata: (SchemaArtifacts::RuntimeMetadata::Schema) -> void def strip_trailing_whitespace: (::String) -> ::String def check_for_circular_dependencies!: () -> void diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs index b4b079de0..a3fd4af47 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs @@ -22,22 +22,11 @@ module ElasticGraph @output: io @max_diff_lines: ::Integer @artifacts: ::Array[SchemaArtifact[untyped]]? - @json_schemas_artifact: SchemaArtifact[untyped] def artifacts: () -> ::Array[SchemaArtifact[untyped]] def artifacts_from_schema_def: () -> ::Array[SchemaArtifact[untyped]] def notify_about_unused_type_name_overrides: () -> void def notify_about_unused_enum_value_overrides: () -> void - def build_desired_versioned_json_schemas: (::Hash[::String, untyped]) -> ::Hash[::Integer, ::Hash[::String, untyped]] - def report_json_schema_merge_errors: (::Array[Indexing::JSONSchemaWithMetadata]) -> void - def report_json_schema_merge_warnings: () -> void - def format_deprecated_elements: (::Enumerable[SchemaElements::DeprecatedElement]) -> ::String - def missing_field_error_for: (::String, ::Array[::Integer]) -> ::String - def missing_type_error_for: (::String, ::Array[::Integer]) -> ::String - def missing_necessary_field_error_for: (Indexing::JSONSchemaWithMetadata::MissingNecessaryField, ::Array[::Integer]) -> ::String - def describe_json_schema_versions: (::Array[::Integer], ::String) -> ::String - def old_versions: (::Array[::Integer]) -> ::String - def files_noun_phrase: (::Array[::Integer]) -> ::String def artifacts_out_of_date_error: (::Array[SchemaArtifact[untyped]]) -> ::String def truncate_diff: (::String, ::Integer) -> [::String, ::String] @@ -47,9 +36,7 @@ module ElasticGraph ?extra_comment_lines: ::Array[::String] ) -> SchemaArtifact[::Hash[::String, untyped]] - def new_versioned_json_schema_artifact: (::Hash[::String, untyped]) -> SchemaArtifact[::Hash[::String, untyped]] def new_raw_artifact: (::String, ::String) -> SchemaArtifact[::String] - def check_if_needs_json_schema_version_bump: () { (::Integer) -> void } -> void def pruned_runtime_metadata: (::String) -> SchemaArtifacts::RuntimeMetadata::Schema end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/field.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/field.rbs index be91e03a3..81faa1505 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/field.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/field.rbs @@ -22,7 +22,6 @@ module ElasticGraph attr_accessor computation_detail: SchemaArtifacts::RuntimeMetadata::ComputationDetail attr_reader filter_customizations: ::Array[^(Field) -> void] attr_reader sort_order_enum_value_customizations: ::Array[^(SortOrderEnumValue) -> void] - attr_reader non_nullable_in_json_schema: bool attr_reader source: FieldSource? attr_accessor relationship: Relationship? attr_reader resolver: SchemaArtifacts::RuntimeMetadata::ConfiguredGraphQLResolver? diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/type_reference.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/type_reference.rbs index 9d418a38f..388116f00 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/type_reference.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/type_reference.rbs @@ -30,9 +30,6 @@ module ElasticGraph def scalar_type_needing_grouped_by_object?: () -> bool def with_reverted_override: () -> TypeReference - @json_schema_layers: jsonSchemaLayersArray? - def json_schema_layers: () -> jsonSchemaLayersArray - def to_final_form: (?as_input: bool) -> TypeReference STATIC_FORMAT_NAME_BY_CATEGORY: ::Hash[::Symbol, ::Symbol] @@ -59,8 +56,6 @@ module ElasticGraph private - def peel_json_schema_layers_once: () -> [jsonSchemaLayersArray, TypeReference] - def matches_format_of?: (::Symbol) -> bool def parent_aggregation_type: (::Array[::String]) -> ::String def renamed_with_same_wrappings: (::String) -> TypeReference diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/state.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/state.rbs index 01bb06fa1..570ac49e3 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/state.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/state.rbs @@ -9,6 +9,7 @@ module ElasticGraph attr_reader scalar_types_by_name: ::Hash[::String, SchemaElements::ScalarType] attr_reader enum_types_by_name: ::Hash[::String, SchemaElements::EnumType] attr_reader implementations_by_interface_ref: ::Hash[SchemaElements::TypeReference, ::Set[SchemaElements::TypeWithSubfields]] + attr_reader union_types_by_member_ref: ::Hash[SchemaElements::TypeReference, ::Set[SchemaElements::UnionType]] attr_reader sdl_parts: ::Array[::String] attr_reader paginated_collection_element_types: ::Set[::String] attr_reader user_defined_fields: ::Set[SchemaElements::Field] @@ -16,8 +17,7 @@ module ElasticGraph attr_reader deleted_types_by_old_name: ::Hash[::String, SchemaElements::DeprecatedElement] attr_reader renamed_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]] attr_reader deleted_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]] - attr_accessor json_schema_version: ::Integer? - attr_accessor json_schema_version_setter_location: ::Thread::Backtrace::Location? + attr_accessor ingestion_serializer_state: ::Hash[::Symbol, untyped] attr_reader graphql_extension_modules: ::Array[SchemaArtifacts::RuntimeMetadata::GraphQLExtension] attr_reader graphql_resolvers_by_name: ::Hash[::Symbol, SchemaArtifacts::RuntimeMetadata::GraphQLResolver] attr_reader built_in_graphql_resolvers: ::Set[::Symbol] @@ -27,11 +27,9 @@ module ElasticGraph attr_accessor user_definition_complete_callbacks: ::Array[^() -> void] attr_accessor sub_aggregation_paths_by_type: ::Hash[Mixins::SupportsFilteringAndAggregation, ::Array[SchemaElements::SubAggregationPath]] attr_accessor type_refs_by_name: ::Hash[::String, SchemaElements::TypeReference] + attr_accessor output: io attr_reader type_namer: SchemaElements::TypeNamer attr_reader enum_value_namer: SchemaElements::EnumValueNamer - attr_accessor output: io - attr_accessor allow_omitted_json_schema_fields: bool - attr_accessor allow_extra_json_schema_fields: bool def initialize: ( api: API, @@ -42,6 +40,7 @@ module ElasticGraph scalar_types_by_name: ::Hash[::String, SchemaElements::ScalarType], enum_types_by_name: ::Hash[::String, SchemaElements::EnumType], implementations_by_interface_ref: ::Hash[SchemaElements::TypeReference, ::Set[SchemaElements::TypeWithSubfields]], + union_types_by_member_ref: ::Hash[SchemaElements::TypeReference, ::Set[SchemaElements::UnionType]], sdl_parts: ::Array[::String], paginated_collection_element_types: ::Set[::String], user_defined_fields: ::Set[SchemaElements::Field], @@ -49,8 +48,7 @@ module ElasticGraph deleted_types_by_old_name: ::Hash[::String, SchemaElements::DeprecatedElement], renamed_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]], deleted_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]], - json_schema_version: Integer?, - json_schema_version_setter_location: ::Thread::Backtrace::Location?, + ingestion_serializer_state: ::Hash[::Symbol, untyped], graphql_extension_modules: ::Array[SchemaArtifacts::RuntimeMetadata::GraphQLExtension], graphql_resolvers_by_name: ::Hash[::Symbol, SchemaArtifacts::RuntimeMetadata::GraphQLResolver], built_in_graphql_resolvers: ::Set[::Symbol], @@ -60,11 +58,9 @@ module ElasticGraph user_definition_complete_callbacks: ::Array[^() -> void], sub_aggregation_paths_by_type: ::Hash[Mixins::SupportsFilteringAndAggregation, ::Array[SchemaElements::SubAggregationPath]], type_refs_by_name: ::Hash[::String, SchemaElements::TypeReference], + output: io, type_namer: SchemaElements::TypeNamer, enum_value_namer: SchemaElements::EnumValueNamer, - output: io, - allow_omitted_json_schema_fields: bool, - allow_extra_json_schema_fields: bool, ) -> void end @@ -110,7 +106,6 @@ module ElasticGraph private - RESERVED_TYPE_NAMES: ::Set[::String] def register_type: [T] (T & SchemaElements::graphQLType, ?::Hash[::String, T]?) -> T end end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/test_support.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/test_support.rbs index 211039ae3..60836dd7c 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/test_support.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/test_support.rbs @@ -7,24 +7,26 @@ module ElasticGraph ?index_document_sizes: bool, ?json_schema_version: ::Integer, ?extension_modules: ::Array[::Module], + ?ingestion_serializer_extension_modules: ::Array[::Module], ?derived_type_name_formats: ::Hash[::Symbol, ::String], ?type_name_overrides: ::Hash[::Symbol, ::String], ?enum_value_overrides_by_type: ::Hash[::Symbol, ::Hash[::Symbol, ::String]], ?output: io?, ?reload_schema_artifacts: bool, - ) ?{ (API) -> void } -> _SchemaArtifacts + ) ?{ (API) -> void } -> (Results | SchemaArtifacts::FromDisk) def define_schema_with_schema_elements: ( SchemaArtifacts::RuntimeMetadata::SchemaElementNames, ?index_document_sizes: bool, ?json_schema_version: ::Integer, ?extension_modules: ::Array[::Module], + ?ingestion_serializer_extension_modules: ::Array[::Module], ?derived_type_name_formats: ::Hash[::Symbol, ::String], ?type_name_overrides: ::Hash[::Symbol, ::String], ?enum_value_overrides_by_type: ::Hash[::Symbol, ::Hash[::Symbol, ::String]], ?output: io?, ?reload_schema_artifacts: bool, - ) ?{ (API) -> void } -> _SchemaArtifacts + ) ?{ (API) -> void } -> (Results | SchemaArtifacts::FromDisk) DOC_COMMENTS: ::String diff --git a/elasticgraph-schema_definition/spec/integration/elastic_graph/schema_definition/rake_tasks_spec.rb b/elasticgraph-schema_definition/spec/integration/elastic_graph/schema_definition/rake_tasks_spec.rb index 8466e2e61..b9372b25a 100644 --- a/elasticgraph-schema_definition/spec/integration/elastic_graph/schema_definition/rake_tasks_spec.rb +++ b/elasticgraph-schema_definition/spec/integration/elastic_graph/schema_definition/rake_tasks_spec.rb @@ -276,6 +276,28 @@ module SchemaDefinition expect(unprefixed_schema).to eq(uncustomized_graphql_schema) end + it "supports disabling the default JSON Schema ingestion serializer" do + ::File.write("schema.rb", <<~RUBY) + ElasticGraph.define_schema do |schema| + schema.object_type "Component" do |t| + t.field "id", "ID!" + t.index "components" + end + end + RUBY + + output = run_rake("schema_artifacts:dump", ingestion_serializer_extension_modules: []) + + expect(output.lines).to include( + a_string_including("Dumped", DATASTORE_CONFIG_FILE), + a_string_including("Dumped", RUNTIME_METADATA_FILE), + a_string_including("Dumped", GRAPHQL_SCHEMA_FILE) + ) + expect(output.lines).to exclude(a_string_including(JSON_SCHEMAS_FILE)) + expect(read_artifact(JSON_SCHEMAS_FILE)).to eq(false) + expect(read_artifact(versioned_json_schema_file(1))).to eq(false) + end + it "generates separate input vs output enums by default, but allows them to be the same if desired" do write_elastic_graph_schema_def_code(json_schema_version: 1) @@ -1062,6 +1084,7 @@ def run_rake( pretend_tty: false, path_to_schema: "schema.rb", include_extension_module: true, + ingestion_serializer_extension_modules: ExtensionModuleSupport.default_ingestion_serializer_extension_modules, derived_type_name_formats: {}, type_name_overrides: {}, enum_value_overrides_by_type: {} @@ -1087,6 +1110,7 @@ def as_active_instance schema_artifacts_directory: "config/schema/artifacts", enforce_json_schema_version: enforce_json_schema_version, extension_modules: [extension_module].compact, + ingestion_serializer_extension_modules: ingestion_serializer_extension_modules, derived_type_name_formats: derived_type_name_formats, type_name_overrides: type_name_overrides, enum_value_overrides_by_type: enum_value_overrides_by_type, diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/define_schema_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/define_schema_spec.rb index b0850973a..632d04ed2 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/define_schema_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/define_schema_spec.rb @@ -49,6 +49,72 @@ module SchemaDefinition EOS end + it "exposes ingestion serializer APIs by default for backward compatibility" do + api = API.new(schema_elements, true, extension_modules: [Module.new]) + + expect(api).to respond_to(:json_schema_version) + expect(api).to respond_to(:json_schema_strictness) + expect(api.results).to respond_to(:json_schemas_for) + expect(api.results).to respond_to(:available_json_schema_versions) + end + + it "allows the factory to build an object type even when no block is provided" do + api = API.new(schema_elements, true) + + object_type = api.factory.new_object_type("Widget") + + expect(object_type.name).to eq("Widget") + end + + it "allows the default ingestion serializer APIs to be explicitly disabled" do + api = API.new(schema_elements, true, ingestion_serializer_extension_modules: []) + + expect(api).not_to respond_to(:json_schema_version) + expect(api).not_to respond_to(:json_schema_strictness) + expect(api.results).not_to respond_to(:json_schemas_for) + expect(api.results).not_to respond_to(:available_json_schema_versions) + end + + it "does not try to configure a JSON schema version when ingestion serializer extensions are disabled" do + expect { + define_schema(ingestion_serializer_extension_modules: []) do |schema| + schema.object_type("Widget") do |t| + t.field "id", "ID" + end + end + }.not_to raise_error + end + + it "keeps ingestion serializer APIs disabled through the test support helpers" do + results = define_schema_with_schema_elements( + schema_elements, + ingestion_serializer_extension_modules: [] + ) do |schema| + schema.object_type("Widget") do |t| + t.field "id", "ID" + end + end + + expect(results).not_to respond_to(:json_schemas_for) + expect(results).not_to respond_to(:available_json_schema_versions) + end + + it "raises an error when trying to register a reserved type name" do + reserved_extension = Module.new do + def self.extended(api) + api.instance_variable_get(:@state).ingestion_serializer_state[:reserved_type_names] = Set["ReservedName"] + end + end + + expect { + define_schema(ingestion_serializer_extension_modules: [reserved_extension]) do |schema| + schema.object_type "ReservedName" do |t| + t.field "id", "ID!" + end + end + }.to raise_error Errors::SchemaError, a_string_including("`ReservedName`", "reserved name") + end + it "raises a clear error when there is no active API instance" do expect { ElasticGraph.define_schema { |schema| } diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/enum_type_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/enum_type_spec.rb index 758571e15..9e618d538 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/enum_type_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/enum_type_spec.rb @@ -139,6 +139,24 @@ module SchemaDefinition expect(result.lines.grep(/ColorInput/)).to be_empty end + it "handles enums where the input and output names are the same" do + result = define_schema(derived_type_name_formats: {InputEnum: "%{base}"}) do |schema| + schema.enum_type "Color" do |t| + t.value "RED" + t.value "BLUE" + end + + schema.object_type "Widget" do |type| + type.field "id", "ID!" + type.field "color", "Color!" + type.index "widgets" + end + end + + # Only one `enum Color` should appear (no separate input enum) + expect(result.scan(/^enum Color\b/).size).to eq(1) + end + it "allows the input variant to be customized using `customize_derived_types`" do result = define_schema do |schema| schema.enum_type "Color" do |t| diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/indexing/json_schema_with_metadata_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/indexing/json_schema_with_metadata_spec.rb deleted file mode 100644 index bd24d3c13..000000000 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/indexing/json_schema_with_metadata_spec.rb +++ /dev/null @@ -1,1070 +0,0 @@ -# Copyright 2024 - 2026 Block, Inc. -# -# Use of this source code is governed by an MIT-style -# license that can be found in the LICENSE file or at -# https://opensource.org/licenses/MIT. -# -# frozen_string_literal: true - -require "elastic_graph/spec_support/schema_definition_helpers" -require "elastic_graph/schema_definition/indexing/json_schema_with_metadata" - -module ElasticGraph - module SchemaDefinition - module Indexing - ::RSpec.describe JSONSchemaWithMetadata do - include_context "SchemaDefinitionHelpers" - - it "ignores derived indexed types that do not show up in the JSON schema" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "amount", "Float" - t.field "cost_currency", "String" - t.field "cost_currency_name", "String" - t.derive_indexed_type_fields "WidgetCurrency", from_id: "cost_currency" do |derive| - derive.immutable_value "name", from: "cost_currency_name" - end - end - - schema.object_type "WidgetCurrency" do |t| - t.field "id", "ID!" - t.field "name", "String" - t.index "widget_currencies" - end - end - - expect(v1_json_schema.fetch("$defs").keys).to include("Widget").and exclude("WidgetCurrency") - end - - context "when merged into an old versioned JSON schema" do - it "maintains the same metadata when a field has not changed" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "amount", "Float" - end - end - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget" do |t| - t.field "amount", "Float" - end - end - - expect( - metadata_for(v1_json_schema, "Widget", "amount") - ).to eq(metadata_for(updated_v1_json_schema, "Widget", "amount")).and have_dumped_metadata("amount", "Float") - end - - it "does not record metadata on the `__typename` field since it has special handling in our indexing logic" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "amount", "Float" - end - end - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget" do |t| - t.field "amount", "Float" - end - end - - expect( - v1_json_schema.dig("$defs", "Widget", "properties", "__typename").keys - ).to eq(updated_v1_json_schema.dig("$defs", "Widget", "properties", "__typename").keys).and exclude("ElasticGraph") - end - - it "records a changed field `type` so that the correct indexing preparer gets used when events at the old version are ingested" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "amount", "Float" - end - end - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget" do |t| - t.field "amount", "Int" - end - end - - expect(metadata_for(v1_json_schema, "Widget", "amount")).to have_dumped_metadata("amount", "Float") - expect(metadata_for(updated_v1_json_schema, "Widget", "amount")).to have_dumped_metadata("amount", "Int") - end - - it "records a changed field `name_in_index` so that the field gets written to the correct field in the index" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "description", "String" - end - end - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget" do |t| - t.field "description", "String", name_in_index: "description_text" do |f| - f.mapping type: "text" - end - end - end - - expect(metadata_for(v1_json_schema, "Widget", "description")).to have_dumped_metadata("description", "String") - expect(metadata_for(updated_v1_json_schema, "Widget", "description")).to have_dumped_metadata("description_text", "String") - end - - it "notifies of an issue when a field has been deleted or renamed without recording what happened" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "description", "String" - end - end - - missing_fields = dump_versioned_json_schema_missing_fields(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget" do |t| - t.field "full_description", "String", name_in_index: "description" - end - end - - expect(missing_fields).to contain_exactly("Widget.description", "Widget.id") - end - - it "supports renamed fields when `renamed_from` is used" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "description", "String" - end - end - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget" do |t| - t.field "full_description", "String!", name_in_index: "description" do |f| - f.renamed_from "description" - end - end - end - - expect(metadata_for(v1_json_schema, "Widget", "description")).to have_dumped_metadata("description", "String") - expect(metadata_for(updated_v1_json_schema, "Widget", "description")).to have_dumped_metadata("description", "String!") - end - - it "supports deleted fields when `deleted_field` is used" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "description", "String" - end - end - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.deleted_field "description" - end - end - - expect(metadata_for(v1_json_schema, "Widget", "description")).to have_dumped_metadata("description", "String") - expect(metadata_for(updated_v1_json_schema, "Widget", "description")).to eq nil - end - - it "notifies of an issue when a type has been deleted or renamed without recording what happened" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Options" do |t| - t.field "size", "Int" - end - - schema.object_type "Widget" do |t| - t.field "amount", "Float" - end - end - - missing_types = dump_versioned_json_schema_missing_types(v1_json_schema) do |schema| - schema.json_schema_version 2 - - # Widget has been renamed to `Component`. - schema.object_type "Component" do |t| - t.field "amount", "Float" - end - end - - expect(missing_types).to contain_exactly("Options", "Widget") - end - - it "supports renamed types when `renamed_from` is used" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "amount", "Float" - end - end - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Component" do |t| - t.field "amount", "Int", name_in_index: "amount_int" - t.renamed_from "Widget" - end - end - - expect(metadata_for(v1_json_schema, "Widget", "amount")).to have_dumped_metadata("amount", "Float") - expect(metadata_for(updated_v1_json_schema, "Widget", "amount")).to have_dumped_metadata("amount_int", "Int") - end - - it "supports deleted types when `deleted_type` is used" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "amount", "Float" - end - end - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Component" do |t| - t.field "id", "ID" - end - - schema.deleted_type "Widget" - end - - expect(metadata_for(v1_json_schema, "Widget", "amount")).to have_dumped_metadata("amount", "Float") - expect(metadata_for(updated_v1_json_schema, "Widget", "amount")).to eq(nil) - end - - it "supports deleted and renamed fields on a renamed type so long as these are indicated through `deleted_` and `renamed_` API calls" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "token", "String" - t.field "amount", "Float" - end - end - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Component" do |t| - t.renamed_from "Widget" - - t.field "id", "ID" do |f| - f.renamed_from "token" - end - - t.deleted_field "amount" - end - end - - expect(metadata_for(updated_v1_json_schema, "Widget", "token")).to have_dumped_metadata("id", "ID") - expect(metadata_for(updated_v1_json_schema, "Widget", "amount")).to eq(nil) - end - - it "keeps track of unused `deleted_field` calls" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "token", "ID" - end - end - - unused_deprecated_elements = dump_versioned_json_schema_unused_deprecated_elements(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.deleted_field "token" # used - t.deleted_field "other" # unused - end - end - - expect(unused_deprecated_elements.map(&:description)).to eq [ - %(`type.deleted_field "other"` at #{__FILE__}:#{__LINE__ - 5}) - ] - end - - it "keeps track of unused `renamed_field` calls" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "token", "ID" - end - end - - unused_deprecated_elements = dump_versioned_json_schema_unused_deprecated_elements(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget" do |t| - t.field "id", "ID" do |f| - f.renamed_from "token" # used - f.renamed_from "other" # unused - end - end - end - - expect(unused_deprecated_elements.map(&:description)).to eq [ - %(`field.renamed_from "other"` at #{__FILE__}:#{__LINE__ - 6}) - ] - end - - it "keeps track of unused `deleted_type` calls" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "token", "ID" - end - end - - unused_deprecated_elements = dump_versioned_json_schema_unused_deprecated_elements(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.deleted_type "Widget" # used - schema.deleted_type "Other" # unused - end - - expect(unused_deprecated_elements.map(&:description)).to eq [ - %(`schema.deleted_type "Other"` at #{__FILE__}:#{__LINE__ - 4}) - ] - end - - it "keeps track of unused `renamed_type` calls" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "token", "ID" - end - end - - unused_deprecated_elements = dump_versioned_json_schema_unused_deprecated_elements(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Component" do |t| - t.field "token", "ID" - t.renamed_from "Widget" # used - t.renamed_from "Other" # unused - end - end - - expect(unused_deprecated_elements.map(&:description)).to eq [ - %(`type.renamed_from "Other"` at #{__FILE__}:#{__LINE__ - 5}) - ] - end - - context "on a type that is using `route_with`" do - it "does not allow a `route_with` field to be entirely missing from an old version of the schema" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "workspace_id", "ID" - - t.index "widgets" do |f| - f.route_with "workspace_id" - end - end - end - - missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "workspace_id2", "ID" - t.deleted_field "workspace_id" - - t.index "widgets" do |f| - f.route_with "workspace_id2" - end - end - end - - expect(missing_necessary_fields).to eq [missing_necessary_field_of("routing", "Widget.workspace_id2")] - end - - it "uses the `name_in_index` when determining if a `route_with` field is missing from an old version of the schema" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "workspace_id", "ID" - - t.index "widgets" do |f| - f.route_with "workspace_id" - end - end - end - - missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "workspace_id2", "ID", name_in_index: "workspace_id3" - t.deleted_field "workspace_id" - - t.index "widgets" do |f| - f.route_with "workspace_id2" - end - end - end - - expect(missing_necessary_fields).to eq [missing_necessary_field_of("routing", "Widget.workspace_id3")] - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "workspace_id2", "ID", name_in_index: "workspace_id" do |f| - f.renamed_from "workspace_id" - end - - t.index "widgets" do |f| - f.route_with "workspace_id2" - end - end - end - - expect(metadata_for(updated_v1_json_schema, "Widget", "workspace_id")).to include("nameInIndex" => "workspace_id") - end - - it "handles embedded fields when determining if a `route_with` field is missing from an old schema version" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Embedded" do |t| - t.field "workspace_id", "ID" - end - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "embedded", "Embedded" - - t.index "widgets" do |f| - f.route_with "embedded.workspace_id" - end - end - end - - missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Embedded" do |t| - t.field "workspace_id", "ID" - end - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "embedded2", "Embedded" - t.deleted_field "embedded" - - t.index "widgets" do |f| - f.route_with "embedded2.workspace_id" - end - end - end - - expect(missing_necessary_fields).to eq [missing_necessary_field_of("routing", "Widget.embedded2.workspace_id")] - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Embedded" do |t| - t.field "workspace_id", "ID" - end - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "embedded2", "Embedded" do |f| - f.renamed_from "embedded" - end - - t.index "widgets" do |f| - f.route_with "embedded2.workspace_id" - end - end - end - - expect(metadata_for(updated_v1_json_schema, "Widget", "embedded")).to include("nameInIndex" => "embedded2") - end - - it "handles renamed types" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "workspace_id", "ID" - - t.index "widgets" do |f| - f.route_with "workspace_id" - end - end - end - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget2" do |t| - t.field "id", "ID" - t.field "workspace_id", "ID" - t.renamed_from "Widget" - - t.index "widgets" do |f| - f.route_with "workspace_id" - end - end - end - - expect(metadata_for(updated_v1_json_schema, "Widget", "workspace_id")).to include("nameInIndex" => "workspace_id") - - missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget2" do |t| - t.field "id", "ID" - t.field "workspace_id2", "ID" - t.deleted_field "workspace_id" - t.renamed_from "Widget" - - t.index "widgets" do |f| - f.route_with "workspace_id2" - end - end - end - - expect(missing_necessary_fields).to eq [missing_necessary_field_of("routing", "Widget2.workspace_id2")] - end - - it "handles deleted types" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "workspace_id", "ID" - - t.index "widgets" do |f| - f.route_with "workspace_id" - end - end - end - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.deleted_type "Widget" - - schema.object_type "Widget2" do |t| - t.field "id", "ID" - t.field "workspace_id", "ID" - - t.index "widgets" do |f| - f.route_with "workspace_id" - end - end - end - - expect(metadata_for(updated_v1_json_schema, "Widget2", "workspace_id")).to eq nil - expect(metadata_for(updated_v1_json_schema, "Widget", "workspace_id")).to eq nil - end - end - - context "on a type using `rollover`" do - it "does not allow a `rollover` field to be entirely missing from an old version of the schema" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "created_at", "DateTime" - - t.index "widgets" do |f| - f.rollover :yearly, "created_at" - end - end - end - - missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "created_at2", "DateTime", name_in_index: "created_at3" - t.deleted_field "created_at" - - t.index "widgets" do |f| - f.rollover :yearly, "created_at2" - end - end - end - - expect(missing_necessary_fields).to eq [missing_necessary_field_of("rollover", "Widget.created_at3")] - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "created_at2", "DateTime", name_in_index: "created_at" do |f| - f.renamed_from "created_at" - end - - t.index "widgets" do |f| - f.rollover :yearly, "created_at2" - end - end - end - - expect(metadata_for(updated_v1_json_schema, "Widget", "created_at")).to include("nameInIndex" => "created_at") - end - - it "uses the `name_in_index` when determining if a `rollover` field is missing from an old version of the schema" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "created_at", "DateTime" - - t.index "widgets" do |f| - f.rollover :yearly, "created_at" - end - end - end - - missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "created_at2", "DateTime" - t.deleted_field "created_at" - - t.index "widgets" do |f| - f.rollover :yearly, "created_at2" - end - end - end - - expect(missing_necessary_fields).to eq [missing_necessary_field_of("rollover", "Widget.created_at2")] - end - - it "handles embedded fields when determining if a `rollover` field is missing from an old schema version" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Embedded" do |t| - t.field "created_at", "DateTime" - end - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "embedded", "Embedded" - - t.index "widgets" do |f| - f.rollover :yearly, "embedded.created_at" - end - end - end - - missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Embedded" do |t| - t.field "created_at", "DateTime" - end - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "embedded2", "Embedded" - t.deleted_field "embedded" - - t.index "widgets" do |f| - f.rollover :yearly, "embedded2.created_at" - end - end - end - - expect(missing_necessary_fields).to eq [missing_necessary_field_of("rollover", "Widget.embedded2.created_at")] - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Embedded" do |t| - t.field "created_at", "DateTime" - end - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "embedded2", "Embedded" do |f| - f.renamed_from "embedded" - end - - t.index "widgets" do |f| - f.rollover :yearly, "embedded2.created_at" - end - end - end - - expect(metadata_for(updated_v1_json_schema, "Widget", "embedded")).to include("nameInIndex" => "embedded2") - end - - it "handles renamed types" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "created_at", "DateTime" - - t.index "widgets" do |f| - f.rollover :yearly, "created_at" - end - end - end - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget2" do |t| - t.field "id", "ID" - t.field "created_at", "DateTime" - t.renamed_from "Widget" - - t.index "widgets" do |f| - f.rollover :yearly, "created_at" - end - end - end - - expect(metadata_for(updated_v1_json_schema, "Widget", "created_at")).to include("nameInIndex" => "created_at") - - missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget2" do |t| - t.field "id", "ID" - t.field "created_at2", "DateTime" - t.deleted_field "created_at" - t.renamed_from "Widget" - - t.index "widgets" do |f| - f.rollover :yearly, "created_at2" - end - end - end - - expect(missing_necessary_fields).to eq [missing_necessary_field_of("rollover", "Widget2.created_at2")] - end - - it "handles deleted types" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "created_at", "DateTime" - - t.index "widgets" do |f| - f.rollover :yearly, "created_at" - end - end - end - - updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.deleted_type "Widget" - - schema.object_type "Widget2" do |t| - t.field "id", "ID" - t.field "created_at", "DateTime" - - t.index "widgets" do |f| - f.rollover :yearly, "created_at" - end - end - end - - expect(metadata_for(updated_v1_json_schema, "Widget2", "created_at")).to eq nil - expect(metadata_for(updated_v1_json_schema, "Widget", "created_at")).to eq nil - end - end - - describe "conflicting definition tracking" do - it "includes a type that exists and is referenced from `deleted_type`" do - elements = dump_versioned_json_schema_definition_conflicts do |schema| - schema.object_type "Widget" do |t| - t.field "id", "ID" - end - - schema.deleted_type "Widget" - end - - expect(elements.map(&:description)).to contain_exactly( - %(`schema.deleted_type "Widget"` at #{__FILE__}:#{__LINE__ - 4}) - ) - end - - it "includes a type that exists and is referenced from `renamed_from`" do - elements = dump_versioned_json_schema_definition_conflicts do |schema| - schema.object_type "Widget" do |t| - t.field "id", "ID" - end - - schema.object_type "Component" do |t| - t.field "id", "ID" - t.renamed_from "Widget" - end - end - - expect(elements.map(&:description)).to contain_exactly( - %(`type.renamed_from "Widget"` at #{__FILE__}:#{__LINE__ - 5}) - ) - end - - it "includes a type that exists and is referenced from `deleted_type` and `renamed_from`" do - elements = dump_versioned_json_schema_definition_conflicts do |schema| - schema.object_type "Widget" do |t| - t.field "id", "ID" - end - - schema.object_type "Component" do |t| - t.field "id", "ID" - t.renamed_from "Widget" - end - - schema.deleted_type "Widget" - end - - expect(elements.map(&:description)).to contain_exactly( - %(`type.renamed_from "Widget"` at #{__FILE__}:#{__LINE__ - 7}), - %(`schema.deleted_type "Widget"` at #{__FILE__}:#{__LINE__ - 5}) - ) - end - - it "includes a type that is referenced from `deleted_type` and `renamed_from` but does not exist" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "token", "ID" - end - end - - elements = dump_versioned_json_schema_definition_conflicts(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Component" do |t| - t.field "id", "ID" - t.renamed_from "Widget" - end - - schema.deleted_type "Widget" - end - - expect(elements.map(&:description)).to contain_exactly( - %(`type.renamed_from "Widget"` at #{__FILE__}:#{__LINE__ - 7}), - %(`schema.deleted_type "Widget"` at #{__FILE__}:#{__LINE__ - 5}) - ) - end - - it "includes a field that exists and is referenced from `deleted_field`" do - elements = dump_versioned_json_schema_definition_conflicts do |schema| - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.deleted_field "id" - end - end - - expect(elements.map(&:description)).to contain_exactly( - %(`type.deleted_field "id"` at #{__FILE__}:#{__LINE__ - 5}) - ) - end - - it "includes a field that exists and is referenced from `renamed_from`" do - elements = dump_versioned_json_schema_definition_conflicts do |schema| - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "token", "ID" do |f| - f.renamed_from "id" - end - end - end - - expect(elements.map(&:description)).to contain_exactly( - %(`field.renamed_from "id"` at #{__FILE__}:#{__LINE__ - 6}) - ) - end - - it "includes a field that exists and is referenced from `deleted_field` and `renamed_from`" do - elements = dump_versioned_json_schema_definition_conflicts do |schema| - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "token", "ID" do |f| - f.renamed_from "id" - end - t.deleted_field "id" - end - end - - expect(elements.map(&:description)).to contain_exactly( - %(`type.deleted_field "id"` at #{__FILE__}:#{__LINE__ - 5}), - %(`field.renamed_from "id"` at #{__FILE__}:#{__LINE__ - 8}) - ) - end - - it "includes a field that is referenced from `deleted_field` and `renamed_from` but does not exist" do - v1_json_schema = dump_versioned_json_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Widget" do |t| - t.field "id", "ID" - end - end - - elements = dump_versioned_json_schema_definition_conflicts(v1_json_schema) do |schema| - schema.json_schema_version 2 - - schema.object_type "Widget" do |t| - t.field "token", "ID" do |f| - f.renamed_from "id" - end - t.deleted_field "id" - end - end - - expect(elements.map(&:description)).to contain_exactly( - %(`type.deleted_field "id"` at #{__FILE__}:#{__LINE__ - 5}), - %(`field.renamed_from "id"` at #{__FILE__}:#{__LINE__ - 8}) - ) - end - end - end - - def dump_versioned_json_schema(old_versioned_json_schema = nil, &schema_definition) - merge_result = perform_merge(old_versioned_json_schema, &schema_definition) - - expect(merge_result.missing_fields).to be_empty - expect(merge_result.missing_types).to be_empty - expect(merge_result.definition_conflicts).to be_empty - expect(merge_result.missing_necessary_fields).to be_empty - - merge_result.json_schema - end - - def dump_versioned_json_schema_missing_fields(old_versioned_json_schema = nil, &schema_definition) - merge_result = perform_merge(old_versioned_json_schema, &schema_definition) - - expect(merge_result.missing_fields).not_to be_empty - expect(merge_result.missing_types).to be_empty - expect(merge_result.definition_conflicts).to be_empty - expect(merge_result.missing_necessary_fields).to be_empty - - merge_result.missing_fields - end - - def dump_versioned_json_schema_definition_conflicts(old_versioned_json_schema = nil, &schema_definition) - merge_result = perform_merge(old_versioned_json_schema, &schema_definition) - - expect(merge_result.missing_fields).to be_empty - expect(merge_result.missing_types).to be_empty - expect(merge_result.definition_conflicts).not_to be_empty - expect(merge_result.missing_necessary_fields).to be_empty - - merge_result.definition_conflicts - end - - def dump_versioned_json_schema_missing_types(old_versioned_json_schema = nil, &schema_definition) - merge_result = perform_merge(old_versioned_json_schema, &schema_definition) - - expect(merge_result.missing_fields).to be_empty - expect(merge_result.missing_types).not_to be_empty - expect(merge_result.definition_conflicts).to be_empty - expect(merge_result.missing_necessary_fields).to be_empty - - merge_result.missing_types - end - - def dump_versioned_json_schema_missing_necessary_fields(old_versioned_json_schema = nil, &schema_definition) - merge_result = perform_merge(old_versioned_json_schema, &schema_definition) - - expect(merge_result.missing_fields).to be_empty - expect(merge_result.missing_types).to be_empty - expect(merge_result.definition_conflicts).to be_empty - expect(merge_result.missing_necessary_fields).not_to be_empty - - merge_result.missing_necessary_fields - end - - def dump_versioned_json_schema_unused_deprecated_elements(old_versioned_json_schema = nil, &schema_definition) - results = define_schema(&schema_definition) - results.merge_field_metadata_into_json_schema(old_versioned_json_schema || results.current_public_json_schema) - results.unused_deprecated_elements - end - - def perform_merge(old_versioned_json_schema = nil, &schema_definition) - results = define_schema(&schema_definition) - results.merge_field_metadata_into_json_schema(old_versioned_json_schema || results.current_public_json_schema).tap do - expect(results.unused_deprecated_elements).to be_empty - end - end - - def metadata_for(json_schema, type, field) - json_schema.dig("$defs", type, "properties", field, "ElasticGraph") - end - - def define_schema(&schema_definition) - super(schema_element_name_form: "snake_case", &schema_definition) - end - - def have_dumped_metadata(name_in_index, type) - eq({"nameInIndex" => name_in_index, "type" => type}) - end - - def missing_necessary_field_of(field_type, fully_qualified_path) - JSONSchemaWithMetadata::MissingNecessaryField.new(field_type, fully_qualified_path) - end - end - end - end -end diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_field_metadata_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_field_metadata_spec.rb deleted file mode 100644 index 6c0aa3799..000000000 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_field_metadata_spec.rb +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright 2024 - 2026 Block, Inc. -# -# Use of this source code is governed by an MIT-style -# license that can be found in the LICENSE file or at -# https://opensource.org/licenses/MIT. -# -# frozen_string_literal: true - -require "elastic_graph/spec_support/schema_definition_helpers" - -module ElasticGraph - module SchemaDefinition - ::RSpec.describe "JSON schema field metadata generation" do - include_context "SchemaDefinitionHelpers" - - it "generates no field metadata for built-in scalar and enum types" do - metadata_by_type_and_field_name = dump_metadata - - json_schema_field_metadata = %w[ - Boolean Float ID Int String - Cursor Date DateTime DistanceUnit JsonSafeLong LocalTime LongString TimeZone Untyped - ].map do |type_name| - metadata_by_type_and_field_name.fetch(type_name) - end - - expect(json_schema_field_metadata).to all eq({}) - end - - it "generates field metadata for built-in object types" do - metadata_by_field_name = dump_metadata.fetch("GeoLocation") - - expect(metadata_by_field_name).to eq({ - "latitude" => field_meta_of("Float!", "lat"), - "longitude" => field_meta_of("Float!", "lon") - }) - end - - it "generates field metadata for user-defined object types" do - metadata_by_field_name = dump_metadata do |schema| - schema.object_type "Money" do |t| - t.field "amount", "Int" - t.field "currency", "String" - end - end.fetch("Money") - - expect(metadata_by_field_name).to eq({ - "amount" => field_meta_of("Int", "amount"), - "currency" => field_meta_of("String", "currency") - }) - end - - it "respects the type and `name_in_index` on user-defined fields" do - metadata_by_field_name = dump_metadata do |schema| - schema.object_type "Money" do |t| - t.field "amount", "Int!", name_in_index: "amount2" - t.field "currency", "[String]!", name_in_index: "currency2" - end - end.fetch("Money") - - expect(metadata_by_field_name).to eq({ - "amount" => field_meta_of("Int!", "amount2"), - "currency" => field_meta_of("[String]!", "currency2") - }) - end - - it "generates no field metadata for user-defined scalar or enum types since they have no subfields" do - metadata_by_type_and_field_name = dump_metadata do |schema| - schema.scalar_type "Url" do |t| - t.json_schema type: "string" - t.mapping type: "keyword" - end - - schema.enum_type "Color" do |t| - t.value "RED" - t.value "GREEN" - t.value "BLUE" - end - end - - json_schema_field_metadata = %w[Url Color].map do |type_name| - metadata_by_type_and_field_name.fetch(type_name) - end - - expect(json_schema_field_metadata).to all eq({}) - end - - it "generates no field metadata for user-defined union or interface types since the JSON schema" do - metadata_by_type_and_field_name = dump_metadata do |schema| - schema.interface_type "Named" do |t| - t.field "name", "String" - end - - schema.union_type "Character" do |t| - t.subtype "Droid" - t.subtype "Human" - end - - schema.object_type "Droid" do |t| - t.implements "Named" - t.field "name", "String" - t.field "model", "String" - end - - schema.object_type "Human" do |t| - t.implements "Named" - t.field "name", "String" - t.field "home_planet", "String" - end - end - - json_schema_field_metadata = %w[Named Character].map do |type_name| - metadata_by_type_and_field_name.fetch(type_name) - end - - expect(json_schema_field_metadata).to all eq({}) - end - - it "includes the JSON schema field metadata in the versioned JSON schemas but not in the current public JSON schema" do - results = define_schema do |schema| - schema.object_type "Money" do |t| - t.field "amount", "Int" - t.field "currency", "String" - end - end - - amount_path = ["$defs", "Money", "properties", "amount"] - - expect(results.json_schemas_for(1).dig(*amount_path)).to eq({ - "anyOf" => [{"$ref" => "#/$defs/Int"}, {"type" => "null"}], - "ElasticGraph" => {"nameInIndex" => "amount", "type" => "Int"} - }) - - expect(results.current_public_json_schema.dig(*amount_path)).to eq({ - "anyOf" => [{"$ref" => "#/$defs/Int"}, {"type" => "null"}] - }) - end - - def dump_metadata(&schema_definition) - define_schema(&schema_definition).json_schema_field_metadata_by_type_and_field_name - end - - def define_schema(&schema_definition) - super(schema_element_name_form: "snake_case", &schema_definition) - end - - def field_meta_of(type, name_in_index) - Indexing::JSONSchemaFieldMetadata.new(type: type, name_in_index: name_in_index) - end - end - end -end diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_pruner_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_pruner_spec.rb deleted file mode 100644 index af9298366..000000000 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_pruner_spec.rb +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright 2024 - 2026 Block, Inc. -# -# Use of this source code is governed by an MIT-style -# license that can be found in the LICENSE file or at -# https://opensource.org/licenses/MIT. -# -# frozen_string_literal: true - -require "elastic_graph/constants" -require "elastic_graph/spec_support/schema_definition_helpers" -require "elastic_graph/schema_definition/json_schema_pruner" - -module ElasticGraph - module SchemaDefinition - RSpec.describe JSONSchemaPruner do - include_context "SchemaDefinitionHelpers" - - describe ".prune" do - subject { described_class.prune(schema) } - - shared_examples "prunes types not referenced by indexed types" do |expected_type_names| - it do - expect(subject["$defs"].keys).to match_array(expected_type_names) - end - end - - context "when there are indexable types" do - let(:schema) do - dump_schema do |s| - # Widget and Boolean should be present - s.object_type "Widget" do |t| - t.field "id", "ID!" - t.field "inStock", "Boolean" - t.index "widgets" - end - - # UnindexedWidget and Float should get pruned - s.object_type "UnindexedWidget" do |t| - t.field "id", "ID!" - t.field "cost", "Float" - end - end - end - - it_behaves_like "prunes types not referenced by indexed types", - [EVENT_ENVELOPE_JSON_SCHEMA_NAME, "Boolean", "ID", "Widget"] - end - - context "when there are no types defined" do - let(:schema) { dump_schema } - - it_behaves_like "prunes types not referenced by indexed types", [EVENT_ENVELOPE_JSON_SCHEMA_NAME] - end - - context "when there are no indexable types defined" do - let(:schema) do - dump_schema do |s| - # UnindexedWidget and Float should get pruned - s.object_type "UnindexedWidget" do |t| - t.field "id", "ID!" - t.field "cost", "Float" - end - end - end - - it_behaves_like "prunes types not referenced by indexed types", [EVENT_ENVELOPE_JSON_SCHEMA_NAME] - end - - context "when there are nested types referenced from an indexed type" do - let(:schema) do - dump_schema do |s| - s.object_type "Widget" do |t| - t.field "id", "ID!" - t.field "options", "WidgetOptions" - t.index "widgets" - end - - s.object_type "WidgetOptions" do |t| - t.field "size", "Size" - t.field "color", "Color" - t.field "cost", "Money" - end - - s.enum_type "Size" do |t| - t.value "SMALL" - t.value "MEDIUM" - t.value "LARGE" - end - - s.enum_type "Color" do |t| - t.value "RED" - t.value "YELLOW" - t.value "BLUE" - end - - s.object_type "Money" do |t| - t.field "currency", "Currency" - t.field "amount_cents", "Int" - end - - s.enum_type "Currency" do |t| - t.value "USD" - t.value "CAD" - end - end - end - - it_behaves_like "prunes types not referenced by indexed types", [ - EVENT_ENVELOPE_JSON_SCHEMA_NAME, - "Color", - "Currency", - "ID", - "Int", - "Money", - "Size", - "Widget", - "WidgetOptions" - ] - end - end - - def dump_schema(&schema_definition) - schema_definition_results = define_schema(schema_element_name_form: "snake_case", &schema_definition) - latest_json_schema_version = schema_definition_results.latest_json_schema_version - - schema_definition_results.json_schemas_for(latest_json_schema_version) - end - end - end -end diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/index_definitions_by_name_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/index_definitions_by_name_spec.rb index 1835088ba..a527ae0d0 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/index_definitions_by_name_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/index_definitions_by_name_spec.rb @@ -81,6 +81,38 @@ module SchemaDefinition end end + it "raises a clear error when the rollover field is not a date or datetime type" do + expect { + index_definition_metadata_for("widgets") do |i| + i.rollover :monthly, "group_id" + end + }.to raise_error(Errors::SchemaError, a_string_including("cannot be used for rollover", "not a")) + end + + it "raises a clear error when the rollover field is a list field" do + expect { + index_definition_metadata_for("widgets", on_my_type: ->(t) { t.field "timestamps", "[DateTime!]!" }) do |i| + i.rollover :monthly, "timestamps" + end + }.to raise_error(Errors::SchemaError, a_string_including("cannot be used for rollover", "list field")) + end + + it "raises a clear error when the route_with field is not a leaf field" do + expect { + index_definition_metadata_for("widgets") do |i| + i.route_with "nested_fields_gql" + end + }.to raise_error(Errors::SchemaError, a_string_including("cannot be used for routing", "not a leaf field")) + end + + it "raises a clear error for a nested field path that cannot be resolved" do + expect { + index_definition_metadata_for("widgets") do |i| + i.route_with "nested_fields_gql.nonexistent_field" + end + }.to raise_error(Errors::SchemaError, a_string_including("cannot be resolved", "Verify that all fields and types")) + end + it "dumps the `default_sort_fields`" do widgets = index_definition_metadata_for("widgets") do |i| i.default_sort "created_at", :asc, "group_id", :desc diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/scalar_types_by_name_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/scalar_types_by_name_spec.rb index 19c9707dd..fcb0cd91d 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/scalar_types_by_name_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/scalar_types_by_name_spec.rb @@ -286,6 +286,17 @@ module SchemaDefinition expect(grouping_missing_value_placeholder).to be_nil end + it "does not infer placeholder for unsigned_long types when ingestion serializer extensions are disabled" do + metadata = define_schema(ingestion_serializer_extension_modules: []) do |s| + s.scalar_type "CustomScalar" do |t| + t.mapping type: "unsigned_long" + t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "support/example_extensions/scalar_coercion_adapter" + end + end.runtime_metadata.scalar_types_by_name.fetch("CustomScalar") + + expect(metadata.grouping_missing_value_placeholder).to be_nil + end + describe "boundary conditions for JSON-safe long ranges" do it "does not infer placeholder when exactly at safe boundaries with default coercion adapter" do grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long", type: "integer", minimum: JSON_SAFE_LONG_MIN, maximum: JSON_SAFE_LONG_MAX) diff --git a/elasticgraph-support/README.md b/elasticgraph-support/README.md index 91f3096dc..8464cbc88 100644 --- a/elasticgraph-support/README.md +++ b/elasticgraph-support/README.md @@ -45,6 +45,9 @@ graph LR; elasticgraph-indexer["elasticgraph-indexer"]; elasticgraph-indexer --> elasticgraph-support; class elasticgraph-indexer otherEgGemStyle; + elasticgraph-json_ingestion["elasticgraph-json_ingestion"]; + elasticgraph-json_ingestion --> elasticgraph-support; + class elasticgraph-json_ingestion otherEgGemStyle; elasticgraph-opensearch["elasticgraph-opensearch"]; elasticgraph-opensearch --> elasticgraph-support; class elasticgraph-opensearch otherEgGemStyle; diff --git a/spec_support/lib/elastic_graph/spec_support/schema_definition_helpers.rb b/spec_support/lib/elastic_graph/spec_support/schema_definition_helpers.rb index 0c3cf8538..5bb9d4ada 100644 --- a/spec_support/lib/elastic_graph/spec_support/schema_definition_helpers.rb +++ b/spec_support/lib/elastic_graph/spec_support/schema_definition_helpers.rb @@ -18,6 +18,7 @@ def define_schema_with_schema_elements( index_document_sizes: true, json_schema_version: 1, extension_modules: [], + ingestion_serializer_extension_modules: ElasticGraph::SchemaDefinition::ExtensionModuleSupport.default_ingestion_serializer_extension_modules, derived_type_name_formats: {}, type_name_overrides: {}, enum_value_overrides_by_type: {}, @@ -29,6 +30,7 @@ def define_schema_with_schema_elements( index_document_sizes: index_document_sizes, json_schema_version: json_schema_version, extension_modules: extension_modules, + ingestion_serializer_extension_modules: ingestion_serializer_extension_modules, derived_type_name_formats: derived_type_name_formats, type_name_overrides: type_name_overrides, enum_value_overrides_by_type: enum_value_overrides_by_type,