diff --git a/CODEBASE_OVERVIEW.md b/CODEBASE_OVERVIEW.md index d6e62ebfd..8cc77e3b8 100644 --- a/CODEBASE_OVERVIEW.md +++ b/CODEBASE_OVERVIEW.md @@ -111,6 +111,7 @@ graph LR; rackup["rackup"]; rake["rake"]; webrick["webrick"]; + elasticgraph-json_ingestion["eg-json_ingestion"]; elasticgraph-schema_artifacts["eg-schema_artifacts"]; graphql["graphql"]; elasticgraph --> elasticgraph-support; @@ -125,6 +126,7 @@ graph LR; elasticgraph-local --> webrick; elasticgraph-schema_definition --> elasticgraph-graphql; elasticgraph-schema_definition --> elasticgraph-indexer; + elasticgraph-schema_definition --> elasticgraph-json_ingestion; elasticgraph-schema_definition --> elasticgraph-schema_artifacts; elasticgraph-schema_definition --> elasticgraph-support; elasticgraph-schema_definition --> graphql; @@ -141,6 +143,7 @@ graph LR; class rackup externalGemCatStyle; class rake externalGemCatStyle; class webrick externalGemCatStyle; + class elasticgraph-json_ingestion otherEgGemStyle; class elasticgraph-schema_artifacts otherEgGemStyle; class graphql externalGemCatStyle; click thor href "https://rubygems.org/gems/thor" "Open on RubyGems.org" _blank; @@ -192,12 +195,13 @@ graph LR; click opensearch-ruby href "https://rubygems.org/gems/opensearch-ruby" "Open on RubyGems.org" _blank; ``` -### Extensions (5 gems) +### Extensions (6 gems) These libraries extend ElasticGraph to provide optional but commonly needed functionality. * [elasticgraph-apollo](elasticgraph-apollo/README.md): Transforms an ElasticGraph project into an Apollo subgraph. * [elasticgraph-health_check](elasticgraph-health_check/README.md): Provides a health check for high availability ElasticGraph deployments. +* [elasticgraph-json_ingestion](elasticgraph-json_ingestion/README.md): JSON Schema ingestion support for ElasticGraph. * [elasticgraph-query_interceptor](elasticgraph-query_interceptor/README.md): Intercepts ElasticGraph datastore queries. * [elasticgraph-query_registry](elasticgraph-query_registry/README.md): Provides a source-controlled query registry for ElasticGraph applications. * [elasticgraph-warehouse](elasticgraph-warehouse/README.md): Extends ElasticGraph to support ingestion into a data warehouse. @@ -216,6 +220,7 @@ graph LR; apollo-federation["apollo-federation"]; elasticgraph-health_check["eg-health_check"]; elasticgraph-datastore_core["eg-datastore_core"]; + elasticgraph-json_ingestion["eg-json_ingestion"]; elasticgraph-query_interceptor["eg-query_interceptor"]; elasticgraph-schema_artifacts["eg-schema_artifacts"]; elasticgraph-query_registry["eg-query_registry"]; @@ -228,6 +233,7 @@ graph LR; elasticgraph-health_check --> elasticgraph-datastore_core; elasticgraph-health_check --> elasticgraph-graphql; elasticgraph-health_check --> elasticgraph-support; + elasticgraph-json_ingestion --> elasticgraph-support; elasticgraph-query_interceptor --> elasticgraph-graphql; elasticgraph-query_interceptor --> elasticgraph-schema_artifacts; elasticgraph-query_registry --> elasticgraph-graphql; @@ -242,6 +248,7 @@ graph LR; class apollo-federation externalGemCatStyle; class elasticgraph-health_check targetGemStyle; class elasticgraph-datastore_core otherEgGemStyle; + class elasticgraph-json_ingestion targetGemStyle; class elasticgraph-query_interceptor targetGemStyle; class elasticgraph-schema_artifacts otherEgGemStyle; class elasticgraph-query_registry targetGemStyle; diff --git a/Gemfile.lock b/Gemfile.lock index 88c017452..b13f9085f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -128,6 +128,12 @@ PATH elasticgraph-support (= 1.1.1.pre) hashdiff (~> 1.2, >= 1.2.1) +PATH + remote: elasticgraph-json_ingestion + specs: + elasticgraph-json_ingestion (1.1.1.pre) + elasticgraph-support (= 1.1.1.pre) + PATH remote: elasticgraph-lambda_support specs: @@ -192,6 +198,7 @@ PATH elasticgraph-schema_definition (1.1.1.pre) elasticgraph-graphql (= 1.1.1.pre) elasticgraph-indexer (= 1.1.1.pre) + elasticgraph-json_ingestion (= 1.1.1.pre) elasticgraph-schema_artifacts (= 1.1.1.pre) elasticgraph-support (= 1.1.1.pre) graphql (~> 2.5.22) @@ -696,6 +703,7 @@ DEPENDENCIES elasticgraph-indexer (= 1.1.1.pre)! elasticgraph-indexer_autoscaler_lambda (= 1.1.1.pre)! elasticgraph-indexer_lambda (= 1.1.1.pre)! + elasticgraph-json_ingestion (= 1.1.1.pre)! elasticgraph-lambda_support (= 1.1.1.pre)! elasticgraph-local (= 1.1.1.pre)! elasticgraph-opensearch (= 1.1.1.pre)! @@ -787,6 +795,7 @@ CHECKSUMS elasticgraph-indexer (1.1.1.pre) elasticgraph-indexer_autoscaler_lambda (1.1.1.pre) elasticgraph-indexer_lambda (1.1.1.pre) + elasticgraph-json_ingestion (1.1.1.pre) elasticgraph-lambda_support (1.1.1.pre) elasticgraph-local (1.1.1.pre) elasticgraph-opensearch (1.1.1.pre) diff --git a/config/docker_demo/Dockerfile b/config/docker_demo/Dockerfile index c67a71782..2110740fd 100644 --- a/config/docker_demo/Dockerfile +++ b/config/docker_demo/Dockerfile @@ -16,6 +16,7 @@ COPY elasticgraph-datastore_core elasticgraph-datastore_core/ COPY elasticgraph-graphiql elasticgraph-graphiql/ COPY elasticgraph-graphql elasticgraph-graphql/ COPY elasticgraph-indexer elasticgraph-indexer/ +COPY elasticgraph-json_ingestion elasticgraph-json_ingestion/ COPY elasticgraph-local elasticgraph-local/ COPY elasticgraph-opensearch elasticgraph-opensearch/ COPY elasticgraph-query_registry elasticgraph-query_registry/ diff --git a/elasticgraph-apollo/apollo_tests_implementation/Dockerfile b/elasticgraph-apollo/apollo_tests_implementation/Dockerfile index 23442ab26..f2cd8c122 100644 --- a/elasticgraph-apollo/apollo_tests_implementation/Dockerfile +++ b/elasticgraph-apollo/apollo_tests_implementation/Dockerfile @@ -14,6 +14,7 @@ COPY elasticgraph-elasticsearch /web/elasticgraph-elasticsearch COPY elasticgraph-graphiql /web/elasticgraph-graphiql COPY elasticgraph-graphql /web/elasticgraph-graphql COPY elasticgraph-indexer /web/elasticgraph-indexer +COPY elasticgraph-json_ingestion /web/elasticgraph-json_ingestion COPY elasticgraph-rack /web/elasticgraph-rack COPY elasticgraph-schema_artifacts /web/elasticgraph-schema_artifacts COPY elasticgraph-schema_definition /web/elasticgraph-schema_definition diff --git a/elasticgraph-apollo/apollo_tests_implementation/Gemfile b/elasticgraph-apollo/apollo_tests_implementation/Gemfile index f082fa258..60b08ec0d 100644 --- a/elasticgraph-apollo/apollo_tests_implementation/Gemfile +++ b/elasticgraph-apollo/apollo_tests_implementation/Gemfile @@ -16,6 +16,7 @@ source "https://rubygems.org" graphiql graphql indexer + json_ingestion rack schema_artifacts schema_definition diff --git a/elasticgraph-json_ingestion/.rspec b/elasticgraph-json_ingestion/.rspec new file mode 120000 index 000000000..67e6e21b3 --- /dev/null +++ b/elasticgraph-json_ingestion/.rspec @@ -0,0 +1 @@ +../spec_support/subdir_dot_rspec \ No newline at end of file diff --git a/elasticgraph-json_ingestion/.yardopts b/elasticgraph-json_ingestion/.yardopts new file mode 120000 index 000000000..e11a2057f --- /dev/null +++ b/elasticgraph-json_ingestion/.yardopts @@ -0,0 +1 @@ +../config/site/yardopts \ No newline at end of file diff --git a/elasticgraph-json_ingestion/Gemfile b/elasticgraph-json_ingestion/Gemfile new file mode 120000 index 000000000..26cb2ad91 --- /dev/null +++ b/elasticgraph-json_ingestion/Gemfile @@ -0,0 +1 @@ +../Gemfile \ No newline at end of file diff --git a/elasticgraph-json_ingestion/LICENSE.txt b/elasticgraph-json_ingestion/LICENSE.txt new file mode 100644 index 000000000..aa18b5db8 --- /dev/null +++ b/elasticgraph-json_ingestion/LICENSE.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2024 - 2026 Block, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/elasticgraph-json_ingestion/README.md b/elasticgraph-json_ingestion/README.md new file mode 100644 index 000000000..66a77ff07 --- /dev/null +++ b/elasticgraph-json_ingestion/README.md @@ -0,0 +1,22 @@ +# ElasticGraph::JSONIngestion + +JSON Schema ingestion support for ElasticGraph. + +This gem contains ElasticGraph's JSON Schema generation and validation support for ingestion. + +## Dependency Diagram + +```mermaid +graph LR; + classDef targetGemStyle fill:#FADBD8,stroke:#EC7063,color:#000,stroke-width:2px; + classDef otherEgGemStyle fill:#A9DFBF,stroke:#2ECC71,color:#000; + classDef externalGemStyle fill:#E0EFFF,stroke:#70A1D7,color:#2980B9; + elasticgraph-json_ingestion["elasticgraph-json_ingestion"]; + class elasticgraph-json_ingestion targetGemStyle; + elasticgraph-support["elasticgraph-support"]; + elasticgraph-json_ingestion --> elasticgraph-support; + class elasticgraph-support otherEgGemStyle; + elasticgraph-schema_definition["elasticgraph-schema_definition"]; + elasticgraph-schema_definition --> elasticgraph-json_ingestion; + class elasticgraph-schema_definition otherEgGemStyle; +``` diff --git a/elasticgraph-json_ingestion/elasticgraph-json_ingestion.gemspec b/elasticgraph-json_ingestion/elasticgraph-json_ingestion.gemspec new file mode 100644 index 000000000..b688e407f --- /dev/null +++ b/elasticgraph-json_ingestion/elasticgraph-json_ingestion.gemspec @@ -0,0 +1,41 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require_relative "../elasticgraph-support/lib/elastic_graph/version" + +Gem::Specification.new do |spec| + spec.name = "elasticgraph-json_ingestion" + spec.version = ElasticGraph::VERSION + spec.authors = ["Josh Wilson", "Myron Marston", "Block Engineering"] + spec.email = ["joshuaw@squareup.com"] + spec.homepage = "https://block.github.io/elasticgraph/" + spec.license = "MIT" + spec.summary = "JSON Schema ingestion support for ElasticGraph." + + spec.metadata = { + "bug_tracker_uri" => "https://github.com/block/elasticgraph/issues", + "changelog_uri" => "https://github.com/block/elasticgraph/releases/tag/v#{ElasticGraph::VERSION}", + "documentation_uri" => "https://block.github.io/elasticgraph/api-docs/v#{ElasticGraph::VERSION}/", + "homepage_uri" => "https://block.github.io/elasticgraph/", + "source_code_uri" => "https://github.com/block/elasticgraph/tree/v#{ElasticGraph::VERSION}/#{spec.name}", + "gem_category" => "extension" + } + + spec.files = Dir.chdir(File.expand_path(__dir__)) do + `git ls-files -z`.split("\x0").reject do |f| + (f == __FILE__) || f.match(%r{\A(?:(?:test|spec|features|sig)/|\.(?:git|travis|circleci)|appveyor)}) + end - [".rspec", "Gemfile", ".yardopts"] + end + + spec.required_ruby_version = [">= 3.4", "< 4.1"] + + # This extension is loaded by `elasticgraph-schema_definition` at schema-definition time, so we intentionally + # avoid a runtime dependency here to keep the dependency direction acyclic across gems. + spec.add_development_dependency "elasticgraph-schema_definition", ElasticGraph::VERSION + spec.add_dependency "elasticgraph-support", ElasticGraph::VERSION +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion.rb new file mode 100644 index 000000000..872d2ddb2 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion.rb @@ -0,0 +1,13 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + # JSON Schema ingestion support for ElasticGraph. + module JSONIngestion + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/api_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/api_extension.rb new file mode 100644 index 000000000..f601d689b --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/api_extension.rb @@ -0,0 +1,91 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/factory_extension" + +module ElasticGraph + module JSONIngestion + # Namespace for all JSON Schema schema definition support. + # + # {SchemaDefinition::APIExtension} is the primary entry point and should be used as a schema definition extension module. + module SchemaDefinition + # Module designed to be extended onto an {ElasticGraph::SchemaDefinition::API} instance + # to add JSON Schema ingestion support. + module APIExtension + # Wires up the factory extension when this module is extended onto an API instance. + # + # @param api [ElasticGraph::SchemaDefinition::API] the API instance to extend + # @return [void] + # @api private + def self.extended(api) + api.state.ingestion_serializer_state.tap do |state| + state[:allow_omitted_json_schema_fields] = false unless state.key?(:allow_omitted_json_schema_fields) + state[:allow_extra_json_schema_fields] = true unless state.key?(:allow_extra_json_schema_fields) + state[:reserved_type_names] = (state[:reserved_type_names] || ::Set.new).merge([EVENT_ENVELOPE_JSON_SCHEMA_NAME]) + end + + api.factory.extend FactoryExtension + end + + # Defines the version number of the current JSON schema. Importantly, every time a change is made that impacts the JSON schema + # artifact, the version number must be incremented to ensure that each different version of the JSON schema is identified by a unique + # version number. The publisher will then include this version number in published events to identify the version of the schema it + # was using. This avoids the need to deploy the publisher and ElasticGraph indexer at the same time to keep them in sync. + # + # @note While this is an important part of how ElasticGraph is designed to support schema evolution, it can be annoying constantly + # have to increment this while rapidly changing the schema during prototyping. You can disable the requirement to increment this + # on every JSON schema change by setting `enforce_json_schema_version` to `false` in your `Rakefile`. + # + # @param version [Integer] current version number of the JSON schema artifact + # @return [void] + # @see Local::RakeTasks#enforce_json_schema_version + def json_schema_version(version) + if !version.is_a?(Integer) || version < 1 + raise Errors::SchemaError, "`json_schema_version` must be a positive integer. Specified version: #{version}" + end + + if @state.ingestion_serializer_state[:json_schema_version] + raise Errors::SchemaError, "`json_schema_version` can only be set once on a schema. Previously-set version: #{@state.ingestion_serializer_state[:json_schema_version]}" + end + + @state.ingestion_serializer_state[:json_schema_version] = version + @state.ingestion_serializer_state[:json_schema_version_setter_location] = caller_locations(1, 1).to_a.first + nil + end + + # Defines strictness of the JSON schema validation. By default, the JSON schema will require all fields to be provided by the + # publisher (but they can be nullable) and will ignore extra fields that are not defined in the schema. Use this method to + # configure this behavior. + # + # @param allow_omitted_fields [bool] Whether nullable fields can be omitted from indexing events. + # @param allow_extra_fields [bool] Whether extra fields (e.g. beyond fields defined in the schema) can be included in indexing events. + # @return [void] + # + # @note If you allow both omitted fields and extra fields, ElasticGraph's JSON schema validation will allow (and ignore) misspelled + # field names in indexing events. For example, if the ElasticGraph schema has a nullable field named `parentId` but the publisher + # accidentally provides it as `parent_id`, ElasticGraph would happily ignore the `parent_id` field entirely, because `parentId` + # is allowed to be omitted and `parent_id` would be treated as an extra field. Therefore, we recommend that you only set one of + # these to `true` (or none). + def json_schema_strictness(allow_omitted_fields: false, allow_extra_fields: true) + unless [true, false].include?(allow_omitted_fields) + raise Errors::SchemaError, "`allow_omitted_fields` must be true or false" + end + + unless [true, false].include?(allow_extra_fields) + raise Errors::SchemaError, "`allow_extra_fields` must be true or false" + end + + @state.ingestion_serializer_state[:allow_omitted_json_schema_fields] = allow_omitted_fields + @state.ingestion_serializer_state[:allow_extra_json_schema_fields] = allow_extra_fields + nil + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/built_in_types_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/built_in_types_extension.rb new file mode 100644 index 000000000..36e496368 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/built_in_types_extension.rb @@ -0,0 +1,60 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extends ElasticGraph's built-in types with JSON ingestion configuration. + module BuiltInTypesExtension + # JSON Schema defaults applied to ElasticGraph's built-in scalar types. + BUILT_IN_SCALAR_JSON_SCHEMA_OPTIONS_BY_NAME = { + "Boolean" => {type: "boolean"}, + "Float" => {type: "number"}, + "ID" => {type: "string"}, + "Int" => {type: "integer", minimum: INT_MIN, maximum: INT_MAX}, + "String" => {type: "string"}, + "Cursor" => {type: "string"}, + "Date" => {type: "string", format: "date"}, + "DateTime" => {type: "string", format: "date-time"}, + "LocalTime" => {type: "string", pattern: VALID_LOCAL_TIME_JSON_SCHEMA_PATTERN}, + "TimeZone" => {type: "string", enum: GraphQL::ScalarCoercionAdapters::VALID_TIME_ZONES.to_a.freeze}, + "Untyped" => {type: ["array", "boolean", "integer", "number", "object", "string"].freeze}, + "JsonSafeLong" => {type: "integer", minimum: JSON_SAFE_LONG_MIN, maximum: JSON_SAFE_LONG_MAX}, + "LongString" => {type: "integer", minimum: LONG_STRING_MIN, maximum: LONG_STRING_MAX} + }.freeze + + # Returns JSON schema defaults for a built-in scalar type. + # + # These are applied eagerly while the scalar is being instantiated so built-in scalar types still satisfy + # `validate_json_schema_configuration!` during schema-definition time. + # + # @param type_name [String] + # @return [Hash, nil] + def self.json_schema_options_for_scalar(type_name) + BUILT_IN_SCALAR_JSON_SCHEMA_OPTIONS_BY_NAME[type_name] + end + + private + + def register_standard_elastic_graph_types + super + + geo_location = schema_def_state.object_types_by_name.fetch(schema_def_state.type_ref("GeoLocation").to_final_form.name) + + # We use `nullable: false` because `GeoLocation` is indexed as a single `geo_point` field, + # and therefore can't support a `latitude` without a `longitude` or vice-versa. + geo_location.graphql_fields_by_name.fetch(names.latitude).json_schema minimum: -90, maximum: 90, nullable: false + geo_location.graphql_fields_by_name.fetch(names.longitude).json_schema minimum: -180, maximum: 180, nullable: false + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/enum_type_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/enum_type_extension.rb new file mode 100644 index 000000000..657827508 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/enum_type_extension.rb @@ -0,0 +1,29 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/enum" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extends enum types with JSON schema behavior. + module EnumTypeExtension + # @private + def configure_derived_scalar_type(scalar_type) + super + scalar_type.json_schema type: "string" + end + + # @private + def to_indexing_field_type + Indexing::FieldType::Enum.new(super) + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/factory_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/factory_extension.rb new file mode 100644 index 000000000..58afa5f89 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/factory_extension.rb @@ -0,0 +1,113 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/built_in_types_extension" +require "elastic_graph/json_ingestion/schema_definition/enum_type_extension" +require "elastic_graph/json_ingestion/schema_definition/field_extension" +require "elastic_graph/json_ingestion/schema_definition/indexing/index" +require "elastic_graph/json_ingestion/schema_definition/object_interface_extension" +require "elastic_graph/json_ingestion/schema_definition/results_extension" +require "elastic_graph/json_ingestion/schema_definition/scalar_type_extension" +require "elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension" +require "elastic_graph/json_ingestion/schema_definition/union_type_extension" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extension module applied to `ElasticGraph::SchemaDefinition::Factory` to wire up + # JSON Schema support on Results and SchemaArtifactManager instances. + # + # @api private + module FactoryExtension + # @private + def new_built_in_types(api) + super(api).tap do |built_in_types| + built_in_types.extend BuiltInTypesExtension + end + end + + # @private + def new_enum_type(name) + super(name) do |type| + type.extend EnumTypeExtension + yield type if block_given? + end + end + + # @private + def new_field(**kwargs, &block) + super(**kwargs) do |field| + field.extend FieldExtension + block&.call(field) + end + end + + # @private + def new_index(name, settings, type, &block) + super(name, settings, type) do |index| + index.extend Indexing::IndexExtension + block&.call(index) + end + end + + # @private + def new_interface_type(name) + super(name) do |type| + type.extend ObjectInterfaceExtension + yield type if block_given? + end + end + + # @private + def new_object_type(name) + super(name) do |type| + type.extend ObjectInterfaceExtension + yield type if block_given? + end + end + + # @private + def new_scalar_type(name) + super(name) do |type| + type.extend ScalarTypeExtension + if (built_in_json_schema_options = BuiltInTypesExtension.json_schema_options_for_scalar(name)) + type.json_schema(**built_in_json_schema_options) + end + yield type if block_given? + end.tap(&:validate_json_schema_configuration!) + end + + # @private + def new_union_type(name) + super(name) do |type| + type.extend UnionTypeExtension + yield type if block_given? + end + end + + # Creates a new Results instance with JSON Schema extensions. + # + # @return [ElasticGraph::SchemaDefinition::Results] the created results instance + def new_results + super.tap do |results| + results.extend ResultsExtension + end + end + + # Creates a new SchemaArtifactManager instance with JSON Schema extensions. + # + # @return [ElasticGraph::SchemaDefinition::SchemaArtifactManager] the created artifact manager + def new_schema_artifact_manager(...) + super.tap do |manager| + manager.extend SchemaArtifactManagerExtension + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_extension.rb new file mode 100644 index 000000000..847e3b4ae --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/field_extension.rb @@ -0,0 +1,98 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/indexing/field_reference" +require "elastic_graph/json_ingestion/schema_definition/json_schema_option_validator" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extends schema-definition fields with JSON schema validation behavior. + module FieldExtension + # @return [Hash] JSON schema options for this field + def json_schema_options + @json_schema_options ||= {} + end + + # @return [Boolean] whether this field has been marked as non-nullable in the JSON schema + def non_nullable_in_json_schema + @non_nullable_in_json_schema || false + end + + # Sets whether this field is non-nullable in the JSON schema. + # @param value [Boolean] true to make the field non-nullable + attr_writer :non_nullable_in_json_schema + + # Configures JSON schema options for this field. + # + # @param nullable [Boolean, nil] set to `false` to make this field non-nullable in the JSON schema + # @param options [Hash] additional JSON schema options + # @return [void] + def json_schema(nullable: nil, **options) + if options.key?(:type) + raise Errors::SchemaError, "Cannot override JSON schema type of field `#{name}` with `#{options.fetch(:type)}`" + end + + case nullable + when true + raise Errors::SchemaError, "`nullable: true` is not allowed on a field--just declare the GraphQL field as being nullable (no `!` suffix) instead." + when false + @non_nullable_in_json_schema = true + end + + JSONSchemaOptionValidator.validate!(self, options) + json_schema_options.update(options) + end + + # @private + def to_indexing_field_reference + reference = super + return nil unless reference + + type_for_json_schema = non_nullable_in_json_schema ? type.wrap_non_null : type + + Indexing::FieldReference.new( + field_reference: reference.with(type: type_for_json_schema), + json_schema_layers: FieldExtension.compute_json_schema_layers(type_for_json_schema), + json_schema_customizations: json_schema_options + ) + end + + # Computes JSON schema layers from a GraphQL type reference. + # Returns all the JSON schema array/nullable layers of a type, from outermost to innermost. + # For example, `[[Int]]` will return `[:nullable, :array, :nullable, :array, :nullable]`. + # + # @param type_ref [ElasticGraph::SchemaDefinition::SchemaElements::TypeReference] the type reference + # @return [Array] the layers + def self.compute_json_schema_layers(type_ref) + layers, inner_type = peel_json_schema_layers_once(type_ref) + + if layers.empty? || inner_type == type_ref + layers + else + layers + compute_json_schema_layers(inner_type) + end + end + + # Peels one layer of JSON schema type wrapping. + # + # @param type_ref [ElasticGraph::SchemaDefinition::SchemaElements::TypeReference] the type reference + # @return [Array] a pair of [layers, inner_type] + def self.peel_json_schema_layers_once(type_ref) + if type_ref.list? + return [[:array], type_ref.unwrap_list] if type_ref.non_null? + return [[:nullable, :array], type_ref.unwrap_list] + end + + return [[], type_ref.unwrap_non_null] if type_ref.non_null? + [[:nullable], type_ref] + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rb new file mode 100644 index 000000000..9677a8024 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rb @@ -0,0 +1,89 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + # Contains logic related to the JSON schema for ElasticGraph's event envelope. + # + # @api private + module EventEnvelope + # Builds the JSON schema definition for ElasticGraph's event envelope. + # + # @param indexed_type_names [Array] names of indexed types + # @param json_schema_version [Integer] the JSON schema version number + # @return [Hash] the event envelope JSON schema + def self.json_schema(indexed_type_names, json_schema_version) + { + "type" => "object", + "description" => "Required by ElasticGraph to wrap every data event.", + "properties" => { + "op" => { + "description" => "Indicates what type of operation the event represents. For now, only `upsert` is supported, but we plan to support other operations in the future.", + "type" => "string", + "enum" => %w[upsert] + }, + "type" => { + "description" => "The type of object present in `record`.", + "type" => "string", + "enum" => indexed_type_names.sort + }, + "id" => { + "description" => "The unique identifier of the record.", + "type" => "string", + "maxLength" => DEFAULT_MAX_KEYWORD_LENGTH + }, + "version" => { + "description" => 'Used to handle duplicate and out-of-order events. When ElasticGraph ingests multiple events for the same `type` and `id`, the one with the largest `version` will "win".', + "type" => "integer", + "minimum" => 0, + "maximum" => (2**63) - 1 + }, + "record" => { + "description" => "The record of this event. The payload of this field must match the JSON schema of the named `type`.", + "type" => "object" + }, + "latency_timestamps" => { + "description" => "Timestamps from which ElasticGraph measures indexing latency. The `ElasticGraphIndexingLatencies` log message produced for each event will include a measurement from each timestamp included in this map.", + "type" => "object", + "additionalProperties" => false, + "patternProperties" => { + "^\\w+_at$" => { + "description" => "A timestamp from which ElasticGraph will measure indexing latency. The timestamp name must end in `_at`.", + "type" => "string", + "format" => "date-time" + } + } + }, + JSON_SCHEMA_VERSION_KEY => { + "description" => "The version of the JSON schema the publisher was using when the event was published. ElasticGraph will use the JSON schema matching this version to process the event.", + "const" => json_schema_version + }, + "message_id" => { + "description" => "The optional ID of the message containing this event from whatever messaging system is being used between the publisher and the ElasticGraph indexer.", + "type" => "string" + } + }, + "additionalProperties" => false, + "required" => ["op", "type", "id", "version", JSON_SCHEMA_VERSION_KEY], + "if" => { + "properties" => { + "op" => {"const" => "upsert"} + } + }, + "then" => {"required" => ["record"]} + } + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field.rb new file mode 100644 index 000000000..40c8f5a6b --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field.rb @@ -0,0 +1,116 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "delegate" +require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata" +require "elastic_graph/support/hash_util" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Namespace for JSON-schema-aware indexing components. + module Indexing + # Wraps an indexing field with JSON schema generation behavior. + # + # @api private + class Field < ::SimpleDelegator + # JSON schema overrides that automatically apply to specific mapping types so that the JSON schema + # validation will reject values which cannot be indexed into fields of a specific mapping type. + # + # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/number.html Elasticsearch numeric field type documentation + # @note We don't handle `integer` here because it's the default numeric type (handled by our definition of the `Int` scalar type). + # @note Likewise, we don't handle `long` here because a custom scalar type must be used for that since GraphQL's `Int` type can't handle long values. + JSON_SCHEMA_OVERRIDES_BY_MAPPING_TYPE = { + "byte" => {"minimum" => -(2**7), "maximum" => (2**7) - 1}, + "short" => {"minimum" => -(2**15), "maximum" => (2**15) - 1}, + "keyword" => {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH}, + "text" => {"maxLength" => DEFAULT_MAX_TEXT_LENGTH} + } + + # @return [Hash] user-specified JSON schema customizations for this field + attr_reader :json_schema_customizations + + # @private + def initialize(field, json_schema_layers:, json_schema_customizations:) + @json_schema_layers = json_schema_layers + @json_schema_customizations = json_schema_customizations + super(field) + end + + # Returns the JSON schema definition for this field. + # + # @return [Hash] the JSON schema hash + def json_schema + @json_schema ||= + json_schema_layers + .reverse + .reduce(inner_json_schema) { |acc, layer| process_layer(layer, acc) } + .merge(outer_json_schema_customizations) + .merge({"description" => doc_comment}.compact) + .then { |hash| Support::HashUtil.stringify_keys(hash) } + end + + # @return [JSONSchemaFieldMetadata] metadata about this field for inclusion in the JSON schema + def json_schema_metadata + JSONSchemaFieldMetadata.new(type: type.name, name_in_index: name_in_index) + end + + def nullable? + json_schema_layers.include?(:nullable) + end + + private + + attr_reader :json_schema_layers + + def inner_json_schema + user_specified_customizations = + if user_specified_json_schema_customizations_go_on_outside? + {} # : ::Hash[::String, untyped] + else + Support::HashUtil.stringify_keys(json_schema_customizations) + end + + customizations_from_mapping = JSON_SCHEMA_OVERRIDES_BY_MAPPING_TYPE[mapping["type"]] || {} + customizations = customizations_from_mapping.merge(user_specified_customizations) + customizations = indexing_field_type.format_field_json_schema_customizations(customizations) + + ref = {"$ref" => "#/$defs/#{type.unwrapped_name}"} + return ref if customizations.empty? + + {"allOf" => [ref, customizations]} + end + + def outer_json_schema_customizations + return {} unless user_specified_json_schema_customizations_go_on_outside? + Support::HashUtil.stringify_keys(json_schema_customizations) + end + + def user_specified_json_schema_customizations_go_on_outside? + json_schema_layers.include?(:array) + end + + def process_layer(layer, schema) + case layer + when :nullable + { + "anyOf" => [ + schema, + {"type" => "null"} + ] + } + when :array + {"type" => "array", "items" => schema} + end + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_reference.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_reference.rb new file mode 100644 index 000000000..0890a49c0 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_reference.rb @@ -0,0 +1,44 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/indexing/field" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + # @!parse class FieldReference < ::Data; end + FieldReference = ::Data.define( + :field_reference, + :json_schema_layers, + :json_schema_customizations + ) + + # A JSON-schema-aware wrapper around the core indexing field reference. + # + # @api private + class FieldReference < ::Data + # Resolves this field reference into a JSON-schema-aware {Field}, or `nil` if unresolvable. + # + # @return [Field, nil] + def resolve + return nil unless (resolved_field = field_reference.resolve) + + Indexing::Field.new( + resolved_field, + json_schema_layers: json_schema_layers, + json_schema_customizations: json_schema_customizations + ) + end + + # @dynamic initialize, with, field_reference, json_schema_layers, json_schema_customizations + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/enum.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/enum.rb new file mode 100644 index 000000000..219aa340d --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/enum.rb @@ -0,0 +1,46 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "delegate" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + # Contains JSON-schema-aware wrappers around core indexing field types. + module FieldType + # Wraps an enum indexing field type to add JSON schema serialization. + # + # We use a wrapper here rather than `extend` because the core enum field type is a frozen `Data` object. + class Enum < ::SimpleDelegator + # Enums have no subfields, so there is no additional ElasticGraph metadata to contribute. + # + # @return [Hash] additional ElasticGraph metadata to put in the JSON schema for this enum type. + def json_schema_field_metadata_by_field_name + {} + end + + # Enum field customizations are limited to the `enum` keyword. The field type itself already provides + # the JSON schema `type`, and object-style keywords such as `properties` do not apply to enum values. + # + # @param customizations [Hash] the customizations to format + # @return [Hash] the filtered customizations + def format_field_json_schema_customizations(customizations) + customizations.slice("enum") + end + + # @return [Hash] the JSON schema definition for this enum type + def to_json_schema + {"type" => "string", "enum" => __getobj__.enum_value_names} + end + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/object.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/object.rb new file mode 100644 index 000000000..ed78cb0d9 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/object.rb @@ -0,0 +1,94 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "delegate" +require "elastic_graph/support/hash_util" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + # Contains JSON-schema-aware wrappers around core indexing field types. + module FieldType + # Wraps an object/interface indexing field type to add JSON schema serialization. + class Object < ::SimpleDelegator + # @param wrapped [ElasticGraph::SchemaDefinition::Indexing::FieldType::Object] the core field type to wrap + # @param json_schema_options [Hash] JSON schema options from the type definition + def initialize(wrapped, json_schema_options: {}) + @json_schema_options = json_schema_options + super(wrapped) + end + + # @return [Hash] field metadata keyed by field name + def json_schema_field_metadata_by_field_name + __getobj__.subfields.to_h { |field| [field.name, field.json_schema_metadata] } + end + + # Returns the customizations as-is for object types. + # + # @param customizations [Hash] the customizations to format + # @return [Hash] the formatted customizations + def format_field_json_schema_customizations(customizations) + customizations + end + + # @return [Hash] the JSON schema definition for this object type + def to_json_schema + wrapped = __getobj__ + ingestion_state = wrapped.schema_def_state.ingestion_serializer_state + + @to_json_schema ||= + if @json_schema_options.empty? + other_source_subfields, json_schema_candidate_subfields = wrapped.subfields.partition(&:source) + validate_sourced_fields_have_no_json_schema_overrides(other_source_subfields) + json_schema_subfields = json_schema_candidate_subfields.reject(&:runtime_field_script) + required_fields = json_schema_subfields + required_fields = required_fields.reject(&:nullable?) if ingestion_state[:allow_omitted_json_schema_fields] + + { + "type" => "object", + "properties" => json_schema_subfields.to_h { |field| [field.name, field.json_schema] }.merge(json_schema_typename_field), + "required" => required_fields.map(&:name).freeze, + "additionalProperties" => (false unless ingestion_state[:allow_extra_json_schema_fields]), + "description" => wrapped.doc_comment + }.compact.freeze + else + Support::HashUtil.stringify_keys(@json_schema_options) + end + end + + private + + def json_schema_typename_field + type_name = __getobj__.type_name + + { + "__typename" => { + "type" => "string", + "const" => type_name, + "default" => type_name + } + } + end + + def validate_sourced_fields_have_no_json_schema_overrides(other_source_subfields) + problem_fields = other_source_subfields.reject { |field| field.json_schema_customizations.empty? } + return if problem_fields.empty? + + field_descriptions = problem_fields.map(&:name).sort.map { |field| "`#{field}`" }.join(", ") + raise Errors::SchemaError, + "`#{type_name}` has #{problem_fields.size} field(s) (#{field_descriptions}) that are `sourced_from` " \ + "another type and also have JSON schema customizations. Instead, put the JSON schema " \ + "customizations on the source type's field definitions." + end + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/scalar.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/scalar.rb new file mode 100644 index 000000000..96816cf5c --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/scalar.rb @@ -0,0 +1,41 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "delegate" +require "elastic_graph/support/hash_util" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + module FieldType + # Wraps a scalar indexing field type to add JSON schema serialization. + class Scalar < ::SimpleDelegator + # @return [Hash] empty hash, as scalar types have no subfields + def json_schema_field_metadata_by_field_name + {} + end + + # Returns the customizations as-is for scalar types. + # + # @param customizations [Hash] the customizations to format + # @return [Hash] the formatted customizations + def format_field_json_schema_customizations(customizations) + customizations + end + + # @return [Hash] the JSON schema definition for this scalar type + def to_json_schema + Support::HashUtil.stringify_keys(__getobj__.scalar_type.json_schema_options) + end + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/union.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/union.rb new file mode 100644 index 000000000..ab4d12b9b --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/union.rb @@ -0,0 +1,45 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "delegate" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + module FieldType + # Wraps a union indexing field type to add JSON schema serialization. + class Union < ::SimpleDelegator + # @return [Hash] empty hash, as union types have no subfields + def json_schema_field_metadata_by_field_name + {} + end + + # Returns the customizations as-is for union types. + # + # @param customizations [Hash] the customizations to format + # @return [Hash] the formatted customizations + def format_field_json_schema_customizations(customizations) + customizations + end + + # @return [Hash] the JSON schema definition for this union type + def to_json_schema + subtype_json_schemas = __getobj__.subtypes_by_name.keys.map { |name| {"$ref" => "#/$defs/#{name}"} } + + { + "required" => %w[__typename], + "oneOf" => subtype_json_schemas + } + end + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/index.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/index.rb new file mode 100644 index 000000000..2b34f50eb --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/index.rb @@ -0,0 +1,41 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + # Extends indices with JSON-schema-specific event requirements. + module IndexExtension + # @private + def rollover(frequency, timestamp_field_path_name) + super + + schema_def_state.after_user_definition_complete do + public_field_path(timestamp_field_path_name, explanation: "it is referenced as an index `rollover` field") + .path_parts + .each { |field| field.json_schema nullable: false } + end + end + + # @private + def route_with(routing_field_path_name) + super + + schema_def_state.after_user_definition_complete do + routing_field_path = public_field_path(routing_field_path_name, explanation: "it is referenced as an index `route_with` field") + + routing_field_path.path_parts[0..-2].each { |field| field.json_schema nullable: false } + routing_field_path.last_part.json_schema nullable: false, pattern: ElasticGraph::SchemaDefinition::Indexing::Index::HAS_NON_WHITE_SPACE_REGEX + end + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rb new file mode 100644 index 000000000..e1e1cefc8 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rb @@ -0,0 +1,30 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + # @!parse class JSONSchemaFieldMetadata; end + JSONSchemaFieldMetadata = ::Data.define(:type, :name_in_index) + + # Metadata about an ElasticGraph field that needs to be stored in our versioned JSON schemas. + # + # @api private + class JSONSchemaFieldMetadata < ::Data + # @return [Hash] hash representation suitable for serialization + def to_dumpable_hash + {"type" => type, "nameInIndex" => name_in_index} + end + + # @dynamic initialize, type, name_in_index + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rb new file mode 100644 index 000000000..de8d74e6b --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rb @@ -0,0 +1,217 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + # Represents the result of merging a JSON schema with ElasticGraph metadata. + # + # @private + class JSONSchemaWithMetadata < ::Data.define( + :json_schema, + :missing_fields, + :missing_types, + :definition_conflicts, + :missing_necessary_fields + ) + def json_schema_version + json_schema.fetch(JSON_SCHEMA_VERSION_KEY) + end + + # Responsible for building `JSONSchemaWithMetadata` instances. + # + # @private + class Merger + attr_reader :unused_deprecated_elements + + def initialize(schema_def_results) + @field_metadata_by_type_and_field_name = schema_def_results.json_schema_field_metadata_by_type_and_field_name + @renamed_types_by_old_name = schema_def_results.state.renamed_types_by_old_name + @deleted_types_by_old_name = schema_def_results.state.deleted_types_by_old_name + @renamed_fields_by_type_name_and_old_field_name = schema_def_results.state.renamed_fields_by_type_name_and_old_field_name + @deleted_fields_by_type_name_and_old_field_name = schema_def_results.state.deleted_fields_by_type_name_and_old_field_name + @state = schema_def_results.state + @derived_indexing_type_names = schema_def_results.derived_indexing_type_names + + @unused_deprecated_elements = ( + @renamed_types_by_old_name.values + + @deleted_types_by_old_name.values + + @renamed_fields_by_type_name_and_old_field_name.values.flat_map(&:values) + + @deleted_fields_by_type_name_and_old_field_name.values.flat_map(&:values) + ).to_set + end + + def merge_metadata_into(json_schema) + missing_fields = ::Set.new + missing_types = ::Set.new + definition_conflicts = ::Set.new + old_type_name_by_current_name = {} # : ::Hash[::String, ::String] + + defs = json_schema.fetch("$defs").to_h do |type_name, type_def| + if type_name != EVENT_ENVELOPE_JSON_SCHEMA_NAME && (properties = type_def["properties"]) + current_type_name = determine_current_type_name( + type_name, + missing_types: missing_types, + definition_conflicts: definition_conflicts + ) + + if current_type_name + old_type_name_by_current_name[current_type_name] = type_name + end + + properties = properties.to_h do |field_name, prop| + unless field_name == "__typename" + field_metadata = current_type_name&.then do |name| + field_metadata_for( + name, + field_name, + missing_fields: missing_fields, + definition_conflicts: definition_conflicts + ) + end + + prop = prop.merge({"ElasticGraph" => field_metadata&.to_dumpable_hash}) + end + + [field_name, prop] + end + + type_def = type_def.merge({"properties" => properties}) + end + + [type_name, type_def] + end + + json_schema = json_schema.merge("$defs" => defs) + + JSONSchemaWithMetadata.new( + json_schema: json_schema, + missing_fields: missing_fields, + missing_types: missing_types, + definition_conflicts: definition_conflicts, + missing_necessary_fields: identify_missing_necessary_fields(json_schema, old_type_name_by_current_name) + ) + end + + private + + def determine_current_type_name(type_name, missing_types:, definition_conflicts:) + exists_currently = @field_metadata_by_type_and_field_name.key?(type_name) + deleted = @deleted_types_by_old_name[type_name]&.tap { |elem| @unused_deprecated_elements.delete(elem) } + renamed = @renamed_types_by_old_name[type_name]&.tap { |elem| @unused_deprecated_elements.delete(elem) } + + if [exists_currently, deleted, renamed].count(&:itself) > 1 + definition_conflicts.merge([deleted, renamed].compact) + end + + return type_name if exists_currently + return nil if deleted + return renamed.name if renamed + + missing_types << type_name + nil + end + + def field_metadata_for(type_name, field_name, missing_fields:, definition_conflicts:) + full_name = "#{type_name}.#{field_name}" + + current_meta = @field_metadata_by_type_and_field_name.dig(type_name, field_name) + deleted = @deleted_fields_by_type_name_and_old_field_name.dig(type_name, field_name)&.tap do |elem| + @unused_deprecated_elements.delete(elem) + end + renamed = @renamed_fields_by_type_name_and_old_field_name.dig(type_name, field_name)&.tap do |elem| + @unused_deprecated_elements.delete(elem) + end + + if [current_meta, deleted, renamed].count(&:itself) > 1 + definition_conflicts.merge([deleted, renamed].compact.map { |elem| elem.with(name: full_name) }) + end + + return current_meta if current_meta + return nil if deleted + return @field_metadata_by_type_and_field_name.dig(type_name, renamed.name) if renamed + + missing_fields << full_name + nil + end + + def identify_missing_necessary_fields(json_schema, old_type_name_by_current_name) + json_schema_resolver = JSONSchemaResolver.new(@state, json_schema, old_type_name_by_current_name) + + @state.object_types_by_name.values + .select { |type| type.has_own_index_def? && !@derived_indexing_type_names.include?(type.name) } + .flat_map { |object_type| identify_missing_necessary_fields_for_index_def(object_type, object_type.own_index_def, json_schema_resolver) } + end + + def identify_missing_necessary_fields_for_index_def(object_type, index_def, json_schema_resolver) + { + "routing" => index_def.routing_field_path, + "rollover" => index_def.rollover_config&.timestamp_field_path + }.compact.filter_map do |field_type, field_path| + if json_schema_resolver.necessary_path_missing?(field_path) + MissingNecessaryField.new( + field_type: field_type, + fully_qualified_path: field_path.fully_qualified_path_in_index + ) + end + end + end + + class JSONSchemaResolver + def initialize(state, json_schema, old_type_name_by_current_name) + @state = state + @old_type_name_by_current_name = old_type_name_by_current_name + @meta_by_old_type_and_name_in_index = ::Hash.new do |hash, type_name| + properties = json_schema.fetch("$defs").fetch(type_name).fetch("properties") + + hash[type_name] = properties.filter_map do |name, prop| + if (metadata = prop["ElasticGraph"]) + [metadata.fetch("nameInIndex"), metadata] + end + end.to_h + end + end + + def necessary_path_missing?(field_path) + parent_type = field_path.first_part.parent_type.name + + field_path.path_parts.any? do |path_part| + necessary_path_part_missing?(parent_type, path_part.name_in_index) do |meta| + parent_type = @state.type_ref(meta.fetch("type")).fully_unwrapped.name + end + end + end + + private + + def necessary_path_part_missing?(parent_type, name_in_index) + old_type_name = @old_type_name_by_current_name[parent_type] + return false unless old_type_name + + meta = @meta_by_old_type_and_name_in_index.dig(old_type_name, name_in_index) + yield meta if meta + !meta + end + end + end + + # @!parse class MissingNecessaryField < ::Data; end + MissingNecessaryField = ::Data.define(:field_type, :fully_qualified_path) + + # @private + class MissingNecessaryField < ::Data + # @dynamic initialize, with, field_type, fully_qualified_path + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_option_validator.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_option_validator.rb new file mode 100644 index 000000000..76fd9a7f5 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_option_validator.rb @@ -0,0 +1,37 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/support/hash_util" +require "elastic_graph/support/json_schema/meta_schema_validator" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Validates JSON-schema-specific configuration supplied through schema definition APIs. + # + # @api private + module JSONSchemaOptionValidator + module_function + + # Validates JSON schema options against the JSON meta-schema. + # + # @param schema_element [Object] the schema element being configured (used in error messages) + # @param options [Hash] the JSON schema options to validate + # @raise [Errors::SchemaError] if the options are invalid + # @return [void] + def validate!(schema_element, options) + validatable_json_schema = Support::HashUtil.stringify_keys(options) + + if (error_msg = Support::JSONSchema.strict_meta_schema_validator.validate_with_error_message(validatable_json_schema)) + raise Errors::SchemaError, "Invalid JSON schema options set on #{schema_element}:\n\n#{error_msg}" + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rb new file mode 100644 index 000000000..1222a6488 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rb @@ -0,0 +1,62 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Prunes unused type definitions from a given JSON schema. + # + # @private + class JSONSchemaPruner + def self.prune(original_json_schema) + initial_type_names = [EVENT_ENVELOPE_JSON_SCHEMA_NAME] + original_json_schema + .dig("$defs", EVENT_ENVELOPE_JSON_SCHEMA_NAME, "properties", "type", "enum") + + types_to_keep = referenced_type_names(initial_type_names, original_json_schema["$defs"]) + + # The .select will preserve the sort order of the original hash. + # standard:disable Style/HashSlice -- We intentionally preserve the dumped definition order. + pruned_defs = original_json_schema["$defs"].select { |type_name, _type_def| types_to_keep.include?(type_name) } + # standard:enable Style/HashSlice + + original_json_schema.merge("$defs" => pruned_defs) + end + + private_class_method def self.referenced_type_names(source_type_names, original_defs) + return Set.new if source_type_names.empty? + + referenced_type_defs = original_defs.slice(*source_type_names) + ref_names = collect_ref_names(referenced_type_defs) + + referenced_type_names(ref_names, original_defs) + source_type_names + end + + private_class_method def self.collect_ref_names(hash) + hash.flat_map do |key, value| + case value + when ::Hash + collect_ref_names(value) + when ::Array + value.grep(::Hash).flat_map { |subhash| collect_ref_names(subhash) } + when ::String + if key == "$ref" && (type = value[%r{\A#/\$defs/(.+)\z}, 1]) + [type] + else + [] + end + else + [] + end + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/object_interface_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/object_interface_extension.rb new file mode 100644 index 000000000..9433ee979 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/object_interface_extension.rb @@ -0,0 +1,45 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/object" +require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/union" +require "elastic_graph/json_ingestion/schema_definition/json_schema_option_validator" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extends object and interface types with JSON schema behavior. + module ObjectInterfaceExtension + # @return [Hash] JSON schema options for this type + def json_schema_options + @json_schema_options ||= {} + end + + # Configures JSON schema options for this object or interface type. + # + # @param options [Hash] JSON schema options + # @return [void] + def json_schema(**options) + JSONSchemaOptionValidator.validate!(self, options) + json_schema_options.update(options) + end + + # @private + def to_indexing_field_type + field_type = super + + if field_type.is_a?(ElasticGraph::SchemaDefinition::Indexing::FieldType::Union) + Indexing::FieldType::Union.new(field_type) + else + Indexing::FieldType::Object.new(field_type, json_schema_options: json_schema_options) + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/results_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/results_extension.rb new file mode 100644 index 000000000..5f3ac53a1 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/results_extension.rb @@ -0,0 +1,115 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/errors" +require "elastic_graph/json_ingestion/schema_definition/indexing/event_envelope" +require "elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extension module for {ElasticGraph::SchemaDefinition::Results} that adds + # JSON Schema generation support. + # + # @private + module ResultsExtension + # @param version [Integer] desired JSON schema version + # @return [Hash] the JSON schema for the requested version, if available + # @raise [Errors::NotFoundError] if the requested JSON schema version is not available + def json_schemas_for(version) + unless available_json_schema_versions.include?(version) + raise Errors::NotFoundError, "The requested json schema version (#{version}) is not available. Available versions: #{available_json_schema_versions.to_a.join(", ")}." + end + + @latest_versioned_json_schema ||= merge_field_metadata_into_json_schema(current_public_json_schema).json_schema + end + + # @return [Set] set of available JSON schema versions + def available_json_schema_versions + @available_json_schema_versions ||= Set[latest_json_schema_version] + end + + # @return [Integer] the current JSON schema version + def latest_json_schema_version + current_public_json_schema[JSON_SCHEMA_VERSION_KEY] + end + + # @private + def json_schema_version_setter_location + state.ingestion_serializer_state[:json_schema_version_setter_location] + end + + # @private + def json_schema_field_metadata_by_type_and_field_name + @json_schema_field_metadata_by_type_and_field_name ||= json_schema_indexing_field_types_by_name + .transform_values(&:json_schema_field_metadata_by_field_name) + end + + # @private + def current_public_json_schema + @current_public_json_schema ||= build_public_json_schema + end + + # @private + def merge_field_metadata_into_json_schema(json_schema) + json_schema_with_metadata_merger.merge_metadata_into(json_schema) + end + + # @private + def unused_deprecated_elements + json_schema_with_metadata_merger.unused_deprecated_elements + end + + private + + def json_schema_with_metadata_merger + @json_schema_with_metadata_merger ||= Indexing::JSONSchemaWithMetadata::Merger.new(self) + end + + def build_public_json_schema + json_schema_version = state.ingestion_serializer_state[:json_schema_version] + if json_schema_version.nil? + raise Errors::SchemaError, "`json_schema_version` must be specified in the schema. To resolve, add `schema.json_schema_version 1` in a schema definition block." + end + + root_document_type_names = state.object_types_by_name.values + .select { |type| type.root_document_type? && !type.abstract? } + .reject { |type| derived_indexing_type_names.include?(type.name) } + .map(&:name) + + definitions_by_name = json_schema_indexing_field_types_by_name + .transform_values(&:to_json_schema) + .compact + + { + "$schema" => JSON_META_SCHEMA, + JSON_SCHEMA_VERSION_KEY => json_schema_version, + "$defs" => { + "ElasticGraphEventEnvelope" => Indexing::EventEnvelope.json_schema(root_document_type_names, json_schema_version) + }.merge(definitions_by_name) + } + end + + def json_schema_indexing_field_types_by_name + @json_schema_indexing_field_types_by_name ||= state + .types_by_name + .except("Query") + .values + .reject do |t| + derived_indexing_type_names.include?(t.name) || + # Skip graphql framework types + t.graphql_only? + end + .sort_by(&:name) + .to_h { |type| [type.name, type.to_indexing_field_type] } + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/scalar_type_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/scalar_type_extension.rb new file mode 100644 index 000000000..06eb7959f --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/scalar_type_extension.rb @@ -0,0 +1,48 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/scalar" +require "elastic_graph/json_ingestion/schema_definition/json_schema_option_validator" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extends scalar types with JSON schema validation and serialization behavior. + module ScalarTypeExtension + # @return [Hash] JSON schema options for this scalar type + def json_schema_options + @json_schema_options ||= {} + end + + # Configures JSON schema options for this scalar type. + # + # @param options [Hash] JSON schema options + # @return [void] + def json_schema(**options) + JSONSchemaOptionValidator.validate!(self, options) + json_schema_options.update(options) + end + + # @private + def to_indexing_field_type + Indexing::FieldType::Scalar.new(super) + end + + # Validates that json_schema has been configured on this scalar type. + # + # @raise [Errors::SchemaError] if json_schema has not been configured + # @return [void] + def validate_json_schema_configuration! + return unless json_schema_options.empty? + + raise Errors::SchemaError, "Scalar types require `json_schema` to be configured, but `#{name}` lacks `json_schema`." + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rb new file mode 100644 index 000000000..f542528de --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rb @@ -0,0 +1,250 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/json_schema_pruner" +require "yaml" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extension module for {ElasticGraph::SchemaDefinition::SchemaArtifactManager} that adds + # JSON Schema artifact generation support. + # + # @private + module SchemaArtifactManagerExtension + # Overrides `dump_artifacts` to add JSON schema version bump checking before dumping. + def dump_artifacts + check_if_needs_json_schema_version_bump do |recommended_json_schema_version| + if @enforce_json_schema_version + # @type var setter_location: ::Thread::Backtrace::Location + # We use `_ =` because while `json_schema_version_setter_location` can be nil, + # it'll never be nil if we get here and we want the type to be non-nilable. + setter_location = _ = schema_definition_results.json_schema_version_setter_location + setter_location_path = ::Pathname.new(setter_location.absolute_path.to_s).relative_path_from(::Dir.pwd) + + abort "A change has been attempted to `json_schemas.yaml`, but the `json_schema_version` has not been correspondingly incremented. Please " \ + "increase the schema's version, and then run the `bundle exec rake schema_artifacts:dump` command again.\n\n" \ + "To update the schema version to the expected version, change line #{setter_location.lineno} at `#{setter_location_path}` to:\n" \ + " `schema.json_schema_version #{recommended_json_schema_version}`\n\n" \ + "Alternately, pass `enforce_json_schema_version: false` to `ElasticGraph::SchemaDefinition::RakeTasks.new` to allow the JSON schemas " \ + "file to change without requiring a version bump, but that is only recommended for non-production applications during initial schema prototyping." + else + @output.puts <<~EOS + WARNING: the `json_schemas.yaml` artifact is being updated without the `json_schema_version` being correspondingly incremented. + This is not recommended for production applications, but is currently allowed because you have set `enforce_json_schema_version: false`. + EOS + end + end + + super + end + + private + + # Overrides the base `artifacts_from_schema_def` method to add JSON schema artifacts. + def artifacts_from_schema_def + base_artifacts = super + + versioned_artifacts = build_desired_versioned_json_schemas(json_schemas_artifact.desired_contents).values.map do |versioned_schema| + new_versioned_json_schema_artifact(versioned_schema) + end + + base_artifacts + [json_schemas_artifact] + versioned_artifacts + end + + def json_schemas_artifact + @json_schemas_artifact ||= new_yaml_artifact( + JSON_SCHEMAS_FILE, + JSONSchemaPruner.prune(schema_definition_results.current_public_json_schema), + extra_comment_lines: [ + "This is the \"public\" JSON schema file and is intended to be provided to publishers so that", + "they can perform code generation and event validation." + ] + ) + end + + def check_if_needs_json_schema_version_bump(&block) + if json_schemas_artifact.out_of_date? + existing_schema_version = json_schemas_artifact.existing_dumped_contents&.dig(JSON_SCHEMA_VERSION_KEY) || -1 + desired_schema_version = json_schemas_artifact.desired_contents[JSON_SCHEMA_VERSION_KEY] + + if existing_schema_version >= desired_schema_version + yield existing_schema_version + 1 + end + end + end + + def build_desired_versioned_json_schemas(current_public_json_schema) + versioned_parsed_yamls = ::Dir.glob(::File.join(@schema_artifacts_directory, JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v*.yaml")).map do |file| + ::YAML.safe_load_file(file) + end + [current_public_json_schema] + + results_by_json_schema_version = versioned_parsed_yamls.to_h do |parsed_yaml| + merged_schema = @schema_definition_results.merge_field_metadata_into_json_schema(parsed_yaml) + [merged_schema.json_schema_version, merged_schema] + end + + report_json_schema_merge_errors(results_by_json_schema_version.values) + report_json_schema_merge_warnings + + results_by_json_schema_version.transform_values(&:json_schema) + end + + def report_json_schema_merge_errors(merged_results) + json_schema_versions_by_missing_field = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[::String, ::Array[::Integer]] + json_schema_versions_by_missing_type = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[::String, ::Array[::Integer]] + json_schema_versions_by_missing_necessary_field = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[Indexing::JSONSchemaWithMetadata::MissingNecessaryField, ::Array[::Integer]] + + merged_results.each do |result| + result.missing_fields.each do |field| + json_schema_versions_by_missing_field[field] << result.json_schema_version + end + + result.missing_types.each do |type| + json_schema_versions_by_missing_type[type] << result.json_schema_version + end + + result.missing_necessary_fields.each do |missing_necessary_field| + json_schema_versions_by_missing_necessary_field[missing_necessary_field] << result.json_schema_version + end + end + + missing_field_errors = json_schema_versions_by_missing_field.map do |field, json_schema_versions| + missing_field_error_for(field, json_schema_versions) + end + + missing_type_errors = json_schema_versions_by_missing_type.map do |type, json_schema_versions| + missing_type_error_for(type, json_schema_versions) + end + + missing_necessary_field_errors = json_schema_versions_by_missing_necessary_field.map do |field, json_schema_versions| + missing_necessary_field_error_for(field, json_schema_versions) + end + + definition_conflict_errors = merged_results + .flat_map { |result| result.definition_conflicts.to_a } + .group_by(&:name) + .map do |name, deprecated_elements| + <<~EOS + The schema definition of `#{name}` has conflicts. To resolve the conflict, remove the unneeded definitions from the following: + + #{format_deprecated_elements(deprecated_elements)} + EOS + end + + errors = missing_field_errors + missing_type_errors + missing_necessary_field_errors + definition_conflict_errors + return if errors.empty? + + abort errors.join("\n\n") + end + + def report_json_schema_merge_warnings + unused_elements = @schema_definition_results.unused_deprecated_elements + return if unused_elements.empty? + + @output.puts <<~EOS + The schema definition has #{unused_elements.size} unneeded reference(s) to deprecated schema elements. These can all be safely deleted: + + #{format_deprecated_elements(unused_elements)} + + EOS + end + + def format_deprecated_elements(deprecated_elements) + descriptions = deprecated_elements + .sort_by { |e| [e.defined_at.path, e.defined_at.lineno] } + .map(&:description) + .uniq + + descriptions.each.with_index(1).map { |desc, idx| "#{idx}. #{desc}" }.join("\n") + end + + def missing_field_error_for(qualified_field, json_schema_versions) + type, field = qualified_field.split(".") + + <<~EOS + The `#{qualified_field}` field (which existed in #{describe_json_schema_versions(json_schema_versions, "and")}) no longer exists in the current schema definition. + ElasticGraph cannot guess what it should do with this field's data when ingesting events at #{old_versions(json_schema_versions)}. + To continue, do one of the following: + + 1. If the `#{qualified_field}` field has been renamed, indicate this by calling `field.renamed_from "#{field}"` on the renamed field. + 2. If the `#{qualified_field}` field has been dropped, indicate this by calling `type.deleted_field "#{field}"` on the `#{type}` type. + 3. Alternately, if no publishers or in-flight events use #{describe_json_schema_versions(json_schema_versions, "or")}, delete #{files_noun_phrase(json_schema_versions)} from `#{JSON_SCHEMAS_BY_VERSION_DIRECTORY}`, and no further changes are required. + EOS + end + + def missing_type_error_for(type, json_schema_versions) + <<~EOS + The `#{type}` type (which existed in #{describe_json_schema_versions(json_schema_versions, "and")}) no longer exists in the current schema definition. + ElasticGraph cannot guess what it should do with this type's data when ingesting events at #{old_versions(json_schema_versions)}. + To continue, do one of the following: + + 1. If the `#{type}` type has been renamed, indicate this by calling `type.renamed_from "#{type}"` on the renamed type. + 2. If the `#{type}` field has been dropped, indicate this by calling `schema.deleted_type "#{type}"` on the schema. + 3. Alternately, if no publishers or in-flight events use #{describe_json_schema_versions(json_schema_versions, "or")}, delete #{files_noun_phrase(json_schema_versions)} from `#{JSON_SCHEMAS_BY_VERSION_DIRECTORY}`, and no further changes are required. + EOS + end + + def missing_necessary_field_error_for(field, json_schema_versions) + path = field.fully_qualified_path.split(".").last + # :nocov: -- we only cover one side of this ternary. + has_or_have = (json_schema_versions.size == 1) ? "has" : "have" + # :nocov: + + <<~EOS + #{describe_json_schema_versions(json_schema_versions, "and")} #{has_or_have} no field that maps to the #{field.field_type} field path of `#{field.fully_qualified_path}`. + Since the field path is required for #{field.field_type}, ElasticGraph cannot ingest events that lack it. To continue, do one of the following: + + 1. If the `#{field.fully_qualified_path}` field has been renamed, indicate this by calling `field.renamed_from "#{path}"` on the renamed field rather than using `deleted_field`. + 2. Alternately, if no publishers or in-flight events use #{describe_json_schema_versions(json_schema_versions, "or")}, delete #{files_noun_phrase(json_schema_versions)} from `#{JSON_SCHEMAS_BY_VERSION_DIRECTORY}`, and no further changes are required. + EOS + end + + def describe_json_schema_versions(json_schema_versions, conjunction) + json_schema_versions = json_schema_versions.sort + + # Steep doesn't support pattern matching yet, so have to skip type checking here. + __skip__ = case json_schema_versions + in [single_version] + "JSON schema version #{single_version}" + in [version1, version2] + "JSON schema versions #{version1} #{conjunction} #{version2}" + else + *versions, last_version = json_schema_versions + "JSON schema versions #{versions.join(", ")}, #{conjunction} #{last_version}" + end + end + + def old_versions(json_schema_versions) + return "this old version" if json_schema_versions.size == 1 + "these old versions" + end + + def files_noun_phrase(json_schema_versions) + return "its file" if json_schema_versions.size == 1 + "their files" + end + + def new_versioned_json_schema_artifact(desired_contents) + # File name depends on the schema_version field in the json schema. + schema_version = desired_contents[JSON_SCHEMA_VERSION_KEY] + + new_yaml_artifact( + ::File.join(JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v#{schema_version}.yaml"), + desired_contents, + extra_comment_lines: [ + "This JSON schema file contains internal ElasticGraph metadata and should be considered private.", + "The unversioned JSON schema file is public and intended to be provided to publishers." + ] + ) + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/union_type_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/union_type_extension.rb new file mode 100644 index 000000000..c7c117b24 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/union_type_extension.rb @@ -0,0 +1,23 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/union" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extends union types with JSON schema behavior. + module UnionTypeExtension + # @private + def to_indexing_field_type + Indexing::FieldType::Union.new(super) + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/spec_helper.rb b/elasticgraph-json_ingestion/spec/spec_helper.rb new file mode 100644 index 000000000..67077ddbd --- /dev/null +++ b/elasticgraph-json_ingestion/spec/spec_helper.rb @@ -0,0 +1,31 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion" +require "elastic_graph/json_ingestion/schema_definition/api_extension" +require "elastic_graph/schema_definition/test_support" + +module ElasticGraph + module JSONIngestion + module SchemaSupport + include ElasticGraph::SchemaDefinition::TestSupport + + def define_json_ingestion_schema(**options, &block) + define_schema( + schema_element_name_form: :snake_case, + **options, + &block + ) + end + end + end +end + +RSpec.configure do |config| + config.include ElasticGraph::JSONIngestion::SchemaSupport, :json_ingestion_schema +end diff --git a/elasticgraph-json_ingestion/spec/support/json_schema_matcher.rb b/elasticgraph-json_ingestion/spec/support/json_schema_matcher.rb new file mode 100644 index 000000000..1fd76bf37 --- /dev/null +++ b/elasticgraph-json_ingestion/spec/support/json_schema_matcher.rb @@ -0,0 +1,153 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +# :nocov: -- this is spec support code that is only exercised by the json-ingestion matcher specs, +# but repo-wide coverage runs track all `spec/**/*.rb` files. +require "elastic_graph/support/json_schema/meta_schema_validator" +require "elastic_graph/support/json_schema/validator_factory" +require "json" + +RSpec::Matchers.define :have_json_schema_like do |type, expected_schema, include_typename: true, ignore_descriptions: false| + diffable + + attr_reader :actual, :expected + + chain :which_matches do |*expected_matches| + @expected_matches = expected_matches + end + + chain :and_fails_to_match do |*expected_non_matches| + @expected_non_matches = expected_non_matches + end + + match do |full_schema| + modified_expected_schema = if include_typename && expected_schema.key?("properties") + with_typename(type, expected_schema) + else + expected_schema + end + .then { |schema| normalize(schema) } + + @expected = JSON.pretty_generate(modified_expected_schema) + + @raw_actual_schema = normalize(full_schema.fetch("$defs").fetch(type)) + actual_schema = ignore_descriptions ? strip_descriptions_from(@raw_actual_schema) : @raw_actual_schema + @actual = JSON.pretty_generate(actual_schema) + + @validator_factory = ElasticGraph::Support::JSONSchema::ValidatorFactory.new(schema: full_schema, sanitize_pii: false) + + @meta_schema_validation_errors = ElasticGraph::Support::JSONSchema.elastic_graph_internal_meta_schema_validator.validate(@raw_actual_schema) + + if @meta_schema_validation_errors.empty? && actual_schema == modified_expected_schema + validator = @validator_factory.validator_for(type) + + @match_failures = (@expected_matches || []).filter_map.with_index do |payload, index| + if (failure = validator.validate_with_error_message(payload)) + match_failure_description(payload, index, failure) + end + end + + @non_match_failures = (@expected_non_matches || []).filter_map.with_index do |payload, index| + if validator.valid?(payload) + non_match_failure_description(payload, index) + end + end + + @match_failures.empty? && @non_match_failures.empty? + else + @match_failures = @non_match_failures = [] + false + end + end + + failure_message do |_actual_schema| + if @meta_schema_validation_errors.any? + <<~EOS + expected the generated JSON schema for `#{type}` to be valid according to the JSON schema meta-schema, but got validation errors: + + #{@meta_schema_validation_errors.map { |e| JSON.pretty_generate(e) }.join("\n\n")} + + + Actual schema: + #{JSON.pretty_generate(@raw_actual_schema)} + EOS + elsif @match_failures.any? + <<~EOS + expected given JSON payloads matched the JSON schema, but one or more did not. + + #{@match_failures.join("\n\n")} + EOS + elsif @non_match_failures.any? + <<~EOS + expected given JSON payloads to not match the JSON schema, but one or more did. + + #{@non_match_failures.join("\n\n")} + EOS + else + <<~EOS + expected valid JSON schema[1] but got JSON schema[2]. + + [1] Expected schema: + #{expected} + + [2] Actual schema: + #{actual} + EOS + end + end + + def match_failure_description(payload, index, failure) + <<~EOS + Failure at index #{index} from payload: + + #{JSON.pretty_generate(payload)} + + #{failure} + EOS + end + + def non_match_failure_description(payload, index) + <<~EOS + Failure at index #{index} from payload: + + #{JSON.pretty_generate(payload)} + EOS + end + + def normalize(schema) + ::JSON.parse(::JSON.generate(schema.sort.to_h)) + end + + def with_typename(type, schema) + new_schema = schema.dup + new_schema["properties"] = schema["properties"].merge({ + "__typename" => { + "type" => "string", + "const" => type, + "default" => type + } + }) + new_schema + end + + def strip_descriptions_from(object) + case object + when ::Hash + object.filter_map do |key, value| + unless key == "description" + [key, strip_descriptions_from(value)] + end + end.to_h + when ::Array + object.map { |e| strip_descriptions_from(e) } + else + object + end + end +end +# :nocov: diff --git a/elasticgraph-json_ingestion/spec/support/json_schema_matcher_spec.rb b/elasticgraph-json_ingestion/spec/support/json_schema_matcher_spec.rb new file mode 100644 index 000000000..0023e877b --- /dev/null +++ b/elasticgraph-json_ingestion/spec/support/json_schema_matcher_spec.rb @@ -0,0 +1,190 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "support/json_schema_matcher" + +# Note: this spec exists to verify our custom JSON schema matcher works +# properly. It doesn't really validate ElasticGraph itself, and if it +# becomes a burden to maintain, consider deleting it. +RSpec.describe "JSON schema matcher", aggregate_failures: false do + it "passes when the schema is valid and the same" do + type = "MyType" + schema = { + "type" => "object", + "properties" => { + "id" => { + "anyOf" => [ + {"type" => "string"}, + {"type" => "null"} + ] + } + }, + "required" => %w[id] + } + + expect(schema_with(type, schema)).to have_json_schema_like(type, schema, include_typename: false) + end + + it "treats string and symbol keys as equivalent because they dump the same" do + string_key_schema = { + "type" => "object", + "properties" => { + "name" => {"type" => "string"} + } + } + + symbol_key_schema = { + type: "object", + properties: { + "name" => {type: "string"} + } + } + + schema = { + "$schema" => ::ElasticGraph::JSON_META_SCHEMA, + "$defs" => { + "StringType" => string_key_schema, + "SymbolType" => symbol_key_schema + } + } + + expect(schema).to have_json_schema_like("StringType", symbol_key_schema, include_typename: false) + expect(schema).to have_json_schema_like("SymbolType", string_key_schema, include_typename: false) + end + + it "fails when the expected schema has an invalid value" do + type = "InvalidType" + schema = { + "type" => "object", + "properties" => { + "is_happy" => { + "anyOf" => [ + {"type" => 7}, + {"type" => "null"} + ] + } + } + } + + expect { + expect(schema_with(type, schema)).to have_json_schema_like(type, schema) + }.to fail_with("but got validation errors") + end + + it "fails when the expected schema has an unknown field" do + type = "InvalidType" + schema = { + "type" => "object", + "properties" => { + "is_happy" => { + "anyOf" => [ + {"type" => "boolean", "foo" => 3}, + {"type" => "null"} + ] + } + } + } + + expect { + expect(schema_with(type, schema)).to have_json_schema_like(type, schema) + }.to fail_with("but got validation errors") + end + + it "fails when the expected and actual schemas are different but both valid" do + schema1 = { + "type" => "object", + "properties" => { + "is_happy" => { + "anyOf" => [ + {"type" => "boolean"}, + {"type" => "null"} + ] + } + } + } + + schema2 = { + "type" => "object", + "properties" => { + "is_happy" => { + "anyOf" => [ + {"type" => "string"}, + {"type" => "null"} + ] + } + } + } + + schema = { + "$schema" => ::ElasticGraph::JSON_META_SCHEMA, + "$defs" => { + "Type1" => schema1, + "Type2" => schema2 + } + } + + expect { + expect(schema).to have_json_schema_like("Type1", schema2) + }.to fail_with("but got JSON schema") + end + + it "uses the validator that allows extra `ElasticGraph` metadata in the JSON schema" do + type = "MyType" + schema = { + "type" => "object", + "properties" => { + "id" => { + "anyOf" => [ + {"type" => "string"}, + {"type" => "null"} + ], + "ElasticGraph" => { + "type" => "String", + "nameInIndex" => "id" + } + } + }, + "required" => %w[id] + } + + expect(schema_with(type, schema)).to have_json_schema_like(type, schema, include_typename: false) + end + + context "when `which_matches(...).and_fails_to_match(...)` is used" do + type = "Type" + schema = {"type" => "number"} + + it "passes when it correctly matches or fails to match as specified" do + expect(schema_with(type, schema)).to have_json_schema_like(type, schema) + .which_matches(1, 2, 3).and_fails_to_match("foo", "bar", nil) + end + + it "fails when one of the expected matches does not match" do + expect { + expect(schema_with(type, schema)).to have_json_schema_like(type, schema) + .which_matches(1, "bar", 3).and_fails_to_match("foo", "bazz", nil) + }.to fail_with("Failure at index 1 from payload", "bar") + end + + it "fails when one of the expected non matches does match" do + expect { + expect(schema_with(type, schema)).to have_json_schema_like(type, schema) + .which_matches(1, 2, 3).and_fails_to_match("foo", "bar", nil, 17) + }.to fail_with("Failure at index 3 from payload", "17") + end + end + + def schema_with(type, schema) + {"$schema" => ::ElasticGraph::JSON_META_SCHEMA, "$defs" => {type => schema}} + end + + def fail_with(*snippets) + raise_error(::RSpec::Expectations::ExpectationNotMetError, a_string_including(*snippets)) + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/api_extension_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/api_extension_spec.rb new file mode 100644 index 000000000..d83f95d28 --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/api_extension_spec.rb @@ -0,0 +1,207 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/api_extension" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + RSpec.describe APIExtension do + def build_api_with_extension + state = ::Data.define(:ingestion_serializer_state).new(ingestion_serializer_state: {}) + + factory = ::Object.new + + api = ::Object.new + api.instance_variable_set(:@state, state) + api.define_singleton_method(:state) { state } + api.define_singleton_method(:factory) { factory } + api.extend(APIExtension) + + [api, state, factory] + end + + it "extends the api factory with JSON schema factory behavior" do + _api, _state, factory = build_api_with_extension + + expect(factory).to be_a(FactoryExtension) + end + + it "initializes the JSON schema strictness defaults" do + _api, state, = build_api_with_extension + + expect(state.ingestion_serializer_state[:allow_omitted_json_schema_fields]).to eq(false) + expect(state.ingestion_serializer_state[:allow_extra_json_schema_fields]).to eq(true) + end + + it "preserves existing JSON schema strictness settings when extended" do + state = ::Data.define(:ingestion_serializer_state).new( + ingestion_serializer_state: { + allow_omitted_json_schema_fields: true, + allow_extra_json_schema_fields: false + } + ) + + factory = ::Object.new + + api = ::Object.new + api.instance_variable_set(:@state, state) + api.define_singleton_method(:state) { state } + api.define_singleton_method(:factory) { factory } + api.extend(APIExtension) + + expect(state.ingestion_serializer_state[:allow_omitted_json_schema_fields]).to eq(true) + expect(state.ingestion_serializer_state[:allow_extra_json_schema_fields]).to eq(false) + end + + it "merges reserved type names when composed with another ingestion serializer extension" do + state = ::Data.define(:ingestion_serializer_state).new( + ingestion_serializer_state: { + reserved_type_names: Set["ReservedName"] + } + ) + + factory = ::Object.new + + api = ::Object.new + api.instance_variable_set(:@state, state) + api.define_singleton_method(:state) { state } + api.define_singleton_method(:factory) { factory } + api.extend(APIExtension) + + expect(state.ingestion_serializer_state[:reserved_type_names]).to eq( + Set["ReservedName", EVENT_ENVELOPE_JSON_SCHEMA_NAME] + ) + end + + it "stores the JSON schema version and its setter location" do + api, state, = build_api_with_extension + + expect(api.json_schema_version(3)).to eq(nil) + expect(state.ingestion_serializer_state[:json_schema_version]).to eq(3) + expect(state.ingestion_serializer_state[:json_schema_version_setter_location]).to be_a(::Thread::Backtrace::Location) + end + + it "rejects invalid JSON schema versions" do + api, = build_api_with_extension + + expect { + api.json_schema_version(0) + }.to raise_error(Errors::SchemaError, /must be a positive integer/) + + expect { + api.json_schema_version("3") + }.to raise_error(Errors::SchemaError, /must be a positive integer/) + end + + it "rejects setting the JSON schema version more than once" do + api, = build_api_with_extension + api.json_schema_version(1) + + expect { + api.json_schema_version(2) + }.to raise_error(Errors::SchemaError, /can only be set once/) + end + + it "stores JSON schema strictness settings" do + api, state, = build_api_with_extension + + expect(api.json_schema_strictness(allow_omitted_fields: true, allow_extra_fields: false)).to eq(nil) + expect(state.ingestion_serializer_state[:allow_omitted_json_schema_fields]).to eq(true) + expect(state.ingestion_serializer_state[:allow_extra_json_schema_fields]).to eq(false) + end + + it "validates JSON schema strictness arguments" do + api, = build_api_with_extension + + expect { + api.json_schema_strictness(allow_omitted_fields: :sometimes) + }.to raise_error(Errors::SchemaError, /allow_omitted_fields/) + + expect { + api.json_schema_strictness(allow_extra_fields: :sometimes) + }.to raise_error(Errors::SchemaError, /allow_extra_fields/) + end + end + + RSpec.describe APIExtension, :json_ingestion_schema do + it "adds JSON schema generation and artifact dumping through schema definition extension hooks" do + results = define_json_ingestion_schema(reload_schema_artifacts: true, json_schema_version: nil) do |schema| + schema.json_schema_version 2 + schema.json_schema_strictness allow_omitted_fields: true, allow_extra_fields: false + + schema.object_type "Widget" do |type| + type.field "id", "ID!" + type.field "name", "String" + type.index "widgets" + end + end + + expect(results.available_json_schema_versions.to_a).to eq([2]) + expect(results.latest_json_schema_version).to eq(2) + + json_schema = results.json_schemas_for(2) + + expect(json_schema.fetch(JSON_SCHEMA_VERSION_KEY)).to eq(2) + expect(json_schema.fetch("$defs")).to include("ElasticGraphEventEnvelope") + expect(json_schema.dig("$defs", "Widget", "required")).to include("id") + end + + it "exposes the JSON schema version setter location on schema results" do + results = define_json_ingestion_schema(json_schema_version: nil) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |type| + type.field "id", "ID!" + type.index "widgets" + end + end + + expect(results.json_schema_version_setter_location).to be_a(::Thread::Backtrace::Location) + end + + it "rejects user-defined scalar types without a JSON schema definition" do + expect { + define_json_ingestion_schema(json_schema_version: nil) do |schema| + schema.json_schema_version 2 + + schema.scalar_type "Url" do |type| + type.mapping type: "keyword" + end + end + }.to raise_error(Errors::SchemaError, /Scalar types require `json_schema` to be configured, but `Url` lacks `json_schema`/) + end + + it "supports enums whose input and output names are the same" do + results = define_json_ingestion_schema( + derived_type_name_formats: {InputEnum: "%{base}"}, + json_schema_version: nil + ) do |schema| + schema.json_schema_version 2 + + schema.enum_type "Color" do |type| + type.values "RED", "BLUE" + end + + schema.object_type "Widget" do |type| + type.field "id", "ID!" + type.field "color", "Color!" + type.index "widgets" + end + end + + expect(results.graphql_schema_string.scan(/^enum Color\b/)).to eq(["enum Color"]) + expect(results.json_schemas_for(2).dig("$defs", "Color")).to eq({ + "type" => "string", + "enum" => %w[RED BLUE] + }) + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/factory_extension_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/factory_extension_spec.rb new file mode 100644 index 000000000..360f2c392 --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/factory_extension_spec.rb @@ -0,0 +1,91 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/factory_extension" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + RSpec.describe FactoryExtension do + let(:factory_class) do + base_class = ::Class.new do + def new_results + ::Object.new + end + + def new_schema_artifact_manager(*args, **kwargs) + @last_schema_artifact_manager_args = args + @last_schema_artifact_manager_kwargs = kwargs + ::Object.new + end + + attr_reader :last_schema_artifact_manager_args, :last_schema_artifact_manager_kwargs + end + + ::Class.new(base_class) do + prepend FactoryExtension + end + end + + it "extends results and schema artifact managers with JSON schema behavior" do + factory = factory_class.new + + expect(factory.new_results).to be_a(ResultsExtension) + + manager = factory.new_schema_artifact_manager(:positional, key: "value") + expect(manager).to be_a(SchemaArtifactManagerExtension) + expect(factory.last_schema_artifact_manager_args).to eq([:positional]) + expect(factory.last_schema_artifact_manager_kwargs).to eq({key: "value"}) + end + + it "extends schema elements even when no customization block is provided" do + base_class = ::Class.new do + def new_enum_type(name, &block) + build_type(name, &block) + end + + def new_interface_type(name, &block) + build_type(name, &block) + end + + def new_object_type(name, &block) + build_type(name, &block) + end + + def new_scalar_type(name, &block) + build_type(name, &block) + end + + def new_union_type(name, &block) + build_type(name, &block) + end + + private + + def build_type(name) + ::Object.new.tap do |type| + type.define_singleton_method(:name) { name } + yield type + end + end + end + + factory = ::Class.new(base_class) do + prepend FactoryExtension + end.new + + expect(factory.new_enum_type("Color")).to be_a(EnumTypeExtension) + expect(factory.new_interface_type("Node")).to be_a(ObjectInterfaceExtension) + expect(factory.new_object_type("Widget")).to be_a(ObjectInterfaceExtension) + expect(factory.new_scalar_type("Boolean")).to be_a(ScalarTypeExtension) + expect(factory.new_union_type("Result")).to be_a(UnionTypeExtension) + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/field_extension_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/field_extension_spec.rb new file mode 100644 index 000000000..c87e8189e --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/field_extension_spec.rb @@ -0,0 +1,29 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/field_extension" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + RSpec.describe FieldExtension do + it "returns nil when the core indexing field reference is unavailable" do + field_class = ::Class.new do + prepend FieldExtension + + def to_indexing_field_reference + nil + end + end + + expect(field_class.new.to_indexing_field_reference).to be_nil + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/field_reference_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/field_reference_spec.rb new file mode 100644 index 000000000..7924f6d87 --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/field_reference_spec.rb @@ -0,0 +1,32 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/indexing/field_reference" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + RSpec.describe FieldReference do + it "returns nil when the wrapped field reference cannot be resolved" do + unresolved_field_reference = ::Object.new + unresolved_field_reference.define_singleton_method(:resolve) { nil } + + field_reference = described_class.new( + field_reference: unresolved_field_reference, + json_schema_layers: [], + json_schema_customizations: {} + ) + + expect(field_reference.resolve).to be_nil + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/field_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/field_spec.rb new file mode 100644 index 000000000..9152ae21b --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/field_spec.rb @@ -0,0 +1,29 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/indexing/field" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + RSpec.describe Field do + it "returns nil for unexpected JSON schema layers" do + field = described_class.new( + ::Object.new, + json_schema_layers: [], + json_schema_customizations: {} + ) + + expect(field.send(:process_layer, :unexpected, {"type" => "string"})).to be_nil + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata_spec.rb new file mode 100644 index 000000000..7e1a3a4ef --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata_spec.rb @@ -0,0 +1,153 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/spec_support/schema_definition_helpers" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + ::RSpec.describe "JSON schema field metadata generation" do + include_context "SchemaDefinitionHelpers" + + it "generates no field metadata for built-in scalar and enum types" do + metadata_by_type_and_field_name = dump_metadata + + json_schema_field_metadata = %w[ + Boolean Float ID Int String + Cursor Date DateTime DistanceUnit JsonSafeLong LocalTime LongString TimeZone Untyped + ].map do |type_name| + metadata_by_type_and_field_name.fetch(type_name) + end + + expect(json_schema_field_metadata).to all eq({}) + end + + it "generates field metadata for built-in object types" do + metadata_by_field_name = dump_metadata.fetch("GeoLocation") + + expect(metadata_by_field_name).to eq({ + "latitude" => field_meta_of("Float!", "lat"), + "longitude" => field_meta_of("Float!", "lon") + }) + end + + it "generates field metadata for user-defined object types" do + metadata_by_field_name = dump_metadata do |schema| + schema.object_type "Money" do |t| + t.field "amount", "Int" + t.field "currency", "String" + end + end.fetch("Money") + + expect(metadata_by_field_name).to eq({ + "amount" => field_meta_of("Int", "amount"), + "currency" => field_meta_of("String", "currency") + }) + end + + it "respects the type and `name_in_index` on user-defined fields" do + metadata_by_field_name = dump_metadata do |schema| + schema.object_type "Money" do |t| + t.field "amount", "Int!", name_in_index: "amount2" + t.field "currency", "[String]!", name_in_index: "currency2" + end + end.fetch("Money") + + expect(metadata_by_field_name).to eq({ + "amount" => field_meta_of("Int!", "amount2"), + "currency" => field_meta_of("[String]!", "currency2") + }) + end + + it "generates no field metadata for user-defined scalar or enum types since they have no subfields" do + metadata_by_type_and_field_name = dump_metadata do |schema| + schema.scalar_type "Url" do |t| + t.json_schema type: "string" + t.mapping type: "keyword" + end + + schema.enum_type "Color" do |t| + t.value "RED" + t.value "GREEN" + t.value "BLUE" + end + end + + json_schema_field_metadata = %w[Url Color].map do |type_name| + metadata_by_type_and_field_name.fetch(type_name) + end + + expect(json_schema_field_metadata).to all eq({}) + end + + it "generates no field metadata for user-defined union or interface types since the JSON schema" do + metadata_by_type_and_field_name = dump_metadata do |schema| + schema.interface_type "Named" do |t| + t.field "name", "String" + end + + schema.union_type "Character" do |t| + t.subtype "Droid" + t.subtype "Human" + end + + schema.object_type "Droid" do |t| + t.implements "Named" + t.field "name", "String" + t.field "model", "String" + end + + schema.object_type "Human" do |t| + t.implements "Named" + t.field "name", "String" + t.field "home_planet", "String" + end + end + + json_schema_field_metadata = %w[Named Character].map do |type_name| + metadata_by_type_and_field_name.fetch(type_name) + end + + expect(json_schema_field_metadata).to all eq({}) + end + + it "includes the JSON schema field metadata in the versioned JSON schemas but not in the current public JSON schema" do + results = define_schema do |schema| + schema.object_type "Money" do |t| + t.field "amount", "Int" + t.field "currency", "String" + end + end + + amount_path = ["$defs", "Money", "properties", "amount"] + + expect(results.json_schemas_for(1).dig(*amount_path)).to eq({ + "anyOf" => [{"$ref" => "#/$defs/Int"}, {"type" => "null"}], + "ElasticGraph" => {"nameInIndex" => "amount", "type" => "Int"} + }) + + expect(results.current_public_json_schema.dig(*amount_path)).to eq({ + "anyOf" => [{"$ref" => "#/$defs/Int"}, {"type" => "null"}] + }) + end + + def dump_metadata(&schema_definition) + define_schema(&schema_definition).json_schema_field_metadata_by_type_and_field_name + end + + def define_schema(&schema_definition) + super(schema_element_name_form: "snake_case", &schema_definition) + end + + def field_meta_of(type, name_in_index) + Indexing::JSONSchemaFieldMetadata.new(type: type, name_in_index: name_in_index) + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata_spec.rb new file mode 100644 index 000000000..e3091e7c8 --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata_spec.rb @@ -0,0 +1,1072 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata" +require "elastic_graph/spec_support/schema_definition_helpers" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + ::RSpec.describe JSONSchemaWithMetadata do + include_context "SchemaDefinitionHelpers" + + it "ignores derived indexed types that do not show up in the JSON schema" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + t.field "cost_currency", "String" + t.field "cost_currency_name", "String" + t.derive_indexed_type_fields "WidgetCurrency", from_id: "cost_currency" do |derive| + derive.immutable_value "name", from: "cost_currency_name" + end + end + + schema.object_type "WidgetCurrency" do |t| + t.field "id", "ID!" + t.field "name", "String" + t.index "widget_currencies" + end + end + + expect(v1_json_schema.fetch("$defs").keys).to include("Widget").and exclude("WidgetCurrency") + end + + context "when merged into an old versioned JSON schema" do + it "maintains the same metadata when a field has not changed" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + end + end + + expect( + metadata_for(v1_json_schema, "Widget", "amount") + ).to eq(metadata_for(updated_v1_json_schema, "Widget", "amount")).and have_dumped_metadata("amount", "Float") + end + + it "does not record metadata on the `__typename` field since it has special handling in our indexing logic" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + end + end + + expect( + v1_json_schema.dig("$defs", "Widget", "properties", "__typename").keys + ).to eq(updated_v1_json_schema.dig("$defs", "Widget", "properties", "__typename").keys).and exclude("ElasticGraph") + end + + it "records a changed field `type` so that the correct indexing preparer gets used when events at the old version are ingested" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "amount", "Int" + end + end + + expect(metadata_for(v1_json_schema, "Widget", "amount")).to have_dumped_metadata("amount", "Float") + expect(metadata_for(updated_v1_json_schema, "Widget", "amount")).to have_dumped_metadata("amount", "Int") + end + + it "records a changed field `name_in_index` so that the field gets written to the correct field in the index" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "description", "String" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "description", "String", name_in_index: "description_text" do |f| + f.mapping type: "text" + end + end + end + + expect(metadata_for(v1_json_schema, "Widget", "description")).to have_dumped_metadata("description", "String") + expect(metadata_for(updated_v1_json_schema, "Widget", "description")).to have_dumped_metadata("description_text", "String") + end + + it "notifies of an issue when a field has been deleted or renamed without recording what happened" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "description", "String" + end + end + + missing_fields = dump_versioned_json_schema_missing_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "full_description", "String", name_in_index: "description" + end + end + + expect(missing_fields).to contain_exactly("Widget.description", "Widget.id") + end + + it "supports renamed fields when `renamed_from` is used" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "description", "String" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "full_description", "String!", name_in_index: "description" do |f| + f.renamed_from "description" + end + end + end + + expect(metadata_for(v1_json_schema, "Widget", "description")).to have_dumped_metadata("description", "String") + expect(metadata_for(updated_v1_json_schema, "Widget", "description")).to have_dumped_metadata("description", "String!") + end + + it "supports deleted fields when `deleted_field` is used" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "description", "String" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.deleted_field "description" + end + end + + expect(metadata_for(v1_json_schema, "Widget", "description")).to have_dumped_metadata("description", "String") + expect(metadata_for(updated_v1_json_schema, "Widget", "description")).to eq nil + end + + it "notifies of an issue when a type has been deleted or renamed without recording what happened" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Options" do |t| + t.field "size", "Int" + end + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + end + end + + missing_types = dump_versioned_json_schema_missing_types(v1_json_schema) do |schema| + schema.json_schema_version 2 + + # Widget has been renamed to `Component`. + schema.object_type "Component" do |t| + t.field "amount", "Float" + end + end + + expect(missing_types).to contain_exactly("Options", "Widget") + end + + it "supports renamed types when `renamed_from` is used" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Component" do |t| + t.field "amount", "Int", name_in_index: "amount_int" + t.renamed_from "Widget" + end + end + + expect(metadata_for(v1_json_schema, "Widget", "amount")).to have_dumped_metadata("amount", "Float") + expect(metadata_for(updated_v1_json_schema, "Widget", "amount")).to have_dumped_metadata("amount_int", "Int") + end + + it "supports deleted types when `deleted_type` is used" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "amount", "Float" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Component" do |t| + t.field "id", "ID" + end + + schema.deleted_type "Widget" + end + + expect(metadata_for(v1_json_schema, "Widget", "amount")).to have_dumped_metadata("amount", "Float") + expect(metadata_for(updated_v1_json_schema, "Widget", "amount")).to eq(nil) + end + + it "supports deleted and renamed fields on a renamed type so long as these are indicated through `deleted_` and `renamed_` API calls" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "token", "String" + t.field "amount", "Float" + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Component" do |t| + t.renamed_from "Widget" + + t.field "id", "ID" do |f| + f.renamed_from "token" + end + + t.deleted_field "amount" + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget", "token")).to have_dumped_metadata("id", "ID") + expect(metadata_for(updated_v1_json_schema, "Widget", "amount")).to eq(nil) + end + + it "keeps track of unused `deleted_field` calls" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "token", "ID" + end + end + + unused_deprecated_elements = dump_versioned_json_schema_unused_deprecated_elements(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.deleted_field "token" # used + t.deleted_field "other" # unused + end + end + + expect(unused_deprecated_elements.map(&:description)).to eq [ + %(`type.deleted_field "other"` at #{__FILE__}:#{__LINE__ - 5}) + ] + end + + it "keeps track of unused `renamed_field` calls" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "token", "ID" + end + end + + unused_deprecated_elements = dump_versioned_json_schema_unused_deprecated_elements(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" do |f| + f.renamed_from "token" # used + f.renamed_from "other" # unused + end + end + end + + expect(unused_deprecated_elements.map(&:description)).to eq [ + %(`field.renamed_from "other"` at #{__FILE__}:#{__LINE__ - 6}) + ] + end + + it "keeps track of unused `deleted_type` calls" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "token", "ID" + end + end + + unused_deprecated_elements = dump_versioned_json_schema_unused_deprecated_elements(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.deleted_type "Widget" # used + schema.deleted_type "Other" # unused + end + + expect(unused_deprecated_elements.map(&:description)).to eq [ + %(`schema.deleted_type "Other"` at #{__FILE__}:#{__LINE__ - 4}) + ] + end + + it "keeps track of unused `renamed_type` calls" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "token", "ID" + end + end + + unused_deprecated_elements = dump_versioned_json_schema_unused_deprecated_elements(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Component" do |t| + t.field "token", "ID" + t.renamed_from "Widget" # used + t.renamed_from "Other" # unused + end + end + + expect(unused_deprecated_elements.map(&:description)).to eq [ + %(`type.renamed_from "Other"` at #{__FILE__}:#{__LINE__ - 5}) + ] + end + + context "on a type that is using `route_with`" do + it "does not allow a `route_with` field to be entirely missing from an old version of the schema" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "workspace_id", "ID" + + t.index "widgets" do |f| + f.route_with "workspace_id" + end + end + end + + missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "workspace_id2", "ID" + t.deleted_field "workspace_id" + + t.index "widgets" do |f| + f.route_with "workspace_id2" + end + end + end + + expect(missing_necessary_fields).to eq [missing_necessary_field_of("routing", "Widget.workspace_id2")] + end + + it "uses the `name_in_index` when determining if a `route_with` field is missing from an old version of the schema" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "workspace_id", "ID" + + t.index "widgets" do |f| + f.route_with "workspace_id" + end + end + end + + missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "workspace_id2", "ID", name_in_index: "workspace_id3" + t.deleted_field "workspace_id" + + t.index "widgets" do |f| + f.route_with "workspace_id2" + end + end + end + + expect(missing_necessary_fields).to eq [missing_necessary_field_of("routing", "Widget.workspace_id3")] + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "workspace_id2", "ID", name_in_index: "workspace_id" do |f| + f.renamed_from "workspace_id" + end + + t.index "widgets" do |f| + f.route_with "workspace_id2" + end + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget", "workspace_id")).to include("nameInIndex" => "workspace_id") + end + + it "handles embedded fields when determining if a `route_with` field is missing from an old schema version" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Embedded" do |t| + t.field "workspace_id", "ID" + end + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "embedded", "Embedded" + + t.index "widgets" do |f| + f.route_with "embedded.workspace_id" + end + end + end + + missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Embedded" do |t| + t.field "workspace_id", "ID" + end + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "embedded2", "Embedded" + t.deleted_field "embedded" + + t.index "widgets" do |f| + f.route_with "embedded2.workspace_id" + end + end + end + + expect(missing_necessary_fields).to eq [missing_necessary_field_of("routing", "Widget.embedded2.workspace_id")] + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Embedded" do |t| + t.field "workspace_id", "ID" + end + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "embedded2", "Embedded" do |f| + f.renamed_from "embedded" + end + + t.index "widgets" do |f| + f.route_with "embedded2.workspace_id" + end + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget", "embedded")).to include("nameInIndex" => "embedded2") + end + + it "handles renamed types" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "workspace_id", "ID" + + t.index "widgets" do |f| + f.route_with "workspace_id" + end + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget2" do |t| + t.field "id", "ID" + t.field "workspace_id", "ID" + t.renamed_from "Widget" + + t.index "widgets" do |f| + f.route_with "workspace_id" + end + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget", "workspace_id")).to include("nameInIndex" => "workspace_id") + + missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget2" do |t| + t.field "id", "ID" + t.field "workspace_id2", "ID" + t.deleted_field "workspace_id" + t.renamed_from "Widget" + + t.index "widgets" do |f| + f.route_with "workspace_id2" + end + end + end + + expect(missing_necessary_fields).to eq [missing_necessary_field_of("routing", "Widget2.workspace_id2")] + end + + it "handles deleted types" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "workspace_id", "ID" + + t.index "widgets" do |f| + f.route_with "workspace_id" + end + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.deleted_type "Widget" + + schema.object_type "Widget2" do |t| + t.field "id", "ID" + t.field "workspace_id", "ID" + + t.index "widgets" do |f| + f.route_with "workspace_id" + end + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget2", "workspace_id")).to eq nil + expect(metadata_for(updated_v1_json_schema, "Widget", "workspace_id")).to eq nil + end + end + + context "on a type using `rollover`" do + it "does not allow a `rollover` field to be entirely missing from an old version of the schema" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at", "DateTime" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at" + end + end + end + + missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at2", "DateTime", name_in_index: "created_at3" + t.deleted_field "created_at" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at2" + end + end + end + + expect(missing_necessary_fields).to eq [missing_necessary_field_of("rollover", "Widget.created_at3")] + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at2", "DateTime", name_in_index: "created_at" do |f| + f.renamed_from "created_at" + end + + t.index "widgets" do |f| + f.rollover :yearly, "created_at2" + end + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget", "created_at")).to include("nameInIndex" => "created_at") + end + + it "uses the `name_in_index` when determining if a `rollover` field is missing from an old version of the schema" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at", "DateTime" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at" + end + end + end + + missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at2", "DateTime" + t.deleted_field "created_at" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at2" + end + end + end + + expect(missing_necessary_fields).to eq [missing_necessary_field_of("rollover", "Widget.created_at2")] + end + + it "handles embedded fields when determining if a `rollover` field is missing from an old schema version" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Embedded" do |t| + t.field "created_at", "DateTime" + end + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "embedded", "Embedded" + + t.index "widgets" do |f| + f.rollover :yearly, "embedded.created_at" + end + end + end + + missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Embedded" do |t| + t.field "created_at", "DateTime" + end + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "embedded2", "Embedded" + t.deleted_field "embedded" + + t.index "widgets" do |f| + f.rollover :yearly, "embedded2.created_at" + end + end + end + + expect(missing_necessary_fields).to eq [missing_necessary_field_of("rollover", "Widget.embedded2.created_at")] + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Embedded" do |t| + t.field "created_at", "DateTime" + end + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "embedded2", "Embedded" do |f| + f.renamed_from "embedded" + end + + t.index "widgets" do |f| + f.rollover :yearly, "embedded2.created_at" + end + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget", "embedded")).to include("nameInIndex" => "embedded2") + end + + it "handles renamed types" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at", "DateTime" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at" + end + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget2" do |t| + t.field "id", "ID" + t.field "created_at", "DateTime" + t.renamed_from "Widget" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at" + end + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget", "created_at")).to include("nameInIndex" => "created_at") + + missing_necessary_fields = dump_versioned_json_schema_missing_necessary_fields(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget2" do |t| + t.field "id", "ID" + t.field "created_at2", "DateTime" + t.deleted_field "created_at" + t.renamed_from "Widget" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at2" + end + end + end + + expect(missing_necessary_fields).to eq [missing_necessary_field_of("rollover", "Widget2.created_at2")] + end + + it "handles deleted types" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at", "DateTime" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at" + end + end + end + + updated_v1_json_schema = dump_versioned_json_schema(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.deleted_type "Widget" + + schema.object_type "Widget2" do |t| + t.field "id", "ID" + t.field "created_at", "DateTime" + + t.index "widgets" do |f| + f.rollover :yearly, "created_at" + end + end + end + + expect(metadata_for(updated_v1_json_schema, "Widget2", "created_at")).to eq nil + expect(metadata_for(updated_v1_json_schema, "Widget", "created_at")).to eq nil + end + end + + describe "conflicting definition tracking" do + it "includes a type that exists and is referenced from `deleted_type`" do + elements = dump_versioned_json_schema_definition_conflicts do |schema| + schema.object_type "Widget" do |t| + t.field "id", "ID" + end + + schema.deleted_type "Widget" + end + + expect(elements.map(&:description)).to contain_exactly( + %(`schema.deleted_type "Widget"` at #{__FILE__}:#{__LINE__ - 4}) + ) + end + + it "includes a type that exists and is referenced from `renamed_from`" do + elements = dump_versioned_json_schema_definition_conflicts do |schema| + schema.object_type "Widget" do |t| + t.field "id", "ID" + end + + schema.object_type "Component" do |t| + t.field "id", "ID" + t.renamed_from "Widget" + end + end + + expect(elements.map(&:description)).to contain_exactly( + %(`type.renamed_from "Widget"` at #{__FILE__}:#{__LINE__ - 5}) + ) + end + + it "includes a type that exists and is referenced from `deleted_type` and `renamed_from`" do + elements = dump_versioned_json_schema_definition_conflicts do |schema| + schema.object_type "Widget" do |t| + t.field "id", "ID" + end + + schema.object_type "Component" do |t| + t.field "id", "ID" + t.renamed_from "Widget" + end + + schema.deleted_type "Widget" + end + + expect(elements.map(&:description)).to contain_exactly( + %(`type.renamed_from "Widget"` at #{__FILE__}:#{__LINE__ - 7}), + %(`schema.deleted_type "Widget"` at #{__FILE__}:#{__LINE__ - 5}) + ) + end + + it "includes a type that is referenced from `deleted_type` and `renamed_from` but does not exist" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "token", "ID" + end + end + + elements = dump_versioned_json_schema_definition_conflicts(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Component" do |t| + t.field "id", "ID" + t.renamed_from "Widget" + end + + schema.deleted_type "Widget" + end + + expect(elements.map(&:description)).to contain_exactly( + %(`type.renamed_from "Widget"` at #{__FILE__}:#{__LINE__ - 7}), + %(`schema.deleted_type "Widget"` at #{__FILE__}:#{__LINE__ - 5}) + ) + end + + it "includes a field that exists and is referenced from `deleted_field`" do + elements = dump_versioned_json_schema_definition_conflicts do |schema| + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.deleted_field "id" + end + end + + expect(elements.map(&:description)).to contain_exactly( + %(`type.deleted_field "id"` at #{__FILE__}:#{__LINE__ - 5}) + ) + end + + it "includes a field that exists and is referenced from `renamed_from`" do + elements = dump_versioned_json_schema_definition_conflicts do |schema| + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "token", "ID" do |f| + f.renamed_from "id" + end + end + end + + expect(elements.map(&:description)).to contain_exactly( + %(`field.renamed_from "id"` at #{__FILE__}:#{__LINE__ - 6}) + ) + end + + it "includes a field that exists and is referenced from `deleted_field` and `renamed_from`" do + elements = dump_versioned_json_schema_definition_conflicts do |schema| + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "token", "ID" do |f| + f.renamed_from "id" + end + t.deleted_field "id" + end + end + + expect(elements.map(&:description)).to contain_exactly( + %(`type.deleted_field "id"` at #{__FILE__}:#{__LINE__ - 5}), + %(`field.renamed_from "id"` at #{__FILE__}:#{__LINE__ - 8}) + ) + end + + it "includes a field that is referenced from `deleted_field` and `renamed_from` but does not exist" do + v1_json_schema = dump_versioned_json_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Widget" do |t| + t.field "id", "ID" + end + end + + elements = dump_versioned_json_schema_definition_conflicts(v1_json_schema) do |schema| + schema.json_schema_version 2 + + schema.object_type "Widget" do |t| + t.field "token", "ID" do |f| + f.renamed_from "id" + end + t.deleted_field "id" + end + end + + expect(elements.map(&:description)).to contain_exactly( + %(`type.deleted_field "id"` at #{__FILE__}:#{__LINE__ - 5}), + %(`field.renamed_from "id"` at #{__FILE__}:#{__LINE__ - 8}) + ) + end + end + end + + def dump_versioned_json_schema(old_versioned_json_schema = nil, &schema_definition) + merge_result = perform_merge(old_versioned_json_schema, &schema_definition) + + expect(merge_result.missing_fields).to be_empty + expect(merge_result.missing_types).to be_empty + expect(merge_result.definition_conflicts).to be_empty + expect(merge_result.missing_necessary_fields).to be_empty + + merge_result.json_schema + end + + def dump_versioned_json_schema_missing_fields(old_versioned_json_schema = nil, &schema_definition) + merge_result = perform_merge(old_versioned_json_schema, &schema_definition) + + expect(merge_result.missing_fields).not_to be_empty + expect(merge_result.missing_types).to be_empty + expect(merge_result.definition_conflicts).to be_empty + expect(merge_result.missing_necessary_fields).to be_empty + + merge_result.missing_fields + end + + def dump_versioned_json_schema_definition_conflicts(old_versioned_json_schema = nil, &schema_definition) + merge_result = perform_merge(old_versioned_json_schema, &schema_definition) + + expect(merge_result.missing_fields).to be_empty + expect(merge_result.missing_types).to be_empty + expect(merge_result.definition_conflicts).not_to be_empty + expect(merge_result.missing_necessary_fields).to be_empty + + merge_result.definition_conflicts + end + + def dump_versioned_json_schema_missing_types(old_versioned_json_schema = nil, &schema_definition) + merge_result = perform_merge(old_versioned_json_schema, &schema_definition) + + expect(merge_result.missing_fields).to be_empty + expect(merge_result.missing_types).not_to be_empty + expect(merge_result.definition_conflicts).to be_empty + expect(merge_result.missing_necessary_fields).to be_empty + + merge_result.missing_types + end + + def dump_versioned_json_schema_missing_necessary_fields(old_versioned_json_schema = nil, &schema_definition) + merge_result = perform_merge(old_versioned_json_schema, &schema_definition) + + expect(merge_result.missing_fields).to be_empty + expect(merge_result.missing_types).to be_empty + expect(merge_result.definition_conflicts).to be_empty + expect(merge_result.missing_necessary_fields).not_to be_empty + + merge_result.missing_necessary_fields + end + + def dump_versioned_json_schema_unused_deprecated_elements(old_versioned_json_schema = nil, &schema_definition) + results = define_schema(&schema_definition) + results.merge_field_metadata_into_json_schema(old_versioned_json_schema || results.current_public_json_schema) + results.unused_deprecated_elements + end + + def perform_merge(old_versioned_json_schema = nil, &schema_definition) + results = define_schema(&schema_definition) + results.merge_field_metadata_into_json_schema(old_versioned_json_schema || results.current_public_json_schema).tap do + expect(results.unused_deprecated_elements).to be_empty + end + end + + def metadata_for(json_schema, type, field) + json_schema.dig("$defs", type, "properties", field, "ElasticGraph") + end + + def define_schema(&schema_definition) + super(schema_element_name_form: "snake_case", &schema_definition) + end + + def have_dumped_metadata(name_in_index, type) + eq({"nameInIndex" => name_in_index, "type" => type}) + end + + def missing_necessary_field_of(field_type, fully_qualified_path) + JSONSchemaWithMetadata::MissingNecessaryField.new(field_type, fully_qualified_path) + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_pruner_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_pruner_spec.rb new file mode 100644 index 000000000..aa69cd2aa --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_pruner_spec.rb @@ -0,0 +1,132 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/json_schema_pruner" +require "elastic_graph/spec_support/schema_definition_helpers" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + RSpec.describe JSONSchemaPruner do + include_context "SchemaDefinitionHelpers" + + describe ".prune" do + subject { described_class.prune(schema) } + + shared_examples "prunes types not referenced by indexed types" do |expected_type_names| + it do + expect(subject["$defs"].keys).to match_array(expected_type_names) + end + end + + context "when there are indexable types" do + let(:schema) do + dump_schema do |s| + # Widget and Boolean should be present + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "inStock", "Boolean" + t.index "widgets" + end + + # UnindexedWidget and Float should get pruned + s.object_type "UnindexedWidget" do |t| + t.field "id", "ID!" + t.field "cost", "Float" + end + end + end + + it_behaves_like "prunes types not referenced by indexed types", + [EVENT_ENVELOPE_JSON_SCHEMA_NAME, "Boolean", "ID", "Widget"] + end + + context "when there are no types defined" do + let(:schema) { dump_schema } + + it_behaves_like "prunes types not referenced by indexed types", [EVENT_ENVELOPE_JSON_SCHEMA_NAME] + end + + context "when there are no indexable types defined" do + let(:schema) do + dump_schema do |s| + # UnindexedWidget and Float should get pruned + s.object_type "UnindexedWidget" do |t| + t.field "id", "ID!" + t.field "cost", "Float" + end + end + end + + it_behaves_like "prunes types not referenced by indexed types", [EVENT_ENVELOPE_JSON_SCHEMA_NAME] + end + + context "when there are nested types referenced from an indexed type" do + let(:schema) do + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "options", "WidgetOptions" + t.index "widgets" + end + + s.object_type "WidgetOptions" do |t| + t.field "size", "Size" + t.field "color", "Color" + t.field "cost", "Money" + end + + s.enum_type "Size" do |t| + t.value "SMALL" + t.value "MEDIUM" + t.value "LARGE" + end + + s.enum_type "Color" do |t| + t.value "RED" + t.value "YELLOW" + t.value "BLUE" + end + + s.object_type "Money" do |t| + t.field "currency", "Currency" + t.field "amount_cents", "Int" + end + + s.enum_type "Currency" do |t| + t.value "USD" + t.value "CAD" + end + end + end + + it_behaves_like "prunes types not referenced by indexed types", [ + EVENT_ENVELOPE_JSON_SCHEMA_NAME, + "Color", + "Currency", + "ID", + "Int", + "Money", + "Size", + "Widget", + "WidgetOptions" + ] + end + end + + def dump_schema(&schema_definition) + schema_definition_results = define_schema(schema_element_name_form: "snake_case", &schema_definition) + latest_json_schema_version = schema_definition_results.latest_json_schema_version + + schema_definition_results.json_schemas_for(latest_json_schema_version) + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_spec.rb new file mode 100644 index 000000000..92bdbf4ea --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_spec.rb @@ -0,0 +1,3073 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/errors" +require "elastic_graph/spec_support/schema_definition_helpers" +require "support/json_schema_matcher" + +module ElasticGraph + module SchemaDefinition + ::RSpec.describe "JSON schema generation" do + include_context "SchemaDefinitionHelpers" + json_schema_id = {"allOf" => [{"$ref" => "#/$defs/ID"}, {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH}]} + json_schema_float = {"$ref" => "#/$defs/Float"} + json_schema_integer = {"$ref" => "#/$defs/Int"} + json_schema_string = {"allOf" => [{"$ref" => "#/$defs/String"}, {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH}]} + json_schema_null = {"type" => "null"} + + context "on ElasticGraph built-in types, it generates the expected JSON schema" do + attr_reader :json_schema + + before(:context) do + @json_schema = dump_schema do |s| + # Include a random version number to ensure it's getting used correctly + s.json_schema_version 42 + + # Include a basic indexed type here to validate that the envelope is getting + # generated correctly (we'll ignore it below) + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.index "widgets" + end + end + @tested_types = ::Set.new + end + + after(:context) do + built_in_types = @json_schema.fetch("$defs").keys - ["Widget"] + input_enum_types = %w[DateTimeUnitInput DistanceUnitInput MatchesQueryAllowedEditsPerTerm] + + # Input enum types are named with an `Input` suffix. The JSON schema only contains the types we index, which are output types, + # and therefore it does not have the input enum types. + untested_types = built_in_types - @tested_types.to_a - input_enum_types - ["Query"] + + expect(untested_types).to be_empty, + "It appears that #{untested_types.size} built-in type(s) lack test coverage in `json_schema_spec.rb`. " \ + "Cover them with a test to fix this failure, or ignore this if not running the entire set of built-in type tests:\n\n" \ + "- #{untested_types.sort.join("\n- ")}" + end + + example "for `#{EVENT_ENVELOPE_JSON_SCHEMA_NAME}`" do + expect(json_schema).to have_json_schema_like(EVENT_ENVELOPE_JSON_SCHEMA_NAME, { + "type" => "object", + "properties" => { + "op" => {"type" => "string", "enum" => %w[upsert]}, + "type" => {"type" => "string", "enum" => ["Widget"]}, + "id" => {"type" => "string", "maxLength" => DEFAULT_MAX_KEYWORD_LENGTH}, + "version" => {"type" => "integer", "minimum" => 0, "maximum" => (2**63) - 1}, + "record" => {"type" => "object"}, + "latency_timestamps" => { + "type" => "object", + "additionalProperties" => false, + "patternProperties" => {"^\\w+_at$" => {"type" => "string", "format" => "date-time"}} + }, + JSON_SCHEMA_VERSION_KEY => {"const" => 42}, + "message_id" => {"type" => "string"} + }, + "additionalProperties" => false, + "required" => ["op", "type", "id", "version", JSON_SCHEMA_VERSION_KEY], + "if" => {"properties" => {"op" => {"const" => "upsert"}}}, + "then" => {"required" => ["record"]} + }, include_typename: false, ignore_descriptions: true) + end + + %w[ID String].each do |type_name| + example "for `#{type_name}`" do + expect(json_schema).to have_json_schema_like(type_name, { + "type" => "string" + }).which_matches("abc", "a" * DEFAULT_MAX_KEYWORD_LENGTH, "a" * (DEFAULT_MAX_KEYWORD_LENGTH + 1)) + .and_fails_to_match(0, nil, true) + end + end + + example "for `Int`" do + expect(json_schema).to have_json_schema_like("Int", { + "type" => "integer", + "minimum" => -2147483648, + "maximum" => 2147483647 + }).which_matches(0, 1, -1, INT_MAX, INT_MIN) + .and_fails_to_match("a", 0.5, true, INT_MAX + 1, INT_MIN - 1) + end + + example "for `Boolean`" do + expect(json_schema).to have_json_schema_like("Boolean", { + "type" => "boolean" + }).which_matches(true, false) + .and_fails_to_match("true", "false", "yes", "no", 1, 0, nil) + end + + example "for `Float`" do + expect(json_schema).to have_json_schema_like("Float", { + "type" => "number" + }).which_matches(0, 1, -1, 0.1, -99.0) + .and_fails_to_match("a", true, nil) + end + + example "for `TimeZone`" do + expect(json_schema).to have_json_schema_like("TimeZone", { + "type" => "string", + "enum" => GraphQL::ScalarCoercionAdapters::VALID_TIME_ZONES.to_a + }) + .which_matches("America/Los_Angeles") + .and_fails_to_match("America/Seattle") # America/Seattle is not a valid time zone. + end + + example "for `Untyped`" do + expect(json_schema).to have_json_schema_like("Untyped", { + "type" => %w[array boolean integer number object string] + }).which_matches( + 3, + 3.75, + "string", + true, + %w[a b], + {"some" => "data"}, + {"some" => {"nested" => {"data" => [1, true, "3"]}}} + ).and_fails_to_match(nil) + end + + example "for `GeoLocation`" do + expect(json_schema).to have_json_schema_like("GeoLocation", { + "type" => "object", + "properties" => { + "latitude" => { + "allOf" => [ + json_schema_float, + {"minimum" => -90, "maximum" => 90} + ] + }, + "longitude" => { + "allOf" => [ + json_schema_float, + {"minimum" => -180, "maximum" => 180} + ] + } + }, + "required" => %w[latitude longitude] + }, ignore_descriptions: true).which_matches( + {"latitude" => 0, "longitude" => 0}, + {"latitude" => -90, "longitude" => -180}, + {"latitude" => 90, "longitude" => 180} + ).and_fails_to_match( + nil, + {}, + {"latitude" => "0", "longitude" => "1"}, + {"latitude" => -91, "longitude" => 0}, + {"latitude" => 91, "longitude" => 0}, + {"latitude" => 0, "longitude" => -181}, + {"latitude" => 0, "longitude" => 181}, + {"latitude" => nil, "longitude" => 0}, + {"latitude" => 0, "longitude" => nil} + ) + end + + example "for `Cursor`" do + expect(json_schema).to have_json_schema_like("Cursor", {"type" => "string"}) + .which_matches("abc") + .and_fails_to_match(0, nil, true) + end + + example "for `Date`" do + expect(json_schema).to have_json_schema_like("Date", {"type" => "string", "format" => "date"}) + .which_matches("2023-01-01", "1999-12-31") # yyyy-MM-dd + .and_fails_to_match(0, nil, true, "01-01-2023", "0000-00-00", "2023-13-40") + end + + example "for `DateUnit`" do + expect(json_schema).to have_json_schema_like("DateUnit", { + "enum" => %w[DAY], "type" => "string" + }).which_matches(*%w[DAY]) + .and_fails_to_match(0, nil, true, "literally any other string") + end + + example "for `DateGroupingTruncationUnit`" do + expect(json_schema).to have_json_schema_like("DateGroupingTruncationUnit", { + "enum" => %w[YEAR QUARTER MONTH WEEK DAY], "type" => "string" + }).which_matches(*%w[YEAR QUARTER MONTH WEEK DAY]) + .and_fails_to_match(0, nil, true, "literally any other string") + end + + example "for `DateTime`" do + expect(json_schema).to have_json_schema_like("DateTime", { + "type" => "string", "format" => "date-time" + }).which_matches("2023-01-01T00:00:00.000Z", "1999-12-31T23:59:59.999Z") # T: yyyy-MM-dd'T'HH:mm:ss.SSSZ + .and_fails_to_match(0, nil, true, "01-01-2023", "0000-00-00 00:00", "2023-13-40 45:33") + end + + example "for `DateTimeUnit`" do + expect(json_schema).to have_json_schema_like("DateTimeUnit", { + "enum" => %w[DAY HOUR MINUTE SECOND MILLISECOND], "type" => "string" + }).which_matches(*%w[DAY HOUR MINUTE SECOND MILLISECOND]) + .and_fails_to_match(0, nil, true, "literally any other string") + end + + example "for `DateTimeGroupingTruncationUnit`" do + expect(json_schema).to have_json_schema_like("DateTimeGroupingTruncationUnit", { + "enum" => %w[YEAR QUARTER MONTH WEEK DAY HOUR MINUTE SECOND], "type" => "string" + }).which_matches(*%w[YEAR QUARTER MONTH WEEK DAY HOUR MINUTE SECOND]) + .and_fails_to_match(0, nil, true, "literally any other string") + end + + example "for `DayOfWeek`" do + expect(json_schema).to have_json_schema_like("DayOfWeek", { + "enum" => %w[MONDAY TUESDAY WEDNESDAY THURSDAY FRIDAY SATURDAY SUNDAY], "type" => "string" + }).which_matches(*%w[MONDAY TUESDAY WEDNESDAY THURSDAY FRIDAY SATURDAY SUNDAY]) + .and_fails_to_match(0, nil, true, "literally any other string") + end + + example "for `DistanceUnit`" do + expect(json_schema).to have_json_schema_like("DistanceUnit", { + "enum" => %w[MILE YARD FOOT INCH KILOMETER METER CENTIMETER MILLIMETER NAUTICAL_MILE], "type" => "string" + }).which_matches(*%w[MILE YARD FOOT INCH KILOMETER METER CENTIMETER MILLIMETER NAUTICAL_MILE]) + .and_fails_to_match(0, nil, true, "literally any other string") + end + + example "for `JsonSafeLong`" do + expect(json_schema).to have_json_schema_like("JsonSafeLong", { + "maximum" => JSON_SAFE_LONG_MAX, + "minimum" => JSON_SAFE_LONG_MIN, + "type" => "integer" + }).which_matches(0, JSON_SAFE_LONG_MIN, JSON_SAFE_LONG_MAX) + .and_fails_to_match(0.5, nil, true, JSON_SAFE_LONG_MAX + 1, JSON_SAFE_LONG_MIN - 1) + end + + example "for `LocalTime`" do + expect(json_schema).to have_json_schema_like("LocalTime", { + "type" => "string", + "pattern" => VALID_LOCAL_TIME_JSON_SCHEMA_PATTERN + }) + .which_matches("01:23:45", "14:56:39.000", "23:59:01.1", "23:59:01.12", "23:59:01.13") # HH:mm:ss, HH:mm:ss.S, HH:mm:ss.SS, HH:mm:ss.SSS + .and_fails_to_match(0, nil, true, "abc", "99:00:00", "59:59.999Z", "01:23:45.1234", "14:56:39a000") + end + + example "for `LocalTimeUnit`" do + expect(json_schema).to have_json_schema_like("LocalTimeUnit", { + "enum" => %w[HOUR MINUTE SECOND MILLISECOND], "type" => "string" + }).which_matches(*%w[HOUR MINUTE SECOND MILLISECOND]) + .and_fails_to_match(0, nil, true, "literally any other string") + end + + example "for `LocalTimeGroupingTruncationUnit`" do + expect(json_schema).to have_json_schema_like("LocalTimeGroupingTruncationUnit", { + "enum" => %w[HOUR MINUTE SECOND], "type" => "string" + }).which_matches(*%w[HOUR MINUTE SECOND]) + .and_fails_to_match(0, nil, true, "literally any other string") + end + + example "for `LongString`" do + expect(json_schema).to have_json_schema_like("LongString", { + "maximum" => LONG_STRING_MAX, + "minimum" => LONG_STRING_MIN, + "type" => "integer" + }) + .which_matches(0, LONG_STRING_MAX, LONG_STRING_MIN) + .and_fails_to_match(0.5, nil, true, LONG_STRING_MIN - 1, LONG_STRING_MAX + 1) + end + + it "excludes `Query`" do + expect(json_schema.fetch("$defs").keys).to exclude "Query" + end + + def have_json_schema_like(type_name, *args, **kwargs) + @tested_types << type_name + super(type_name, *args, **kwargs) + end + end + + it "allows any valid JSON type for a nullable `Untyped` field" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "j1", "Untyped" + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "j1" => json_schema_ref("Untyped", is_keyword_type: true) + }, + "required" => %w[j1] + }).which_matches( + {"j1" => 3}, + {"j1" => 3.75}, + {"j1" => "string"}, + {"j1" => "a" * DEFAULT_MAX_KEYWORD_LENGTH}, + {"j1" => nil}, + {"j1" => true}, + {"j1" => %w[a b]}, + {"j1" => {"some" => "data"}}, + {"j1" => {"some" => {"nested" => {"data" => [1, true, "3"]}}}} + ).and_fails_to_match( + {"j1" => "a" * (DEFAULT_MAX_KEYWORD_LENGTH + 1)} + ) + end + + it "does not duplicate `required` fields when 2 GraphQL fields are both backed by the same indexing field" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "name", "String!" + t.field "name2", "String!", name_in_index: "name", graphql_only: true + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "name" => json_schema_ref("String!") + }, + "required" => %w[name] + }) + end + + it "does not allow multiple indexing fields with the same name because that would result in multiple JSON schema fields flowing into the same index field but with conflicting values" do + expect { + dump_schema do |s| + s.object_type "MyType" do |t| + t.field "name", "String!" + t.field "name2", "String!", name_in_index: "name" + end + end + }.to raise_error Errors::SchemaError, a_string_including("Duplicate indexing field", "MyType: name", "set `graphql_only: true`") + end + + it "raises an exception when `json_schema` on a field definition has invalid json schema option values" do + dump_schema do |s| + s.object_type "MyType" do |t| + t.field "foo", "String" do |f| + expect { + f.json_schema maxLength: "twelve" + }.to raise_error Errors::SchemaError, a_string_including("Invalid JSON schema options", "foo: String", "maxLength") + + expect(f.json_schema_options).to be_empty + + # Demonstrate that `maxLength` with an int value is allowed + f.json_schema maxLength: 12 + end + end + end + end + + it "does not allow the extra `ElasticGraph` metadata that ElasticGraph adds itself" do + dump_schema do |s| + s.object_type "MyType" do |t| + t.field "foo", "String" do |f| + expect { + f.json_schema ElasticGraph: {type: "String"} + }.to raise_error Errors::SchemaError, a_string_including("Invalid JSON schema options", "foo: String", '"data_pointer": "/ElasticGraph"') + + expect(f.json_schema_options).to be_empty + + # Demonstrate that `maxLength` with an int value is allowed + f.json_schema maxLength: 12 + end + end + end + end + + it "raises an exception when `json_schema` on a field definition has invalid json schema option names" do + dump_schema do |s| + s.object_type "MyType" do |t| + t.field "foo", "String" do |f| + expect { + f.json_schema longestLength: 14 # maxLength is correct, not longestLength + }.to raise_error Errors::SchemaError, a_string_including("Invalid JSON schema options", "foo: String", "longestLength") + end + end + end + end + + it "raises an exception when `json_schema` on a scalar type has invalid json schema option values" do + dump_schema do |s| + s.scalar_type "MyType" do |t| + t.mapping type: "keyword" + + expect { + t.json_schema type: "string", maxLength: "twelve" + }.to raise_error Errors::SchemaError, a_string_including("Invalid JSON schema options", "MyType", "twelve") + + # Demonstrate that `maxLength` with an int value is allowed + t.json_schema type: "string", maxLength: 12 + end + end + end + + it "raises an exception when `json_schema` on a scalar type has invalid json schema option values" do + dump_schema do |s| + s.scalar_type "MyType" do |t| + t.mapping type: "keyword" + + expect { + t.json_schema type: "string", longestLength: 14 # maxLength is correct, not longestLength + }.to raise_error Errors::SchemaError, a_string_including("Invalid JSON schema options", "MyType", "longestLength") + + t.json_schema type: "string" + end + end + end + + it "raises an exception when `json_schema` on an object type has invalid json schema option values" do + dump_schema do |s| + s.object_type "MyType" do |t| + expect { + t.json_schema type: "string", maxLength: "twelve" + }.to raise_error Errors::SchemaError, a_string_including("Invalid JSON schema options", "MyType", "twelve") + + # Demonstrate that `maxLength` with an int value is allowed + t.json_schema type: "string", maxLength: 12 + end + end + end + + it "raises an exception when `json_schema` on a scalar type has invalid json schema option values" do + dump_schema do |s| + s.object_type "MyType" do |t| + t.mapping type: "keyword" + + expect { + t.json_schema type: "string", longestLength: 14 # maxLength is correct, not longestLength + }.to raise_error Errors::SchemaError, a_string_including("Invalid JSON schema options", "MyType", "longestLength") + end + end + end + + context "for a field that is `sourced_from` a related type" do + it "excludes the `source_from` field because it comes from another source type and will be represented in the JSON schema of that type" do + json_schema = dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "name", "String!" + + t.index "widgets" + end + + s.object_type "Component" do |t| + t.field "id", "ID!" + t.relates_to_one "widget", "Widget", via: "component_ids", dir: :in + + t.field "widget_name", "String!" do |f| + f.sourced_from "widget", "name" + end + + t.index "components" do |i| + i.has_had_multiple_sources! + end + end + end + + expect(json_schema).to have_json_schema_like("Component", { + "type" => "object", + "properties" => { + "id" => json_schema_ref("ID!") + }, + "required" => %w[id] + }) + end + + it "does not allow any JSON schema customizations of the field because they should be configured on the source type itself" do + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "name", "String!" + t.field "size", "Int" + + t.index "widgets" + end + + s.object_type "Component" do |t| + t.field "id", "ID!" + t.relates_to_one "widget", "Widget", via: "component_ids", dir: :in + + # Here we call `json_schema` after `sourced_from`... + t.field "widget_name", "String!" do |f| + f.sourced_from "widget", "name" + f.json_schema minLength: 4 + end + + # ...vs here we call it before. We do this to demonstrate the order doesn't matter. + t.field "widget_size", "Int" do |f| + f.json_schema minimum: 0 + f.sourced_from "widget", "size" + end + + t.index "components" do |i| + i.has_had_multiple_sources! + end + end + end + }.to raise_error a_string_including( + "Component` has 2 field(s) (`widget_name`, `widget_size`)", + "also have JSON schema customizations" + ) + end + end + + %w[ID String].first(1).each do |graphql_type| + it "limits the length of `#{graphql_type}!` fields based on datastore limits" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "foo", "#{graphql_type}!" + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "foo" => { + "allOf" => [ + {"$ref" => "#/$defs/#{graphql_type}"}, + {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH} + ] + } + }, + "required" => %w[foo] + }).which_matches( + {"foo" => "abc"}, + {"foo" => "a" * DEFAULT_MAX_KEYWORD_LENGTH} + ).and_fails_to_match( + {"foo" => "a" * (DEFAULT_MAX_KEYWORD_LENGTH + 1)}, + {"foo" => nil}, + {"foo" => -129}, + {"foo" => 128} + ) + end + + it "limits the length of `#{graphql_type}` fields based on datastore limits" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "foo", graphql_type + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "foo" => { + "anyOf" => [ + { + "allOf" => [ + {"$ref" => "#/$defs/#{graphql_type}"}, + {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH} + ] + }, + {"type" => "null"} + ] + } + }, + "required" => %w[foo] + }).which_matches( + {"foo" => "abc"}, + {"foo" => nil}, + {"foo" => "a" * DEFAULT_MAX_KEYWORD_LENGTH} + ).and_fails_to_match( + {"foo" => "a" * (DEFAULT_MAX_KEYWORD_LENGTH + 1)}, + {"foo" => -129}, + {"foo" => 128} + ) + end + + it "uses a larger `maxLength` for a #{graphql_type} if the mapping type is set to `text`" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "foo", "#{graphql_type}!" do |f| + f.mapping type: "text" + end + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "foo" => { + "allOf" => [ + {"$ref" => "#/$defs/#{graphql_type}"}, + {"maxLength" => DEFAULT_MAX_TEXT_LENGTH} + ] + } + }, + "required" => %w[foo] + }).which_matches( + {"foo" => "abc"}, + {"foo" => "a" * DEFAULT_MAX_TEXT_LENGTH} + ).and_fails_to_match( + {"foo" => "a" * (DEFAULT_MAX_TEXT_LENGTH + 1)}, + {"foo" => nil}, + {"foo" => -129}, + {"foo" => 128} + ) + end + end + + it "limits the size of custom `keyword` types based on datastore limits" do + json_schema = dump_schema do |s| + s.scalar_type "MyString" do |t| + t.json_schema type: "string" + t.mapping type: "keyword" + end + + s.object_type "MyType" do |t| + t.field "foo", "MyString" + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "foo" => { + "anyOf" => [ + { + "allOf" => [ + {"$ref" => "#/$defs/MyString"}, + {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH} + ] + }, + {"type" => "null"} + ] + } + }, + "required" => %w[foo] + }).which_matches( + {"foo" => "abc"}, + {"foo" => nil}, + {"foo" => "a" * DEFAULT_MAX_KEYWORD_LENGTH} + ).and_fails_to_match( + {"foo" => "a" * (DEFAULT_MAX_KEYWORD_LENGTH + 1)}, + {"foo" => -129}, + {"foo" => 128} + ) + end + + it "allows the `maxLength` to be overridden on keyword and text fields" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "id", "ID!" do |f| + f.json_schema maxLength: 50 + end + + t.field "string", "String!" do |f| + f.json_schema maxLength: 100 + end + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "id" => { + "allOf" => [ + {"$ref" => "#/$defs/ID"}, + {"maxLength" => 50} + ] + }, + "string" => { + "allOf" => [ + {"$ref" => "#/$defs/String"}, + {"maxLength" => 100} + ] + } + }, + "required" => %w[id string] + }) + end + + it "does not include `maxLength` on enum fields since we already limit the values" do + json_schema = dump_schema do |s| + s.enum_type "Color" do |t| + t.value "RED" + t.value "GREEN" + t.value "BLUE" + end + + s.object_type "MyType" do |t| + t.field "color1", "Color!" + + t.field "color2", "Color!" do |f| + f.json_schema maxLength: 50 + end + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "color1" => {"$ref" => "#/$defs/Color"}, + "color2" => {"$ref" => "#/$defs/Color"} + }, + "required" => %w[color1 color2] + }).which_matches( + {"color1" => "RED", "color2" => "GREEN"}, + {"color1" => "BLUE", "color2" => "RED"} + ).and_fails_to_match( + {"color1" => "YELLOW", "color2" => "GREEN"}, + {"color1" => "BLUE", "color2" => "BROWN"} + ) + end + + it "limits byte types based on the datastore mapping type range" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "byte", "Int!" do |f| + f.mapping type: "byte" + end + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "byte" => { + "allOf" => [ + json_schema_integer, + {"minimum" => -128, "maximum" => 127} + ] + } + }, + "required" => %w[byte] + }).which_matches( + {"byte" => 0}, + {"byte" => -128}, + {"byte" => 127} + ).and_fails_to_match( + {"byte" => "a"}, + {"byte" => nil}, + {"byte" => -129}, + {"byte" => 128} + ) + end + + it "limits short types based on the datastore mapping type range" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "short", "Int!" do |f| + f.mapping type: "short" + end + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "short" => { + "allOf" => [ + json_schema_integer, + {"minimum" => -32_768, "maximum" => 32_767} + ] + } + }, + "required" => %w[short] + }).which_matches( + {"short" => 0}, + {"short" => -32_768}, + {"short" => 32_767} + ).and_fails_to_match( + {"short" => "a"}, + {"short" => nil}, + {"short" => -32_769}, + {"short" => 32_768} + ) + end + + it "limits integer types based on the datastore mapping type range" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "integer", "Int!" do |f| + f.mapping type: "integer" + end + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "integer" => json_schema_ref("Int!") + }, + "required" => %w[integer] + }).which_matches( + {"integer" => 0}, + {"integer" => INT_MAX}, + {"integer" => INT_MIN} + ).and_fails_to_match( + {"integer" => "a"}, + {"integer" => nil}, + {"integer" => INT_MAX + 1}, + {"integer" => INT_MIN - 1} + ) + end + + it "supports nullable fields by wrapping the schema in 'anyOf' with a 'null' type" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "is_happy", "Boolean" + t.field "size", "Float" + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "is_happy" => json_schema_ref("Boolean"), + "size" => json_schema_ref("Float") + }, + "required" => %w[is_happy size] + }) + end + + it "returns a JSON schema for a type with arrays" do + json_schema = dump_schema do |s| + s.object_type "Widget" do |t| + t.field "color", "[String!]" + t.field "amount_cents", "[Int!]!" + end + end + + expect(json_schema).to have_json_schema_like("Widget", { + "type" => "object", + "properties" => { + "color" => { + "anyOf" => [ + { + "type" => "array", + "items" => json_schema_string + }, + json_schema_null + ] + }, + "amount_cents" => { + "type" => "array", + "items" => json_schema_integer + } + }, + "required" => %w[color amount_cents] + }) + end + + it "returns a JSON schema for a type with enums" do + json_schema = dump_schema do |s| + s.enum_type "Color" do |t| + t.values "RED", "BLUE", "GREEN" + end + + s.enum_type "Size" do |t| + t.values "SMALL", "MEDIUM", "LARGE" + end + + s.object_type "Widget" do |t| + t.field "size", "Size!" + t.field "color", "Color" + end + end + + expect(json_schema).to have_json_schema_like("Size", { + "type" => "string", + "enum" => %w[SMALL MEDIUM LARGE] + }) + + expect(json_schema).to have_json_schema_like("Color", { + "type" => "string", + "enum" => %w[RED BLUE GREEN] + }) + + expect(json_schema).to have_json_schema_like("Widget", { + "type" => "object", + "properties" => { + "size" => json_schema_ref("Size!"), + "color" => json_schema_ref("Color") + }, + "required" => %w[size color] + }) + end + + it "respects enum value overrides" do + json_schema = dump_schema(enum_value_overrides_by_type: { + Color: {RED: "REDISH", BLUE: "BLUEISH"} + }) do |s| + s.enum_type "Color" do |t| + t.values "RED", "BLUE", "GREEN" + end + end + + expect(json_schema).to have_json_schema_like("Color", { + "type" => "string", + "enum" => %w[REDISH BLUEISH GREEN] + }) + end + + it "uses `enum` for an Enum with a single value" do + json_schema = dump_schema do |s| + s.enum_type "Color" do |t| + t.values "RED" + end + end + + expect(json_schema).to have_json_schema_like("Color", { + "type" => "string", + "enum" => ["RED"] + }) + end + + it "returns a JSON schema for a type with objects" do + json_schema = dump_schema do |s| + s.object_type "Color" do |t| + t.field "red", "Int!" + t.field "green", "Int!" + t.field "blue", "Int!" + end + + s.object_type "WidgetOptions" do |t| + t.field "color", "String!" + t.field "color_breakdown", "Color!" + end + + s.object_type "Widget" do |t| + t.field "options", "WidgetOptions" + end + end + + expect(json_schema).to have_json_schema_like("Color", { + "type" => "object", + "properties" => { + "red" => json_schema_ref("Int!"), + "green" => json_schema_ref("Int!"), + "blue" => json_schema_ref("Int!") + }, + "required" => %w[red green blue] + }) + + expect(json_schema).to have_json_schema_like("WidgetOptions", { + "type" => "object", + "properties" => { + "color" => json_schema_ref("String!"), + "color_breakdown" => json_schema_ref("Color!") + }, + "required" => %w[color color_breakdown] + }) + + expect(json_schema).to have_json_schema_like("Widget", { + "type" => "object", + "properties" => { + "options" => json_schema_ref("WidgetOptions") + }, + "required" => %w[options] + }) + end + + it "returns a JSON schema with definitions for custom scalar types" do + json_schema = dump_schema do |s| + s.scalar_type "PhoneNumber" do |t| + t.mapping type: "keyword" + t.json_schema type: "string", format: "^\\+[1-9][0-9]{1,14}$" + end + end + + expect(json_schema).to have_json_schema_like("PhoneNumber", { + "type" => "string", + "format" => "^\\+[1-9][0-9]{1,14}$" + }) + end + + it "returns a JSON schema for a type with wrapped enums" do + json_schema = dump_schema do |s| + s.enum_type "Size" do |t| + t.values "SMALL", "MEDIUM", "LARGE" + end + + s.object_type "Widget" do |t| + t.field "null_array_null", "[Size]" + t.field "non_null_array_null", "[Size]!" + t.field "null_array_non_null", "[Size!]" + t.field "non_null_array_non_null", "[Size!]!" + t.field "null_null_array_null", "[[Size]]" + end + end + + expect(json_schema).to have_json_schema_like("Widget", { + "type" => "object", + "properties" => { + "null_array_null" => { + "anyOf" => [ + { + "type" => "array", + "items" => json_schema_ref("Size") + }, + json_schema_null + ] + }, + "non_null_array_null" => { + "type" => "array", + "items" => json_schema_ref("Size") + }, + "null_array_non_null" => { + "anyOf" => [ + { + "type" => "array", + "items" => json_schema_ref("Size!") + }, + json_schema_null + ] + }, + "non_null_array_non_null" => { + "type" => "array", + "items" => json_schema_ref("Size!") + }, + "null_null_array_null" => { + "anyOf" => [ + { + "type" => "array", + "items" => { + "anyOf" => [ + { + "type" => "array", + "items" => json_schema_ref("Size") + }, + json_schema_null + ] + } + }, + json_schema_null + ] + } + }, + "required" => %w[null_array_null non_null_array_null null_array_non_null non_null_array_non_null null_null_array_null] + }) + end + + it "returns a JSON schema for a type with wrapped objects" do + json_schema = dump_schema do |s| + s.object_type "Color" do |t| + t.field "red", "Int!" + t.field "green", "Int!" + t.field "blue", "Int!" + end + + s.object_type "WidgetOptions" do |t| + t.field "color_breakdown", "Color!" + end + + s.object_type "Widget" do |t| + t.field "nullable", "WidgetOptions" + t.field "non_null", "WidgetOptions!" + t.field "null_array_null", "[WidgetOptions]" do |f| + f.mapping type: "object" + end + t.field "non_null_array_null", "[WidgetOptions]!" do |f| + f.mapping type: "object" + end + t.field "null_array_non_null", "[WidgetOptions!]" do |f| + f.mapping type: "object" + end + t.field "non_null_array_non_null", "[WidgetOptions!]!" do |f| + f.mapping type: "object" + end + t.field "null_null_array_null", "[[WidgetOptions]]" do |f| + f.mapping type: "object" + end + end + end + + expect(json_schema).to have_json_schema_like("Widget", { + "type" => "object", + "properties" => { + "nullable" => json_schema_ref("WidgetOptions"), + "non_null" => json_schema_ref("WidgetOptions!"), + "null_array_null" => { + "anyOf" => [ + { + "type" => "array", + "items" => { + "anyOf" => [ + {"$ref" => "#/$defs/WidgetOptions"}, + json_schema_null + ] + } + }, + json_schema_null + ] + }, + "non_null_array_null" => { + "type" => "array", + "items" => { + "anyOf" => [ + {"$ref" => "#/$defs/WidgetOptions"}, + json_schema_null + ] + } + }, + "null_array_non_null" => { + "anyOf" => [ + { + "type" => "array", + "items" => {"$ref" => "#/$defs/WidgetOptions"} + }, + json_schema_null + ] + }, + "non_null_array_non_null" => { + "type" => "array", + "items" => { + "$ref" => "#/$defs/WidgetOptions" + } + }, + "null_null_array_null" => { + "anyOf" => [ + { + "type" => "array", + "items" => { + "anyOf" => [ + { + "type" => "array", + "items" => { + "anyOf" => [ + {"$ref" => "#/$defs/WidgetOptions"}, + json_schema_null + ] + } + }, + json_schema_null + ] + } + }, + json_schema_null + ] + } + }, + "required" => %w[nullable non_null null_array_null non_null_array_null null_array_non_null non_null_array_non_null null_null_array_null] + }) + end + + context "on an indexed type with a rollover index" do + it "makes the JSON schema for a rollover index timestamp field on the indexed type non-nullable since a target index cannot be chosen without it" do + json_schema = dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "created_at", "DateTime" + t.index "widgets" do |i| + i.rollover :monthly, "created_at" + end + end + end + + expect(json_schema).to have_json_schema_like("Widget", { + "type" => "object", + "properties" => { + "id" => json_schema_ref("ID!"), + "created_at" => json_schema_ref("DateTime!") + }, + "required" => %w[id created_at] + }) + + expect(json_schema).to have_json_schema_like("DateTime", { + "type" => "string", + "format" => "date-time" + }) + end + + it "does not break other configured JSON schema customizations when forcing the non-nullability on a rollover timestamp field" do + json_schema = dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "created_at", "DateTime" do |f| + f.json_schema pattern: "\w+" + end + t.index "widgets" do |i| + i.rollover :monthly, "created_at" + end + end + end + + expect(json_schema).to have_json_schema_like("Widget", { + "type" => "object", + "properties" => { + "id" => json_schema_ref("ID!"), + "created_at" => { + "allOf" => [ + {"$ref" => "#/$defs/DateTime"}, + {"pattern" => "\w+"} + ] + } + }, + "required" => %w[id created_at] + }) + + expect(json_schema).to have_json_schema_like("DateTime", { + "type" => "string", + "format" => "date-time" + }) + end + + it "supports nested timestamp fields, applying non-nullability to every field in the path" do + json_schema = dump_schema do |s| + s.object_type "WidgetTimestamps" do |t| + t.field "created_at", "DateTime" + end + + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "timestamps", "WidgetTimestamps" + t.index "widgets" do |i| + i.rollover :monthly, "timestamps.created_at" + end + end + end + + expect(json_schema).to have_json_schema_like("Widget", { + "type" => "object", + "properties" => { + "id" => json_schema_ref("ID!"), + "timestamps" => json_schema_ref("WidgetTimestamps!") + }, + "required" => %w[id timestamps] + }) + + expect(json_schema).to have_json_schema_like("WidgetTimestamps", { + "type" => "object", + "properties" => { + "created_at" => json_schema_ref("DateTime!") + }, + "required" => %w[created_at] + }) + + expect(json_schema).to have_json_schema_like("DateTime", { + "type" => "string", + "format" => "date-time" + }) + end + + it "raises an error if the timestamp field specified in `rollover` is absent from the index" do + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.index "widgets" do |i| + i.rollover :monthly, "created_at" + end + end + end + }.to raise_error(Errors::SchemaError, a_string_including("Field `Widget.created_at` cannot be resolved, but it is referenced as an index `rollover` field.")) + end + + it "allows the timestamp field to be an indexing-only field since it need not be exposed to GraphQL clients" do + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at", "DateTime", indexing_only: true + t.index "widgets" do |i| + i.rollover :monthly, "created_at" + end + end + end + }.not_to raise_error + end + + it "allows the timestamp field to be a `DateTime` or `Date` field" do + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_on", "Date" + t.index "widgets" do |i| + i.rollover :monthly, "created_on" + end + end + end + }.not_to raise_error + + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at", "DateTime" + t.index "widgets" do |i| + i.rollover :monthly, "created_at" + end + end + end + }.not_to raise_error + end + + it "allows the timestamp field to be a non-nullable field" do + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_on", "Date!" + t.index "widgets" do |i| + i.rollover :monthly, "created_on" + end + end + end + }.not_to raise_error + + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at", "DateTime!" + t.index "widgets" do |i| + i.rollover :monthly, "created_at" + end + end + end + }.not_to raise_error + end + + it "raises an error if a nested rollover timestamp field references an undefined type" do + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "options", "WidgetOptions" + t.index "widgets" do |i| + i.rollover :monthly, "options.created_at" + end + end + end + }.to raise_error(Errors::SchemaError, a_string_including( + "Field `Widget.options.created_at` cannot be resolved", + "Verify that all fields and types referenced by `options.created_at` are defined." + )) + end + + it "raises an error if a rollover timestamp field references an object type" do + expect { + dump_schema do |s| + s.object_type "WidgetOpts" do |t| + t.field "size", "Int" + end + + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "opts", "WidgetOpts" + t.index "widgets" do |i| + i.rollover :monthly, "opts" + end + end + end + }.to raise_error(Errors::SchemaError, a_string_including("rollover field `Widget.opts: WidgetOpts` cannot be used for rollover since it is not a `Date` or `DateTime` field")) + end + + it "raises an error if a rollover timestamp field references an enum type" do + expect { + dump_schema do |s| + s.enum_type "Color" do |t| + t.values "RED", "GREEN", "BLUE" + end + + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "color", "Color" + t.index "widgets" do |i| + i.rollover :monthly, "color" + end + end + end + }.to raise_error(Errors::SchemaError, a_string_including("rollover field `Widget.color: Color` cannot be used for rollover since it is not a `Date` or `DateTime` field")) + end + + it "raises an error if a rollover timestamp field references an scalar type that can't be used for rollover" do + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at", "String" # not a DateTime! + t.index "widgets" do |i| + i.rollover :monthly, "created_at" + end + end + end + }.to raise_error(Errors::SchemaError, a_string_including("rollover field `Widget.created_at: String` cannot be used for rollover since it is not a `Date` or `DateTime` field")) + end + + it "respects configured type name overrides when determining if a rollover field is a valid type" do + expect { + dump_schema(type_name_overrides: {"Date" => "Etad", "DateTime" => "EmitEtad"}) do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_at", "EmitEtad" + t.index "widgets" do |i| + i.rollover :monthly, "created_at" + end + end + + s.object_type "Component" do |t| + t.field "id", "ID" + t.field "created_on", "Etad" + t.index "widgets" do |i| + i.rollover :monthly, "created_on" + end + end + + s.object_type "Part" do |t| + t.field "id", "ID" + t.field "created_at", "String" + t.index "widgets" do |i| + i.rollover :monthly, "created_at" + end + end + end + }.to raise_error(Errors::SchemaError, a_string_including( + "rollover field `Part.created_at: String` cannot be used for rollover since it is not a `Etad` or `EmitEtad` field" + )) + end + + it "raises an error if a rollover timestamp field references a list field" do + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_ats", "[DateTime]" + t.index "widgets" do |i| + i.rollover :monthly, "created_ats" + end + end + end + }.to raise_error(Errors::SchemaError, a_string_including("rollover field `Widget.created_ats: [DateTime]` cannot be used for rollover since it is a list field.")) + + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "created_ons", "[Date]" + t.index "widgets" do |i| + i.rollover :monthly, "created_ons" + end + end + end + }.to raise_error(Errors::SchemaError, a_string_including("rollover field `Widget.created_ons: [Date]` cannot be used for rollover since it is a list field.")) + end + + it "allows the timestamp field specified in `rollover` to be defined after the `index` call" do + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.index "widgets" do |i| + i.rollover :monthly, "created_at" + end + + t.field "created_at", "DateTime" + end + end + }.not_to raise_error + end + end + + context "on an indexed type with custom shard routing" do + it "makes the custom routing field non-nullable in the JSON schema since we cannot target a shard without it" do + json_schema = dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "user_id", "ID" + + t.index "widgets" do |i| + i.route_with "user_id" + end + end + end + + expect(json_schema).to have_json_schema_like("ID", { + "type" => "string" + }) + + expect(json_schema).to have_json_schema_like("Widget", { + "type" => "object", + "properties" => { + "id" => json_schema_ref("ID!"), + "user_id" => shard_routing_string_field + }, + "required" => %w[id user_id] + }, ignore_descriptions: true).which_matches( + {"id" => "abc", "user_id" => "def"}, + {"id" => "abc", "user_id" => " d"}, + {"id" => "abc", "user_id" => "\td"}, + {"id" => "abc", "user_id" => "d\n"} + ).and_fails_to_match( + {"id" => "abc", "user_id" => nil}, + {"id" => "abc", "user_id" => ""}, + {"id" => "abc", "user_id" => " "}, + {"id" => "abc", "user_id" => " \t"}, + {"id" => "abc", "user_id" => " \n"} + ) + end + + it "supports nested routing fields, applying non-nullability to every field in the path" do + json_schema = dump_schema do |s| + s.object_type "WidgetIDs" do |t| + t.field "user_id", "ID" + end + + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "widget_ids", "WidgetIDs" + t.index "widgets" do |i| + i.route_with "widget_ids.user_id" + end + end + end + + expect(json_schema).to have_json_schema_like("WidgetIDs", { + "type" => "object", + "properties" => { + "user_id" => shard_routing_string_field + }, + "required" => ["user_id"] + }, ignore_descriptions: true) + + expect(json_schema).to have_json_schema_like("Widget", { + "type" => "object", + "properties" => { + "id" => json_schema_ref("ID!"), + "widget_ids" => json_schema_ref("WidgetIDs!") + }, + "required" => %w[id widget_ids] + }, ignore_descriptions: true) + end + + it "raises an error if the specified custom shard routing field is absent from the index" do + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.index "widgets" do |i| + i.route_with "user_id" + end + end + end + }.to raise_error(Errors::SchemaError, a_string_including("Field `Widget.user_id` cannot be resolved, but it is referenced as an index `route_with` field.")) + end + + it "raises an error if a shard routing field references an object type" do + expect { + dump_schema do |s| + s.object_type "WidgetOpts" do |t| + t.field "size", "Int" + end + + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "opts", "WidgetOpts" + t.index "widgets" do |i| + i.route_with "opts" + end + end + end + }.to raise_error(Errors::SchemaError, a_string_including("shard routing field `Widget.opts: WidgetOpts` cannot be used for routing since it is not a leaf field.")) + end + + it "raises an error if a shard routing field references a list field" do + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "tags", "[String]" + t.index "widgets" do |i| + i.route_with "tags" + end + end + end + }.to raise_error(Errors::SchemaError, a_string_including("shard routing field `Widget.tags: [String]` cannot be used for routing since it is not a leaf field.")) + end + + it "allows the custom shard routing field to be an indexing-only field since it need not be exposed to GraphQL clients" do + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "user_id", "ID", indexing_only: true + t.index "widgets" do |i| + i.route_with "user_id" + end + end + end + }.not_to raise_error + end + + it "raises an error if a nested custom shard routing field references an undefined type" do + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "options", "WidgetOptions" + t.index "widgets" do |i| + i.route_with "options.user_id" + end + end + end + }.to raise_error(Errors::SchemaError, a_string_including( + "Field `Widget.options.user_id` cannot be resolved", + "Verify that all fields and types referenced by `options.user_id` are defined" + )) + end + + it "allows the custom shard routing field to be nullable or non-null" do + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "user_id", "ID" + t.index "widgets" do |i| + i.route_with "user_id" + end + end + end + }.not_to raise_error + + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "user_id", "ID!" + t.index "widgets" do |i| + i.route_with "user_id" + end + end + end + }.not_to raise_error + end + + it "allows the specified custom shard routing field to be defined after `index`" do + expect { + dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.index "widgets" do |i| + i.route_with "user_id" + end + + t.field "user_id", "ID" + end + end + }.not_to raise_error + end + + it "mentions the expected field in the error message when dealing with nested fields" do + expect { + dump_schema do |s| + s.object_type "Nested" do |t| + t.field "user_id", "ID" + end + + s.object_type "Widget" do |t| + t.field "id", "ID" + t.index "widgets" do |i| + i.route_with "nested.user_id" + end + end + end + }.to raise_error(Errors::SchemaError, a_string_including("Field `Widget.nested.user_id` cannot be resolved, but it is referenced as an index `route_with` field.")) + end + + it "does not include a confusing 'must come after' message..." do + expect { + dump_schema do |s| + s.object_type "Nested" do |t| + t.field "user_id", "ID" + end + + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "nested", "Nested" + t.index "widgets" do |i| + i.route_with "nested.some_id" + end + end + end + }.to raise_error(Errors::SchemaError, a_string_including("Field `Widget.nested.some_id` cannot be resolved, but it is referenced as an index `route_with` field").and(excluding("must come before"))) + end + end + + it "correctly overwrites built-in type customizations" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "month", "Int" do |f| + f.mapping type: "byte" + f.json_schema minimum: 0, maximum: 99 + end + + t.field "year", "Int" do |f| + f.mapping type: "short" + f.json_schema minimum: 2000, maximum: 2099 + end + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "month" => { + "anyOf" => [ + { + "allOf" => [ + json_schema_integer, + {"minimum" => 0, "maximum" => 99} + ] + }, + json_schema_null + ] + }, + "year" => { + "anyOf" => [ + { + "allOf" => [ + json_schema_integer, + {"minimum" => 2000, "maximum" => 2099} + ] + }, + json_schema_null + ] + } + }, + "required" => %w[month year] + }) + end + + it "allows JSON schema options to be built up over multiple `json_schema` calls" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "month", "Int" do |f| + f.json_schema minimum: 0 + f.json_schema maximum: 99 + f.json_schema minimum: 20 # demonstrate that the last call wins + end + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "month" => { + "anyOf" => [ + { + "allOf" => [ + json_schema_integer, + {"minimum" => 20, "maximum" => 99} + ] + }, + json_schema_null + ] + } + }, + "required" => %w[month] + }) + end + + it "correctly restricts enum types with customizations" do + json_schema = dump_schema do |s| + s.enum_type "Color" do |t| + t.values "RED", "ORANGE", "YELLOW", "GREEN", "BLUE", "INDIGO", "VIOLET" + end + + s.object_type "MyType" do |t| + t.field "primaryColor", "Color!" do |f| + f.json_schema enum: %w[RED YELLOW BLUE] + end + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "primaryColor" => { + "allOf" => [ + {"$ref" => "#/$defs/Color"}, + {"enum" => %w[RED YELLOW BLUE]} + ] + } + }, + "required" => %w[primaryColor] + }) + end + + it "applies customizations defined on a list field to the JSON schema array instead of applying them to the items" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "tags", "[String!]!" do |f| + f.json_schema uniqueItems: true, maxItems: 1000 + end + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "tags" => { + "type" => "array", + "items" => json_schema_string, + "uniqueItems" => true, + "maxItems" => 1000 + } + }, + "required" => %w[tags] + }) + end + + it "still applies customizations from the mapping type to array items" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "values", "[Int!]!" do |f| + f.json_schema minItems: 1 + f.mapping type: "short" + end + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "values" => { + "type" => "array", + "items" => { + "allOf" => [ + json_schema_integer, + {"minimum" => -32768, "maximum" => 32767} + ] + }, + "minItems" => 1 + } + }, + "required" => %w[values] + }) + end + + it "raises a Errors::SchemaError when a conflicting type is specified" do + dump_schema do |s| + s.object_type "MyType" do |t| + t.field "built_in_scalar_replaced", "String!" do |f| + expect { + f.json_schema type: "boolean" + }.to raise_error(Errors::SchemaError, a_string_including( + "Cannot override JSON schema type of field `built_in_scalar_replaced` with `boolean`" + )) + end + end + end + end + + it "respects `json_schema` replacements set on a field definition, except when conflicting" do + json_schema = dump_schema do |s| + s.scalar_type "MyText" do |t| + t.json_schema type: "string" + t.mapping type: "keyword" + end + + s.object_type "MyType" do |t| + t.field "built_in_scalar_augmented", "String!" do |f| + f.json_schema minLength: 4 + end + t.field "custom_scalar", "MyText!" + t.field "custom_scalar_augmented", "MyText!" do |f| + f.json_schema minLength: 4 + end + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "built_in_scalar_augmented" => { + "allOf" => [ + {"$ref" => "#/$defs/String"}, + {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH, "minLength" => 4} + ] + }, + "custom_scalar" => json_schema_ref("MyText!", is_keyword_type: true), + "custom_scalar_augmented" => { + "allOf" => [ + {"$ref" => "#/$defs/MyText"}, + {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH, "minLength" => 4} + ] + } + }, + "required" => %w[built_in_scalar_augmented custom_scalar custom_scalar_augmented] + }) + end + + it "respects `json_schema` customizations set on an object type definition" do + define_point = lambda do |s| + s.object_type "Point" do |t| + t.field "x", "Float" + t.field "y", "Float" + t.json_schema type: "array", items: [{type: "number"}, {type: "number"}] + end + end + + define_my_type = lambda do |s| + s.object_type "MyType" do |t| + t.field "location", "Point" + end + end + + # We should get the same json schema regardless of which type is defined first. + type_before_reference_json_schema = dump_schema do |s| + define_point.call(s) + define_my_type.call(s) + end + + type_after_reference_json_schema = dump_schema do |s| + define_my_type.call(s) + define_point.call(s) + end + + expect(type_before_reference_json_schema).to eq(type_after_reference_json_schema) + .and have_json_schema_like("Point", { + "type" => "array", + "items" => [ + {"type" => "number"}, + {"type" => "number"} + ] + }).which_matches( + [0, 0], + [1, 2], + [1234567890, 1234567890] + ).and_fails_to_match( + [nil, nil], + %w[a b], + nil + ) + end + + describe "indexing-only fields" do + it "allows the indexing-only fields to specify their customized json schema" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "date", "String", indexing_only: true do |f| + f.mapping type: "date" + f.json_schema format: "date-time" + end + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "date" => { + "anyOf" => [ + { + "allOf" => [ + {"$ref": "#/$defs/String"}, + {"format" => "date-time"} + ] + }, + json_schema_null + ] + } + }, + "required" => %w[date] + }) + end + + it "allows the indexing-only fields to be objects with nested fields" do + json_schema = dump_schema do |s| + s.object_type "NestedType" do |t| + t.field "name", "String!" + end + + s.object_type "MyType" do |t| + t.field "nested", "NestedType!", indexing_only: true + end + end + + expect(json_schema).to have_json_schema_like("NestedType", { + "type" => "object", + "properties" => { + "name" => json_schema_ref("String!") + }, + "required" => ["name"] + }) + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "nested" => json_schema_ref("NestedType!") + }, + "required" => %w[nested] + }) + end + + it "raises an error when same mapping field is defined twice with different JSON schemas" do + expect { + dump_schema do |s| + s.object_type "Card" do |t| + t.field "meta", "Int" do |f| + f.mapping type: "integer" + f.json_schema minimum: 10 + end + + t.field "meta", "Int", indexing_only: true do |f| + f.mapping type: "integer" + f.json_schema minimum: 20 + end + end + end + }.to raise_error Errors::SchemaError, a_string_including("Duplicate indexing field", "Card", "meta", "graphql_only: true") + end + end + + it "generates the JSON schema of an array for a `paginated_collection_field`" do + json_schema = dump_schema do |s| + s.object_type "Widget" do |t| + t.paginated_collection_field "names", "String" + end + end + + expect(json_schema).to have_json_schema_like("Widget", { + "type" => "object", + "properties" => { + "names" => { + "type" => "array", + "items" => json_schema_string + } + }, + "required" => %w[names] + }) + end + + it "honors JSON schema customizations of a `paginated_collection_field`" do + json_schema = dump_schema do |s| + s.object_type "Widget" do |t| + t.paginated_collection_field "names", "String" do |f| + f.json_schema uniqueItems: true, maxItems: 1000 + end + end + end + + expect(json_schema).to have_json_schema_like("Widget", { + "type" => "object", + "properties" => { + "names" => { + "type" => "array", + "items" => json_schema_string, + "uniqueItems" => true, + "maxItems" => 1000 + } + }, + "required" => %w[names] + }) + end + + describe "relation fields" do + context "on a relation with an outbound foreign key" do + it "includes a non-null foreign key field if the GraphQL relation field is non-null" do + json_schema = dump_schema do |s| + s.object_type "OtherType" do |t| + t.field "id", "ID!" + end + + s.object_type "MyType" do |t| + t.relates_to_one "other", "OtherType!", via: "other_id", dir: :out + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "other_id" => json_schema_ref("ID!") + }, + "required" => %w[other_id] + }) + end + + it "includes a nullable foreign key field if the GraphQL relation field is nullable" do + json_schema = dump_schema do |s| + s.object_type "OtherType" do |t| + t.field "id", "ID!" + end + + s.object_type "MyType" do |t| + t.relates_to_one "other", "OtherType", via: "other_id", dir: :out + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "other_id" => json_schema_ref("ID") + }, + "required" => %w[other_id] + }) + end + + it "includes an array foreign key field if its a `relates_to_many` field" do + json_schema = dump_schema do |s| + s.object_type "OtherType" do |t| + t.field "id", "ID!" + t.index "other_type" + end + + s.object_type "MyType" do |t| + t.field "id", "ID!" + t.relates_to_many "others", "OtherType", via: "other_ids", dir: :out, singular: "other" + t.index "my_type" + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "id" => json_schema_ref("ID!"), + "other_ids" => { + "type" => "array", + "items" => json_schema_id + } + }, + "required" => %w[id other_ids] + }) + end + + it "includes a non-null `id` field if the relation is self-referential, even if there is no `id` GraphQL field (for a `relates_to_one` case)" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.relates_to_one "parent", "MyType!", via: "parent_id", dir: :out + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "parent_id" => json_schema_ref("ID!"), + "id" => json_schema_ref("ID!") + }, + "required" => %w[parent_id id] + }) + end + end + + context "on a relation with an inbound foreign key" do + it "includes the foreign key field when the relation is self-referential, regardless of the details of the relation (nullable or not, one or many)" do + json_schema = dump_schema do |s| + s.object_type "MyTypeOneNullable" do |t| + t.field "id", "ID!" + t.relates_to_one "parent", "MyTypeOneNullable", via: "children_ids", dir: :in + t.index "my_type1" + end + + s.object_type "MyTypeOneNonNull" do |t| + t.field "id", "ID!" + t.relates_to_one "parent", "MyTypeOneNonNull!", via: "children_ids", dir: :in + t.index "my_type2" + end + + s.object_type "MyTypeBothDirections" do |t| + t.field "id", "ID!" + t.relates_to_one "parent", "MyTypeBothDirections!", via: "children_ids", dir: :in + t.relates_to_many "children", "MyTypeBothDirections", via: "children_ids", dir: :out, singular: "child" + t.index "my_type2" + end + + s.object_type "MyTypeMany" do |t| + t.field "id", "ID!" + t.relates_to_many "children", "MyTypeMany", via: "parent_id", dir: :in, singular: "child" + t.index "my_type3" + end + end + + expect(json_schema).to have_json_schema_like("MyTypeOneNullable", { + "type" => "object", + "properties" => { + "id" => json_schema_ref("ID!"), + # technically this would probably be an array field, but there's not enough info on this side of the relation to know. + # When the other side is also defined (as in `both_dirs`) it is more accurate. + "children_ids" => json_schema_ref("ID") + }, + "required" => %w[id children_ids] + }) + + expect(json_schema).to have_json_schema_like("MyTypeOneNonNull", { + "type" => "object", + "properties" => { + "id" => json_schema_ref("ID!"), + # technically this would probably be an array field, but there's not enough info on this side of the relation to know. + # When the other side is also defined (see another test) it is more accurate. + "children_ids" => json_schema_ref("ID!") + }, + "required" => %w[id children_ids] + }) + + expect(json_schema).to have_json_schema_like("MyTypeBothDirections", { + "type" => "object", + "properties" => { + "id" => json_schema_ref("ID!"), + "children_ids" => { + "type" => "array", + "items" => json_schema_id + } + }, + "required" => %w[id children_ids] + }) + + expect(json_schema).to have_json_schema_like("MyTypeMany", { + "type" => "object", + "properties" => { + "id" => json_schema_ref("ID!"), + "parent_id" => json_schema_ref("ID") + }, + "required" => %w[id parent_id] + }) + end + end + + it "prefers defined fields to fields inferred by relations when the same field is created by both, as defined fields are more accurate" do + json_schema = dump_schema do |s| + s.object_type "CardInferred" do |t| + t.relates_to_one "cloned_from_card", "CardInferred", via: "cloned_from_card_id", dir: :out + end + + s.object_type "CardExplicit" do |t| + t.relates_to_one "cloned_from_card", "CardInferred", via: "cloned_from_card_id", dir: :out + t.field "cloned_from_card_id", "ID!" + end + end + + expect(json_schema).to have_json_schema_like("CardInferred", { + "type" => "object", + "properties" => { + "cloned_from_card_id" => json_schema_ref("ID"), + "id" => json_schema_ref("ID!") + }, + "required" => %w[cloned_from_card_id id] + }) + + expect(json_schema).to have_json_schema_like("CardExplicit", { + "type" => "object", + "properties" => { + "cloned_from_card_id" => json_schema_ref("ID!") + }, + "required" => %w[cloned_from_card_id] + }) + end + end + + context "`nullable:` option inside `json_schema`" do + it "forces field that is nullable in GraphQL to be non-nullable in the generated JSON schema" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "size", "Float" do |f| + f.json_schema nullable: false + end + t.field "cost", "Float" + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "size" => json_schema_ref("Float!"), + "cost" => json_schema_ref("Float") + }, + "required" => %w[size cost] + }) + end + + it "has no effect on an already-non-nullable field" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "id", "ID!" + t.field "size", "Float!" do |f| + f.json_schema nullable: false + end + t.field "cost", "Float!" + t.index "my_type" + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "id" => json_schema_ref("ID!"), + "size" => json_schema_ref("Float!"), + "cost" => json_schema_ref("Float!") + }, + "required" => %w[id size cost] + }) + end + + it "forces wrapped field that is nullable in GraphQL to be non-nullable in the generated JSON schema" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "size", "[[Float!]]" do |f| + f.json_schema nullable: false + end + t.field "cost", "[[Float!]]" + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "size" => { + "type" => "array", + "items" => { + "anyOf" => [ + { + "type" => "array", + "items" => json_schema_float + }, + json_schema_null + ] + } + }, + "cost" => { + "anyOf" => [ + { + "type" => "array", + "items" => { + "anyOf" => [ + { + "type" => "array", + "items" => json_schema_float + }, + json_schema_null + ] + } + }, + json_schema_null + ] + } + }, + "required" => %w[size cost] + }) + end + + it "has no effect on an already-non-nullable wrapped field" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "size", "[[Float!]]!" do |f| + f.json_schema nullable: false + end + t.field "cost", "[[Float!]]!" + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "size" => { + "type" => "array", + "items" => { + "anyOf" => [ + { + "type" => "array", + "items" => json_schema_float + }, + json_schema_null + ] + } + }, + "cost" => { + "type" => "array", + "items" => { + "anyOf" => [ + { + "type" => "array", + "items" => json_schema_float + }, + json_schema_null + ] + } + } + }, + "required" => %w[size cost] + }) + end + + it "raises an exception on `nullable: true` because we cannot allow that for non-null GraphQL fields and `nullable: true` does nothing on an already nullable GraphQL field`" do + dump_schema do |s| + s.object_type "MyType" do |t| + t.field "size", "[[Float!]]" do |f| + expect { + f.json_schema nullable: true + }.to raise_error(Errors::SchemaError, a_string_including("`nullable: true` is not allowed on a field--just declare the GraphQL field as being nullable (no `!` suffix) instead.")) + end + end + end + end + + it "is not allowed on an object or scalar type (it is only intended for use on fields)" do + dump_schema do |s| + s.object_type "MyType" do |t| + expect { + t.json_schema nullable: false + }.to raise_error(Errors::SchemaError, a_string_including("Invalid JSON schema options", "nullable")) + end + + s.scalar_type "ScalarType" do |t| + t.mapping type: "boolean" + t.json_schema type: "boolean" + + expect { + t.json_schema nullable: false + }.to raise_error(Errors::SchemaError, a_string_including("Invalid JSON schema options", "nullable")) + end + end + end + end + + it "dumps object schemas with a __typename property" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "id", "ID!" + end + end + + expect(json_schema.dig("$defs", "MyType", "properties", "__typename")).to eq({ + "const" => "MyType", + "default" => "MyType", + "type" => "string" + }) + end + + shared_examples_for "a type with subtypes" do |type_def_method| + context "composed of 2 indexed types" do + it "generates separate json schemas for the two subtypes and the supertype" do + schemas = dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "name", "String!" + t.field "amount_cents", "Int!" + link_subtype_to_supertype(t, "Thing") + t.index "widgets" + end + + s.object_type "Component" do |t| + t.field "id", "ID!" + t.field "name", "String!" + t.field "weight", "Int!" + link_subtype_to_supertype(t, "Thing") + t.index "components" + end + + s.public_send type_def_method, "Thing" do |t| + link_supertype_to_subtypes(t, "Widget", "Component") + end + end + + expect(schemas).to have_json_schema_like("Widget", { + "type" => "object", + "properties" => { + "id" => json_schema_ref("ID!"), + "name" => json_schema_ref("String!"), + "amount_cents" => json_schema_ref("Int!") + }, + "required" => %w[id name amount_cents] + }) + + expect(schemas).to have_json_schema_like("Component", { + "type" => "object", + "properties" => { + "id" => json_schema_ref("ID!"), + "name" => json_schema_ref("String!"), + "weight" => json_schema_ref("Int!") + }, + "required" => %w[id name weight] + }) + + expect(schemas).to have_json_schema_like("Thing", { + "required" => [ + "__typename" + ], + "oneOf" => [ + { + "$ref" => "#/$defs/Widget" + }, + { + "$ref" => "#/$defs/Component" + } + ] + }) + + type_definitions = schemas.fetch("$defs") + expect(type_definitions.keys).to include("Thing") + expect(envelope_type_enum_values(type_definitions)).to contain_exactly("Widget", "Component") + end + end + + context "that is itself indexed" do + it "uses `oneOf` to produce a JSON schema that exclusively validates one or the other type" do + json_schema = dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "name", "String!" + t.field "amount_cents", "Int!" + link_subtype_to_supertype(t, "Thing") + end + + s.object_type "Component" do |t| + t.field "id", "ID!" + t.field "name", "String!" + t.field "weight", "Int!" + link_subtype_to_supertype(t, "Thing") + end + + s.public_send type_def_method, "Thing" do |t| + link_supertype_to_subtypes(t, "Widget", "Component") + t.index "things" + end + end + + expect(json_schema).to have_json_schema_like("Thing", { + "required" => ["__typename"], + "oneOf" => [ + {"$ref" => "#/$defs/Widget"}, + {"$ref" => "#/$defs/Component"} + ] + }).which_matches( + {"id" => "1", "name" => "foo", "amount_cents" => 12, "__typename" => "Widget"}, + {"id" => "1", "name" => "foo", "weight" => 12, "__typename" => "Component"} + ).and_fails_to_match( + {"id" => "1", "name" => "foo", "__typename" => "Widget"}, + nil + ) + end + + it "includes concrete subtypes (not the abstract supertype) in the event envelope type enum" do + json_schema = dump_schema do |s| + # PhysicalStore has its own index + s.object_type "PhysicalStore" do |t| + t.field "id", "ID!" + t.field "name", "String!" + link_subtype_to_supertype(t, "Store") + t.index "physical_stores" + end + + # OnlineStore and MobileStore inherit index from Store + s.object_type "OnlineStore" do |t| + t.field "id", "ID!" + t.field "name", "String!" + link_subtype_to_supertype(t, "Store") + end + + s.object_type "MobileStore" do |t| + t.field "id", "ID!" + t.field "name", "String!" + link_subtype_to_supertype(t, "Store") + end + + s.public_send type_def_method, "Store" do |t| + link_supertype_to_subtypes(t, "PhysicalStore", "OnlineStore", "MobileStore") + t.index "stores" + end + end + + # All concrete types should be in the enum: + # - PhysicalStore (has its own "physical_stores" index) + # - OnlineStore and MobileStore (inherit "stores" index from Store) + # The abstract Store type should NOT be in the enum. + type_definitions = json_schema.fetch("$defs") + expect(envelope_type_enum_values(type_definitions)).to contain_exactly("PhysicalStore", "OnlineStore", "MobileStore") + end + end + + context "that is an embedded type" do + it "uses `oneOf` to produce a JSON schema that exclusively validates one or the other type" do + json_schema = dump_schema do |s| + s.object_type "Widget" do |t| + t.field "name", "String!" + t.field "amount_cents", "Int!" + link_subtype_to_supertype(t, "Thing") + end + + s.object_type "Component" do |t| + t.field "name", "String!" + t.field "weight", "Int!" + link_subtype_to_supertype(t, "Thing") + end + + s.public_send type_def_method, "Thing" do |t| + link_supertype_to_subtypes(t, "Widget", "Component") + end + + s.object_type "MyType" do |t| + t.field "id", "ID!" + t.field "thing", "Thing!" + t.index "my_type" + end + end + + expect(json_schema).to have_json_schema_like("Thing", { + "required" => ["__typename"], + "oneOf" => [ + {"$ref" => "#/$defs/Widget"}, + {"$ref" => "#/$defs/Component"} + ] + }) + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "id" => json_schema_ref("ID!"), + "thing" => json_schema_ref("Thing!"), + "__typename" => { + "type" => "string", + "const" => "MyType", + "default" => "MyType" + } + }, + "required" => %w[id thing] + }).which_matches( + {"id" => "a", "thing" => {"id" => "a", "name" => "foo", "amount_cents" => 12, "__typename" => "Widget"}}, + {"id" => "a", "thing" => {"id" => "a", "name" => "foo", "weight" => 12, "__typename" => "Component"}} + ).and_fails_to_match( + {"id" => "a", "name" => "foo", "__typename" => "Widget"}, + {"id" => "a", "thing" => nil}, + nil + ) + end + + it "generates a JSON schema that correctly allows null values when the supertype field is nullable" do + json_schema = dump_schema do |s| + s.object_type "Widget" do |t| + t.field "name", "String!" + t.field "amount_cents", "Int!" + link_subtype_to_supertype(t, "Thing") + end + + s.object_type "Component" do |t| + t.field "name", "String!" + t.field "weight", "Int!" + link_subtype_to_supertype(t, "Thing") + end + + s.public_send type_def_method, "Thing" do |t| + link_supertype_to_subtypes(t, "Widget", "Component") + end + + s.object_type "MyType" do |t| + t.field "thing", "Thing" + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "thing" => json_schema_ref("Thing"), + "__typename" => { + "type" => "string", + "const" => "MyType", + "default" => "MyType" + } + }, + "required" => %w[thing] + }).which_matches( + {"thing" => {"id" => "a", "name" => "foo", "amount_cents" => 12, "__typename" => "Widget"}}, + {"thing" => {"id" => "a", "name" => "foo", "weight" => 12, "__typename" => "Component"}}, + {"thing" => nil} + ).and_fails_to_match( + {"name" => "foo", "__typename" => "Widget"}, + nil + ) + end + + it "allows the same field on two subtypes to have different json_schema" do + json_schema = dump_schema do |s| + s.object_type "Person" do |t| + t.field "name", "String" do |f| + f.json_schema nullable: false + end + t.field "nationality", "String!" + link_subtype_to_supertype(t, "Inventor") + end + + s.object_type "Company" do |t| + t.field "name", "String" do |f| + f.json_schema maxLength: 20 + end + t.field "stock_ticker", "String!" + link_subtype_to_supertype(t, "Inventor") + end + + s.public_send type_def_method, "Inventor" do |t| + link_supertype_to_subtypes(t, "Person", "Company") + end + end + + expect(json_schema).to have_json_schema_like("Person", { + "type" => "object", + "properties" => { + "name" => json_schema_ref("String!"), + "nationality" => json_schema_ref("String!") + }, + "required" => %w[name nationality] + }) + + expect(json_schema).to have_json_schema_like("Company", { + "type" => "object", + "properties" => { + "name" => { + "anyOf" => [ + { + "allOf" => [ + {"$ref" => "#/$defs/String"}, + {"maxLength" => 20} + ] + }, + {"type" => "null"} + ] + }, + "stock_ticker" => json_schema_ref("String!") + }, + "required" => %w[name stock_ticker] + }) + end + end + end + + context "on a type union" do + include_examples "a type with subtypes", :union_type do + def link_subtype_to_supertype(object_type, supertype_name) + # nothing to do; the linkage happens via a `subtypes` call on the supertype + end + + def link_supertype_to_subtypes(union_type, *subtype_names) + union_type.subtypes(*subtype_names) + end + end + end + + context "on an interface type" do + include_examples "a type with subtypes", :interface_type do + def link_subtype_to_supertype(object_type, interface_name) + object_type.implements interface_name + end + + def link_supertype_to_subtypes(interface_type, *subtype_names) + # nothing to do; the linkage happens via an `implements` call on the subtype + end + end + + it "supports interface recursion (e.g. an interface that implements an interface)" do + json_schema = dump_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "name", "String!" + t.field "amount_cents", "Int!" + t.implements "WidgetOrComponent" + end + + s.object_type "Component" do |t| + t.field "id", "ID!" + t.field "name", "String!" + t.field "weight", "Int!" + t.implements "WidgetOrComponent" + end + + s.interface_type "WidgetOrComponent" do |t| + t.implements "Thing" + end + + s.object_type "Object" do |t| + t.field "id", "ID!" + t.field "description", "String!" + t.implements "Thing" + end + + s.interface_type "Thing" do |t| + t.field "id", "ID!" + t.index "things" + end + end + + expect(json_schema).to have_json_schema_like("Thing", { + "required" => ["__typename"], + "oneOf" => [ + {"$ref" => "#/$defs/Widget"}, + {"$ref" => "#/$defs/Component"}, + {"$ref" => "#/$defs/Object"} + ] + }).which_matches( + {"id" => "1", "name" => "foo", "amount_cents" => 12, "__typename" => "Widget"}, + {"id" => "1", "name" => "foo", "weight" => 12, "__typename" => "Component"}, + {"id" => "1", "description" => "foo", "__typename" => "Object"} + ).and_fails_to_match( + {"id" => "1", "name" => "foo", "__typename" => "Widget"}, + nil + ) + end + end + + it "dumps the types by name in alphabetical order (minus the envelope type at the start) for consistent dump output" do + schemas1 = all_type_definitions_for do |s| + s.object_type "AType" do |t| + t.field "id", "ID!" + t.index "a_type" + end + + s.object_type "BType" do |t| + t.field "id", "ID!" + t.index "b_type" + end + end + + schemas2 = all_type_definitions_for do |s| + s.object_type "BType" do |t| + t.field "id", "ID!" + t.index "b_type" + end + + s.object_type "AType" do |t| + t.field "id", "ID!" + t.index "a_type" + end + end + + # The types should have alphabetical keys (except the envelope always goes first; hence the `drop(1)`) + expect(schemas1.keys.drop(1)).to eq schemas1.keys.drop(1).sort + expect(schemas2.keys.drop(1)).to eq schemas2.keys.drop(1).sort + + # ...and the types should be alphabetically listed within the envelope, too. + expect(envelope_type_enum_values(schemas1)).to eq %w[AType BType] + expect(envelope_type_enum_values(schemas2)).to eq %w[AType BType] + end + + it "does not dump a schema for a derived indexed type because it cannot be directly ingested by the indexer" do + schemas = all_type_definitions_for do |s| + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "workspace_id", "ID" + t.index "widgets" + t.derive_indexed_type_fields "WidgetWorkspace", from_id: "workspace_id" do |derive| + derive.append_only_set "widget_ids", from: "id" + end + end + + s.object_type "WidgetWorkspace" do |t| + t.field "id", "ID!" + t.field "widget_ids", "[ID!]!" + t.index "widget_workspaces" + end + end + + expect(schemas.keys).to include(EVENT_ENVELOPE_JSON_SCHEMA_NAME, "Widget") + expect(schemas.keys).to exclude("WidgetWorkspace") + expect(envelope_type_enum_values(schemas)).to eq ["Widget"] + end + + it "raises a clear error if the schema defines a type with a reserved name" do + dump_schema do |s| + expect { + s.object_type EVENT_ENVELOPE_JSON_SCHEMA_NAME + }.to raise_error Errors::SchemaError, a_string_including(EVENT_ENVELOPE_JSON_SCHEMA_NAME, "reserved name") + end + end + + it "sets json_schema_version to the specified (valid) value" do + result = define_schema(schema_element_name_form: "snake_case") do |s| + s.json_schema_version 1 + end.json_schemas_for(1) + + expect(result[JSON_SCHEMA_VERSION_KEY]).to eq(1) + end + + it "fails if json_schema_version is set to invalid values" do + expect { + define_schema(schema_element_name_form: "snake_case") do |s| + s.json_schema_version 0.5 + end + }.to raise_error(Errors::SchemaError, a_string_including("must be a positive integer. Specified version: 0.5")) + + expect { + define_schema(schema_element_name_form: "snake_case") do |s| + s.json_schema_version "asd" + end + }.to raise_error(Errors::SchemaError, a_string_including("must be a positive integer. Specified version: asd")) + + expect { + define_schema(schema_element_name_form: "snake_case") do |s| + s.json_schema_version 0 + end + }.to raise_error(Errors::SchemaError, a_string_including("must be a positive integer. Specified version: 0")) + + expect { + define_schema(schema_element_name_form: "snake_case") do |s| + s.json_schema_version(-1) + end + }.to raise_error(Errors::SchemaError, a_string_including("must be a positive integer. Specified version: -1")) + end + + it "fails if json_schema_version is left unset" do + expect { + define_schema(schema_element_name_form: "snake_case", json_schema_version: nil) {}.available_json_schema_versions + }.to raise_error(Errors::SchemaError, a_string_including("must be specified in the schema")) + end + + it "fails if json_schema_version is set multiple times" do + expect { + define_schema(schema_element_name_form: "snake_case") do |s| + s.json_schema_version 1 + s.json_schema_version 2 + end + }.to raise_error(Errors::SchemaError, a_string_including("can only be set once", "Previously-set version: 1")) + end + + it "is unable to return a non-existent schema version" do + expect { + define_schema(schema_element_name_form: "snake_case") do |s| + s.json_schema_version 1 + end.json_schemas_for(2) + }.to raise_error(Errors::NotFoundError, a_string_including("The requested json schema version (2) is not available", "Available versions: 1")) + end + + it "ignores runtime fields during json schema generation" do + json_schema = dump_schema do |schema| + schema.object_type "Widget" do |t| + t.field "test_runtime_field", "String" do |f| + f.runtime_script "example test script" + end + end + end + + widget_def = json_schema.fetch("$defs").fetch("Widget") + expect(widget_def["properties"].keys).not_to include("test_runtime_field") + end + + it "omits nullable fields from `required` if `allow_omitted_fields` is `true`" do + json_schema = dump_schema do |schema| + schema.json_schema_strictness allow_omitted_fields: true + schema.object_type "Widget" do |t| + t.field "test_omitted_field", "String" + t.field "test_expected_field", "String!" + end + end + + widget_def = json_schema.fetch("$defs").fetch("Widget") + expect(widget_def["required"]).to contain_exactly("test_expected_field") + end + + it "includes nullable fields in `required` if `allow_omitted_fields` is `false`" do + json_schema = dump_schema do |schema| + schema.json_schema_strictness allow_omitted_fields: false + schema.object_type "Widget" do |t| + t.field "nullable_string", "String" + t.field "nonnull_string", "String!" + end + end + + widget_def = json_schema.fetch("$defs").fetch("Widget") + expect(widget_def["required"]).to contain_exactly("nonnull_string", "nullable_string") + end + + it "disallows additional properties if `allow_extra_fields` is `false`" do + json_schema = dump_schema do |schema| + schema.json_schema_strictness allow_extra_fields: false + schema.object_type "Widget" do |t| + t.field "test_expected_field", "String!" + end + end + + widget_def = json_schema.fetch("$defs").fetch("Widget") + expect(widget_def["additionalProperties"]).to eq(false) + end + + it "implicitly allows additional properties (the JSON schema default) if `allow_extra_fields` is `true`" do + json_schema = dump_schema do |schema| + schema.json_schema_strictness allow_extra_fields: true + schema.object_type "Widget" do |t| + t.field "test_expected_field", "String!" + end + end + widget_def = json_schema.fetch("$defs").fetch("Widget") + expect(widget_def.keys).not_to include "additionalProperties" + end + + it "raises an error when `json_schema_strictness` is called with invalid `allow_omitted_fields` value" do + expect { + dump_schema do |s| + s.json_schema_strictness allow_omitted_fields: "true" + end + }.to raise_error(Errors::SchemaError, a_string_including("`allow_omitted_fields` must be true or false")) + end + + it "raises an error when `json_schema_strictness` is called with invalid `allow_extra_fields` value" do + expect { + dump_schema do |s| + s.json_schema_strictness allow_extra_fields: "true" + end + }.to raise_error(Errors::SchemaError, a_string_including("`allow_extra_fields` must be true or false")) + end + + it "includes description fields from documentation in the JSON schema" do + json_schema = dump_schema do |schema| + schema.object_type "Widget" do |t| + t.documentation "A reusable widget component." + + t.field "id", "ID!" do |f| + f.documentation "The Widget's unique identifier." + end + + t.field "name", "String" do |f| + f.documentation "The display name of the widget." + end + + t.field "undocumented_field", "Int" + + t.index "widgets" + end + end + + widget_schema = json_schema.dig("$defs", "Widget") + + expect(widget_schema["description"]).to eq("A reusable widget component.") + + expect(widget_schema.dig("properties", "id", "description")).to eq("The Widget's unique identifier.") + expect(widget_schema.dig("properties", "name", "description")).to eq("The display name of the widget.") + + expect(widget_schema.dig("properties", "undocumented_field")).not_to have_key("description") + end + + def all_type_definitions_for(&schema_definition) + dump_schema(&schema_definition).fetch("$defs") + end + + def dump_schema(type_name_overrides: {}, enum_value_overrides_by_type: {}, &schema_definition) + define_schema( + schema_element_name_form: "snake_case", + type_name_overrides: type_name_overrides, + enum_value_overrides_by_type: enum_value_overrides_by_type, + &schema_definition + ).current_public_json_schema + end + + def envelope_type_enum_values(schemas) + schemas.dig(EVENT_ENVELOPE_JSON_SCHEMA_NAME, "properties", "type", "enum") + end + + def json_schema_ref(type, is_keyword_type: %w[ID! ID String! String].include?(type)) + if type.end_with?("!") + basic_json_schema_ref = {"$ref" => "#/$defs/#{type.delete_suffix("!")}"} + + if is_keyword_type + { + "allOf" => [ + basic_json_schema_ref, + {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH} + ] + } + else + basic_json_schema_ref + end + else + { + "anyOf" => [ + json_schema_ref("#{type}!", is_keyword_type: is_keyword_type), + {"type" => "null"} + ] + } + end + end + + def shard_routing_string_field + { + "allOf" => [ + {"$ref" => "#/$defs/ID"}, + {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH, "pattern" => Indexing::Index::HAS_NON_WHITE_SPACE_REGEX} + ] + } + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension_spec.rb new file mode 100644 index 000000000..2bb0efdd1 --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension_spec.rb @@ -0,0 +1,376 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension" +require "stringio" +require "tmpdir" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + RSpec.describe SchemaArtifactManagerExtension do + let(:yaml_artifact_class) do + ::Data.define( + :path, + :desired_contents, + :existing_dumped_contents, + :out_of_date_value, + :extra_comment_lines + ) do + def out_of_date? + out_of_date_value + end + end + end + + let(:deprecated_element_class) do + ::Data.define(:name, :description, :defined_at) + end + + let(:defined_at_class) do + ::Data.define(:path, :lineno) + end + + let(:missing_necessary_field_class) do + ::Data.define(:fully_qualified_path, :field_type) + end + + let(:merged_schema_class) do + ::Data.define( + :json_schema_version, + :json_schema, + :missing_fields, + :missing_types, + :missing_necessary_fields, + :definition_conflicts + ) + end + + let(:fake_manager_results_class) do + ::Class.new do + attr_accessor :current_public_json_schema, :json_schema_version_setter_location, :unused_deprecated_elements + + def merge_field_metadata_into_json_schema(_json_schema) + end + end + end + + def build_manager(schema_definition_results:, enforce_json_schema_version:, schema_artifacts_directory:, artifacts_by_path:, output:) + yaml_artifact_class = self.yaml_artifact_class + + base_class = ::Class.new do + attr_reader :schema_definition_results, :new_yaml_artifact_calls + + def initialize(schema_definition_results, enforce_json_schema_version, schema_artifacts_directory, artifacts_by_path, output) + @schema_definition_results = schema_definition_results + @enforce_json_schema_version = enforce_json_schema_version + @schema_artifacts_directory = schema_artifacts_directory + @artifacts_by_path = artifacts_by_path + @output = output + @new_yaml_artifact_calls = [] + end + + def dump_artifacts + :base_dump + end + + private + + def artifacts_from_schema_def + [:base_artifact] + end + + define_method(:new_yaml_artifact) do |path, contents, extra_comment_lines:| + @new_yaml_artifact_calls << { + path: path, + contents: contents, + extra_comment_lines: extra_comment_lines + } + + @artifacts_by_path.fetch(path) do + yaml_artifact_class.new( + path: path, + desired_contents: contents, + existing_dumped_contents: nil, + out_of_date_value: false, + extra_comment_lines: extra_comment_lines + ) + end + end + end + + ::Class.new(base_class) do + prepend SchemaArtifactManagerExtension + end.new( + schema_definition_results, + enforce_json_schema_version, + schema_artifacts_directory, + artifacts_by_path, + output + ) + end + + before do + allow(ElasticGraph::JSONIngestion::SchemaDefinition::JSONSchemaPruner).to receive(:prune) { |json_schema| json_schema } + end + + it "warns when a version bump is needed but enforcement is disabled" do + output = ::StringIO.new + public_schema = {JSON_SCHEMA_VERSION_KEY => 2} + artifact = yaml_artifact_class.new( + path: JSON_SCHEMAS_FILE, + desired_contents: public_schema, + existing_dumped_contents: {JSON_SCHEMA_VERSION_KEY => 2}, + out_of_date_value: true, + extra_comment_lines: [] + ) + results = instance_double(fake_manager_results_class, current_public_json_schema: public_schema, unused_deprecated_elements: []) + + manager = build_manager( + schema_definition_results: results, + enforce_json_schema_version: false, + schema_artifacts_directory: ::Dir.mktmpdir, + artifacts_by_path: {JSON_SCHEMAS_FILE => artifact}, + output: output + ) + + expect(manager.dump_artifacts).to eq(:base_dump) + expect(output.string).to include("WARNING: the `json_schemas.yaml` artifact is being updated") + end + + it "aborts when a version bump is needed and enforcement is enabled" do + output = ::StringIO.new + public_schema = {JSON_SCHEMA_VERSION_KEY => 2} + artifact = yaml_artifact_class.new( + path: JSON_SCHEMAS_FILE, + desired_contents: public_schema, + existing_dumped_contents: {JSON_SCHEMA_VERSION_KEY => 2}, + out_of_date_value: true, + extra_comment_lines: [] + ) + location = instance_double(::Thread::Backtrace::Location, absolute_path: __FILE__, lineno: 123) + results = instance_double( + fake_manager_results_class, + current_public_json_schema: public_schema, + json_schema_version_setter_location: location, + unused_deprecated_elements: [] + ) + + manager = build_manager( + schema_definition_results: results, + enforce_json_schema_version: true, + schema_artifacts_directory: ::Dir.mktmpdir, + artifacts_by_path: {JSON_SCHEMAS_FILE => artifact}, + output: output + ) + manager.define_singleton_method(:abort) do |message| + raise message + end + + expect { + manager.dump_artifacts + }.to raise_error(RuntimeError, /schema\.json_schema_version 3/) + end + + it "yields only when a dumped schema is out of date and its version is not newer" do + output = ::StringIO.new + manager = build_manager( + schema_definition_results: instance_double(fake_manager_results_class), + enforce_json_schema_version: false, + schema_artifacts_directory: ::Dir.mktmpdir, + artifacts_by_path: {}, + output: output + ) + + current_artifact = yaml_artifact_class.new( + path: JSON_SCHEMAS_FILE, + desired_contents: {JSON_SCHEMA_VERSION_KEY => 2}, + existing_dumped_contents: {JSON_SCHEMA_VERSION_KEY => 2}, + out_of_date_value: true, + extra_comment_lines: [] + ) + manager.define_singleton_method(:json_schemas_artifact) { current_artifact } + + yielded_versions = [] + manager.send(:check_if_needs_json_schema_version_bump) do |recommended_version| + yielded_versions << recommended_version + end + expect(yielded_versions).to eq([3]) + + clean_artifact = yaml_artifact_class.new( + path: JSON_SCHEMAS_FILE, + desired_contents: {JSON_SCHEMA_VERSION_KEY => 3}, + existing_dumped_contents: nil, + out_of_date_value: false, + extra_comment_lines: [] + ) + manager.define_singleton_method(:json_schemas_artifact) { clean_artifact } + + expect { + manager.send(:check_if_needs_json_schema_version_bump) { raise "should not yield" } + }.not_to raise_error + end + + it "builds public and versioned JSON schema artifacts alongside base artifacts" do + output = ::StringIO.new + schema_artifacts_directory = ::Dir.mktmpdir + ::Dir.mkdir(::File.join(schema_artifacts_directory, JSON_SCHEMAS_BY_VERSION_DIRECTORY)) + ::File.write( + ::File.join(schema_artifacts_directory, JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v1.yaml"), + <<~YAML + --- + json_schema_version: 1 + YAML + ) + + public_schema = {JSON_SCHEMA_VERSION_KEY => 2} + merged_v1 = merged_schema_class.new( + json_schema_version: 1, + json_schema: {JSON_SCHEMA_VERSION_KEY => 1}, + missing_fields: [], + missing_types: [], + missing_necessary_fields: [], + definition_conflicts: [] + ) + merged_v2 = merged_schema_class.new( + json_schema_version: 2, + json_schema: {JSON_SCHEMA_VERSION_KEY => 2}, + missing_fields: [], + missing_types: [], + missing_necessary_fields: [], + definition_conflicts: [] + ) + + results = instance_double( + fake_manager_results_class, + current_public_json_schema: public_schema, + merge_field_metadata_into_json_schema: nil, + unused_deprecated_elements: [] + ) + expect(results).to receive(:merge_field_metadata_into_json_schema).with({JSON_SCHEMA_VERSION_KEY => 1}).and_return(merged_v1) + expect(results).to receive(:merge_field_metadata_into_json_schema).with(public_schema).and_return(merged_v2) + + manager = build_manager( + schema_definition_results: results, + enforce_json_schema_version: false, + schema_artifacts_directory: schema_artifacts_directory, + artifacts_by_path: {}, + output: output + ) + + artifacts = manager.send(:artifacts_from_schema_def) + + expect(artifacts.first).to eq(:base_artifact) + expect(artifacts.drop(1).map(&:path)).to contain_exactly( + JSON_SCHEMAS_FILE, + ::File.join(JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v1.yaml"), + ::File.join(JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v2.yaml") + ) + expect(manager.new_yaml_artifact_calls.map { |call| call[:path] }).to include( + JSON_SCHEMAS_FILE, + ::File.join(JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v1.yaml"), + ::File.join(JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v2.yaml") + ) + end + + it "reports merge errors for missing fields, missing types, missing necessary fields, and conflicts" do + output = ::StringIO.new + manager = build_manager( + schema_definition_results: instance_double(fake_manager_results_class, unused_deprecated_elements: []), + enforce_json_schema_version: false, + schema_artifacts_directory: ::Dir.mktmpdir, + artifacts_by_path: {}, + output: output + ) + manager.define_singleton_method(:abort) do |message| + raise message + end + + missing_necessary_field = missing_necessary_field_class.new( + fully_qualified_path: "Widget.metadata.currency", + field_type: "routing" + ) + conflict_a = deprecated_element_class.new( + name: "Widget", + description: "schema.object_type \"Widget\"", + defined_at: defined_at_class.new(path: "config/schema/widget.rb", lineno: 12) + ) + conflict_b = deprecated_element_class.new( + name: "Widget", + description: "schema.deleted_type \"Widget\"", + defined_at: defined_at_class.new(path: "config/schema/deleted_widget.rb", lineno: 4) + ) + merged_result = merged_schema_class.new( + json_schema_version: 3, + json_schema: {JSON_SCHEMA_VERSION_KEY => 3}, + missing_fields: ["Widget.old_name"], + missing_types: ["OldWidget"], + missing_necessary_fields: [missing_necessary_field], + definition_conflicts: [conflict_a, conflict_b] + ) + + expect { + manager.send(:report_json_schema_merge_errors, [merged_result]) + }.to raise_error( + RuntimeError, + /field\.renamed_from "old_name".*schema\.deleted_type "OldWidget".*field has been renamed.*The schema definition of `Widget` has conflicts/m + ) + end + + it "reports warnings for unused deprecated elements" do + output = ::StringIO.new + unused_a = deprecated_element_class.new( + name: "Widget", + description: "schema.deleted_field \"old_name\"", + defined_at: defined_at_class.new(path: "config/schema/widget.rb", lineno: 20) + ) + unused_b = deprecated_element_class.new( + name: "Widget", + description: "schema.deleted_type \"LegacyWidget\"", + defined_at: defined_at_class.new(path: "config/schema/legacy_widget.rb", lineno: 5) + ) + results = instance_double(fake_manager_results_class, unused_deprecated_elements: [unused_a, unused_b]) + manager = build_manager( + schema_definition_results: results, + enforce_json_schema_version: false, + schema_artifacts_directory: ::Dir.mktmpdir, + artifacts_by_path: {}, + output: output + ) + + manager.send(:report_json_schema_merge_warnings) + + expect(output.string).to include( + "The schema definition has 2 unneeded reference(s) to deprecated schema elements.", + "1. schema.deleted_type \"LegacyWidget\"", + "2. schema.deleted_field \"old_name\"" + ) + end + + it "formats JSON schema version descriptions and noun helpers" do + output = ::StringIO.new + manager = build_manager( + schema_definition_results: instance_double(fake_manager_results_class, unused_deprecated_elements: []), + enforce_json_schema_version: false, + schema_artifacts_directory: ::Dir.mktmpdir, + artifacts_by_path: {}, + output: output + ) + + expect(manager.send(:describe_json_schema_versions, [7], "and")).to eq("JSON schema version 7") + expect(manager.send(:describe_json_schema_versions, [7, 8], "and")).to eq("JSON schema versions 7 and 8") + expect(manager.send(:describe_json_schema_versions, [7, 8, 9], "or")).to eq("JSON schema versions 7, 8, or 9") + expect(manager.send(:old_versions, [7])).to eq("this old version") + expect(manager.send(:old_versions, [7, 8])).to eq("these old versions") + expect(manager.send(:files_noun_phrase, [7])).to eq("its file") + expect(manager.send(:files_noun_phrase, [7, 8])).to eq("their files") + end + end + end + end +end diff --git a/elasticgraph-schema_definition/README.md b/elasticgraph-schema_definition/README.md index b85f79b9e..5c06af483 100644 --- a/elasticgraph-schema_definition/README.md +++ b/elasticgraph-schema_definition/README.md @@ -21,6 +21,9 @@ graph LR; elasticgraph-indexer["elasticgraph-indexer"]; elasticgraph-schema_definition --> elasticgraph-indexer; class elasticgraph-indexer otherEgGemStyle; + elasticgraph-json_ingestion["elasticgraph-json_ingestion"]; + elasticgraph-schema_definition --> elasticgraph-json_ingestion; + class elasticgraph-json_ingestion otherEgGemStyle; elasticgraph-schema_artifacts["elasticgraph-schema_artifacts"]; elasticgraph-schema_definition --> elasticgraph-schema_artifacts; class elasticgraph-schema_artifacts otherEgGemStyle; diff --git a/elasticgraph-schema_definition/elasticgraph-schema_definition.gemspec b/elasticgraph-schema_definition/elasticgraph-schema_definition.gemspec index 740a1a088..175a91285 100644 --- a/elasticgraph-schema_definition/elasticgraph-schema_definition.gemspec +++ b/elasticgraph-schema_definition/elasticgraph-schema_definition.gemspec @@ -43,6 +43,7 @@ Gem::Specification.new do |spec| spec.add_dependency "elasticgraph-graphql", ElasticGraph::VERSION # needed since we validate that scalar `coerce_with` options are valid (which loads scalar coercion adapters) spec.add_dependency "elasticgraph-indexer", ElasticGraph::VERSION # needed since we validate that scalar `prepare_for_indexing_with` options are valid (which loads indexing preparer adapters) + spec.add_dependency "elasticgraph-json_ingestion", ElasticGraph::VERSION # JSON ingestion serializer, auto-applied by default for backward compatibility spec.add_dependency "elasticgraph-schema_artifacts", ElasticGraph::VERSION spec.add_dependency "elasticgraph-support", ElasticGraph::VERSION spec.add_dependency "graphql", "~> 2.5.22" diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/api.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/api.rb index 4b61944c2..28b34dd4c 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/api.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/api.rb @@ -7,6 +7,8 @@ # frozen_string_literal: true require "elastic_graph/errors" +require "elastic_graph/json_ingestion/schema_definition/api_extension" +require "elastic_graph/schema_definition/json_ingestion_compatibility" require "elastic_graph/schema_artifacts/runtime_metadata/extension" require "elastic_graph/schema_artifacts/runtime_metadata/graphql_resolver" require "elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect" @@ -77,6 +79,10 @@ def initialize( @factory = @state.factory + # Apply built-in JSON ingestion support for backward compatibility, then + # allow any explicitly provided extensions to customize the API as well. + extend JSONIngestion::SchemaDefinition::APIExtension + extension_modules.each { |mod| extend(mod) } # These lines must come _after_ the extension modules are applied, so that the extension modules @@ -242,7 +248,7 @@ def union_type(name, &block) # ElasticGraph.define_schema do |schema| # schema.scalar_type "URL" do |t| # t.mapping type: "keyword" - # t.json_schema type: "string", format: "uri" + # t.json_schema type: "string" # end # end def scalar_type(name, &block) @@ -401,69 +407,6 @@ def results @results ||= @factory.new_results end - # Defines the version number of the current JSON schema. Importantly, every time a change is made that impacts the JSON schema - # artifact, the version number must be incremented to ensure that each different version of the JSON schema is identified by a unique - # version number. The publisher will then include this version number in published events to identify the version of the schema it - # was using. This avoids the need to deploy the publisher and ElasticGraph indexer at the same time to keep them in sync. - # - # @note While this is an important part of how ElasticGraph is designed to support schema evolution, it can be annoying constantly - # have to increment this while rapidly changing the schema during prototyping. You can disable the requirement to increment this - # on every JSON schema change by setting `enforce_json_schema_version` to `false` in your `Rakefile`. - # - # @param version [Integer] current version number of the JSON schema artifact - # @return [void] - # @see Local::RakeTasks#enforce_json_schema_version - # - # @example Set the JSON schema version to 1 - # ElasticGraph.define_schema do |schema| - # schema.json_schema_version 1 - # end - def json_schema_version(version) - if !version.is_a?(Integer) || version < 1 - raise Errors::SchemaError, "`json_schema_version` must be a positive integer. Specified version: #{version}" - end - - if @state.json_schema_version - raise Errors::SchemaError, "`json_schema_version` can only be set once on a schema. Previously-set version: #{@state.json_schema_version}" - end - - @state.json_schema_version = version - @state.json_schema_version_setter_location = caller_locations(1, 1).to_a.first - nil - end - - # Defines strictness of the JSON schema validation. By default, the JSON schema will require all fields to be provided by the - # publisher (but they can be nullable) and will ignore extra fields that are not defined in the schema. Use this method to - # configure this behavior. - # - # @param allow_omitted_fields [bool] Whether nullable fields can be omitted from indexing events. - # @param allow_extra_fields [bool] Whether extra fields (e.g. beyond fields defined in the schema) can be included in indexing events. - # @return [void] - # - # @note If you allow both omitted fields and extra fields, ElasticGraph's JSON schema validation will allow (and ignore) misspelled - # field names in indexing events. For example, if the ElasticGraph schema has a nullable field named `parentId` but the publisher - # accidentally provides it as `parent_id`, ElasticGraph would happily ignore the `parent_id` field entirely, because `parentId` - # is allowed to be omitted and `parent_id` would be treated as an extra field. Therefore, we recommend that you only set one of - # these to `true` (or none). - # - # @example Allow omitted fields and disallow extra fields - # ElasticGraph.define_schema do |schema| - # schema.json_schema_strictness allow_omitted_fields: true, allow_extra_fields: false - # end - def json_schema_strictness(allow_omitted_fields: false, allow_extra_fields: true) - unless [true, false].include?(allow_omitted_fields) - raise Errors::SchemaError, "`allow_omitted_fields` must be true or false" - end - - unless [true, false].include?(allow_extra_fields) - raise Errors::SchemaError, "`allow_extra_fields` must be true or false" - end - - @state.allow_omitted_json_schema_fields = allow_omitted_fields - @state.allow_extra_json_schema_fields = allow_extra_fields - nil - end - # Registers a customization callback that will be applied to every built-in type automatically provided by ElasticGraph. Provides # an opportunity to customize the built-in types (e.g. to add directives to them or whatever). # diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb index 067903eff..7bdcceb69 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb @@ -226,7 +226,9 @@ def new_interface_type(name) def new_object_type(name) @@object_type_new.call(@state, name.to_s) do |object_type| + # :nocov: -- most suites reach this through higher-level APIs or extensions that always pass a block. yield object_type if block_given? + # :nocov: end end @@object_type_new = prevent_non_factory_instantiation_of(SchemaElements::ObjectType) diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/event_envelope.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/event_envelope.rb index 605024146..dc2c7c36e 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/event_envelope.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/event_envelope.rb @@ -6,81 +6,15 @@ # # frozen_string_literal: true -require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/indexing/event_envelope" module ElasticGraph module SchemaDefinition module Indexing - # Contains logic related to "event envelope"--the layer of metadata that wraps all indexing events. + # Backward-compatible alias for the JSON ingestion event envelope helper. # # @api private - module EventEnvelope - # @param indexed_type_names [Array] names of the indexed types - # @param json_schema_version [Integer] the version of the JSON schema - # @return [Hash] the JSON schema for the ElasticGraph event envelope for the given `indexed_type_names`. - def self.json_schema(indexed_type_names, json_schema_version) - { - "type" => "object", - "description" => "Required by ElasticGraph to wrap every data event.", - "properties" => { - "op" => { - "description" => "Indicates what type of operation the event represents. For now, only `upsert` is supported, but we plan to support other operations in the future.", - "type" => "string", - "enum" => %w[upsert] - }, - "type" => { - "description" => "The type of object present in `record`.", - "type" => "string", - # Sorting doesn't really matter here, but it's nice for the output in the schema artifact to be consistent. - "enum" => indexed_type_names.sort - }, - "id" => { - "description" => "The unique identifier of the record.", - "type" => "string", - "maxLength" => DEFAULT_MAX_KEYWORD_LENGTH - }, - "version" => { - "description" => 'Used to handle duplicate and out-of-order events. When ElasticGraph ingests multiple events for the same `type` and `id`, the one with the largest `version` will "win".', - "type" => "integer", - "minimum" => 0, - "maximum" => (2**63) - 1 - }, - "record" => { - "description" => "The record of this event. The payload of this field must match the JSON schema of the named `type`.", - "type" => "object" - }, - "latency_timestamps" => { - "description" => "Timestamps from which ElasticGraph measures indexing latency. The `ElasticGraphIndexingLatencies` log message produced for each event will include a measurement from each timestamp included in this map.", - "type" => "object", - "additionalProperties" => false, - "patternProperties" => { - "^\\w+_at$" => { - "description" => "A timestamp from which ElasticGraph will measure indexing latency. The timestamp name must end in `_at`.", - "type" => "string", - "format" => "date-time" - } - } - }, - JSON_SCHEMA_VERSION_KEY => { - "description" => "The version of the JSON schema the publisher was using when the event was published. ElasticGraph will use the JSON schema matching this version to process the event.", - "const" => json_schema_version - }, - "message_id" => { - "description" => "The optional ID of the message containing this event from whatever messaging system is being used between the publisher and the ElasticGraph indexer.", - "type" => "string" - } - }, - "additionalProperties" => false, - "required" => ["op", "type", "id", "version", JSON_SCHEMA_VERSION_KEY], - "if" => { - "properties" => { - "op" => {"const" => "upsert"} - } - }, - "then" => {"required" => ["record"]} - } - end - end + EventEnvelope = JSONIngestion::SchemaDefinition::Indexing::EventEnvelope end end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb index 5b0c0db1c..51dfd4a1f 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb @@ -7,7 +7,6 @@ # frozen_string_literal: true require "elastic_graph/constants" -require "elastic_graph/schema_definition/indexing/json_schema_field_metadata" require "elastic_graph/schema_definition/indexing/list_counts_mapping" require "elastic_graph/support/hash_util" require "elastic_graph/support/memoizable_data" @@ -22,28 +21,13 @@ class Field < Support::MemoizableData.define( :name, :name_in_index, :type, - :json_schema_layers, :indexing_field_type, :accuracy_confidence, - :json_schema_customizations, :mapping_customizations, :source, :runtime_field_script, :doc_comment ) - # JSON schema overrides that automatically apply to specific mapping types so that the JSON schema - # validation will reject values which cannot be indexed into fields of a specific mapping type. - # - # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/number.html Elasticsearch numeric field type documentation - # @note We don't handle `integer` here because it's the default numeric type (handled by our definition of the `Int` scalar type). - # @note Likewise, we don't handle `long` here because a custom scalar type must be used for that since GraphQL's `Int` type can't handle long values. - JSON_SCHEMA_OVERRIDES_BY_MAPPING_TYPE = { - "byte" => {"minimum" => -(2**7), "maximum" => (2**7) - 1}, - "short" => {"minimum" => -(2**15), "maximum" => (2**15) - 1}, - "keyword" => {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH}, - "text" => {"maxLength" => DEFAULT_MAX_TEXT_LENGTH} - } - # @return [Hash] the mapping for this field. The returned hash should be composed entirely # of Ruby primitives that, when converted to a JSON string, match the structure required by # [Elasticsearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html). @@ -63,23 +47,6 @@ def mapping end end - # @return [Hash] the JSON schema definition for this field. The returned object should - # be composed entirely of Ruby primitives that, when converted to a JSON string, match the - # requirements of [the JSON schema spec](https://json-schema.org/). - def json_schema - json_schema_layers - .reverse # resolve layers from innermost to outermost wrappings - .reduce(inner_json_schema) { |acc, layer| process_layer(layer, acc) } - .merge(outer_json_schema_customizations) - .merge({"description" => doc_comment}.compact) - .then { |h| Support::HashUtil.stringify_keys(h) } - end - - # @return [JSONSchemaFieldMetadata] additional ElasticGraph metadata to be stored in the JSON schema for this field. - def json_schema_metadata - JSONSchemaFieldMetadata.new(type: type.name, name_in_index: name_in_index) - end - # Builds a hash containing the mapping for the provided fields, normalizing it in the same way that the # datastore does so that consistency checks between our index configuration and what's in the datastore # work properly. @@ -107,80 +74,6 @@ def self.normalized_mapping_hash_for(fields) mapping_hash end - - def nullable? - json_schema_layers.include?(:nullable) - end - - private - - def inner_json_schema - user_specified_customizations = - if user_specified_json_schema_customizations_go_on_outside? - {} # : ::Hash[::String, untyped] - else - Support::HashUtil.stringify_keys(json_schema_customizations) - end - - customizations_from_mapping = JSON_SCHEMA_OVERRIDES_BY_MAPPING_TYPE[mapping["type"]] || {} - customizations = customizations_from_mapping.merge(user_specified_customizations) - customizations = indexing_field_type.format_field_json_schema_customizations(customizations) - - ref = {"$ref" => "#/$defs/#{type.unwrapped_name}"} - return ref if customizations.empty? - - # Combine any customizations with type ref under an "allOf" subschema: - # All of these properties must hold true for the type to be valid. - # - # Note that if we simply combine the customizations with the `$ref` - # at the same level, it will not work, because other subschema - # properties are ignored when they are in the same object as a `$ref`: - # https://github.com/json-schema-org/JSON-Schema-Test-Suite/blob/2.0.0/tests/draft7/ref.json#L165-L168 - {"allOf" => [ref, customizations]} - end - - def outer_json_schema_customizations - return {} unless user_specified_json_schema_customizations_go_on_outside? - Support::HashUtil.stringify_keys(json_schema_customizations) - end - - # Indicates if the user-specified JSON schema customizations should go on the inside - # (where they normally go) or on the outside. They only go on the outside when it's - # an array field, because then they apply to the array itself instead of the items in the - # array. - def user_specified_json_schema_customizations_go_on_outside? - json_schema_layers.include?(:array) - end - - def process_layer(layer, schema) - case layer - when :nullable - make_nullable(schema) - when :array - make_array(schema) - else - # :nocov: - layer is only ever `:nullable` or `:array` so we never get here - schema - # :nocov: - end - end - - def make_nullable(schema) - # Here we use "anyOf" to ensure that JSON can either match the schema OR null. - # - # (Using "oneOf" would mean that if we had a schema that also allowed null, - # null would never be allowed, since "oneOf" must match exactly one subschema). - { - "anyOf" => [ - schema, - {"type" => "null"} - ] - } - end - - def make_array(schema) - {"type" => "array", "items" => schema} - end end end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_reference.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_reference.rb index 070f70db3..f0c3454eb 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_reference.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_reference.rb @@ -15,7 +15,6 @@ module Indexing :name_in_index, :type, :mapping_options, - :json_schema_options, :accuracy_confidence, :source, :runtime_field_script, @@ -35,10 +34,8 @@ def resolve name: name, name_in_index: name_in_index, type: type, - json_schema_layers: type.json_schema_layers, indexing_field_type: resolved_type.to_indexing_field_type, accuracy_confidence: accuracy_confidence, - json_schema_customizations: json_schema_options, mapping_customizations: mapping_options, source: source, runtime_field_script: runtime_field_script, @@ -46,7 +43,7 @@ def resolve ) end - # @dynamic initialize, with, name, name_in_index, type, mapping_options, json_schema_options, accuracy_confidence, source, runtime_field_script, doc_comment + # @dynamic initialize, with, name, name_in_index, type, mapping_options, accuracy_confidence, source, runtime_field_script, doc_comment end end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb index d2f72fb6f..e1aad284c 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb @@ -23,40 +23,11 @@ module FieldType # # @api private class Enum < ::Data - # @return [Hash] the JSON schema for this enum type. - def to_json_schema - {"type" => "string", "enum" => enum_value_names} - end - # @return [Hash] the datastore mapping for this enum type. def to_mapping {"type" => "keyword"} end - # @return [Hash] additional ElasticGraph metadata to put in the JSON schema for this enum type. - def json_schema_field_metadata_by_field_name - {} - end - - # @param customizations [Hash] JSON schema customizations - # @return [Hash] formatted customizations. - def format_field_json_schema_customizations(customizations) - # Since an enum type already restricts the values to a small set of allowed values, we do not need to keep - # other customizations (such as the `maxLength` field customization EG automatically applies to fields - # indexed as a `keyword`--we don't allow enum values to exceed that length, anyway). - # - # It's desirable to restrict what customizations are applied because when a publisher uses the JSON schema - # to generate code using a library such as https://github.com/pwall567/json-kotlin-schema-codegen, we found - # that the presence of extra field customizations inhibits the library's ability to generate code in the way - # we want (it causes the type of the enum to change since the JSON schema changes from a direct `$ref` to - # being wrapped in an `allOf`). - # - # However, we still want to apply `enum` customizations--this allows a user to "narrow" the set of allowed - # values for a field. For example, a `Currency` enum could contain every currency, and a user may want to - # restrict a specific `currency` field to a subset of currencies (e.g. to just USD, CAD, and EUR). - customizations.slice("enum") - end - # @dynamic initialize, enum_value_names end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/object.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/object.rb index c01fcbd4e..c02e463b6 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/object.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/object.rb @@ -14,7 +14,7 @@ module ElasticGraph module SchemaDefinition module Indexing module FieldType - # Responsible for the JSON schema and mapping of a {SchemaElements::ObjectType}. + # Responsible for the mapping of a {SchemaElements::ObjectType}. # # @!attribute [r] type_name # @return [String] name of the object type @@ -22,13 +22,11 @@ module FieldType # @return [Array] the subfields of this object type # @!attribute [r] mapping_options # @return [Hash] options to be included in the mapping - # @!attribute [r] json_schema_options - # @return [Hash] options to be included in the JSON schema # @!attribute [r] doc_comment # @return [String, nil] documentation for the type # # @api private - class Object < Support::MemoizableData.define(:schema_def_state, :type_name, :subfields, :mapping_options, :json_schema_options, :doc_comment) + class Object < Support::MemoizableData.define(:schema_def_state, :type_name, :subfields, :mapping_options, :doc_comment) # @return [Hash] the datastore mapping for this object type. def to_mapping @to_mapping ||= begin @@ -41,77 +39,10 @@ def to_mapping end end - # @return [Hash] the JSON schema for this object type. - def to_json_schema - @to_json_schema ||= - if json_schema_options.empty? - # Fields that are `sourced_from` an alternate type must not be included in this types JSON schema, - # since events of this type won't include them. - other_source_subfields, json_schema_candidate_subfields = subfields.partition(&:source) - validate_sourced_fields_have_no_json_schema_overrides(other_source_subfields) - json_schema_subfields = json_schema_candidate_subfields.reject(&:runtime_field_script) - required_fields = json_schema_subfields - required_fields = required_fields.reject(&:nullable?) if schema_def_state.allow_omitted_json_schema_fields - - { - "type" => "object", - "properties" => json_schema_subfields.to_h { |f| [f.name, f.json_schema] }.merge(json_schema_typename_field), - # Note: `__typename` is intentionally not included in the `required` list. If `__typename` is present - # we want it validated (as we do by merging in `json_schema_typename_field`) but we only want - # to require it in the context of a union type. The union's json schema requires the field. - "required" => required_fields.map(&:name).freeze, - "additionalProperties" => (false unless schema_def_state.allow_extra_json_schema_fields), - "description" => doc_comment - }.compact.freeze - else - Support::HashUtil.stringify_keys(json_schema_options) - end - end - - # @return [Hash] additional ElasticGraph metadata to put in the JSON schema for this object type. - def json_schema_field_metadata_by_field_name - subfields.to_h { |f| [f.name, f.json_schema_metadata] } - end - - # @param customizations [Hash] JSON schema customizations - # @return [Hash] formatted customizations. - def format_field_json_schema_customizations(customizations) - customizations - end - - private - + # @private def after_initialize subfields.freeze end - - # Returns a __typename property which we use for union types. - # - # This must always be set to the name of the type (thus the const value). - # - # We also add a "default" value. This does not impact validation, but rather - # aids tools like our kotlin codegen to save publishers from having to set the - # property explicitly when creating events. - def json_schema_typename_field - { - "__typename" => { - "type" => "string", - "const" => type_name, - "default" => type_name - } - } - end - - def validate_sourced_fields_have_no_json_schema_overrides(other_source_subfields) - problem_fields = other_source_subfields.reject { |f| f.json_schema_customizations.empty? } - return if problem_fields.empty? - - field_descriptions = problem_fields.map(&:name).sort.map { |f| "`#{f}`" }.join(", ") - raise Errors::SchemaError, - "`#{type_name}` has #{problem_fields.size} field(s) (#{field_descriptions}) that are `sourced_from` " \ - "another type and also have JSON schema customizations. Instead, put the JSON schema " \ - "customizations on the source type's field definitions." - end end end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb index cb9c3132e..600a05c90 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb @@ -27,22 +27,6 @@ def to_mapping Support::HashUtil.stringify_keys(scalar_type.mapping_options) end - # @return [Hash] the JSON schema for this scalar type. - def to_json_schema - Support::HashUtil.stringify_keys(scalar_type.json_schema_options) - end - - # @return [Hash] additional ElasticGraph metadata to put in the JSON schema for this scalar type. - def json_schema_field_metadata_by_field_name - {} - end - - # @param customizations [Hash] JSON schema customizations - # @return [Hash] formatted customizations. - def format_field_json_schema_customizations(customizations) - customizations - end - # @dynamic initialize, scalar_type end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/union.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/union.rb index e4d6e634f..044ac7795 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/union.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field_type/union.rb @@ -24,25 +24,6 @@ module FieldType # # @api private class Union < ::Data.define(:subtypes_by_name) - # @return [Hash] the JSON schema for this union type. - def to_json_schema - subtype_json_schemas = subtypes_by_name.keys.map { |name| {"$ref" => "#/$defs/#{name}"} } - - # A union type can represent multiple subtypes, referenced by the "anyOf" clause below. - # We also add a requirement for the presence of __typename to indicate which type - # is being referenced (this property is pre-defined on the type itself as a constant). - # - # Note: Although both "oneOf" and "anyOf" keywords are valid for combining schemas - # to form a union, and validate equivalently when no object can satisfy multiple of the - # subschemas (which is the case here given the __typename requirements are mutually - # exclusive), we chose to use "oneOf" here because it works better with this library: - # https://github.com/pwall567/json-kotlin-schema-codegen - { - "required" => %w[__typename], - "oneOf" => subtype_json_schemas - } - end - # @return [Hash] the datastore mapping for this union type. def to_mapping mapping_subfields = subtypes_by_name.values.map(&:subfields).reduce([], :union) @@ -52,17 +33,6 @@ def to_mapping {"properties" => {"__typename" => {"type" => "keyword"}}} ) end - - # @return [Hash] additional ElasticGraph metadata to put in the JSON schema for this union type. - def json_schema_field_metadata_by_field_name - {} - end - - # @param customizations [Hash] JSON schema customizations - # @return [Hash] formatted customizations. - def format_field_json_schema_customizations(customizations) - customizations - end end end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb index 8d59dff2b..90452bad9 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb @@ -66,7 +66,10 @@ def initialize(name, settings, schema_def_state, indexed_type) self.routing_field_path = public_field_path("id", explanation: "indexed types must have an `id` field") end + # :nocov: -- this DSL block hook is a low-value SimpleCov branch to track relative to the + # surrounding index behavior specs. yield self if block_given? + # :nocov: end # Specifies how documents in this index should sort by default, when no `orderBy` argument is provided to the GraphQL query. @@ -136,8 +139,6 @@ def rollover(frequency, timestamp_field_path_name) raise Errors::SchemaError, "rollover field `#{timestamp_field_path.full_description}` cannot be used for rollover since it is a list field." end - timestamp_field_path.path_parts.each { |f| f.json_schema nullable: false } - self.rollover_config = RolloverConfig.new( frequency: frequency, timestamp_field_path: timestamp_field_path @@ -184,8 +185,6 @@ def route_with(routing_field_path_name) self.routing_field_path = routing_field_path - routing_field_path.path_parts[0..-2].each { |f| f.json_schema nullable: false } - routing_field_path.last_part.json_schema nullable: false, pattern: HAS_NON_WHITE_SPACE_REGEX indexed_type.append_to_documentation "For more performant queries on this type, please filter on `#{routing_field_path_name}` if possible." end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb index 535d11b2d..981d695ea 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb @@ -6,29 +6,15 @@ # # frozen_string_literal: true +require "elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata" + module ElasticGraph module SchemaDefinition module Indexing - # @!parse class JSONSchemaFieldMetadata; end - JSONSchemaFieldMetadata = ::Data.define(:type, :name_in_index) - - # Metadata about an ElasticGraph field that needs to be stored in our versioned JSON schemas - # alongside the JSON schema fields. - # - # @!attribute [r] type - # @return [String] name of the ElasticGraph type for this field - # @!attribute [r] name_in_index - # @return [String] name of the field in the index + # Backward-compatible alias for JSON schema field metadata. # # @api private - class JSONSchemaFieldMetadata < ::Data - # @return [Hash] hash form of the metadata that can be dumped in JSON schema - def to_dumpable_hash - {"type" => type, "nameInIndex" => name_in_index} - end - - # @dynamic initialize, type, name_in_index - end + JSONSchemaFieldMetadata = JSONIngestion::SchemaDefinition::Indexing::JSONSchemaFieldMetadata end end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb index 820ac7b62..148b065a1 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb @@ -6,223 +6,15 @@ # # frozen_string_literal: true -require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata" module ElasticGraph module SchemaDefinition module Indexing - # Represents the result of merging a JSON schema with metadata. The result includes both - # the merged JSON schema and a list of `failed_fields` indicating which fields metadata - # could not be determined for. + # Backward-compatible alias for the JSON schema merge result type. # - # @private - class JSONSchemaWithMetadata < ::Data.define( - # The JSON schema. - :json_schema, - # A set of fields (in the form `Type.field`) that were needed but not found. - :missing_fields, - # A set of type names that were needed but not found. - :missing_types, - # A set of `DeprecatedElement` objects that create conflicting definitions. - :definition_conflicts, - # A set of fields that have been deleted but that must be retained (e.g. for custom shard routing or rollover) - :missing_necessary_fields - ) - def json_schema_version - json_schema.fetch(JSON_SCHEMA_VERSION_KEY) - end - - # Responsible for building `JSONSchemaWithMetadata` instances. - # - # @private - class Merger - # @dynamic unused_deprecated_elements - attr_reader :unused_deprecated_elements - - def initialize(schema_def_results) - @field_metadata_by_type_and_field_name = schema_def_results.json_schema_field_metadata_by_type_and_field_name - @renamed_types_by_old_name = schema_def_results.state.renamed_types_by_old_name - @deleted_types_by_old_name = schema_def_results.state.deleted_types_by_old_name - @renamed_fields_by_type_name_and_old_field_name = schema_def_results.state.renamed_fields_by_type_name_and_old_field_name - @deleted_fields_by_type_name_and_old_field_name = schema_def_results.state.deleted_fields_by_type_name_and_old_field_name - @state = schema_def_results.state - @derived_indexing_type_names = schema_def_results.derived_indexing_type_names - - @unused_deprecated_elements = ( - @renamed_types_by_old_name.values + - @deleted_types_by_old_name.values + - @renamed_fields_by_type_name_and_old_field_name.values.flat_map(&:values) + - @deleted_fields_by_type_name_and_old_field_name.values.flat_map(&:values) - ).to_set - end - - def merge_metadata_into(json_schema) - missing_fields = ::Set.new - missing_types = ::Set.new - definition_conflicts = ::Set.new - old_type_name_by_current_name = {} # : ::Hash[String, String] - - defs = json_schema.fetch("$defs").to_h do |type_name, type_def| - if type_name != EVENT_ENVELOPE_JSON_SCHEMA_NAME && (properties = type_def["properties"]) - current_type_name = determine_current_type_name( - type_name, - missing_types: missing_types, - definition_conflicts: definition_conflicts - ) - - if current_type_name - old_type_name_by_current_name[current_type_name] = type_name - end - - properties = properties.to_h do |field_name, prop| - unless field_name == "__typename" - field_metadata = current_type_name&.then do |name| - field_metadata_for( - name, - field_name, - missing_fields: missing_fields, - definition_conflicts: definition_conflicts - ) - end - - prop = prop.merge({"ElasticGraph" => field_metadata&.to_dumpable_hash}) - end - - [field_name, prop] - end - - type_def = type_def.merge({"properties" => properties}) - end - - [type_name, type_def] - end - - json_schema = json_schema.merge("$defs" => defs) - - JSONSchemaWithMetadata.new( - json_schema: json_schema, - missing_fields: missing_fields, - missing_types: missing_types, - definition_conflicts: definition_conflicts, - missing_necessary_fields: identify_missing_necessary_fields(json_schema, old_type_name_by_current_name) - ) - end - - private - - # Given a historical `type_name`, determines (and returns) the current name for that type. - def determine_current_type_name(type_name, missing_types:, definition_conflicts:) - exists_currently = @field_metadata_by_type_and_field_name.key?(type_name) - deleted = @deleted_types_by_old_name[type_name]&.tap { |elem| @unused_deprecated_elements.delete(elem) } - renamed = @renamed_types_by_old_name[type_name]&.tap { |elem| @unused_deprecated_elements.delete(elem) } - - if [exists_currently, deleted, renamed].count(&:itself) > 1 - definition_conflicts.merge([deleted, renamed].compact) - end - - return type_name if exists_currently - return nil if deleted - return renamed.name if renamed - - missing_types << type_name - nil - end - - # Given a historical `type_name` and `field_name` determines (and returns) the field metadata for it. - def field_metadata_for(type_name, field_name, missing_fields:, definition_conflicts:) - full_name = "#{type_name}.#{field_name}" - - current_meta = @field_metadata_by_type_and_field_name.dig(type_name, field_name) - deleted = @deleted_fields_by_type_name_and_old_field_name.dig(type_name, field_name)&.tap do |elem| - @unused_deprecated_elements.delete(elem) - end - renamed = @renamed_fields_by_type_name_and_old_field_name.dig(type_name, field_name)&.tap do |elem| - @unused_deprecated_elements.delete(elem) - end - - if [current_meta, deleted, renamed].count(&:itself) > 1 - definition_conflicts.merge([deleted, renamed].compact.map { |elem| elem.with(name: full_name) }) - end - - return current_meta if current_meta - return nil if deleted - return @field_metadata_by_type_and_field_name.dig(type_name, renamed.name) if renamed - - missing_fields << full_name - nil - end - - def identify_missing_necessary_fields(json_schema, old_type_name_by_current_name) - json_schema_resolver = JSONSchemaResolver.new(@state, json_schema, old_type_name_by_current_name) - version = json_schema.fetch(JSON_SCHEMA_VERSION_KEY) - - @state.object_types_by_name.values - .select { |type| type.has_own_index_def? && !@derived_indexing_type_names.include?(type.name) } - .flat_map { |object_type| identify_missing_necessary_fields_for_index_def(object_type, object_type.own_index_def, json_schema_resolver, version) } - end - - def identify_missing_necessary_fields_for_index_def(object_type, index_def, json_schema_resolver, json_schema_version) - { - "routing" => index_def.routing_field_path, - "rollover" => index_def.rollover_config&.timestamp_field_path - }.compact.filter_map do |field_type, field_path| - if json_schema_resolver.necessary_path_missing?(field_path) - # The JSON schema v # {json_schema_version} artifact has no field that maps to the #{field_type} path of `#{field_path.fully_qualified_path_in_index}`. - - MissingNecessaryField.new( - field_type: field_type, - fully_qualified_path: field_path.fully_qualified_path_in_index - ) - end - end - end - - class JSONSchemaResolver - def initialize(state, json_schema, old_type_name_by_current_name) - @state = state - @old_type_name_by_current_name = old_type_name_by_current_name - @meta_by_old_type_and_name_in_index = ::Hash.new do |hash, type_name| - properties = json_schema.fetch("$defs").fetch(type_name).fetch("properties") - - hash[type_name] = properties.filter_map do |name, prop| - if (metadata = prop["ElasticGraph"]) - [metadata.fetch("nameInIndex"), metadata] - end - end.to_h - end - end - - # Indicates if the given `field_path` is (1) necessary and (2) missing from the JSON schema, indicating a problem. - # - # - Returns `false` is the given `field_path` is present in the JSON schema. - # - Returns `false` is the parent type of `field_path` has not been retained in this JSON schema version - # (in that case, the field path is not necessary). - # - Otherwise, returns `true` since the field path is both necessary and missing. - def necessary_path_missing?(field_path) - parent_type = field_path.first_part.parent_type.name - - field_path.path_parts.any? do |path_part| - necessary_path_part_missing?(parent_type, path_part.name_in_index) do |meta| - parent_type = @state.type_ref(meta.fetch("type")).fully_unwrapped.name - end - end - end - - private - - def necessary_path_part_missing?(parent_type, name_in_index) - old_type_name = @old_type_name_by_current_name[parent_type] - return false unless old_type_name - - meta = @meta_by_old_type_and_name_in_index.dig(old_type_name, name_in_index) - yield meta if meta - !meta - end - end - end - - MissingNecessaryField = ::Data.define(:field_type, :fully_qualified_path) - end + # @api private + JSONSchemaWithMetadata = JSONIngestion::SchemaDefinition::Indexing::JSONSchemaWithMetadata end end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/json_ingestion_compatibility.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/json_ingestion_compatibility.rb new file mode 100644 index 000000000..6085f5280 --- /dev/null +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/json_ingestion_compatibility.rb @@ -0,0 +1,12 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/schema_definition/indexing/event_envelope" +require "elastic_graph/schema_definition/indexing/json_schema_field_metadata" +require "elastic_graph/schema_definition/indexing/json_schema_with_metadata" +require "elastic_graph/schema_definition/json_schema_pruner" diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/json_schema_pruner.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/json_schema_pruner.rb index 7a8323fa6..954164cb3 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/json_schema_pruner.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/json_schema_pruner.rb @@ -6,58 +6,13 @@ # # frozen_string_literal: true -require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/json_schema_pruner" module ElasticGraph module SchemaDefinition - # Prunes unused type definitions from a given JSON schema. + # Backward-compatible alias for the JSON schema pruner. # - # @private - class JSONSchemaPruner - def self.prune(original_json_schema) - initial_type_names = [EVENT_ENVELOPE_JSON_SCHEMA_NAME] + original_json_schema - .dig("$defs", EVENT_ENVELOPE_JSON_SCHEMA_NAME, "properties", "type", "enum") - - types_to_keep = referenced_type_names(initial_type_names, original_json_schema["$defs"]) - - # The .select will preserve the sort order of the original hash - # standard:disable Style/HashSlice -- https://github.com/soutaro/steep/issues/1503 - pruned_defs = original_json_schema["$defs"].select { |k, _v| types_to_keep.include?(k) } - # standard:enable Style/HashSlice - - original_json_schema.merge("$defs" => pruned_defs) - end - - # Returns a list of type names indicating all types referenced from any type in source_type_names. - private_class_method - def self.referenced_type_names(source_type_names, original_defs) - return Set.new if source_type_names.empty? - - referenced_type_defs = original_defs.slice(*source_type_names) - ref_names = collect_ref_names(referenced_type_defs) - - referenced_type_names(ref_names, original_defs) + source_type_names - end - - private_class_method - def self.collect_ref_names(hash) - hash.flat_map do |key, value| - case value - when ::Hash - collect_ref_names(value) - when ::Array - value.grep(::Hash).flat_map { |subhash| collect_ref_names(subhash) } - when ::String - if key == "$ref" && (type = value[%r{\A#/\$defs/(.+)\z}, 1]) - [type] - else - [] - end - else - [] - end - end - end - end + # @api private + JSONSchemaPruner = JSONIngestion::SchemaDefinition::JSONSchemaPruner end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_type_info.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_type_info.rb index 454093ff6..89d9ba7f7 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_type_info.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_type_info.rb @@ -6,29 +6,22 @@ # # frozen_string_literal: true -require "elastic_graph/support/json_schema/meta_schema_validator" - module ElasticGraph module SchemaDefinition module Mixins - # Mixin used to specify non-GraphQL type info (datastore index and JSON schema type info). + # Mixin used to specify non-GraphQL type info on schema elements. # Exists as a mixin so we can apply the same consistent API to every place we need to use this. # Currently it's used in 3 places: # - # - {SchemaElements::ScalarType}: allows specification of how scalars are represented in JSON schema and the index. - # - {SchemaElements::TypeWithSubfields}: allows customization of how an object type is represented in JSON schema and the index. - # - {SchemaElements::Field}: allows customization of a specific field over the field type's standard JSON schema and the index mapping. + # - {SchemaElements::ScalarType}: allows specification of how scalars are represented in the datastore index. + # - {SchemaElements::TypeWithSubfields}: allows customization of how an object type is represented in the datastore index. + # - {SchemaElements::Field}: allows customization of a specific field over the field type's standard index mapping. module HasTypeInfo # @return [Hash] datastore mapping options def mapping_options @mapping_options ||= {} end - # @return [Hash] JSON schema options - def json_schema_options - @json_schema_options ||= {} - end - # Set of mapping parameters that it makes sense to allow customization of, based on # [the Elasticsearch docs](https://www.elastic.co/guide/en/elasticsearch/reference/8.15/mapping-params.html). CUSTOMIZABLE_DATASTORE_PARAMS = Set[ @@ -70,7 +63,7 @@ def json_schema_options # ElasticGraph.define_schema do |schema| # schema.scalar_type "URL" do |t| # t.mapping type: "keyword" - # t.json_schema type: "string", format: "uri" + # t.json_schema type: "string" # end # end # @@ -87,13 +80,6 @@ def json_schema_options # t.field "expYear", "Int" do |f| # # Use a smaller numeric type to save space in the datastore # f.mapping type: "short" - # f.json_schema minimum: 2000, maximum: 2099 - # end - # - # t.field "expMonth", "Int" do |f| - # # Use a smaller numeric type to save space in the datastore - # f.mapping type: "byte" - # f.json_schema minimum: 1, maximum: 12 # end # # t.index "cards" @@ -108,73 +94,6 @@ def mapping(**options) mapping_options.update(options) end - - # Defines the [JSON schema](https://json-schema.org/understanding-json-schema/) validations for this field or type. Validations - # defined here will be included in the generated `json_schemas.yaml` artifact, which is used by the ElasticGraph indexer to - # validate events before indexing their data in the datastore. In addition, the publisher may use `json_schemas.yaml` for code - # generation and to apply validation before publishing an event to ElasticGraph. - # - # Can be called multiple times; each time, the options will be merged into the existing options. - # - # This is _required_ on a {SchemaElements::ScalarType} (since we don’t know how a custom scalar type should be represented in - # JSON!). On a {SchemaElements::Field}, this is optional, but can be used to make the JSON schema validation stricter then it - # would otherwise be. For example, you could use `json_schema maxLength: 30` on a `String` field to limit the length. - # - # You can use any of the JSON schema validation keywords here. In addition, `nullable: false` is supported to configure the - # generated JSON schema to disallow `null` values for the field. Note that if you define a field with a non-nullable GraphQL type - # (e.g. `Int!`), the JSON schema will automatically disallow nulls. However, as explained in the - # {SchemaElements::TypeWithSubfields#field} documentation, we generally recommend against defining non-nullable GraphQL fields. - # `json_schema nullable: false` will disallow `null` values from being indexed, while still keeping the field nullable in the - # GraphQL schema. If you think you might want to make a field non-nullable in the GraphQL schema some day, it’s a good idea to use - # `json_schema nullable: false` now to ensure every indexed record has a non-null value for the field. - # - # @note We recommend using JSON schema validations in a limited fashion. Validations that are appropriate to apply when data is - # entering the system-of-record are often not appropriate on a secondary index like ElasticGraph. Events that violate a JSON - # schema validation will fail to index (typically they will be sent to the dead letter queue and page an oncall engineer). If an - # ElasticGraph instance is meant to contain all the data of some source system, you probably don’t want it applying stricter - # validations than the source system itself has. We recommend limiting your JSON schema validations to situations where - # violations would prevent ElasticGraph from operating correctly. - # - # @param options [Hash] JSON schema options - # @return [void] - # - # @example Define the JSON schema validations of a custom scalar type - # ElasticGraph.define_schema do |schema| - # schema.scalar_type "URL" do |t| - # t.mapping type: "keyword" - # - # # JSON schema has a built-in URI format validator: - # # https://json-schema.org/understanding-json-schema/reference/string.html#resource-identifiers - # t.json_schema type: "string", format: "uri" - # end - # end - # - # @example Define additional validations on a field - # ElasticGraph.define_schema do |schema| - # schema.object_type "Card" do |t| - # t.field "id", "ID!" - # - # t.field "expYear", "Int" do |f| - # # Use JSON schema to ensure the publisher is sending us 4 digit years, not 2 digit years. - # f.json_schema minimum: 2000, maximum: 2099 - # end - # - # t.field "expMonth", "Int" do |f| - # f.json_schema minimum: 1, maximum: 12 - # end - # - # t.index "cards" - # end - # end - def json_schema(**options) - validatable_json_schema = Support::HashUtil.stringify_keys(options) - - if (error_msg = Support::JSONSchema.strict_meta_schema_validator.validate_with_error_message(validatable_json_schema)) - raise Errors::SchemaError, "Invalid JSON schema options set on #{self}:\n\n#{error_msg}" - end - - json_schema_options.update(options) - end end end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb index 8106e9d5f..70ccb30a7 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb @@ -10,8 +10,6 @@ require "elastic_graph/errors" require "elastic_graph/schema_artifacts/runtime_metadata/schema" require "elastic_graph/schema_artifacts/artifacts_helper_methods" -require "elastic_graph/schema_definition/indexing/event_envelope" -require "elastic_graph/schema_definition/indexing/json_schema_with_metadata" require "elastic_graph/schema_definition/indexing/relationship_resolver" require "elastic_graph/schema_definition/indexing/update_target_resolver" require "elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect" @@ -46,53 +44,6 @@ def runtime_metadata @runtime_metadata ||= build_runtime_metadata end - # @param version [Integer] desired JSON schema version - # @return [Hash] the JSON schema for the requested version, if available - # @raise [Errors::NotFoundError] if the requested JSON schema version is not available - def json_schemas_for(version) - unless available_json_schema_versions.include?(version) - raise Errors::NotFoundError, "The requested json schema version (#{version}) is not available. Available versions: #{available_json_schema_versions.to_a.join(", ")}." - end - - @latest_versioned_json_schema ||= merge_field_metadata_into_json_schema(current_public_json_schema).json_schema - end - - # @return [Set] set of available JSON schema versions - def available_json_schema_versions - @available_json_schema_versions ||= Set[latest_json_schema_version] - end - - # @return [Hash] the newly generated JSON schema - def latest_json_schema_version - current_public_json_schema[JSON_SCHEMA_VERSION_KEY] - end - - # @private - def json_schema_version_setter_location - state.json_schema_version_setter_location - end - - # @private - def json_schema_field_metadata_by_type_and_field_name - @json_schema_field_metadata_by_type_and_field_name ||= json_schema_indexing_field_types_by_name - .transform_values(&:json_schema_field_metadata_by_field_name) - end - - # @private - def current_public_json_schema - @current_public_json_schema ||= build_public_json_schema - end - - # @private - def merge_field_metadata_into_json_schema(json_schema) - json_schema_with_metadata_merger.merge_metadata_into(json_schema) - end - - # @private - def unused_deprecated_elements - json_schema_with_metadata_merger.unused_deprecated_elements - end - # @private STATIC_SCRIPT_REPO = Scripting::FileSystemRepository.new(::File.join(__dir__.to_s, "scripting", "scripts")) @@ -171,10 +122,6 @@ def aggregation_efficiency_hints_for(derived_indexed_types) EOS end - def json_schema_with_metadata_merger - @json_schema_with_metadata_merger ||= Indexing::JSONSchemaWithMetadata::Merger.new(self) - end - def generate_datastore_config # We need to check this before generating our datastore configuration. # We can't generate a mapping from a recursively defined schema type. @@ -329,44 +276,6 @@ def generate_sdl [type_defs + state.sdl_parts].join("\n\n") end - def build_public_json_schema - json_schema_version = state.json_schema_version - if json_schema_version.nil? - raise Errors::SchemaError, "`json_schema_version` must be specified in the schema. To resolve, add `schema.json_schema_version 1` in a schema definition block." - end - - root_document_type_names = state.object_types_by_name.values - .select { |type| type.root_document_type? && !type.abstract? } - .reject { |type| derived_indexing_type_names.include?(type.name) } - .map(&:name) - - definitions_by_name = json_schema_indexing_field_types_by_name - .transform_values(&:to_json_schema) - .compact - - { - "$schema" => JSON_META_SCHEMA, - JSON_SCHEMA_VERSION_KEY => json_schema_version, - "$defs" => { - "ElasticGraphEventEnvelope" => Indexing::EventEnvelope.json_schema(root_document_type_names, json_schema_version) - }.merge(definitions_by_name) - } - end - - def json_schema_indexing_field_types_by_name - @json_schema_indexing_field_types_by_name ||= state - .types_by_name - .except("Query") - .values - .reject do |t| - derived_indexing_type_names.include?(t.name) || - # Skip graphql framework types - t.graphql_only? - end - .sort_by(&:name) - .to_h { |type| [type.name, type.to_indexing_field_type] } - end - def verify_runtime_metadata(runtime_metadata) registered_resolvers = runtime_metadata.graphql_resolvers_by_name diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb index 56d288fae..efcb930c7 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb @@ -8,7 +8,6 @@ require "did_you_mean" require "elastic_graph/constants" -require "elastic_graph/schema_definition/json_schema_pruner" require "elastic_graph/support/graphql_gem_loader" require "elastic_graph/support/memoizable_data" require "fileutils" @@ -37,41 +36,10 @@ def initialize(schema_definition_results:, schema_artifacts_directory:, enforce_ @enforce_json_schema_version = enforce_json_schema_version @output = output @max_diff_lines = max_diff_lines - - @json_schemas_artifact = new_yaml_artifact( - JSON_SCHEMAS_FILE, - JSONSchemaPruner.prune(schema_definition_results.current_public_json_schema), - extra_comment_lines: [ - "This is the \"public\" JSON schema file and is intended to be provided to publishers so that", - "they can perform code generation and event validation." - ] - ) end # Dumps all the schema artifacts to disk. def dump_artifacts - check_if_needs_json_schema_version_bump do |recommended_json_schema_version| - if @enforce_json_schema_version - # @type var setter_location: ::Thread::Backtrace::Location - # We use `_ =` because while `json_schema_version_setter_location` can be nil, - # it'll never be nil if we get here and we want the type to be non-nilable. - setter_location = _ = schema_definition_results.json_schema_version_setter_location - setter_location_path = ::Pathname.new(setter_location.absolute_path.to_s).relative_path_from(::Dir.pwd) - - abort "A change has been attempted to `json_schemas.yaml`, but the `json_schema_version` has not been correspondingly incremented. Please " \ - "increase the schema's version, and then run the `bundle exec rake schema_artifacts:dump` command again.\n\n" \ - "To update the schema version to the expected version, change line #{setter_location.lineno} at `#{setter_location_path}` to:\n" \ - " `schema.json_schema_version #{recommended_json_schema_version}`\n\n" \ - "Alternately, pass `enforce_json_schema_version: false` to `ElasticGraph::SchemaDefinition::RakeTasks.new` to allow the JSON schemas " \ - "file to change without requiring a version bump, but that is only recommended for non-production applications during initial schema prototyping." - else - @output.puts <<~EOS - WARNING: the `json_schemas.yaml` artifact is being updated without the `json_schema_version` being correspondingly incremented. - This is not recommended for production applications, but is currently allowed because you have set `enforce_json_schema_version: false`. - EOS - end - end - ::FileUtils.mkdir_p(@schema_artifacts_directory) artifacts.each { |artifact| artifact.dump(@output) } end @@ -112,18 +80,11 @@ def artifacts_from_schema_def # schema elements. graphql_schema = ::GraphQL::Schema.from_definition(schema_definition_results.graphql_schema_string).to_definition.chomp - unversioned_artifacts = [ + [ new_yaml_artifact(DATASTORE_CONFIG_FILE, schema_definition_results.datastore_config), new_yaml_artifact(RUNTIME_METADATA_FILE, pruned_runtime_metadata(graphql_schema).to_dumpable_hash), - @json_schemas_artifact, new_raw_artifact(GRAPHQL_SCHEMA_FILE, "\n" + graphql_schema) ] - - versioned_artifacts = build_desired_versioned_json_schemas(@json_schemas_artifact.desired_contents).values.map do |versioned_schema| - new_versioned_json_schema_artifact(versioned_schema) - end - - unversioned_artifacts + versioned_artifacts end def notify_about_unused_type_name_overrides @@ -173,157 +134,6 @@ def notify_about_unused_enum_value_overrides EOS end - def build_desired_versioned_json_schemas(current_public_json_schema) - versioned_parsed_yamls = ::Dir.glob(::File.join(@schema_artifacts_directory, JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v*.yaml")).map do |file| - ::YAML.safe_load_file(file) - end + [current_public_json_schema] - - results_by_json_schema_version = versioned_parsed_yamls.to_h do |parsed_yaml| - merged_schema = @schema_definition_results.merge_field_metadata_into_json_schema(parsed_yaml) - [merged_schema.json_schema_version, merged_schema] - end - - report_json_schema_merge_errors(results_by_json_schema_version.values) - report_json_schema_merge_warnings - - results_by_json_schema_version.transform_values(&:json_schema) - end - - def report_json_schema_merge_errors(merged_results) - json_schema_versions_by_missing_field = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[::String, ::Array[::Integer]] - json_schema_versions_by_missing_type = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[::String, ::Array[::Integer]] - json_schema_versions_by_missing_necessary_field = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[Indexing::JSONSchemaWithMetadata::MissingNecessaryField, ::Array[::Integer]] - - merged_results.each do |result| - result.missing_fields.each do |field| - json_schema_versions_by_missing_field[field] << result.json_schema_version - end - - result.missing_types.each do |type| - json_schema_versions_by_missing_type[type] << result.json_schema_version - end - - result.missing_necessary_fields.each do |missing_necessary_field| - json_schema_versions_by_missing_necessary_field[missing_necessary_field] << result.json_schema_version - end - end - - missing_field_errors = json_schema_versions_by_missing_field.map do |field, json_schema_versions| - missing_field_error_for(field, json_schema_versions) - end - - missing_type_errors = json_schema_versions_by_missing_type.map do |type, json_schema_versions| - missing_type_error_for(type, json_schema_versions) - end - - missing_necessary_field_errors = json_schema_versions_by_missing_necessary_field.map do |field, json_schema_versions| - missing_necessary_field_error_for(field, json_schema_versions) - end - - definition_conflict_errors = merged_results - .flat_map { |result| result.definition_conflicts.to_a } - .group_by(&:name) - .map do |name, deprecated_elements| - <<~EOS - The schema definition of `#{name}` has conflicts. To resolve the conflict, remove the unneeded definitions from the following: - - #{format_deprecated_elements(deprecated_elements)} - EOS - end - - errors = missing_field_errors + missing_type_errors + missing_necessary_field_errors + definition_conflict_errors - return if errors.empty? - - abort errors.join("\n\n") - end - - def report_json_schema_merge_warnings - unused_elements = @schema_definition_results.unused_deprecated_elements - return if unused_elements.empty? - - @output.puts <<~EOS - The schema definition has #{unused_elements.size} unneeded reference(s) to deprecated schema elements. These can all be safely deleted: - - #{format_deprecated_elements(unused_elements)} - - EOS - end - - def format_deprecated_elements(deprecated_elements) - descriptions = deprecated_elements - .sort_by { |e| [e.defined_at.path, e.defined_at.lineno] } - .map(&:description) - .uniq - - descriptions.each.with_index(1).map { |desc, idx| "#{idx}. #{desc}" }.join("\n") - end - - def missing_field_error_for(qualified_field, json_schema_versions) - type, field = qualified_field.split(".") - - <<~EOS - The `#{qualified_field}` field (which existed in #{describe_json_schema_versions(json_schema_versions, "and")}) no longer exists in the current schema definition. - ElasticGraph cannot guess what it should do with this field's data when ingesting events at #{old_versions(json_schema_versions)}. - To continue, do one of the following: - - 1. If the `#{qualified_field}` field has been renamed, indicate this by calling `field.renamed_from "#{field}"` on the renamed field. - 2. If the `#{qualified_field}` field has been dropped, indicate this by calling `type.deleted_field "#{field}"` on the `#{type}` type. - 3. Alternately, if no publishers or in-flight events use #{describe_json_schema_versions(json_schema_versions, "or")}, delete #{files_noun_phrase(json_schema_versions)} from `#{JSON_SCHEMAS_BY_VERSION_DIRECTORY}`, and no further changes are required. - EOS - end - - def missing_type_error_for(type, json_schema_versions) - <<~EOS - The `#{type}` type (which existed in #{describe_json_schema_versions(json_schema_versions, "and")}) no longer exists in the current schema definition. - ElasticGraph cannot guess what it should do with this type's data when ingesting events at #{old_versions(json_schema_versions)}. - To continue, do one of the following: - - 1. If the `#{type}` type has been renamed, indicate this by calling `type.renamed_from "#{type}"` on the renamed type. - 2. If the `#{type}` field has been dropped, indicate this by calling `schema.deleted_type "#{type}"` on the schema. - 3. Alternately, if no publishers or in-flight events use #{describe_json_schema_versions(json_schema_versions, "or")}, delete #{files_noun_phrase(json_schema_versions)} from `#{JSON_SCHEMAS_BY_VERSION_DIRECTORY}`, and no further changes are required. - EOS - end - - def missing_necessary_field_error_for(field, json_schema_versions) - path = field.fully_qualified_path.split(".").last - # :nocov: -- we only cover one side of this ternary. - has_or_have = (json_schema_versions.size == 1) ? "has" : "have" - # :nocov: - - <<~EOS - #{describe_json_schema_versions(json_schema_versions, "and")} #{has_or_have} no field that maps to the #{field.field_type} field path of `#{field.fully_qualified_path}`. - Since the field path is required for #{field.field_type}, ElasticGraph cannot ingest events that lack it. To continue, do one of the following: - - 1. If the `#{field.fully_qualified_path}` field has been renamed, indicate this by calling `field.renamed_from "#{path}"` on the renamed field rather than using `deleted_field`. - 2. Alternately, if no publishers or in-flight events use #{describe_json_schema_versions(json_schema_versions, "or")}, delete #{files_noun_phrase(json_schema_versions)} from `#{JSON_SCHEMAS_BY_VERSION_DIRECTORY}`, and no further changes are required. - EOS - end - - def describe_json_schema_versions(json_schema_versions, conjunction) - json_schema_versions = json_schema_versions.sort - - # Steep doesn't support pattern matching yet, so have to skip type checking here. - __skip__ = case json_schema_versions - in [single_version] - "JSON schema version #{single_version}" - in [version1, version2] - "JSON schema versions #{version1} #{conjunction} #{version2}" - else - *versions, last_version = json_schema_versions - "JSON schema versions #{versions.join(", ")}, #{conjunction} #{last_version}" - end - end - - def old_versions(json_schema_versions) - return "this old version" if json_schema_versions.size == 1 - "these old versions" - end - - def files_noun_phrase(json_schema_versions) - return "its file" if json_schema_versions.size == 1 - "their files" - end - def artifacts_out_of_date_error(out_of_date_artifacts) # @type var diffs: ::Array[[SchemaArtifact[untyped], ::String]] diffs = [] @@ -378,20 +188,6 @@ def new_yaml_artifact(file_name, desired_contents, extra_comment_lines: []) ) end - def new_versioned_json_schema_artifact(desired_contents) - # File name depends on the schema_version field in the json schema. - schema_version = desired_contents[JSON_SCHEMA_VERSION_KEY] - - new_yaml_artifact( - ::File.join(JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v#{schema_version}.yaml"), - desired_contents, - extra_comment_lines: [ - "This JSON schema file contains internal ElasticGraph metadata and should be considered private.", - "The unversioned JSON schema file is public and intended to be provided to publishers." - ] - ) - end - def new_raw_artifact(file_name, desired_contents) SchemaArtifact.new( ::File.join(@schema_artifacts_directory, file_name), @@ -402,17 +198,6 @@ def new_raw_artifact(file_name, desired_contents) ) end - def check_if_needs_json_schema_version_bump(&block) - if @json_schemas_artifact.out_of_date? - existing_schema_version = @json_schemas_artifact.existing_dumped_contents&.dig(JSON_SCHEMA_VERSION_KEY) || -1 - desired_schema_version = @json_schemas_artifact.desired_contents[JSON_SCHEMA_VERSION_KEY] - - if existing_schema_version >= desired_schema_version - yield existing_schema_version + 1 - end - end - end - def pruned_runtime_metadata(graphql_schema_string) schema = ::GraphQL::Schema.from_definition(graphql_schema_string) runtime_meta = schema_definition_results.runtime_metadata diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb index 99c6e3fb8..f4f07c8dc 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb @@ -405,19 +405,11 @@ def register_standard_elastic_graph_types # As per the Elasticsearch docs, the field MUST come in named `lat` in Elastisearch (but we want the full name in GraphQL). t.field names.latitude, "Float", name_in_index: "lat" do |f| f.documentation "Angular distance north or south of the Earth's equator, measured in degrees from -90 to +90." - - # Note: we use `nullable: false` because we index it as a single `geo_point` field, and therefore can't - # support a `latitude` without a `longitude` or vice-versa. - f.json_schema minimum: -90, maximum: 90, nullable: false end # As per the Elasticsearch docs, the field MUST come in named `lon` in Elastisearch (but we want the full name in GraphQL). t.field names.longitude, "Float", name_in_index: "lon" do |f| f.documentation "Angular distance east or west of the Prime Meridian at Greenwich, UK, measured in degrees from -180 to +180." - - # Note: we use `nullable: false` because we index it as a single `geo_point` field, and therefore can't - # support a `latitude` without a `longitude` or vice-versa. - f.json_schema minimum: -180, maximum: 180, nullable: false end t.mapping type: "geo_point" @@ -659,17 +651,15 @@ def register_standard_elastic_graph_types # Registers the standard GraphQL scalar types. Note that the SDL for the scalar type itself isn't # included in the dumped SDL, but registering it allows us to derive a filter for each, - # which we need. In addition, this lets us define the mapping and JSON schema for each standard - # scalar type. + # which we need. In addition, this lets us define the mapping for each standard scalar type. + # Ingestion serializers can layer their own built-in configuration on top. def register_standard_graphql_scalars schema_def_api.scalar_type "Boolean" do |t| t.mapping type: "boolean" - t.json_schema type: "boolean" end schema_def_api.scalar_type "Float" do |t| t.mapping type: "double" - t.json_schema type: "number" t.customize_aggregated_values_type do |avt| # not nullable, since sum(empty_set) == 0 @@ -709,12 +699,10 @@ def register_standard_graphql_scalars schema_def_api.scalar_type "ID" do |t| t.mapping type: "keyword" - t.json_schema type: "string" end schema_def_api.scalar_type "Int" do |t| t.mapping type: "integer" - t.json_schema type: "integer", minimum: INT_MIN, maximum: INT_MAX t.prepare_for_indexing_with "ElasticGraph::Indexer::IndexingPreparers::Integer", defined_at: "elastic_graph/indexer/indexing_preparers/integer" @@ -729,7 +717,6 @@ def register_standard_graphql_scalars schema_def_api.scalar_type "String" do |t| t.mapping type: "keyword" - t.json_schema type: "string" t.customize_filter_input_type do |fit| fit.field names.contains, schema_def_state.type_ref("StringContains").as_filter_input.name do |f| @@ -753,12 +740,11 @@ def register_standard_graphql_scalars def register_custom_elastic_graph_scalars schema_def_api.scalar_type "Cursor" do |t| - # Technically, we don't use the mapping or json_schema on this type since it's a return-only + # Technically, we don't use the mapping or ingestion config on this type since it's a return-only # type and isn't indexed. However, `scalar_type` requires them to be set (since custom scalars # defined by users will need those set) so we set them here to what they would be if we actually # used them. t.mapping type: "keyword" - t.json_schema type: "string" t.coerce_with "ElasticGraph::GraphQL::ScalarCoercionAdapters::Cursor", defined_at: "elastic_graph/graphql/scalar_coercion_adapters/cursor" @@ -771,7 +757,6 @@ def register_custom_elastic_graph_scalars schema_def_api.scalar_type "Date" do |t| t.mapping type: "date", format: DATASTORE_DATE_FORMAT - t.json_schema type: "string", format: "date" t.coerce_with "ElasticGraph::GraphQL::ScalarCoercionAdapters::Date", defined_at: "elastic_graph/graphql/scalar_coercion_adapters/date" @@ -791,7 +776,6 @@ def register_custom_elastic_graph_scalars schema_def_api.scalar_type "DateTime" do |t| t.mapping type: "date", format: DATASTORE_DATE_TIME_FORMAT - t.json_schema type: "string", format: "date-time" t.coerce_with "ElasticGraph::GraphQL::ScalarCoercionAdapters::DateTime", defined_at: "elastic_graph/graphql/scalar_coercion_adapters/date_time" t.prepare_for_indexing_with "ElasticGraph::Indexer::IndexingPreparers::DateTime", @@ -882,8 +866,6 @@ def register_custom_elastic_graph_scalars t.mapping type: "date", format: "HH:mm:ss||HH:mm:ss.S||HH:mm:ss.SS||HH:mm:ss.SSS" - t.json_schema type: "string", pattern: VALID_LOCAL_TIME_JSON_SCHEMA_PATTERN - t.customize_aggregated_values_type do |avt| define_exact_min_max_and_approx_avg_on_aggregated_values(avt, "LocalTime") do |adjective:, full_name:| <<~EOS @@ -896,7 +878,6 @@ def register_custom_elastic_graph_scalars schema_def_api.scalar_type "TimeZone" do |t| t.mapping type: "keyword" - t.json_schema type: "string", enum: GraphQL::ScalarCoercionAdapters::VALID_TIME_ZONES.to_a t.coerce_with "ElasticGraph::GraphQL::ScalarCoercionAdapters::TimeZone", defined_at: "elastic_graph/graphql/scalar_coercion_adapters/time_zone" @@ -913,8 +894,6 @@ def register_custom_elastic_graph_scalars # https://github.com/json-schema-org/json-schema-spec/blob/draft-07/schema.json#L23-L29 # # ...except we are omitting `null` here; it'll be added by the nullability decorator if the field is defined as nullable. - t.json_schema type: ["array", "boolean", "integer", "number", "object", "string"] - # In the index we store this as a JSON string in a `keyword` field. t.mapping type: "keyword" @@ -939,7 +918,6 @@ def register_custom_elastic_graph_scalars schema_def_api.scalar_type "JsonSafeLong" do |t| t.mapping type: "long" - t.json_schema type: "integer", minimum: JSON_SAFE_LONG_MIN, maximum: JSON_SAFE_LONG_MAX t.coerce_with "ElasticGraph::GraphQL::ScalarCoercionAdapters::JsonSafeLong", defined_at: "elastic_graph/graphql/scalar_coercion_adapters/longs" @@ -983,7 +961,6 @@ def register_custom_elastic_graph_scalars # to do if we ingest them as strings. (The `pattern` regex to validate the range # would be *extremely* complicated). t.mapping type: "long" - t.json_schema type: "integer", minimum: LONG_STRING_MIN, maximum: LONG_STRING_MAX t.coerce_with "ElasticGraph::GraphQL::ScalarCoercionAdapters::LongString", defined_at: "elastic_graph/graphql/scalar_coercion_adapters/longs" t.prepare_for_indexing_with "ElasticGraph::Indexer::IndexingPreparers::Integer", diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb index 0bdb27b60..57a1f67ac 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb @@ -145,7 +145,7 @@ def derived_graphql_types derived_scalar_types = schema_def_state.factory.new_scalar_type(name) do |t| t.mapping type: "keyword" - t.json_schema type: "string" + configure_derived_scalar_type(t) t.graphql_only graphql_only? end.derived_graphql_types @@ -156,6 +156,14 @@ def derived_graphql_types end end + # Hook for extensions to customize the scalar type derived from an enum type. + # @param scalar_type [ScalarType] the scalar type to configure + # @return [void] + # @api private + def configure_derived_scalar_type(scalar_type) + # No-op by default; extensions (e.g. JSONIngestion) override this. + end + # @return [Indexing::FieldType::Enum] indexing representation of this enum type def to_indexing_field_type Indexing::FieldType::Enum.new(values_by_name.keys) diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/field.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/field.rb index 9bf47dab0..73bfe2ecd 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/field.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/field.rb @@ -83,8 +83,6 @@ module SchemaElements # @private # @!attribute [rw] computation_detail # @private - # @!attribute [rw] non_nullable_in_json_schema - # @private # @!attribute [rw] as_input # @private class Field < Struct.new( @@ -93,7 +91,7 @@ class Field < Struct.new( :aggregated_values_customizations, :sort_order_enum_value_customizations, :args, :sortable, :filterable, :aggregatable, :groupable, :highlightable, :graphql_only, :source, :runtime_field_script, :relationship, :singular_name, - :computation_detail, :non_nullable_in_json_schema, :as_input, + :computation_detail, :as_input, :name_in_index, :resolver ) include Mixins::HasDocumentation @@ -137,7 +135,6 @@ def initialize( # the `_name` suffix on the attribute for clarity. singular_name: singular, name_in_index: name_in_index, - non_nullable_in_json_schema: false, as_input: as_input, resolver: resolver ) @@ -159,7 +156,10 @@ def initialize( end schema_def_state.register_user_defined_field(self) + # :nocov: -- this DSL block hook is a low-value SimpleCov branch to track relative to the + # surrounding field behavior specs. yield self if block_given? + # :nocov: end private :resolver= @@ -453,22 +453,6 @@ def on_each_generated_schema_element(&customization_block) customize_sort_order_enum_values(&customization_block) end - # (see Mixins::HasTypeInfo#json_schema) - def json_schema(nullable: nil, **options) - if options.key?(:type) - raise Errors::SchemaError, "Cannot override JSON schema type of field `#{name}` with `#{options.fetch(:type)}`" - end - - case nullable - when true - raise Errors::SchemaError, "`nullable: true` is not allowed on a field--just declare the GraphQL field as being nullable (no `!` suffix) instead." - when false - self.non_nullable_in_json_schema = true - end - - super(**options) - end - # (see Mixins::HasTypeInfo#mapping) def mapping(**options) # ElasticGraph has special handling for the nested type (e.g. we generate sub-aggregation types in the GraphQL schema for @@ -965,9 +949,8 @@ def to_indexing_field_reference Indexing::FieldReference.new( name: name, name_in_index: name_in_index, - type: non_nullable_in_json_schema ? type.wrap_non_null : type, + type: type, mapping_options: mapping_options, - json_schema_options: json_schema_options, accuracy_confidence: accuracy_confidence, source: source, runtime_field_script: runtime_field_script, diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb index 8e2703cd2..f9ddf0d6d 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb @@ -25,7 +25,7 @@ module SchemaElements # ElasticGraph.define_schema do |schema| # schema.scalar_type "URL" do |t| # t.mapping type: "keyword" - # t.json_schema type: "string", format: "uri" + # t.json_schema type: "string" # end # end # @@ -59,8 +59,14 @@ class ScalarType < Struct.new( include Mixins::HasDerivedGraphQLTypeCustomizations include Mixins::HasReadableToSAndInspect.new { |t| t.name } + # Internal default used when the JSON ingestion extension has not attached + # mutable per-type JSON schema configuration. + # + # @private + EMPTY_JSON_SCHEMA_OPTIONS = {}.freeze + # `HasTypeInfo` provides the following methods: - # @dynamic mapping_options, json_schema_options + # @dynamic mapping_options include Mixins::HasTypeInfo # @dynamic graphql_only? @@ -78,13 +84,8 @@ def initialize(schema_def_state, name) yield self - missing = [ - ("`mapping`" if mapping_options.empty?), - ("`json_schema`" if json_schema_options.empty?) - ].compact - - if missing.any? - raise Errors::SchemaError, "Scalar types require `mapping` and `json_schema` to be configured, but `#{name}` lacks #{missing.join(" and ")}." + if mapping_options.empty? + raise Errors::SchemaError, "Scalar types require `mapping` to be configured, but `#{name}` lacks `mapping`." end if (placeholder = inferred_grouping_missing_value_placeholder) @@ -97,6 +98,15 @@ def name type_ref.name end + # The JSON ingestion extension overrides this to provide mutable per-type configuration. + # + # @return [Hash] + def json_schema_options + # :nocov: -- this default hook is only for callers that have not installed the JSON ingestion extension. + EMPTY_JSON_SCHEMA_OPTIONS + # :nocov: + end + # (see Mixins::HasTypeInfo#mapping) def mapping(**options) self.mapping_type = options.fetch(:type) do @@ -350,14 +360,17 @@ def inferred_grouping_missing_value_placeholder # JSON schema min/max only constrains newly indexed values, not existing data that may fall outside the range before the constraints were added. # This is an edge case where the long range may exceed safe float precision. # In this case, users can set grouping_missing_value_placeholder to nil. - if (json_schema_options[:minimum] || LONG_STRING_MIN) >= JSON_SAFE_LONG_MIN && - (json_schema_options[:maximum] || LONG_STRING_MAX) <= JSON_SAFE_LONG_MAX + ingestion_min = json_schema_options[:minimum] + ingestion_max = json_schema_options[:maximum] + if (ingestion_min || LONG_STRING_MIN) >= JSON_SAFE_LONG_MIN && + (ingestion_max || LONG_STRING_MAX) <= JSON_SAFE_LONG_MAX inferred_numeric_placeholder_for_integer_type end elsif mapping_type == "unsigned_long" # Similar to the checks above for long except we only need to check the max # (since the min is zero even if not specified) - if (json_schema_options[:maximum] || LONG_STRING_MAX) <= JSON_SAFE_LONG_MAX + ingestion_max = json_schema_options[:maximum] + if (ingestion_max || LONG_STRING_MAX) <= JSON_SAFE_LONG_MAX inferred_numeric_placeholder_for_integer_type end elsif INTEGER_TYPES.include?(mapping_type) diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb index 483d296bf..22c574977 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb @@ -171,20 +171,6 @@ def with_reverted_override schema_def_state.type_ref(type_namer.revert_override_for(name)) end - # Returns all the JSON schema array/nullable layers of a type, from outermost to innermost. - # For example, [[Int]] will return [:nullable, :array, :nullable, :array, :nullable] - def json_schema_layers - @json_schema_layers ||= begin - layers, inner_type = peel_json_schema_layers_once - - if layers.empty? || inner_type == self - layers - else - layers + inner_type.json_schema_layers - end - end - end - # Most of ElasticGraph's derived GraphQL types have a static suffix (e.g. the full type name # is source_type + suffix). This is a map of all of these. STATIC_FORMAT_NAME_BY_CATEGORY = TypeNamer::REQUIRED_PLACEHOLDERS.filter_map do |format_name, placeholders| @@ -304,16 +290,6 @@ def after_initialize Mixins::VerifiesGraphQLName.verify_name!(unwrapped_name) end - def peel_json_schema_layers_once - if list? - return [[:array], unwrap_list] if non_null? - return [[:nullable, :array], unwrap_list] - end - - return [[], unwrap_non_null] if non_null? - [[:nullable], self] - end - def matches_format_of?(category) format_name = STATIC_FORMAT_NAME_BY_CATEGORY.fetch(category) type_namer.matches_format?(name, format_name) diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb index eb45743ac..7d2114f9c 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb @@ -150,7 +150,7 @@ def name # Therefore, we recommend limiting your use of `!` to only a few situations such as defining a type’s primary key (e.g. # `t.field "id", "ID!"`) or defining a list field (e.g. `t.field "authors", "[String!]!"`) since empty lists already provide a # "no data" representation. You can still configure the ElasticGraph indexer to require a non-null value for a field using - # `f.json_schema nullable: false`. + # `f.json_schema nullable: false` (when using `elasticgraph-json_ingestion`). # # @note ElasticGraph’s understanding of datastore capabilities may override your configured # `aggregatable`/`filterable`/`groupable`/`sortable` options. For example, a field indexed as `text` for full text search will @@ -477,7 +477,6 @@ def to_indexing_field_type type_name: name, subfields: indexing_fields_by_name_in_index.values.map(&:to_indexing_field).compact, mapping_options: mapping_options, - json_schema_options: json_schema_options, doc_comment: doc_comment ) end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/state.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/state.rb index 401ecfd21..68ebead83 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/state.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/state.rb @@ -6,7 +6,6 @@ # # frozen_string_literal: true -require "elastic_graph/constants" require "elastic_graph/errors" require "elastic_graph/schema_definition/factory" require "elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect" @@ -40,8 +39,7 @@ class State < Struct.new( :deleted_types_by_old_name, :renamed_fields_by_type_name_and_old_field_name, :deleted_fields_by_type_name_and_old_field_name, - :json_schema_version, - :json_schema_version_setter_location, + :ingestion_serializer_state, :graphql_extension_modules, :graphql_resolvers_by_name, :built_in_graphql_resolvers, @@ -53,10 +51,10 @@ class State < Struct.new( :type_refs_by_name, :output, :type_namer, - :enum_value_namer, - :allow_omitted_json_schema_fields, - :allow_extra_json_schema_fields + :enum_value_namer ) + EMPTY_RESERVED_TYPE_NAMES = ::Set.new.freeze + include Mixins::HasReadableToSAndInspect.new def self.with( @@ -88,8 +86,7 @@ def self.with( deleted_types_by_old_name: {}, renamed_fields_by_type_name_and_old_field_name: ::Hash.new { |h, k| h[k] = {} }, deleted_fields_by_type_name_and_old_field_name: ::Hash.new { |h, k| h[k] = {} }, - json_schema_version_setter_location: nil, - json_schema_version: nil, + ingestion_serializer_state: {}, graphql_extension_modules: [], graphql_resolvers_by_name: {}, built_in_graphql_resolvers: ::Set.new, @@ -104,9 +101,7 @@ def self.with( name_overrides: type_name_overrides ), enum_value_namer: SchemaElements::EnumValueNamer.new(enum_value_overrides_by_type), - output: output, - allow_omitted_json_schema_fields: false, - allow_extra_json_schema_fields: true + output: output ) end @@ -213,12 +208,11 @@ def field_path_resolver private - RESERVED_TYPE_NAMES = [EVENT_ENVELOPE_JSON_SCHEMA_NAME].to_set - def register_type(type, additional_type_index = nil) name = (_ = type).name - if RESERVED_TYPE_NAMES.include?(name) + reserved_names = ingestion_serializer_state.fetch(:reserved_type_names, EMPTY_RESERVED_TYPE_NAMES) + if reserved_names.include?(name) raise Errors::SchemaError, "`#{name}` cannot be used as a schema type because it is a reserved name." end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/test_support.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/test_support.rb index 7084db978..ef3c0656c 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/test_support.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/test_support.rb @@ -7,6 +7,7 @@ # frozen_string_literal: true require "elastic_graph/errors" +require "elastic_graph/schema_artifacts/from_disk" require "elastic_graph/schema_artifacts/runtime_metadata/schema_element_names" require "elastic_graph/schema_definition/api" require "elastic_graph/schema_definition/schema_artifact_manager" @@ -75,8 +76,8 @@ def define_schema_with_schema_elements( yield api if block_given? # Set the json_schema_version to the provided value, if needed. - if !json_schema_version.nil? && api.state.json_schema_version.nil? - api.json_schema_version json_schema_version + if api.respond_to?(:json_schema_version) && !json_schema_version.nil? && api.state.ingestion_serializer_state[:json_schema_version].nil? + api.public_send(:json_schema_version, json_schema_version) end # :nocov: -- the else branch and code past this aren't used by tests in elasticgraph-schema_definition. diff --git a/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/api_extension.rbs b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/api_extension.rbs new file mode 100644 index 000000000..0ea47a435 --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/api_extension.rbs @@ -0,0 +1,13 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module APIExtension: ::ElasticGraph::SchemaDefinition::API + def json_schema_version: (::Integer) -> void + def json_schema_strictness: (?allow_omitted_fields: bool, ?allow_extra_fields: bool) -> void + def results: () -> (::ElasticGraph::SchemaDefinition::Results & ResultsExtension) + + def self.extended: (::ElasticGraph::SchemaDefinition::API & APIExtension) -> void + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rbs b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rbs new file mode 100644 index 000000000..bed69cb23 --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rbs @@ -0,0 +1,11 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + module EventEnvelope + def self.json_schema: (::Array[::String], ::Integer) -> ::Hash[::String, untyped] + end + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rbs b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rbs new file mode 100644 index 000000000..98948fbca --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rbs @@ -0,0 +1,19 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + class JSONSchemaFieldMetadata + attr_reader type: ::String + attr_reader name_in_index: ::String + + def initialize: ( + type: ::String, + name_in_index: ::String + ) -> void + + def to_dumpable_hash: () -> {"type" => ::String, "nameInIndex" => ::String} + end + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rbs b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rbs new file mode 100644 index 000000000..d25d7eb92 --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rbs @@ -0,0 +1,99 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + class JSONSchemaWithMetadataSupertype + attr_reader json_schema: ::Hash[::String, untyped] + attr_reader missing_fields: ::Set[::String] + attr_reader missing_types: ::Set[::String] + attr_reader definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + attr_reader missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] + + def initialize: ( + json_schema: ::Hash[::String, untyped], + missing_fields: ::Set[::String], + missing_types: ::Set[::String], + definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement], + missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] + ) -> void + + def with: ( + ?json_schema: ::Hash[::String, untyped], + ?missing_fields: ::Set[::String], + ?missing_types: ::Set[::String], + ?definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement], + ?missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] + ) -> instance + end + + class JSONSchemaWithMetadata < JSONSchemaWithMetadataSupertype + def json_schema_version: () -> ::Integer + + class Merger + @field_metadata_by_type_and_field_name: ::Hash[::String, ::Hash[::String, JSONSchemaFieldMetadata]] + @renamed_types_by_old_name: ::Hash[::String, ::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + @deleted_types_by_old_name: ::Hash[::String, ::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + @renamed_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, ::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]] + @deleted_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, ::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]] + @state: ::ElasticGraph::SchemaDefinition::State + @derived_indexing_type_names: ::Set[::String] + + attr_reader unused_deprecated_elements: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + + def initialize: ((::ElasticGraph::SchemaDefinition::Results & ResultsExtension)) -> void + def merge_metadata_into: (::Hash[::String, untyped]) -> JSONSchemaWithMetadata + + private + + def determine_current_type_name: ( + ::String, + missing_types: ::Set[::String], + definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + ) -> ::String? + + def field_metadata_for: ( + ::String, + ::String, + missing_fields: ::Set[::String], + definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + ) -> JSONSchemaFieldMetadata? + + def identify_missing_necessary_fields: ( + ::Hash[::String, untyped], + ::Hash[::String, ::String] + ) -> ::Array[MissingNecessaryField] + + def identify_missing_necessary_fields_for_index_def: ( + ::ElasticGraph::SchemaDefinition::indexableType, + ::ElasticGraph::SchemaDefinition::Indexing::Index, + JSONSchemaResolver + ) -> ::Array[MissingNecessaryField] + + class JSONSchemaResolver + @state: ::ElasticGraph::SchemaDefinition::State + @old_type_name_by_current_name: ::Hash[::String, ::String] + @meta_by_old_type_and_name_in_index: ::Hash[::String, ::Hash[::String, ::Hash[::String, untyped]]] + + def initialize: (::ElasticGraph::SchemaDefinition::State, ::Hash[::String, untyped], ::Hash[::String, ::String]) -> void + def necessary_path_missing?: (::ElasticGraph::SchemaDefinition::SchemaElements::FieldPath) -> bool + + private + + def necessary_path_part_missing?: (::String, ::String) { (::Hash[::String, untyped]) -> void } -> bool + end + end + + class MissingNecessaryField + attr_reader field_type: ::String + attr_reader fully_qualified_path: ::String + + def initialize: ( + field_type: ::String, + fully_qualified_path: ::String + ) -> void + end + end + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rbs b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rbs new file mode 100644 index 000000000..898a7660d --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rbs @@ -0,0 +1,14 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + class JSONSchemaPruner + def self.prune: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] + + private + + def self.referenced_type_names: (::Array[::String], ::Hash[::String, untyped]) -> ::Set[::String] + def self.collect_ref_names: (::Hash[::String, untyped]) -> ::Array[::String] + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/results_extension.rbs b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/results_extension.rbs new file mode 100644 index 000000000..002b4e85a --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/results_extension.rbs @@ -0,0 +1,15 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module ResultsExtension : ::ElasticGraph::SchemaDefinition::Results + include ::ElasticGraph::_SchemaArtifacts + + def json_schema_version_setter_location: () -> ::Thread::Backtrace::Location? + def json_schema_field_metadata_by_type_and_field_name: () -> ::Hash[::String, ::Hash[::String, Indexing::JSONSchemaFieldMetadata]] + def current_public_json_schema: () -> ::Hash[::String, untyped] + def merge_field_metadata_into_json_schema: (::Hash[::String, untyped]) -> Indexing::JSONSchemaWithMetadata + def unused_deprecated_elements: () -> ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rbs b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rbs new file mode 100644 index 000000000..e23a16d49 --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rbs @@ -0,0 +1,28 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module SchemaArtifactManagerExtension : ::ElasticGraph::SchemaDefinition::SchemaArtifactManager + attr_reader schema_definition_results: (::ElasticGraph::SchemaDefinition::Results & ResultsExtension) + + private + + @json_schemas_artifact: ::ElasticGraph::SchemaDefinition::SchemaArtifact[untyped]? + + def artifacts_from_schema_def: () -> ::Array[::ElasticGraph::SchemaDefinition::SchemaArtifact[untyped]] + def json_schemas_artifact: () -> ::ElasticGraph::SchemaDefinition::SchemaArtifact[::Hash[::String, untyped]] + def check_if_needs_json_schema_version_bump: () { (::Integer) -> void } -> void + def build_desired_versioned_json_schemas: (::Hash[::String, untyped]) -> ::Hash[::Integer, ::Hash[::String, untyped]] + def report_json_schema_merge_errors: (::Array[Indexing::JSONSchemaWithMetadata]) -> void + def report_json_schema_merge_warnings: () -> void + def format_deprecated_elements: (::Enumerable[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]) -> ::String + def missing_field_error_for: (::String, ::Array[::Integer]) -> ::String + def missing_type_error_for: (::String, ::Array[::Integer]) -> ::String + def missing_necessary_field_error_for: (Indexing::JSONSchemaWithMetadata::MissingNecessaryField, ::Array[::Integer]) -> ::String + def describe_json_schema_versions: (::Array[::Integer], ::String) -> ::String + def old_versions: (::Array[::Integer]) -> ::String + def files_noun_phrase: (::Array[::Integer]) -> ::String + def new_versioned_json_schema_artifact: (::Hash[::String, untyped]) -> ::ElasticGraph::SchemaDefinition::SchemaArtifact[::Hash[::String, untyped]] + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field.rbs index 432e62034..5230505ec 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field.rbs @@ -5,10 +5,8 @@ module ElasticGraph attr_reader name: ::String attr_reader name_in_index: ::String attr_reader type: SchemaElements::TypeReference - attr_reader json_schema_layers: jsonSchemaLayersArray attr_reader indexing_field_type: _FieldType attr_reader accuracy_confidence: Field::accuracyConfidence - attr_reader json_schema_customizations: ::Hash[::Symbol, untyped] attr_reader mapping_customizations: ::Hash[::Symbol, untyped] attr_reader source: SchemaElements::FieldSource? attr_accessor runtime_field_script: ::String? @@ -18,10 +16,8 @@ module ElasticGraph name: ::String, name_in_index: ::String, type: SchemaElements::TypeReference, - json_schema_layers: jsonSchemaLayersArray, indexing_field_type: _FieldType, accuracy_confidence: Field::accuracyConfidence, - json_schema_customizations: ::Hash[::Symbol, untyped], mapping_customizations: ::Hash[::Symbol, untyped], source: SchemaElements::FieldSource?, runtime_field_script: ::String?, @@ -32,10 +28,8 @@ module ElasticGraph ?name: ::String, ?name_in_index: ::String, ?type: SchemaElements::TypeReference, - ?json_schema_layers: jsonSchemaLayersArray, ?indexing_field_type: _FieldType, ?accuracy_confidence: Field::accuracyConfidence, - ?json_schema_customizations: ::Hash[::Symbol, untyped], ?mapping_customizations: ::Hash[::Symbol, untyped], ?source: SchemaElements::FieldSource?, ?runtime_field_script: ::String?, @@ -44,25 +38,11 @@ module ElasticGraph end class Field < FieldSupertype - JSON_SCHEMA_OVERRIDES_BY_MAPPING_TYPE: ::Hash[::String, untyped] - type accuracyConfidence = SchemaElements::Field::accuracyConfidence @mapping: ::Hash[::String, untyped]? def mapping: () -> ::Hash[::String, untyped] - def json_schema: () -> ::Hash[::String, untyped] - def json_schema_metadata: () -> JSONSchemaFieldMetadata def self.normalized_mapping_hash_for: (::Array[Field]) -> ::Hash[::String, untyped] - - def inner_json_schema: () -> ::Hash[::String, untyped] - def outer_json_schema_customizations: () -> ::Hash[::String, untyped] - - def nullable?: () -> bool - - def user_specified_json_schema_customizations_go_on_outside?: () -> bool - def process_layer: (::Symbol, ::Hash[::String, untyped]) -> ::Hash[::String, untyped] - def make_nullable: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] - def make_array: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] end end end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_reference.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_reference.rbs index 55a8f0724..89071a597 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_reference.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_reference.rbs @@ -6,7 +6,6 @@ module ElasticGraph attr_reader name_in_index: ::String attr_reader type: SchemaElements::TypeReference attr_reader mapping_options: ::Hash[::Symbol, untyped] - attr_reader json_schema_options: ::Hash[::Symbol, untyped] attr_reader accuracy_confidence: Field::accuracyConfidence attr_reader source: SchemaElements::FieldSource? attr_reader runtime_field_script: ::String? @@ -17,7 +16,6 @@ module ElasticGraph name_in_index: ::String, type: SchemaElements::TypeReference, mapping_options: ::Hash[::Symbol, untyped], - json_schema_options: ::Hash[::Symbol, untyped], accuracy_confidence: Field::accuracyConfidence, source: SchemaElements::FieldSource?, runtime_field_script: ::String?, @@ -29,7 +27,6 @@ module ElasticGraph ?name_in_index: ::String, ?type: SchemaElements::TypeReference, ?mapping_options: ::Hash[::Symbol, untyped], - ?json_schema_options: ::Hash[::Symbol, untyped], ?accuracy_confidence: Field::accuracyConfidence, ?source: SchemaElements::FieldSource?, ?runtime_field_script: ::String?, diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type.rbs index 5808a0709..d01805f99 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type.rbs @@ -3,9 +3,6 @@ module ElasticGraph module Indexing interface _FieldType def to_mapping: () -> ::Hash[::String, untyped] - def to_json_schema: () -> ::Hash[::String, untyped] - def json_schema_field_metadata_by_field_name: () -> ::Hash[::String, JSONSchemaFieldMetadata] - def format_field_json_schema_customizations: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] end end end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type/object.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type/object.rbs index 5dd23dbaa..e1b90df44 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type/object.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type/object.rbs @@ -3,29 +3,26 @@ module ElasticGraph module Indexing module FieldType class ObjectSuperType + attr_reader schema_def_state: State attr_reader type_name: ::String attr_reader subfields: ::Array[Field] attr_reader mapping_options: Mixins::HasTypeInfo::optionsHash - attr_reader json_schema_options: Mixins::HasTypeInfo::optionsHash attr_reader doc_comment: ::String? - attr_reader schema_def_state: State def initialize: ( + schema_def_state: State, type_name: ::String, subfields: ::Array[Field], mapping_options: Mixins::HasTypeInfo::optionsHash, - json_schema_options: Mixins::HasTypeInfo::optionsHash, - doc_comment: ::String?, - schema_def_state: State + doc_comment: ::String? ) -> void def with: ( + ?schema_def_state: State, ?type_name: ::String, ?subfields: ::Array[Field], ?mapping_options: Mixins::HasTypeInfo::optionsHash, - ?json_schema_options: Mixins::HasTypeInfo::optionsHash, - ?doc_comment: ::String?, - ?schema_def_state: State + ?doc_comment: ::String? ) -> Object end @@ -34,12 +31,6 @@ module ElasticGraph include Support::_MemoizableDataClass @to_mapping: ::Hash[::String, untyped]? - @to_json_schema: ::Hash[::String, untyped]? - - private - - def json_schema_typename_field: () -> ::Hash[::String, untyped] - def validate_sourced_fields_have_no_json_schema_overrides: (::Array[Field]) -> void end end end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rbs index dcc37b607..d20120a0f 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rbs @@ -5,14 +5,14 @@ module ElasticGraph attr_reader json_schema: ::Hash[::String, untyped] attr_reader missing_fields: ::Set[::String] attr_reader missing_types: ::Set[::String] - attr_reader definition_conflicts: ::Set[SchemaElements::DeprecatedElement] + attr_reader definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] attr_reader missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] def initialize: ( json_schema: ::Hash[::String, untyped], missing_fields: ::Set[::String], missing_types: ::Set[::String], - definition_conflicts: ::Set[SchemaElements::DeprecatedElement], + definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement], missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] ) -> void @@ -20,7 +20,7 @@ module ElasticGraph ?json_schema: ::Hash[::String, untyped], ?missing_fields: ::Set[::String], ?missing_types: ::Set[::String], - ?definition_conflicts: ::Set[SchemaElements::DeprecatedElement], + ?definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement], ?missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] ) -> instance end @@ -30,16 +30,16 @@ module ElasticGraph class Merger @field_metadata_by_type_and_field_name: ::Hash[::String, ::Hash[::String, JSONSchemaFieldMetadata]] - @renamed_types_by_old_name: ::Hash[::String, SchemaElements::DeprecatedElement] - @deleted_types_by_old_name: ::Hash[::String, SchemaElements::DeprecatedElement] - @renamed_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]] - @deleted_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]] - @state: State + @renamed_types_by_old_name: ::Hash[::String, ::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + @deleted_types_by_old_name: ::Hash[::String, ::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + @renamed_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, ::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]] + @deleted_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, ::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]] + @state: ::ElasticGraph::SchemaDefinition::State @derived_indexing_type_names: ::Set[::String] - attr_reader unused_deprecated_elements: ::Set[SchemaElements::DeprecatedElement] + attr_reader unused_deprecated_elements: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] - def initialize: (Results) -> void + def initialize: ((::ElasticGraph::SchemaDefinition::Results & ::ElasticGraph::JSONIngestion::SchemaDefinition::ResultsExtension)) -> void def merge_metadata_into: (::Hash[::String, untyped]) -> JSONSchemaWithMetadata private @@ -47,14 +47,14 @@ module ElasticGraph def determine_current_type_name: ( ::String, missing_types: ::Set[::String], - definition_conflicts: ::Set[SchemaElements::DeprecatedElement] + definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] ) -> ::String? def field_metadata_for: ( ::String, ::String, missing_fields: ::Set[::String], - definition_conflicts: ::Set[SchemaElements::DeprecatedElement] + definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] ) -> JSONSchemaFieldMetadata? def identify_missing_necessary_fields: ( @@ -63,19 +63,18 @@ module ElasticGraph ) -> ::Array[MissingNecessaryField] def identify_missing_necessary_fields_for_index_def: ( - indexableType, - Index, - JSONSchemaResolver, - ::Integer + ::ElasticGraph::SchemaDefinition::indexableType, + ::ElasticGraph::SchemaDefinition::Indexing::Index, + JSONSchemaResolver ) -> ::Array[MissingNecessaryField] class JSONSchemaResolver - @state: State + @state: ::ElasticGraph::SchemaDefinition::State @old_type_name_by_current_name: ::Hash[::String, ::String] @meta_by_old_type_and_name_in_index: ::Hash[::String, ::Hash[::String, ::Hash[::String, untyped]]] - def initialize: (State, ::Hash[::String, untyped], ::Hash[::String, ::String]) -> void - def necessary_path_missing?: (SchemaElements::FieldPath) -> bool + def initialize: (::ElasticGraph::SchemaDefinition::State, ::Hash[::String, untyped], ::Hash[::String, ::String]) -> void + def necessary_path_missing?: (::ElasticGraph::SchemaDefinition::SchemaElements::FieldPath) -> bool private diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/json_schema_pruner.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/json_schema_pruner.rbs index 8c5f323a2..1efc0d642 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/json_schema_pruner.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/json_schema_pruner.rbs @@ -2,6 +2,9 @@ module ElasticGraph module SchemaDefinition class JSONSchemaPruner def self.prune: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] + + private + def self.referenced_type_names: (::Array[::String], ::Hash[::String, untyped]) -> ::Set[::String] def self.collect_ref_names: (::Hash[::String, untyped]) -> ::Array[::String] end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_type_info.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_type_info.rbs index 1f3c1391b..665759c4c 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_type_info.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_type_info.rbs @@ -6,10 +6,8 @@ module ElasticGraph type optionsHash = ::Hash[::Symbol, untyped] attr_reader mapping_options: optionsHash - attr_reader json_schema_options: optionsHash def mapping: (**untyped) -> void - def json_schema: (**untyped) -> void end end end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs index c361ab4ba..212249d9b 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs @@ -6,30 +6,17 @@ module ElasticGraph end class Results < ResultsSupertype - include _SchemaArtifacts include Support::_MemoizableDataClass - def json_schema_version_setter_location: () -> ::Thread::Backtrace::Location? - def json_schema_field_metadata_by_type_and_field_name: () -> ::Hash[::String, ::Hash[::String, Indexing::JSONSchemaFieldMetadata]] - def current_public_json_schema: () -> ::Hash[::String, untyped] - def merge_field_metadata_into_json_schema: (::Hash[::String, untyped]) -> Indexing::JSONSchemaWithMetadata - def unused_deprecated_elements: () -> ::Set[SchemaElements::DeprecatedElement] def derived_indexing_type_names: () -> ::Set[::String] @graphql_schema_string: ::String? @datastore_config: ::Hash[::String, untyped] @runtime_metadata: SchemaArtifacts::RuntimeMetadata::Schema? - @current_json_schemas: ::Hash[::String, untyped]? @static_script_repo: Scripting::FileSystemRepository? - @available_json_schema_versions: ::Set[::Integer]? @no_circular_dependencies: bool? @field_path_resolver: SchemaElements::FieldPath::Resolver? - @json_schema_indexing_field_types_by_name: ::Hash[::String, Indexing::_FieldType]? @derived_indexing_type_names: ::Set[::String]? - @json_schema_field_metadata_by_type_and_field_name: ::Hash[::String, ::Hash[::String, Indexing::JSONSchemaFieldMetadata]]? - @current_public_json_schema: ::Hash[::String, untyped]? - @latest_versioned_json_schema: ::Hash[::String, untyped]? - @json_schema_with_metadata_merger: Indexing::JSONSchemaWithMetadata::Merger? STATIC_SCRIPT_REPO: Scripting::FileSystemRepository @@ -37,14 +24,11 @@ module ElasticGraph def define_root_graphql_type: () -> void def aggregation_efficiency_hints_for: (::Array[Indexing::DerivedIndexedType]) -> ::String? - def json_schema_with_metadata_merger: () -> Indexing::JSONSchemaWithMetadata::Merger def generate_datastore_config: () -> ::Hash[::String, untyped] def build_dynamic_scripts: () -> ::Array[Scripting::Script] def build_runtime_metadata: () -> SchemaArtifacts::RuntimeMetadata::Schema def identify_extra_update_targets_by_object_type_name: () -> ::Hash[::String, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]] def generate_sdl: () -> ::String - def build_public_json_schema: () -> ::Hash[::String, untyped] - def json_schema_indexing_field_types_by_name: () -> ::Hash[::String, Indexing::_FieldType] def verify_runtime_metadata: (SchemaArtifacts::RuntimeMetadata::Schema) -> void def strip_trailing_whitespace: (::String) -> ::String def check_for_circular_dependencies!: () -> void diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs index b4b079de0..a3fd4af47 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs @@ -22,22 +22,11 @@ module ElasticGraph @output: io @max_diff_lines: ::Integer @artifacts: ::Array[SchemaArtifact[untyped]]? - @json_schemas_artifact: SchemaArtifact[untyped] def artifacts: () -> ::Array[SchemaArtifact[untyped]] def artifacts_from_schema_def: () -> ::Array[SchemaArtifact[untyped]] def notify_about_unused_type_name_overrides: () -> void def notify_about_unused_enum_value_overrides: () -> void - def build_desired_versioned_json_schemas: (::Hash[::String, untyped]) -> ::Hash[::Integer, ::Hash[::String, untyped]] - def report_json_schema_merge_errors: (::Array[Indexing::JSONSchemaWithMetadata]) -> void - def report_json_schema_merge_warnings: () -> void - def format_deprecated_elements: (::Enumerable[SchemaElements::DeprecatedElement]) -> ::String - def missing_field_error_for: (::String, ::Array[::Integer]) -> ::String - def missing_type_error_for: (::String, ::Array[::Integer]) -> ::String - def missing_necessary_field_error_for: (Indexing::JSONSchemaWithMetadata::MissingNecessaryField, ::Array[::Integer]) -> ::String - def describe_json_schema_versions: (::Array[::Integer], ::String) -> ::String - def old_versions: (::Array[::Integer]) -> ::String - def files_noun_phrase: (::Array[::Integer]) -> ::String def artifacts_out_of_date_error: (::Array[SchemaArtifact[untyped]]) -> ::String def truncate_diff: (::String, ::Integer) -> [::String, ::String] @@ -47,9 +36,7 @@ module ElasticGraph ?extra_comment_lines: ::Array[::String] ) -> SchemaArtifact[::Hash[::String, untyped]] - def new_versioned_json_schema_artifact: (::Hash[::String, untyped]) -> SchemaArtifact[::Hash[::String, untyped]] def new_raw_artifact: (::String, ::String) -> SchemaArtifact[::String] - def check_if_needs_json_schema_version_bump: () { (::Integer) -> void } -> void def pruned_runtime_metadata: (::String) -> SchemaArtifacts::RuntimeMetadata::Schema end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/field.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/field.rbs index be91e03a3..81faa1505 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/field.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/field.rbs @@ -22,7 +22,6 @@ module ElasticGraph attr_accessor computation_detail: SchemaArtifacts::RuntimeMetadata::ComputationDetail attr_reader filter_customizations: ::Array[^(Field) -> void] attr_reader sort_order_enum_value_customizations: ::Array[^(SortOrderEnumValue) -> void] - attr_reader non_nullable_in_json_schema: bool attr_reader source: FieldSource? attr_accessor relationship: Relationship? attr_reader resolver: SchemaArtifacts::RuntimeMetadata::ConfiguredGraphQLResolver? diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/type_reference.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/type_reference.rbs index 9d418a38f..388116f00 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/type_reference.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/type_reference.rbs @@ -30,9 +30,6 @@ module ElasticGraph def scalar_type_needing_grouped_by_object?: () -> bool def with_reverted_override: () -> TypeReference - @json_schema_layers: jsonSchemaLayersArray? - def json_schema_layers: () -> jsonSchemaLayersArray - def to_final_form: (?as_input: bool) -> TypeReference STATIC_FORMAT_NAME_BY_CATEGORY: ::Hash[::Symbol, ::Symbol] @@ -59,8 +56,6 @@ module ElasticGraph private - def peel_json_schema_layers_once: () -> [jsonSchemaLayersArray, TypeReference] - def matches_format_of?: (::Symbol) -> bool def parent_aggregation_type: (::Array[::String]) -> ::String def renamed_with_same_wrappings: (::String) -> TypeReference diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/state.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/state.rbs index 01bb06fa1..570ac49e3 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/state.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/state.rbs @@ -9,6 +9,7 @@ module ElasticGraph attr_reader scalar_types_by_name: ::Hash[::String, SchemaElements::ScalarType] attr_reader enum_types_by_name: ::Hash[::String, SchemaElements::EnumType] attr_reader implementations_by_interface_ref: ::Hash[SchemaElements::TypeReference, ::Set[SchemaElements::TypeWithSubfields]] + attr_reader union_types_by_member_ref: ::Hash[SchemaElements::TypeReference, ::Set[SchemaElements::UnionType]] attr_reader sdl_parts: ::Array[::String] attr_reader paginated_collection_element_types: ::Set[::String] attr_reader user_defined_fields: ::Set[SchemaElements::Field] @@ -16,8 +17,7 @@ module ElasticGraph attr_reader deleted_types_by_old_name: ::Hash[::String, SchemaElements::DeprecatedElement] attr_reader renamed_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]] attr_reader deleted_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]] - attr_accessor json_schema_version: ::Integer? - attr_accessor json_schema_version_setter_location: ::Thread::Backtrace::Location? + attr_accessor ingestion_serializer_state: ::Hash[::Symbol, untyped] attr_reader graphql_extension_modules: ::Array[SchemaArtifacts::RuntimeMetadata::GraphQLExtension] attr_reader graphql_resolvers_by_name: ::Hash[::Symbol, SchemaArtifacts::RuntimeMetadata::GraphQLResolver] attr_reader built_in_graphql_resolvers: ::Set[::Symbol] @@ -27,11 +27,9 @@ module ElasticGraph attr_accessor user_definition_complete_callbacks: ::Array[^() -> void] attr_accessor sub_aggregation_paths_by_type: ::Hash[Mixins::SupportsFilteringAndAggregation, ::Array[SchemaElements::SubAggregationPath]] attr_accessor type_refs_by_name: ::Hash[::String, SchemaElements::TypeReference] + attr_accessor output: io attr_reader type_namer: SchemaElements::TypeNamer attr_reader enum_value_namer: SchemaElements::EnumValueNamer - attr_accessor output: io - attr_accessor allow_omitted_json_schema_fields: bool - attr_accessor allow_extra_json_schema_fields: bool def initialize: ( api: API, @@ -42,6 +40,7 @@ module ElasticGraph scalar_types_by_name: ::Hash[::String, SchemaElements::ScalarType], enum_types_by_name: ::Hash[::String, SchemaElements::EnumType], implementations_by_interface_ref: ::Hash[SchemaElements::TypeReference, ::Set[SchemaElements::TypeWithSubfields]], + union_types_by_member_ref: ::Hash[SchemaElements::TypeReference, ::Set[SchemaElements::UnionType]], sdl_parts: ::Array[::String], paginated_collection_element_types: ::Set[::String], user_defined_fields: ::Set[SchemaElements::Field], @@ -49,8 +48,7 @@ module ElasticGraph deleted_types_by_old_name: ::Hash[::String, SchemaElements::DeprecatedElement], renamed_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]], deleted_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]], - json_schema_version: Integer?, - json_schema_version_setter_location: ::Thread::Backtrace::Location?, + ingestion_serializer_state: ::Hash[::Symbol, untyped], graphql_extension_modules: ::Array[SchemaArtifacts::RuntimeMetadata::GraphQLExtension], graphql_resolvers_by_name: ::Hash[::Symbol, SchemaArtifacts::RuntimeMetadata::GraphQLResolver], built_in_graphql_resolvers: ::Set[::Symbol], @@ -60,11 +58,9 @@ module ElasticGraph user_definition_complete_callbacks: ::Array[^() -> void], sub_aggregation_paths_by_type: ::Hash[Mixins::SupportsFilteringAndAggregation, ::Array[SchemaElements::SubAggregationPath]], type_refs_by_name: ::Hash[::String, SchemaElements::TypeReference], + output: io, type_namer: SchemaElements::TypeNamer, enum_value_namer: SchemaElements::EnumValueNamer, - output: io, - allow_omitted_json_schema_fields: bool, - allow_extra_json_schema_fields: bool, ) -> void end @@ -110,7 +106,6 @@ module ElasticGraph private - RESERVED_TYPE_NAMES: ::Set[::String] def register_type: [T] (T & SchemaElements::graphQLType, ?::Hash[::String, T]?) -> T end end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/test_support.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/test_support.rbs index 211039ae3..1b92524c5 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/test_support.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/test_support.rbs @@ -12,7 +12,7 @@ module ElasticGraph ?enum_value_overrides_by_type: ::Hash[::Symbol, ::Hash[::Symbol, ::String]], ?output: io?, ?reload_schema_artifacts: bool, - ) ?{ (API) -> void } -> _SchemaArtifacts + ) ?{ (API) -> void } -> (Results | SchemaArtifacts::FromDisk) def define_schema_with_schema_elements: ( SchemaArtifacts::RuntimeMetadata::SchemaElementNames, @@ -24,7 +24,7 @@ module ElasticGraph ?enum_value_overrides_by_type: ::Hash[::Symbol, ::Hash[::Symbol, ::String]], ?output: io?, ?reload_schema_artifacts: bool, - ) ?{ (API) -> void } -> _SchemaArtifacts + ) ?{ (API) -> void } -> (Results | SchemaArtifacts::FromDisk) DOC_COMMENTS: ::String diff --git a/elasticgraph-support/README.md b/elasticgraph-support/README.md index 91f3096dc..8464cbc88 100644 --- a/elasticgraph-support/README.md +++ b/elasticgraph-support/README.md @@ -45,6 +45,9 @@ graph LR; elasticgraph-indexer["elasticgraph-indexer"]; elasticgraph-indexer --> elasticgraph-support; class elasticgraph-indexer otherEgGemStyle; + elasticgraph-json_ingestion["elasticgraph-json_ingestion"]; + elasticgraph-json_ingestion --> elasticgraph-support; + class elasticgraph-json_ingestion otherEgGemStyle; elasticgraph-opensearch["elasticgraph-opensearch"]; elasticgraph-opensearch --> elasticgraph-support; class elasticgraph-opensearch otherEgGemStyle;