From a21f41de71923acb5922fc57cf0734761bd3c884 Mon Sep 17 00:00:00 2001 From: Justin Coyne Date: Fri, 6 Feb 2026 10:09:52 -0600 Subject: [PATCH] Switch to using DateRangeField for storing date ranges This can be more efficient than a long list of integers. --- README.md | 16 +- .../facet_field_presenter.rb | 38 +++- lib/blacklight_range_limit.rb | 31 ++- .../range_limit_builder.rb | 74 ++++--- .../segment_calculation.rb | 112 ++++++----- solr/conf/schema.xml | 16 ++ solr/sample_solr_documents.yml | 60 +++--- .../range_limit_action_method_spec.rb | 64 +++---- spec/features/blacklight_range_limit_spec.rb | 62 +++--- spec/features/run_through_spec.rb | 142 +++++++------- spec/fixtures/solr_documents/zero_year.yml | 2 +- .../segment_calculation_spec.rb | 8 +- spec/presenters/facet_field_presenter_spec.rb | 181 +++++++++++++----- spec/requests/bad_param_requests_spec.rb | 58 +++--- .../lib/generators/test_app_generator.rb | 46 +++-- 15 files changed, 555 insertions(+), 355 deletions(-) diff --git a/README.md b/README.md index 72c37fd0..dc6e5c9b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -BlacklightRangeLimit: integer range limiting and profiling for Blacklight applications +BlacklightRangeLimit: date range limiting and profiling for Blacklight applications ![Build Status](https://github.com/projectblacklight/blacklight/workflows/CI/badge.svg) [![Gem Version](https://badge.fury.io/rb/blacklight_range_limit.png)](http://badge.fury.io/rb/blacklight_range_limit) @@ -6,16 +6,14 @@ BlacklightRangeLimit: integer range limiting and profiling for Blacklight appli # Description -The BlacklightRangeLimit plugin provides a 'facet' or limit for integer fields, that lets the user enter range limits with a text box or a slider, and also provides area charts giving a sense of the distribution of values (with drill down). +The BlacklightRangeLimit plugin provides a 'facet' or limit for date fields, that lets the user enter range limits with a text box or a slider, and also provides area charts giving a sense of the distribution of values (with drill down). -The primary use case of this plugin is for 'year' data, but it should work for any integer field. - -Decimal numbers and Dates are NOT supported; they theoretically could be in the future, although it gets tricky. +The primary use case of this plugin is for 'year' data using Solr's `DateRangeField`. This field type supports both single dates (e.g. `"1998"`) and date ranges (e.g. `"[1998 TO 2005]"`), making it ideal for library catalog records that may span multiple years. # Requirements -* A Solr integer field. It might be advantageous to use an IntPointField. +* A Solr `DateRangeField`. The field should be indexed and stored (e.g. using the `*_drsi` dynamic field suffix from the sample schema). Values can be truncated ISO-8601 dates like `"1998"` (meaning the entire year), full dates like `"1998-06-15"`, or date ranges like `"[1998 TO 2005]"`. * Javascript requires you to be using either rails-importmaps or a package.json-based builder like jsbundling-rails or vite-ruby. Legacy "sprockets-only" is not supported, however propshaft or sprockets can be used as your base asset pipeline. @@ -74,14 +72,16 @@ package.json-based use will additionally need to point to the matching unreleaes You have at least one solr field you want to display as a range limit, that's why you've installed this plugin. In your CatalogController, the facet configuration should look like: ```ruby -config.add_facet_field 'pub_date', label: 'Publication Year', range: true +config.add_facet_field 'pub_date_drsi', label: 'Publication Year', range: true ``` +where `pub_date_drsi` is a Solr `DateRangeField` (the `_drsi` suffix maps to the `date_range` field type in the sample schema — stored, indexed, single-valued). + You should now get range limit display. More complicated configuration is available if desired, see Range Facet Configuration below. ## A note on AJAX use -In order to calculate distribution segment ranges, we need to first know the min and max boundaries. But we don't really know that until we've fetched the result set (we use the Solr Stats component to get min and max with a result set). +In order to calculate distribution segment ranges, we need to first know the min and max boundaries. But we don't really know that until we've fetched the result set (we use the Solr JSON Facet API to get min and max with a result set). So, ordinarily, after we've gotten the result set, an additional round trip to back-end and solr will happen, with min max identified, to fetch segments. diff --git a/app/presenters/blacklight_range_limit/facet_field_presenter.rb b/app/presenters/blacklight_range_limit/facet_field_presenter.rb index 62f2825f..d1aba9a8 100644 --- a/app/presenters/blacklight_range_limit/facet_field_presenter.rb +++ b/app/presenters/blacklight_range_limit/facet_field_presenter.rb @@ -61,25 +61,49 @@ def range_config private def missing - stats_for_field.fetch('missing', 0) + json_facet_stats.dig('missing', 'count') || stats_for_field.fetch('missing', 0) end + # Read range stats from the JSON Facet API response. + # The range_limit_builder stores them under "_range_stats". + def json_facet_stats + response.dig('facets', "#{facet_field.field}_range_stats") || {} + end + + # Legacy stats-component path – kept as a fallback so that the presenter + # still works when the upstream application has not yet switched its Solr + # request handler to the JSON Facet API approach. def stats_for_field response.dig('stats', 'stats_fields', facet_field.field) || {} end # type is 'min' or 'max' # Returns smallest and largest value in current result set, if available - # from stats component response. + # from the JSON Facet API response (preferred) or the stats component + # response (legacy fallback). def range_results_endpoint(type) - stats = stats_for_field + type_s = type.to_s + + # Try JSON Facet API first + json_stats = json_facet_stats + if json_stats.key?(type_s) + raw = json_stats[type_s] + return nil if raw.nil? - return nil unless stats.key? type - # StatsComponent returns weird min/max when there are in - # fact no values + # Check if all docs are missing a value – no meaningful min/max + missing_count = json_stats.dig('missing', 'count') || 0 + return nil if selected_range_hits == missing_count && missing_count > 0 + + year = BlacklightRangeLimit.year_from_solr_date(raw) + return year.to_s if year + end + + # Fall back to legacy stats component + stats = stats_for_field + return nil unless stats.key? type_s return nil if selected_range_hits == stats['missing'] - stats[type].to_s.gsub(/\.0+/, '') + stats[type_s].to_s.gsub(/\.0+/, '') end def selected_range_hits diff --git a/lib/blacklight_range_limit.rb b/lib/blacklight_range_limit.rb index 0189a4a8..d315924b 100644 --- a/lib/blacklight_range_limit.rb +++ b/lib/blacklight_range_limit.rb @@ -19,6 +19,33 @@ class InvalidRange < TypeError; end submit: 'submit btn btn-sm btn-secondary' } + # Extract a year integer from a Solr date value. + # Handles ISO-8601 dates like "1998-01-01T00:00:00Z", truncated dates like "1998", + # and numeric values (integers/floats). + # Returns an integer year or nil if the value cannot be parsed. + def self.year_from_solr_date(value) + return nil if value.nil? + + case value + when Integer + value + when Float + value.to_i + when String + value = value.strip + return nil if value.empty? + + # Match optional negative sign followed by digits at the start (the year portion) + ::Regexp.last_match(1).to_i if value =~ /\A(-?\d+)/ + end + end + + # Convert a year integer to a Solr-compatible date string for use with DateRangeField. + # DateRangeField accepts truncated dates like "1998" to mean the entire year. + def self.year_to_solr_date(year) + year.to_s + end + def self.default_range_config { range: true, @@ -32,8 +59,8 @@ def self.default_range_config chart_segment_bg_color: 'rgba(54, 162, 235, 0.5)', chart_aspect_ratio: 2, assumed_boundaries: nil, - min_value: -2_147_483_648, # solr intfield min and max - max_value: 2_147_483_648 + min_value: 0, + max_value: 9999 }, filter_class: BlacklightRangeLimit::FilterField, presenter: BlacklightRangeLimit::FacetFieldPresenter, diff --git a/lib/blacklight_range_limit/range_limit_builder.rb b/lib/blacklight_range_limit/range_limit_builder.rb index e13066bb..58a454b6 100644 --- a/lib/blacklight_range_limit/range_limit_builder.rb +++ b/lib/blacklight_range_limit/range_limit_builder.rb @@ -15,11 +15,25 @@ def add_range_limit_params(solr_params) ranged_facet_configs = blacklight_config.facet_fields.select { |_key, config| config.range } return solr_params unless ranged_facet_configs.any? - solr_params["stats"] = "true" - solr_params["stats.field"] ||= [] + # Build JSON facet API queries for min/max/missing per field, + # replacing the stats component which does not work with DateRangeField. + json_facet = solr_params.delete('json.facet') || {} + json_facet = JSON.parse(json_facet) if json_facet.is_a?(String) ranged_facet_configs.each do |field_key, config| - solr_params["stats.field"] << config.field + solr_field = config.field + + # Use a nested JSON facet to get min, max, and missing for this field. + # We wrap them under a single key so they are easy to find in the response. + json_facet["#{solr_field}_range_stats"] = { + type: 'query', + q: '*:*', + facet: { + min: "min(#{solr_field})", + max: "max(#{solr_field})", + missing: { type: 'query', q: "-#{solr_field}:[* TO *]" } + } + } range_config = config.range_config next unless range_config[:chart_js] || range_config[:textual_facets] @@ -29,13 +43,16 @@ def add_range_limit_params(solr_params) range = bl_create_selected_range_value(selected_value, config) # If we have both ends of a range - add_range_segments_to_solr!(solr_params, field_key, range.begin, range.end) if range && range.count != Float::INFINITY + if range && range.count != Float::INFINITY + add_range_segments_to_solr!(solr_params, field_key, range.begin, range.end) + end end + solr_params['json.facet'] = JSON.generate(json_facet) + solr_params end - # Another processing method, this one is NOT included in default processing chain, # it is specifically swapped in *instead of* add_range_limit_params for # certain ajax requests that only want to fetch range limit segments for @@ -49,35 +66,36 @@ def add_range_limit_params(solr_params) def fetch_specific_range_limit(solr_params) field_key = blacklight_params[:range_field] # what field to fetch for - unless blacklight_params[:range_start].present? && blacklight_params[:range_start].kind_of?(String) && - blacklight_params[:range_end].present? && blacklight_params[:range_end].kind_of?(String) + unless blacklight_params[:range_start].present? && blacklight_params[:range_start].is_a?(String) && + blacklight_params[:range_end].present? && blacklight_params[:range_end].is_a?(String) raise BlacklightRangeLimit::InvalidRange end start = blacklight_params[:range_start].to_i finish = blacklight_params[:range_end].to_i - add_range_segments_to_solr!(solr_params, field_key, start, finish ) + add_range_segments_to_solr!(solr_params, field_key, start, finish) # Remove all field faceting for efficiency, we won't be using it. - solr_params.delete("facet.field") - solr_params.delete("facet.field".to_sym) + solr_params.delete('facet.field') + solr_params.delete('facet.field'.to_sym) # We don't need any actual rows either solr_params[:rows] = 0 - return solr_params + solr_params rescue BlacklightRangeLimit::InvalidRange # This will make Rails return a 400 - raise ActionController::BadRequest, "invalid range_start (#{blacklight_params[:range_start]}) or range_end (#{blacklight_params[:range_end]})" + raise ActionController::BadRequest, + "invalid range_start (#{blacklight_params[:range_start]}) or range_end (#{blacklight_params[:range_end]})" end # hacky polyfill for new Blacklight behavior we need, if we don't have it yet # # https://github.com/projectblacklight/blacklight/pull/3213 # https://github.com/projectblacklight/blacklight/pull/3443 - bl_version = Gem.loaded_specs["blacklight"]&.version - if bl_version && (bl_version <= Gem::Version.new("8.6.1")) + bl_version = Gem.loaded_specs['blacklight']&.version + if bl_version && (bl_version <= Gem::Version.new('8.6.1')) def facet_value_to_fq_string(facet_field, value, use_local_params: true) facet_config = blacklight_config.facet_fields[facet_field] @@ -94,7 +112,7 @@ def facet_value_to_fq_string(facet_field, value, use_local_params: true) prefix = "{!#{local_params.join(' ')}}" unless local_params.empty? - "#{prefix}#{solr_field}:[#{value.begin || "*"} TO #{value.end || "*"}]" + "#{prefix}#{solr_field}:[#{value.begin || '*'} TO #{value.end || '*'}]" else super end @@ -108,23 +126,25 @@ def bl_create_selected_range_value(selected_value, field_config) range_config = field_config.range_config range = if selected_value.is_a? Range - selected_value - elsif range_config[:assumed_boundaries].is_a?(Range) - range_config[:assumed_boundaries] - elsif range_config[:assumed_boundaries] # Array of two things please - Range.new(*range_config[:assumed_boundaries]) - else - nil - end + selected_value + elsif range_config[:assumed_boundaries].is_a?(Range) + range_config[:assumed_boundaries] + elsif range_config[:assumed_boundaries] # Array of two things please + Range.new(*range_config[:assumed_boundaries]) + else + nil + end # clamp between config'd min and max min = range_config[:min_value] max = range_config[:max_value] - range = Range.new( - (range.begin.clamp(min, max) if range.begin), - (range.end.clamp(min, max) if range.end), - ) if range + if range + range = Range.new( + (range.begin.clamp(min, max) if range.begin), + (range.end.clamp(min, max) if range.end) + ) + end range end diff --git a/lib/blacklight_range_limit/segment_calculation.rb b/lib/blacklight_range_limit/segment_calculation.rb index f6451768..4aa80435 100644 --- a/lib/blacklight_range_limit/segment_calculation.rb +++ b/lib/blacklight_range_limit/segment_calculation.rb @@ -1,18 +1,21 @@ # Meant to be in a Controller, included in our ControllerOverride module. module BlacklightRangeLimit module SegmentCalculation - protected # Calculates segment facets within a given start and end on a given # field, returns request params to be added on to what's sent to # solr to get the calculated facet segments. - # Assumes solr_field is an integer, as range endpoint will be found - # by subtracting one from subsequent boundary. + # + # Uses year-level date boundaries for DateRangeField compatibility. + # The min and max are year integers; they are converted to Solr + # date-range query syntax (truncated years) so that DateRangeField + # can interpret them correctly. # # Changes solr_params passed in. def add_range_segments_to_solr!(solr_params, facet_field, min, max) - raise InvalidRange, "The min date must be before the max date" if min > max + raise InvalidRange, 'The min date must be before the max date' if min > max + field_config = blacklight_config.facet_fields[facet_field.to_s] return solr_params unless field_config @@ -24,9 +27,16 @@ def add_range_segments_to_solr!(solr_params, facet_field, min, max) boundaries = boundaries_for_range_facets(min, max, range_config[:num_segments] || 10) # Now make the boundaries into actual filter.queries. + # For DateRangeField compatibility the query uses bare year values, + # which Solr's DateRangeField interprets as truncated ISO-8601 dates + # (e.g. "1998" means the entire year 1998). + # + # Each segment covers [boundary_n TO boundary_(n+1) - 1] expressed + # as years so that the ranges are inclusive on both ends without + # overlapping. 0.upto(boundaries.length - 2) do |index| first = boundaries[index] - last = boundaries[index+1].to_i - 1 + last = boundaries[index + 1].to_i - 1 solr_params[:"facet.query"] << "#{field_config.field}:[#{first} TO #{last}]" end @@ -43,7 +53,8 @@ def add_range_segments_to_solr!(solr_params, facet_field, min, max) # be turned into inclusive ranges, the FINAL boundary will be one # unit more than the actual end of the last range later computed. def boundaries_for_range_facets(first, last, num_div) - raise ArgumentError, "The first date must be before the last date" if last < first + raise ArgumentError, 'The first date must be before the last date' if last < first + # arithmetic issues require last to be one more than the actual # last value included in our inclusive range last += 1 @@ -54,58 +65,55 @@ def boundaries_for_range_facets(first, last, num_div) # Don't know what most of these variables mean, just copying # from Flot. - dec = -1 * ( Math.log10(delta) ).floor - magn = (10 ** (-1 * dec)).to_f - norm = (magn == 0) ? delta : (delta / magn) # norm is between 1.0 and 10.0 + dec = -1 * Math.log10(delta).floor + magn = (10**(-1 * dec)).to_f + norm = magn == 0 ? delta : (delta / magn) # norm is between 1.0 and 10.0 size = 10 - if (norm < 1.5) - size = 1 - elsif (norm < 3) - size = 2; - # special case for 2.5, requires an extra decimal - if (norm > 2.25 ) - size = 2.5; - dec = dec + 1 - end - elsif (norm < 7.5) - size = 5 - end - - size = size * magn - - boundaries = [] - - start = floorInBase(first, size) - i = 0 - v = Float::MAX - prev = nil - begin - prev = v - v = start + i * size - boundaries.push(v.to_i) - i += 1 - end while ( v < last && v != prev) - - # Can create dups for small ranges, tighten up - boundaries.uniq! - - # That algorithm i don't entirely understand will sometimes - # extend past our first and last, tighten it up and make sure - # first and last are endpoints. - boundaries.delete_if {|b| b <= first || b >= last} - boundaries.unshift(first) - boundaries.push(last) - - return boundaries + if norm < 1.5 + size = 1 + elsif norm < 3 + size = 2 + # special case for 2.5, requires an extra decimal + if norm > 2.25 + size = 2.5 + dec += 1 + end + elsif norm < 7.5 + size = 5 + end + + size *= magn + + boundaries = [] + + start = floorInBase(first, size) + i = 0 + v = Float::MAX + prev = nil + begin + prev = v + v = start + i * size + boundaries.push(v.to_i) + i += 1 + end while (v < last && v != prev) + + # Can create dups for small ranges, tighten up + boundaries.uniq! + + # That algorithm i don't entirely understand will sometimes + # extend past our first and last, tighten it up and make sure + # first and last are endpoints. + boundaries.delete_if { |b| b <= first || b >= last } + boundaries.unshift(first) + boundaries.push(last) + + boundaries end # Cribbed from Flot. Round to nearby lower multiple of base def floorInBase(n, base) - return base * (n / base).floor + base * (n / base).floor end - - - end end diff --git a/solr/conf/schema.xml b/solr/conf/schema.xml index a1c7ab3f..7e636fbe 100644 --- a/solr/conf/schema.xml +++ b/solr/conf/schema.xml @@ -176,6 +176,17 @@ + + + + + + + + + + + @@ -239,6 +250,11 @@ + + +