Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ gem 'rest-client'
gem 'sys-proctable'

# NCBO
gem 'goo', github: 'ncbo/goo', branch: 'development'
gem 'goo', github: 'ncbo/goo', branch: 'feature/solrcloud-alias-indexing-claude'
gem 'sparql-client', github: 'ncbo/sparql-client', branch: 'development'
gem 'ncbo_annotator', github: 'ncbo/ncbo_annotator', branch: 'develop'
gem 'ontologies_linked_data', github: 'ncbo/ontologies_linked_data', branch: 'develop'
gem 'ontologies_linked_data', github: 'ncbo/ontologies_linked_data', branch: 'feature/solrcloud-alias-indexing-claude'

group :development do
gem 'rubocop', require: false
Expand Down
10 changes: 5 additions & 5 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ GIT

GIT
remote: https://github.com/ncbo/goo.git
revision: fa850f46c94e09b6368ed47728b9cce05b7e9d54
branch: development
revision: 534648570e056c6da12e12e9f019a14459909dcf
branch: feature/solrcloud-alias-indexing-claude
specs:
goo (0.0.2)
addressable (~> 2.8)
Expand All @@ -26,7 +26,7 @@ GIT

GIT
remote: https://github.com/ncbo/ncbo_annotator.git
revision: 7755a51ce891763a0ba174813796df116abca8a9
revision: 3e9c060baa3505af13671b06b3724524277fa301
branch: develop
specs:
ncbo_annotator (0.0.1)
Expand All @@ -37,8 +37,8 @@ GIT

GIT
remote: https://github.com/ncbo/ontologies_linked_data.git
revision: a4bbd57fcaa78fa66a11b494384b1791b6e9a6d8
branch: develop
revision: eed53891f29170fc13d4925606175d53ffc2e122
branch: feature/solrcloud-alias-indexing-claude
specs:
ontologies_linked_data (0.0.1)
activesupport
Expand Down
148 changes: 94 additions & 54 deletions bin/ncbo_ontology_index
Original file line number Diff line number Diff line change
Expand Up @@ -23,63 +23,81 @@ elsif LinkedData.settings.goo_host.include? "prod"
end
puts "Running on #{platform} platform"

require 'uri'
require 'benchmark'
require 'optparse'

ALIAS_NAME = :term_search

options = {all: false}
opt_parser = OptionParser.new do |opts|
# Set a banner, displayed at the top of the help screen.
#opts.banner = "Usage: ncbo_ontology_index [options]"

opts.on('-c', '--solr-core-url URL', 'Optional URL of the Solr core to be used for indexing. Ex: http://localhost:8983/solr/core1') do |url|
options[:solr_core_url] = url
end
opts.banner = "Usage: #{$0} [options]"

options[:ontologies] = []
opts.on('-a', '--all-ontologies', 'Index all ontologies (this or -o option required).') do
options[:solr_core_url] = NcboCron.settings.search_index_all_url if (options[:solr_core_url].nil?)

LinkedData::Models::Ontology.all.each do |ont|
opts.on('-a', '--all-ontologies', 'Index all ontologies (this or -o option required, unless using --promote-only).') do
LinkedData::Models::Ontology.all.each do |ont|
ont.bring(:acronym)
options[:ontologies].push(ont.acronym)
end
options[:all] = true
end

opts.on('-o', '--ontologies ACRONYM1,ACRONYM2,ACRONYM3', 'Comma-separated list of ontologies to index (this or -a option required).') do |acronyms|
opts.on('-o', '--ontologies ACRONYM1,ACRONYM2,ACRONYM3', 'Comma-separated list of ontologies to index.') do |acronyms|
options[:ontologies] = acronyms.split(",").map {|o| o.strip}
end

opts.on('-z', '--optimize [true/false]', 'Whether to optimize the index after the indexing completion. Default: true') do |optimize|
opts.on('-c', '--collection NAME', 'Target collection name (required with -a and --promote-only).') do |name|
options[:collection] = name
end

opts.on('-p', '--promote', 'Promote the alias to the new collection after successful indexing. Use with -a -c.') do
options[:promote] = true
end

opts.on('--promote-only', 'Promote the alias to an existing collection without rebuilding. Requires -c.') do
options[:promote_only] = true
end

opts.on('-z', '--optimize [true/false]', 'Whether to optimize the index after indexing. Default: true') do |optimize|
options[:optimize] = optimize
end

options[:logfile] = STDOUT
opts.on( '-l', '--logfile FILE', "Write log to FILE (default is STDOUT)" ) do |filename|
opts.on('-l', '--logfile FILE', "Write log to FILE (default is STDOUT)") do |filename|
options[:logfile] = filename
end

#options[:verbose] = false
#opts.on( '-v', '--verbose', 'Output more information' ) do
# options[:verbose] = true
#end

# Display the help screen, all programs are assumed to have this option.
opts.on( '-h', '--help', 'Display this screen' ) do
opts.on('-h', '--help', 'Display this screen') do
puts opts
puts
puts "Examples:"
puts " Rebuild all into a new collection (no alias swap):"
puts " #{$0} -a -c term_search_rebuild"
puts
puts " Rebuild all and promote alias on success:"
puts " #{$0} -a -c term_search_20260401 -p"
puts
puts " Promote alias to an existing collection (no rebuild):"
puts " #{$0} --promote-only -c term_search_20260401"
puts
puts " Re-index specific ontologies into the live alias:"
puts " #{$0} -o SNOMEDCT,GO"
exit
end
end

# Parse the command-line. The 'parse' method simply parses ARGV, while the 'parse!' method parses ARGV and removes
# any options found there, as well as any parameters for the options.
options[:ontologies] = []
opt_parser.parse!
unless options[:ontologies]

# Validate option combinations
if options[:promote_only]
abort("Error: -c (collection name) is required with --promote-only.\n\n") if options[:collection].nil?
abort("Error: --promote-only cannot be combined with -a or -o.\n\n") if options[:all] || !options[:ontologies].empty?
elsif options[:all]
abort("Error: -c (collection name) is required when using -a.\n" \
"Example: #{$0} -a -c term_search_20260413\n\n") if options[:collection].nil?
elsif options[:ontologies].empty?
puts opt_parser.help
exit(1)
end
options[:solr_core_url] = LinkedData.settings.search_server_url if (options[:solr_core_url].nil?)

if options[:optimize].nil? || options[:optimize] != "false"
options[:optimize] = true
Expand Down Expand Up @@ -114,41 +132,55 @@ def index_ontology(indexed_ontologies, acronym, logger)
end
end

def valid_url?(url)
url = URI.parse(url) rescue false
url.kind_of?(URI::HTTP) || url.kind_of?(URI::HTTPS)
end
# --- Promote-only mode ---
if options[:promote_only]
logger = Logger.new(options[:logfile])
collection = options[:collection]

abort("The Solr core URL you provided is invalid. Aborting...\n\n") unless valid_url?(options[:solr_core_url])
puts "You are about to promote alias '#{ALIAS_NAME}' to point to collection '#{collection}'."
puts "Type 'yes' to continue: "
$stdout.flush
confirm = $stdin.gets
abort("Aborting...\n\n") unless confirm.strip == 'yes'

if options[:all] && options[:solr_core_url].downcase == LinkedData.settings.search_server_url.downcase
puts "WARNING: You are about to clear the main core search index on #{options[:solr_core_url]}!!! This will affect ALL searches!!! Are you sure you mean to do this?"
elsif options[:all]
puts "You are about to clear the index on #{options[:solr_core_url]} and kick off a lengthy re-indexing process. Are you sure?"
old_collection = Goo.promote_alias(ALIAS_NAME, collection)
msg = "Alias '#{ALIAS_NAME}' promoted from '#{old_collection}' to '#{collection}'."
puts msg
logger.info(msg)
exit
end

# --- Full re-index or individual ontology mode ---
if options[:all]
action = options[:promote] ? "re-index all ontologies and promote alias" : "re-index all ontologies (no alias swap)"
puts "You are about to create collection '#{options[:collection]}' and #{action}. Are you sure?"
puts "Type 'yes' to continue: "
$stdout.flush
confirm = $stdin.gets
abort("Aborting...\n\n") unless (confirm.strip == 'yes')
abort("Aborting...\n\n") unless confirm.strip == 'yes'
end

begin
logger = Logger.new(options[:logfile])
puts "Processing details are logged to #{options[:logfile] == STDOUT ? "STDOUT" : options[:logfile]}"
msg = ""

if options[:all]
msg = "Processing index for all ontologies on #{options[:solr_core_url]}"
else
msg = "Processing index for ontologies: #{options[:ontologies]} on #{options[:solr_core_url]}"
end
puts msg
logger.info(msg)
msg = "Processing full re-index for all ontologies into collection '#{options[:collection]}'"
puts msg
logger.info(msg)

Goo.configure do |conf|
conf.add_search_backend(:main, service: options[:solr_core_url])
# Create a new collection for re-indexing
logger.info("Creating reindex collection '#{options[:collection]}'...")
reindex_conn = Goo.create_reindex_connection(ALIAS_NAME, options[:collection])
target_collection = reindex_conn.collection_name

logger.info("Clearing new collection '#{target_collection}'...")
LinkedData::Models::Class.indexClear(target_collection)
LinkedData::Models::Class.indexCommit(nil, target_collection)
else
msg = "Processing index for ontologies: #{options[:ontologies]}"
puts msg
logger.info(msg)
end

indexed_ontologies = []
Expand All @@ -157,12 +189,6 @@ begin
time = Benchmark.realtime do
logger.info("There is a total of #{options[:ontologies].length} ontolog#{options[:ontologies].length > 1 ? "ies" : "y"} to index")

if options[:all]
logger.info("Clearing existing index on #{options[:solr_core_url]}")
LinkedData::Models::Class.indexClear()
LinkedData::Models::Class.indexCommit()
end

options[:ontologies].each do |acronym|
index_ontology(indexed_ontologies, acronym, logger)
remaining_ontologies -= 1
Expand All @@ -173,16 +199,30 @@ begin
logger.info("Optimizing index...")
logger.flush
t0 = Time.now
LinkedData::Models::Class.indexOptimize()
if options[:all]
LinkedData::Models::Class.indexOptimize(nil, target_collection)
else
LinkedData::Models::Class.indexOptimize()
end
logger.info("Completed optimizing index in #{Time.now - t0} sec.")
logger.flush
end

if options[:all] && options[:promote]
logger.info("Promoting alias '#{ALIAS_NAME}' to collection '#{target_collection}'...")
Goo.promote_alias(ALIAS_NAME, target_collection)
logger.info("Alias promotion complete.")
end
end

if options[:all]
msg = "Completed processing index for all ontologies on #{options[:solr_core_url]} in #{(time/60).round(1)} minutes."
if options[:promote]
msg = "Completed full re-index into '#{options[:collection]}' in #{(time/60).round(1)} minutes. Alias '#{ALIAS_NAME}' now points to '#{options[:collection]}'."
else
msg = "Completed full re-index into '#{options[:collection]}' in #{(time/60).round(1)} minutes. Alias NOT promoted — use --promote-only -c #{options[:collection]} to promote later."
end
else
msg = "Completed processing index for ontologies: #{indexed_ontologies} on #{options[:solr_core_url]} in #{(time/60).round(1)} minutes."
msg = "Completed processing index for ontologies: #{indexed_ontologies} in #{(time/60).round(1)} minutes."
end
puts msg
logger.info(msg)
Expand Down
Loading
Loading