Powered by AppSignal & Oban Pro

LOC “Offline” Replacement (via Virtuoso)

virtuoso/loc_sparql.livemd

LOC “Offline” Replacement (via Virtuoso)

Mix.install([
  {:authoritex, "~> 2.0"}
])

Generic SKOS SPARQL Authority

defmodule Authoritex.SKOS do
  @moduledoc """
  This module implements a generic SKOS fetch/search client for Authoritex using
  `skos:prefLabel` and `skos:altLabel` for the label and variants, respectively. It
  is written specifically for the Virtuoso RDF triple store, using the
  `bif:contains` free-text search predicate, so indexing `skos:prefLabel` and
  `skos:altLabel` is required.
  """

  require Logger

  @fetch_query_template """
  PREFIX skos: 

  SELECT ?id ?label (GROUP_CONCAT(DISTINCT ?variant; separator="|") AS ?variants)
  WHERE {
    VALUES ?id { <{{id}}> }

    OPTIONAL {
      ?id skos:prefLabel ?label_en .
      FILTER(LANGMATCHES(LANG(?label_en), "en"))
    }
    ?id skos:prefLabel ?label_any .
    BIND(COALESCE(?label_en, ?label_any) AS ?label)

    OPTIONAL {
      ?id skos:altLabel ?variant .
      FILTER(LANGMATCHES(LANG(?variant), "en"))
    }
  }
  GROUP BY ?id ?label
  LIMIT 1
  """

  @search_query_template """
  PREFIX skos: 

  SELECT ?id ?label (GROUP_CONCAT(DISTINCT ?variant; separator="|") AS ?variants)
  WHERE {
    GRAPH <{{graph}}> {
      {
        ?id skos:prefLabel ?label .
        ?label bif:contains "{{tokenized_stem}}" OPTION (score ?sc) .
        FILTER(STRSTARTS(LCASE(?label), "{{stem}}"))
        OPTIONAL {
          ?id skos:altLabel ?variant .
          FILTER(LANGMATCHES(LANG(?variant), "en"))
        }
      } UNION {
        ?id skos:prefLabel ?label .
        ?id skos:altLabel ?variant_match .
        ?variant_match bif:contains "{{tokenized_stem}}" OPTION (score ?sc) .
        FILTER(STRSTARTS(LCASE(?variant_match), "{{stem}}"))
        ?id skos:altLabel ?variant .
        FILTER(LANGMATCHES(LANG(?variant), "en"))
      }
    }
  }
  ORDER BY DESC(?sc)
  LIMIT {{count}}
  """

  def build_query(template, replacements) do
    Enum.reduce(replacements, template, fn {key, value}, acc ->
      String.replace(acc, "{{#{key}}}", value)
    end)
  end

  def fetch(id) do
    build_query(@fetch_query_template, id: id)
    |> run_query()
    |> case do
      [] -> nil
      [result|_] -> result
    end
  end

  def search(graph, stem, count \\ 50) do
    tokenized_stem =
      (stem <> "*")
      |> String.split(~r/\s+/)
      |> Enum.map(&amp;"'#{&amp;1}'")
      |> Enum.join(" AND ")

    build_query(
      @search_query_template,
      graph: graph,
      stem: stem,
      tokenized_stem: tokenized_stem,
      count: to_string(count)
    )
    |> run_query()
  end

  defp base_url do
    Application.get_env(:virtuoso, :endpoint, "http://localhost:8890/")
  end

  defp run_query(query) do
    Authoritex.HTTP.Client.new(
      base_url: base_url(),
      url: "/sparql",
      body: query,
      headers: [
        {"content-type", "application/sparql-query"},
        {"accept", "application/json"}
      ]
    )
    |> Req.post!(decode_json: [keys: :atoms])
    |> Map.get(:body)
    |> parse_response()
  end

  defp parse_response(response) when is_map(response) do
    response
    |> get_in([:results, :bindings])
    |> Enum.map(fn entry ->
      Enum.map(entry, fn
        {:variants, %{value: ""}} -> {:variants, []}
        {:variants, %{value: value}} -> {:variants, String.split(value, "|")}
        {key, %{value: value}} -> {key, value}
      end)
      |> Enum.into(%{})
    end)
  end

  defp parse_response(response) do
    if String.contains?(response, "Error FT370: Wildcard word needs at least 4 leading characters") do
      Logger.debug("Final token must be at least 4 characters")
    else
      Logger.warning("Unexpected response from server: #{response}")
    end
    []
  end
end

Drop-in Replacement LOC Authority using SKOS

defmodule Authoritex.LOC.SKOS.Base do
  defmacro __using__(_) do
    quote do
      @moduledoc "Authoritex implementation for #{@description}"
      @behaviour Authoritex

      require Logger
      alias Authoritex.SKOS

      @impl true
      def can_resolve?(@base_uri <> "/" <> _), do: true
      def can_resolve?(_), do: false

      @impl true
      def code, do: @code

      @impl true
      def description, do: @description

      @impl true
      def fetch(id) do
        Logger.debug("Fetching #{id} via #{__MODULE__}")
        SKOS.fetch(id)
      end

      @impl true
      def search(query, max_results \\ 20) do
        Logger.debug("Searching for #{query} via #{__MODULE__}")
        SKOS.search(@base_uri, query, max_results)
      end
    end
  end
end
defmodule Authoritex.LOC.SKOS.Names do
  @base_uri "http://id.loc.gov/authorities/names"
  @code "lcnaf"
  @description "Library of Congress Name Authority File (SKOS)"
  use Authoritex.LOC.SKOS.Base
end

defmodule Authoritex.LOC.SKOS.SubjectHeadings do
  @base_uri "http://id.loc.gov/authorities/subjects"
  @code "lcsh"
  @description "Library of Congress Subject Headings (SKOS)"
  use Authoritex.LOC.SKOS.Base
end

defmodule Authoritex.LOC.SKOS.GenreForms do
  @base_uri "http://id.loc.gov/authorities/genreForms"
  @code "lcgft"
  @description "Library of Congress Genre/Form Terms (SKOS)"
  use Authoritex.LOC.SKOS.Base
end

defmodule Authoritex.LOC.SKOS.Languages do
  @base_uri "http://id.loc.gov/vocabulary/languages"
  @code "lclang"
  @description "Library of Congress MARC List for Languages (SKOS)"
  use Authoritex.LOC.SKOS.Base
end

Configure Authoritex to use LOC via SKOS

Application.put_env(:authoritex, :authorities, [
  Authoritex.LOC.SKOS.Names,
  Authoritex.LOC.SKOS.SubjectHeadings,
  Authoritex.LOC.SKOS.GenreForms,
  Authoritex.LOC.SKOS.Languages
])
Application.put_env(:virtuoso, :endpoint, "http://localhost:8890/")

Sample Queries

Authoritex.authorities()

Fetch from NAF

Authoritex.fetch("http://id.loc.gov/authorities/names/n00006689")

Fetch from LCSH

Authoritex.fetch("http://id.loc.gov/authorities/subjects/sh85017416")

Fetch from LCGFT

Authoritex.fetch("http://id.loc.gov/authorities/genreForms/gf2014026114")

Search LCGFT (make sure all variants are returned on variant match)

Authoritex.search("lcgft", "educat")

Search LCNAF (make sure non-English label fallback works)

Authoritex.search("lcnaf", "lyotard")

Search variant with multi-token query

Authoritex.search("lcsh", "superficial scal", 50)

Search label with multi-token query

Authoritex.search("lcsh", "great brit", 5)