From 6db8a08cf82823052075e1e86abb138c1c3439cc Mon Sep 17 00:00:00 2001 From: Jane Sandberg Date: Thu, 27 Aug 2020 19:27:59 -0700 Subject: [PATCH] Add a traject config for oai-pmh, helps with #309, helps with #308 To use it, you can run: traject -i xml -r Traject::OaiPmhNokogiriReader \ -s oai_pmh.start_url=https://libarchive.linnbenton.edu/catalog/oai.xml?verb=ListRecords&metadataPrefix=oai_dc&set=type:OpenEducationalResource \ -c lib/tasks/data/config/config.rb \ -s solr.url=[YOUR SOLR URL \ -s solrj_writer.commit_on_close=true \ -c lib/tasks/data/config/oai.rb --- lib/tasks/data/config/oai.rb | 40 ++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 lib/tasks/data/config/oai.rb diff --git a/lib/tasks/data/config/oai.rb b/lib/tasks/data/config/oai.rb new file mode 100644 index 0000000..c453148 --- /dev/null +++ b/lib/tasks/data/config/oai.rb @@ -0,0 +1,40 @@ +require 'traject' + +settings do + provide "solr_writer.max_skipped", -1 + provide "nokogiri.namespaces", { + "oai" => "http://www.openarchives.org/OAI/2.0/", + "dc" => "http://purl.org/dc/elements/1.1/", + "oai_dc" => "http://www.openarchives.org/OAI/2.0/oai_dc/" + } + provide "nokogiri.each_record_xpath", "//oai:record" +end + + + +to_field "id", extract_xpath("/oai:record/oai:header/oai:identifier", to_text: false) do |record, accumulator| + accumulator.map! do |xml_node| + Digest::MD5.hexdigest(xml_node) + end +end + + +to_field "abstract_display", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:description") +to_field "abstract_t", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:description") + +to_field "author_display", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:creator[1]") +to_field "author_t", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:creator") + +to_field "contributor_display", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:contributor") +to_field "contributor_t", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:contributor") + +to_field "is_electronic_facet", literal("Online") + +to_field "subject_t", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:subject") +to_field "subject_topic_facet", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:subject") + +to_field "title_display", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:title[1]") +to_field "title_t", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:title") + + +