Skip to content

Commit

Permalink
Add a traject config for oai-pmh, helps with #309, helps with #308
Browse files Browse the repository at this point in the history
To use it, you can run:

traject -i xml -r Traject::OaiPmhNokogiriReader \
-s oai_pmh.start_url=https://libarchive.linnbenton.edu/catalog/oai.xml?verb=ListRecords&metadataPrefix=oai_dc&set=type:OpenEducationalResource \
-c lib/tasks/data/config/config.rb \
-s solr.url=[YOUR SOLR URL \
-s solrj_writer.commit_on_close=true \
-c lib/tasks/data/config/oai.rb
  • Loading branch information
sandbergja committed Aug 28, 2020
1 parent e9414cf commit 6db8a08
Showing 1 changed file with 40 additions and 0 deletions.
40 changes: 40 additions & 0 deletions lib/tasks/data/config/oai.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
require 'traject'

settings do
provide "solr_writer.max_skipped", -1
provide "nokogiri.namespaces", {
"oai" => "http://www.openarchives.org/OAI/2.0/",
"dc" => "http://purl.org/dc/elements/1.1/",
"oai_dc" => "http://www.openarchives.org/OAI/2.0/oai_dc/"
}
provide "nokogiri.each_record_xpath", "//oai:record"
end



to_field "id", extract_xpath("/oai:record/oai:header/oai:identifier", to_text: false) do |record, accumulator|
accumulator.map! do |xml_node|
Digest::MD5.hexdigest(xml_node)
end
end


to_field "abstract_display", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:description")
to_field "abstract_t", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:description")

to_field "author_display", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:creator[1]")
to_field "author_t", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:creator")

to_field "contributor_display", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:contributor")
to_field "contributor_t", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:contributor")

to_field "is_electronic_facet", literal("Online")

to_field "subject_t", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:subject")
to_field "subject_topic_facet", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:subject")

to_field "title_display", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:title[1]")
to_field "title_t", extract_xpath("/oai:record/oai:metadata/oai_dc:dc/dc:title")



0 comments on commit 6db8a08

Please sign in to comment.