Skip to content

Commit

Permalink
Beobachter - rewrite (#2777)
Browse files Browse the repository at this point in the history
Closes #2486

I got rid of the issue/volume designation as this looks like it's almost 
entirely online, with no sign of published issues on the online articles
  • Loading branch information
adam3smith authored Jun 4, 2022
1 parent 5a52ddb commit bbe5ac5
Showing 1 changed file with 108 additions and 155 deletions.
263 changes: 108 additions & 155 deletions Beobachter.js
Original file line number Diff line number Diff line change
@@ -1,180 +1,104 @@
{
"translatorID": "a571680e-6338-46c2-a740-3cd9eb80fc7f",
"label": "Beobachter",
"creator": "ibex",
"creator": "Sebastian Karcher",
"target": "^https?://((www\\.)?beobachter\\.ch/.)",
"minVersion": "2.1.9",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2021-12-27 20:41:15"
"lastUpdated": "2022-02-05 20:11:36"
}

/*
Beobachter Translator - Parses Beobachter articles and creates Zotero-based
metadata.
Copyright (C) 2011 ibex
***** BEGIN LICENSE BLOCK *****
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Copyright © 2022 Sebastian Karcher
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
GNU Affero General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
/*
Reference URLs:
Article: http://www.beobachter.ch/natur/natuerlich-leben/wohnen-freizeit/artikel/beleuchtung_es-werde-led/
Topic list: http://www.beobachter.ch/natur/forschung-wissen/
***** END LICENSE BLOCK *****
*/

/* Zotero API */
function detectWeb(doc, url) {
// Z.debug("ibex detectWeb URL = " + url);
if (doc.location.href.match(/.*\/artikel\//i) && (ZU.xpath(doc, '//div[' + containingClass('mediaarticleSingleView') + ']//h3').length > 0)) {
function detectWeb(doc, _url) {
if (doc.getElementsByClassName('article-header').length > 0) {
return "magazineArticle";
// AJAX-ified results are currently not compatible with Zotero.
// The following condition is not useful:
// http://forums.zotero.org/discussion/18518/import-citation-from-an-ajaxbased-site/
// } else if (doc.location.href.match(/\/suche\//i) && (ZU.xpath(doc, '//div[@id = "multiSerachListContainer"]') + ']').length > 0)) {
} else if (ZU.xpath(doc, '//html/body[' + containingClass('article') + ']').length > 0) {
return "multiple";
}
}

/* Zotero API */
function doWeb(doc, url) {
// Z.debug("ibex doWeb URL = " + url);
var urls = new Array();
if (detectWeb(doc, url) == "multiple") {
var items = ZU.getItemArray(doc, doc.getElementById("mainContent").getElementsByTagName('h3'), '.*');
if (!items || countObjectProperties(items) == 0) {
return true;
}
items = Z.selectItems(items, function (items) {
if (!items) return;
ZU.processDocuments(Object.keys(items), scrape);
});
} else {
scrape(doc);
else if (getSearchResults(doc, true)) {
return "multiple";
}
return false;
}

/* Zotero API */
function scrape(doc) {
// Z.debug("ibex scrape URL = " + doc.location.href);

// Fetch meta tags and fill meta tag array for associateMeta() function
var metaTags = fetchMeta(doc);
function getSearchResults(doc, checkOnly) {
var items = {};
var found = false;

var newItem = new Z.Item('magazineArticle');
newItem.url = doc.location.href;
var shortTitle = ZU.xpath(doc, '//div[' + containingClass('mediaarticleSingleView') + ']//h3');
if (shortTitle.length > 0) {
newItem.shortTitle = ZU.trimInternal(shortTitle[0].textContent);
var rows = doc.querySelectorAll('a[class*="teaser"]');
for (let row of rows) {
let href = row.href;
let title = text(row, 'span');
if (!href || !title) continue;
if (checkOnly) return true;
found = true;
items[href] = title;
}

associateMeta(newItem, metaTags, "DC.title", "title");
associateMeta(newItem, metaTags, "DC.date", "date");
associateMeta(newItem, metaTags, "publisher", "publicationTitle");
associateMeta(newItem, metaTags, "abstract", "abstractNote");
associateMeta(newItem, metaTags, "DC.Language", "language");
// Other potentially usful meta data: DC.keywords

newItem.ISSN = "1661-7444";

var authorline = ZU.xpath(doc, '//div[' + containingClass('mediaarticleSingleView') + ']//dl/dt[. = "Autor:"]');
if (authorline.length > 0) {
authorline = ZU.trimInternal(authorline[0].nextSibling.textContent);
// Assumption of authorline: "name1[, name2] [und Name3]"
var authors = authorline.split(/,|und/);
for (var i = 0; i < authors.length && authorline.length > 0; i++) {
newItem.creators.push(ZU.cleanAuthor(authors[i], "author"));
}
}

var issueDt = ZU.xpath(doc, '//div[' + containingClass('mediaarticleSingleView') + ']//dl/dt[. = "Ausgabe:"]');
if (issueDt.length > 0) {
issueArray = issueDt[0].nextSibling.textContent.split("/");
newItem.issue = ZU.trimInternal(issueArray[0]);
newItem.volume = ZU.trimInternal(issueArray[1]);
}

// A print dialog is shown to the user. The print page listens to the
// onload JavaScriptevent and executes window.print().
// I do not know how to disable this behaviour.
newItem.attachments.push({title: "Beobachter Article Snapshot", mimeType: "text/html", url: doc.location.href + "/print.html", snapshot: true});

newItem.complete();
return found ? items : false;
}

/*
* There is no built-in function to count object properties which often are used as associative arrays.
*
* @param {Object} obj Associative array
* @return {int} Number of object properties = ength of associative array
*/
function countObjectProperties(obj) {
var size = 0;
for (var key in obj) {
if (obj.hasOwnProperty(key)) size++;
function doWeb(doc, url) {
if (detectWeb(doc, url) == "multiple") {
Zotero.selectItems(getSearchResults(doc, false), function (items) {
if (items) ZU.processDocuments(Object.keys(items), scrape);
});
}
return size;
}

/**
* Fetch meta tags and fill meta tag array for associateMeta() function
*
* @param {element} doc Document DOM
* @return {Object} Associative array (Object) of meta tags, array[name] = value
*/
function fetchMeta(doc) {
var metaTagHTML = doc.getElementsByTagName("meta");
var metaTags = new Object();
for (var i = 0 ; i < metaTagHTML.length ; i++) {
metaTags[metaTagHTML[i].getAttribute("name")] = metaTagHTML[i].getAttribute("content");
else {
scrape(doc, url);
}
return metaTags;
}

/**
* Adds an HTML meta tag to a Zotero item field.
* The meta tags array can be filled with fetchMeta() function.
*
* @param {Object} newItem The Zotero item
* @param {Object} metaTags Associative array (Object) of meta tags, array[name] = value
* @param {String} name The meta tag name
* @param {String} zoteroField The Zotero field name in the Zotero item.
* @return {null} Nothing is returned
*/
function associateMeta(newItem, metaTags, name, zoteroField) {
if (metaTags[name]) {
newItem[zoteroField] = ZU.trimInternal(ZU.unescapeHTML(metaTags[name]));
}

function scrape(doc, url) {
var authors = doc.querySelectorAll('meta[name="parsely-author"]');
var date = attr(doc, 'meta[name="published_at"]', 'content');
var translator = Zotero.loadTranslator('web');
// Embedded Metadata
translator.setTranslator('951c027d-74ac-47d4-a107-9c3069ab7b48');
// translator.setDocument(doc);

translator.setHandler('itemDone', function (obj, item) {
for (let author of authors) {
item.creators.push(ZU.cleanAuthor(author.content, "author"));
}
item.title = item.title.replace(/\s*\|\s*Beobachter/, "");
item.date = date;
item.ISSN = "1661-7444";
item.complete();
});

translator.getTranslatorObject(function (trans) {
trans.itemType = "magazineArticle";
trans.doWeb(doc, url);
});
}

/**
* Generates a partial xpath expression that matches an element whose 'class' attribute
* contains the given CSS className. So to match &lt;div class='foo bar'&gt; you would
* say "//div[" + containingClass("foo") + "]".
*
* Reference: http://pivotallabs.com/users/alex/blog/articles/427-xpath-css-class-matching
*
* @param {String} className CSS class name
* @return {String} XPath fragment
*/
function containingClass(className) {
return "contains(concat(' ',normalize-space(@class),' '),' " + className + " ')";
}/** BEGIN TEST CASES **/
/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
Expand All @@ -183,38 +107,67 @@ var testCases = [
},
{
"type": "web",
"url": "http://www.beobachter.ch/natur/forschung-wissen/klima-wetter/artikel/blitzschlag_suche-nicht-die-buche/",
"url": "https://www.beobachter.ch/umwelt/blitze-suche-nicht-die-buche",
"items": [
{
"itemType": "magazineArticle",
"title": "Blitze: Suche nicht die Buche!",
"creators": [
{
"firstName": "Tanja",
"lastName": "Polli",
"creatorType": "author"
}
],
"notes": [],
"tags": [],
"seeAlso": [],
"date": "2013-08-16T16:28:50+02:00",
"ISSN": "1661-7444",
"abstractNote": "Acht Tipps, was man tun und lassen soll, wenn man von Blitz und Donner überrascht wird.",
"language": "de-CH",
"libraryCatalog": "www.beobachter.ch",
"shortTitle": "Blitze",
"url": "https://www.beobachter.ch/umwelt/blitze-suche-nicht-die-buche",
"attachments": [
{
"title": "Beobachter Article Snapshot",
"mimeType": "text/html",
"snapshot": true
"title": "Snapshot",
"mimeType": "text/html"
}
],
"url": "http://www.beobachter.ch/natur/forschung-wissen/klima-wetter/artikel/blitzschlag_suche-nicht-die-buche/",
"shortTitle": "Neun Tipps, was man tun und lassen soll, wenn man von Blitz und Donner überrascht wird.",
"title": "Blitze: Suche nicht die Buche!",
"publicationTitle": "Beobachter",
"abstractNote": "Neun Tipps, was man tun und lassen soll, wenn man von Blitz und Donner überrascht wird.",
"language": "de",
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "https://www.beobachter.ch/gesundheit/medizin-krankheit/immer-schlapp-wieso-fuhlen-wir-uns-standig-mude",
"items": [
{
"itemType": "magazineArticle",
"title": "Immer schlapp: Wieso fühlen wir uns ständig müde?",
"creators": [
{
"firstName": "Andreas",
"lastName": "Grote",
"creatorType": "author"
}
],
"date": "2022-01-11T09:30:00+01:00",
"ISSN": "1661-7444",
"issue": "23. August 2013, Beobachter 17",
"volume": "2013",
"libraryCatalog": "Beobachter",
"accessDate": "CURRENT_TIMESTAMP"
"abstractNote": "Wer andauernd schläfrig und erschöpft ist, leidet – und nervt andere. Ein kurzer Selbsttest zeigt, ob Ihre Müdigkeit normal ist und was dagegen helfen kann.",
"language": "de-CH",
"libraryCatalog": "www.beobachter.ch",
"shortTitle": "Immer schlapp",
"url": "https://www.beobachter.ch/gesundheit/medizin-krankheit/immer-schlapp-wieso-fuhlen-wir-uns-standig-mude",
"attachments": [
{
"title": "Snapshot",
"mimeType": "text/html"
}
],
"tags": [],
"notes": [],
"seeAlso": []
}
]
}
Expand Down

0 comments on commit bbe5ac5

Please sign in to comment.