diff --git a/E-Tiller.js b/E-Tiller.js index ebc8bdb..2f12cfd 100644 --- a/E-Tiller.js +++ b/E-Tiller.js @@ -1,21 +1,21 @@ { "translatorID": "d611008a-850d-4860-b607-54e1ecbcc592", "label": "E-Tiller", - "creator": "jiaojiaodubai23", - "target": "^https?://.*(/ch/)?.*(\\.aspx)?", + "creator": "jiaojiaodubai", + "target": "", "minVersion": "5.0", "maxVersion": "", "priority": 200, "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2024-06-20 16:34:51" + "lastUpdated": "2024-09-30 10:33:44" } /* ***** BEGIN LICENSE BLOCK ***** - Copyright © 2022 jiaojiaodubai23@gmail.com + Copyright © 2022 jiaojiaodubai This file is part of Zotero. @@ -35,37 +35,34 @@ ***** END LICENSE BLOCK ***** */ -const paths = [ - 'reader/view_abstract.aspx?file_no=', - '/article/abstract/' -]; - function detectWeb(doc, url) { - let insite = Array.from(doc.querySelectorAll(`div[class*="foot"], div[id*="foot"]`)) - .some(foot => (/北京勤云科技/.test(foot.textContent))); - Z.debug(`incite: ${insite}`); - if (!insite) return false; - for (let path of paths) { - if (url.includes(path) && doc.querySelector('meta[name="citation_title"]')) { - Z.debug(`match path: ${path}`); - return 'journalArticle'; - } - else if (doc.querySelector('meta[name]') && getSearchResults(doc, true)) { - Z.debug(`match path: ${path}`); - return 'multiple'; - } + const haveMeta = !!doc.querySelector('meta[name="citation_title"]'); + function detect(path, selctor) { + return url.includes(path) && doc.querySelector(selctor); + } + if (haveMeta && ( + detect('/article/abstract/', '.article_abstract_main .p1 > a#ExportUrl') + || detect('/reader/view_abstract.aspx?', 'table#QueryUI table table table span#FileTitle') + )) { + return 'journalArticle'; + } + else if (detect('/reader/view_abstract.aspx?', 'table.front_table span#ReferenceText')) { + return 'journalArticle'; + } + if (getSearchResults(doc, true)) { + return 'multiple'; } return false; } function getSearchResults(doc, checkOnly) { - var items = {}; - var found = false; - var rows = Array.from(doc.querySelectorAll(paths.map(path => `a[href*="${path}"]`).join(','))) + const items = {}; + let found = false; + const rows = Array.from(doc.querySelectorAll('a[href*="article/abstract/"],a[href*="reader/view_abstract.aspx?"]')) .filter(element => !(/^[[【〔]?\s*摘要/.test(ZU.trimInternal(element.textContent)))); - for (let row of rows) { - let href = row.href; - let title = ZU.trimInternal(row.textContent); + for (const row of rows) { + const href = row.href; + const title = ZU.trimInternal(row.textContent); if (!href || !title) continue; if (checkOnly) return true; found = true; @@ -76,11 +73,9 @@ function getSearchResults(doc, checkOnly) { async function doWeb(doc, url) { if (detectWeb(doc, url) == 'multiple') { - let items = await Zotero.selectItems(getSearchResults(doc, false)); - Z.debug('selected items:'); - Z.debug(items); + const items = await Zotero.selectItems(getSearchResults(doc, false)); if (!items) return; - for (let url of Object.keys(items)) { + for (const url of Object.keys(items)) { await scrape(await requestDocument(url)); } } @@ -90,48 +85,55 @@ async function doWeb(doc, url) { } async function scrape(doc, url = doc.location.href) { - let translator = Zotero.loadTranslator('web'); + if (doc.querySelector('meta[name="citation_title"]')) { + await scrapeMeta(doc, url); + } + else { + await scrapeText(doc, url); + } +} + +async function scrapeMeta(doc, url = doc.location.href) { + const translator = Zotero.loadTranslator('web'); // Embedded Metadata translator.setTranslator('951c027d-74ac-47d4-a107-9c3069ab7b48'); translator.setDocument(doc); translator.setHandler('itemDone', (_obj, item) => { - item.extra = ''; + const extra = new Extra(); if (item.title.includes('(网络首发、推荐阅读)')) { - item.extra += addExtra('Status', 'advance online publication'); + extra.set('Status', 'advance online publication', true); item.title = item.title.replace(/(网络首发、推荐阅读)$/, ''); } + item.abstractNote = trimAbstract(item.abstractNote); + item.pages = cleanPages(item.pages); + if (/^\d{8}$/.test(item.date)) { + item.date = `${item.date.substring(0, 4)}-${item.date.substring(4, 6)}-${item.date.substring(6, 8)}`; + } item.language = { cn: 'zh-CN', en: 'en-US' }[item.language]; - item.extra += addExtra('original-title', attr(doc, 'meta[name="citation_title"]', 'content', 1)); - let creators = doc.querySelector('meta[name="citation_author"]') + extra.set('original-title', ZU.capitalizeTitle(attr(doc, 'meta[name="citation_title"]', 'content', 1) || text(doc, '#EnTitleValue')), true); + const creators = doc.querySelector('meta[name="citation_author"]') ? Array.from(doc.querySelectorAll('meta[name="citation_author"]')).map(element => element.content) : attr(doc, 'meta[name="citation_authors"]', 'content', 0).split(/[,;,;]/); - if (creators.length) { - Z.debug(creators); - item.creators = creators - .map(creator => creator.replace(/(<.+>)?[\d,\s]+(<.+>)?$/, '')) - .filter(creator => creator) - .map(creator => ZU.cleanAuthor(creator, 'author')); - } - item.creators.forEach((creator) => { - if (/[\u4e00-\u9fa5]/.test(creator.lastName)) { - creator.lastName = creator.firstName + creator.lastName; - creator.firstName = ''; - creator.fieldMode = 1; + item.creators = creators + .map(creator => creator.replace(/(<.+>)?[\d,\s]+(<.+>)?$/, '')) + .filter(creator => creator) + .map(creator => cleanAuthor(creator)); + const enCreators = attr(doc, 'meta[name="citation_authors"]', 'content', 1).split(/[,;,;]/); + if (enCreators.length) { + const creatorsExt = JSON.parse(JSON.stringify(item.creators)); + for (let i = 0; i < item.creators.length; i++) { + creatorsExt[i].original = ZU.capitalizeName(enCreators[i]); } - }); - let enCreators = attr(doc, 'meta[name="citation_authors"]', 'content', 1).split(/[,;,;]/); - let creatorsExt = JSON.parse(JSON.stringify(item.creators)); - for (let i = 0; i < item.creators.length; i++) { - creatorsExt[i].original = enCreators[i]; + extra.set('creatorsExt', JSON.stringify(creatorsExt)); } + item.extra = extra.toString(); if (item.tags.length == 1) { - item.tags = item.tags[0].split(/[;;]\s*/).map(tag => ({ tag: tag })); + item.tags = item.tags[0].split(/[,;,;、]\s*/).map(tag => ({ tag: tag })); } - item.extra += addExtra('creatorsExt', JSON.stringify(creatorsExt)); - let pdfLink = doc.querySelector('a[href*="create_pdf"]'); + const pdfLink = doc.querySelector('a[href*="create_pdf"],h1 > a[href*="/pdf/"]'); if (pdfLink && !item.attachments.some(attachment => attachment.mimeType == 'application/pdf')) { item.attachments.push({ url: pdfLink.href, @@ -142,14 +144,127 @@ async function scrape(doc, url = doc.location.href) { item.complete(); }); - let em = await translator.getTranslatorObject(); + const em = await translator.getTranslatorObject(); em.itemType = 'journalArticle'; await em.doWeb(doc, url); } -function addExtra(key, value) { - return value - ? `${key}: ${value}\n` +async function scrapeText(doc, url = doc.location.href) { + const newItem = new Z.Item('journalArticle'); + const extra = new Extra(); + newItem.title = doc.querySelector('#FileTitle').innerHTML; + extra.set('original-title', ZU.capitalizeTitle(text(doc, '#EnTitle')), true); + const abstractElm = doc.querySelectorAll('td.unnamed3'); + if (abstractElm.length == 2) { + newItem.abstractNote = trimAbstract(abstractElm.item(0).textContent); + } + else if (abstractElm.length == 4) { + newItem.abstractNote = trimAbstract(abstractElm.item(1).textContent); + } + const referenceText = text(doc, '#ReferenceText'); + Z.debug(referenceText); + const pubInfo = tryMatch(referenceText, /\.\s?([^.]+)\.?$/, 1).split(/[,,]\s?/); + Z.debug(pubInfo); + newItem.publicationTitle = pubInfo[0]; + newItem.volume = tryMatch(pubInfo[2], /0*(\d+)[((]/, 1); + newItem.issue = tryMatch(pubInfo[2], /\(([a-z\d]+)\)/i, 1).replace(/0*(\d+)/, '$1'); + newItem.pages = cleanPages(tryMatch(pubInfo[2], /:\s?(.+)$/, 1)); + newItem.date = pubInfo[1]; + newItem.language = 'zh-CN'; + newItem.DOI = ZU.cleanDOI(text(doc, '#DOI')); + newItem.url = url; + const urlElm = doc.querySelector('a#URL'); + urlElm && (newItem.url = urlElm.href); + extra.set('view', text(doc, '#ClickNum')); + extra.set('download', '#PDFClickNum'); + newItem.extra = extra.toString(); + let creators = doc.querySelector('#Author td > a[href*="field=author"]') + ? Array.from(doc.querySelectorAll('#Author td > a[href*="field=author"]')).map(elm => ZU.trimInternal(elm.textContent)) + : tryMatch(pubInfo, /^([^.]+)\./, 1); + if (creators.length == 1) { + creators = creators[0].split(/[,;;,、]/); + } + newItem.creators = creators.map(name => cleanAuthor(name)); + const pdfLink = doc.querySelector('a[href*="create_pdf"]'); + if (pdfLink) { + newItem.attachments.push({ + url: pdfLink.href, + title: 'Full Text PDF', + mimeType: 'application/pdf' + }); + } + let tags = Array.from(doc.querySelectorAll('#KeyWord > a')).map(elm => elm.textContent); + if (tags.length == 1) { + tags = tags[0].split(/[,;,;、]\s*/); + } + newItem.tags = tags; + newItem.complete(); +} + +class Extra { + constructor() { + this.fields = []; + } + + push(key, val, csl = false) { + this.fields.push({ key: key, val: val, csl: csl }); + } + + set(key, val, csl = false) { + const target = this.fields.find(obj => new RegExp(`^${key}$`, 'i').test(obj.key)); + if (target) { + target.val = val; + } + else { + this.push(key, val, csl); + } + } + + get(key) { + const result = this.fields.find(obj => new RegExp(`^${key}$`, 'i').test(obj.key)); + return result + ? result.val + : ''; + } + + toString(history = '') { + this.fields = this.fields.filter(obj => obj.val); + return [ + this.fields.filter(obj => obj.csl).map(obj => `${obj.key}: ${obj.val}`).join('\n'), + history, + this.fields.filter(obj => !obj.csl).map(obj => `${obj.key}: ${obj.val}`).join('\n') + ].filter(obj => obj).join('\n'); + } +} + +function cleanAuthor(name) { + // https://zkxb.hnust.edu.cn/ch/reader/view_abstract.aspx?file_no=202303007&flag=1 + const creator = ZU.cleanAuthor(name.replace(/[((]?[\d,*]*[))]?$/, ''), 'author'); + if (/[\u4e00-\u9fff]/.test(creator.lastName)) { + creator.lastName = creator.firstName + creator.lastName; + creator.firstName = ''; + creator.fieldMode = 1; + } + return creator; +} + +function cleanPages(page) { + if (!page) return ''; + return page.replace(/~/g, '-') + .replace(/[+,]/g, ', ') + .replace(/0*(\d+)/, '$1'); +} + +function trimAbstract(abstract) { + if (!abstract) return ''; + return abstract.replace(/。;.*$/, '。'); +} + +function tryMatch(string, pattern, index = 0) { + if (!string) return ''; + const match = string.match(pattern); + return (match && match[index]) + ? match[index] : ''; } @@ -274,8 +389,9 @@ var testCases = [ "fieldMode": 1 } ], - "date": "20231211", + "date": "2023-12-11", "DOI": "10.13506/j.cnki.jpr.2023.11.005", + "abstractNote": "目的 采用正交设计综合加权评分和层次分析法相结合的方法,优化黄芩微波酒制工艺。方法 采用单因素试验优选加酒量、闷润时间、微波功率、微波时间,然后以微波酒制工艺中的加酒量、闷润时间、微波功率、微波时间为考察因素,采用层次分析法确定各指标权重系数,以黄芩苷、汉黄芩苷、黄芩素、汉黄芩素含量进行综合加权评分为评价指标,优化黄芩微波酒制工艺。结果 取生黄芩饮片适量,加入10%辅料酒,置密闭容器内闷润60 min,在300 W功率下微波5 min,经三次验证实验,各指标成分的平均含量分别为15.65%,5.94%,3.00%,0.33%,平均评分98.24分,RSD为0.99%(n=3)。结论 优化所得微波炮制酒工艺稳定,重复性良好,可应用于微波酒黄芩的炮制。", "extra": "creatorsExt: [{\"firstName\":\"\",\"lastName\":\"李利华\",\"creatorType\":\"author\",\"fieldMode\":1,\"original\":\"\"},{\"firstName\":\"\",\"lastName\":\"王巍\",\"creatorType\":\"author\",\"fieldMode\":1},{\"firstName\":\"\",\"lastName\":\"张一美\",\"creatorType\":\"author\",\"fieldMode\":1},{\"firstName\":\"\",\"lastName\":\"赵梦辉\",\"creatorType\":\"author\",\"fieldMode\":1},{\"firstName\":\"\",\"lastName\":\"鞠成国\",\"creatorType\":\"author\",\"fieldMode\":1}]", "issue": "11", "language": "zh-CN", @@ -316,6 +432,74 @@ var testCases = [ } ] }, + { + "type": "web", + "url": "http://hkfdj.ijournals.cn/hkfdj/ch/reader/view_abstract.aspx?file_no=20160403&flag=1", + "items": [ + { + "itemType": "journalArticle", + "title": "基于模型的系统工程概述", + "creators": [ + { + "firstName": "", + "lastName": "朱静", + "creatorType": "author", + "fieldMode": 1 + }, + { + "firstName": "", + "lastName": "杨晖", + "creatorType": "author", + "fieldMode": 1 + }, + { + "firstName": "", + "lastName": "高亚辉", + "creatorType": "author", + "fieldMode": 1 + }, + { + "firstName": "", + "lastName": "姚太克", + "creatorType": "author", + "fieldMode": 1 + } + ], + "date": "2016", + "abstractNote": "由于航空领域涉及的系统日益高度复杂,为更好推进基于模型的系统工程(Model Based System Engineering ,MBSE)研发体系,通过从当前遇到的问题、推行基于模型的系统工程的必要性、优势、未来的挑战等方面进行了较为详细地阐述。基于模型的系统工程研发体系具有知识表示的无二义、系统设计的一体化、沟通交流的高效率等优势,是未来发展的大趋势。", + "extra": "original-title: Summary of Model Based System Engineering\nview: 9490\ndownload: #PDFClickNum", + "issue": "4", + "language": "zh-CN", + "libraryCatalog": "E-Tiller", + "pages": "12-16", + "publicationTitle": "航空发动机", + "url": "http://hkfdj.ijournals.cn/hkfdj/ch/reader/view_abstract.aspx?file_no=20160403&flag=1", + "volume": "42", + "attachments": [ + { + "title": "Full Text PDF", + "mimeType": "application/pdf" + } + ], + "tags": [ + { + "tag": "模型" + }, + { + "tag": "研发体系" + }, + { + "tag": "系统工程" + }, + { + "tag": "航空发动机" + } + ], + "notes": [], + "seeAlso": [] + } + ] + }, { "type": "web", "url": "http://www.stae.com.cn/jsygc/home",