-
Notifications
You must be signed in to change notification settings - Fork 525
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ad57ad7
commit 79e772d
Showing
1 changed file
with
246 additions
and
62 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,21 @@ | ||
{ | ||
"translatorID": "d611008a-850d-4860-b607-54e1ecbcc592", | ||
"label": "E-Tiller", | ||
"creator": "jiaojiaodubai23", | ||
"target": "^https?://.*(/ch/)?.*(\\.aspx)?", | ||
"creator": "jiaojiaodubai", | ||
"target": "", | ||
"minVersion": "5.0", | ||
"maxVersion": "", | ||
"priority": 200, | ||
"inRepository": true, | ||
"translatorType": 4, | ||
"browserSupport": "gcsibv", | ||
"lastUpdated": "2024-06-20 16:34:51" | ||
"lastUpdated": "2024-09-30 10:33:44" | ||
} | ||
|
||
/* | ||
***** BEGIN LICENSE BLOCK ***** | ||
Copyright © 2022 [email protected] | ||
Copyright © 2022 jiaojiaodubai<[email protected]> | ||
This file is part of Zotero. | ||
|
@@ -35,37 +35,34 @@ | |
***** END LICENSE BLOCK ***** | ||
*/ | ||
|
||
const paths = [ | ||
'reader/view_abstract.aspx?file_no=', | ||
'/article/abstract/' | ||
]; | ||
|
||
function detectWeb(doc, url) { | ||
let insite = Array.from(doc.querySelectorAll(`div[class*="foot"], div[id*="foot"]`)) | ||
.some(foot => (/北京勤云科技/.test(foot.textContent))); | ||
Z.debug(`incite: ${insite}`); | ||
if (!insite) return false; | ||
for (let path of paths) { | ||
if (url.includes(path) && doc.querySelector('meta[name="citation_title"]')) { | ||
Z.debug(`match path: ${path}`); | ||
return 'journalArticle'; | ||
} | ||
else if (doc.querySelector('meta[name]') && getSearchResults(doc, true)) { | ||
Z.debug(`match path: ${path}`); | ||
return 'multiple'; | ||
} | ||
const haveMeta = !!doc.querySelector('meta[name="citation_title"]'); | ||
function detect(path, selctor) { | ||
return url.includes(path) && doc.querySelector(selctor); | ||
} | ||
if (haveMeta && ( | ||
detect('/article/abstract/', '.article_abstract_main .p1 > a#ExportUrl') | ||
|| detect('/reader/view_abstract.aspx?', 'table#QueryUI table table table span#FileTitle') | ||
)) { | ||
return 'journalArticle'; | ||
} | ||
else if (detect('/reader/view_abstract.aspx?', 'table.front_table span#ReferenceText')) { | ||
return 'journalArticle'; | ||
} | ||
if (getSearchResults(doc, true)) { | ||
return 'multiple'; | ||
} | ||
return false; | ||
} | ||
|
||
function getSearchResults(doc, checkOnly) { | ||
var items = {}; | ||
var found = false; | ||
var rows = Array.from(doc.querySelectorAll(paths.map(path => `a[href*="${path}"]`).join(','))) | ||
const items = {}; | ||
let found = false; | ||
const rows = Array.from(doc.querySelectorAll('a[href*="article/abstract/"],a[href*="reader/view_abstract.aspx?"]')) | ||
.filter(element => !(/^[[【〔]?\s*摘要/.test(ZU.trimInternal(element.textContent)))); | ||
for (let row of rows) { | ||
let href = row.href; | ||
let title = ZU.trimInternal(row.textContent); | ||
for (const row of rows) { | ||
const href = row.href; | ||
const title = ZU.trimInternal(row.textContent); | ||
if (!href || !title) continue; | ||
if (checkOnly) return true; | ||
found = true; | ||
|
@@ -76,11 +73,9 @@ function getSearchResults(doc, checkOnly) { | |
|
||
async function doWeb(doc, url) { | ||
if (detectWeb(doc, url) == 'multiple') { | ||
let items = await Zotero.selectItems(getSearchResults(doc, false)); | ||
Z.debug('selected items:'); | ||
Z.debug(items); | ||
const items = await Zotero.selectItems(getSearchResults(doc, false)); | ||
if (!items) return; | ||
for (let url of Object.keys(items)) { | ||
for (const url of Object.keys(items)) { | ||
await scrape(await requestDocument(url)); | ||
} | ||
} | ||
|
@@ -90,48 +85,55 @@ async function doWeb(doc, url) { | |
} | ||
|
||
async function scrape(doc, url = doc.location.href) { | ||
let translator = Zotero.loadTranslator('web'); | ||
if (doc.querySelector('meta[name="citation_title"]')) { | ||
await scrapeMeta(doc, url); | ||
} | ||
else { | ||
await scrapeText(doc, url); | ||
} | ||
} | ||
|
||
async function scrapeMeta(doc, url = doc.location.href) { | ||
const translator = Zotero.loadTranslator('web'); | ||
// Embedded Metadata | ||
translator.setTranslator('951c027d-74ac-47d4-a107-9c3069ab7b48'); | ||
translator.setDocument(doc); | ||
translator.setHandler('itemDone', (_obj, item) => { | ||
item.extra = ''; | ||
const extra = new Extra(); | ||
if (item.title.includes('(网络首发、推荐阅读)')) { | ||
item.extra += addExtra('Status', 'advance online publication'); | ||
extra.set('Status', 'advance online publication', true); | ||
item.title = item.title.replace(/(网络首发、推荐阅读)$/, ''); | ||
} | ||
item.abstractNote = trimAbstract(item.abstractNote); | ||
item.pages = cleanPages(item.pages); | ||
if (/^\d{8}$/.test(item.date)) { | ||
item.date = `${item.date.substring(0, 4)}-${item.date.substring(4, 6)}-${item.date.substring(6, 8)}`; | ||
} | ||
item.language = { | ||
cn: 'zh-CN', | ||
en: 'en-US' | ||
}[item.language]; | ||
item.extra += addExtra('original-title', attr(doc, 'meta[name="citation_title"]', 'content', 1)); | ||
let creators = doc.querySelector('meta[name="citation_author"]') | ||
extra.set('original-title', ZU.capitalizeTitle(attr(doc, 'meta[name="citation_title"]', 'content', 1) || text(doc, '#EnTitleValue')), true); | ||
const creators = doc.querySelector('meta[name="citation_author"]') | ||
? Array.from(doc.querySelectorAll('meta[name="citation_author"]')).map(element => element.content) | ||
: attr(doc, 'meta[name="citation_authors"]', 'content', 0).split(/[,;,;]/); | ||
if (creators.length) { | ||
Z.debug(creators); | ||
item.creators = creators | ||
.map(creator => creator.replace(/(<.+>)?[\d,\s]+(<.+>)?$/, '')) | ||
.filter(creator => creator) | ||
.map(creator => ZU.cleanAuthor(creator, 'author')); | ||
} | ||
item.creators.forEach((creator) => { | ||
if (/[\u4e00-\u9fa5]/.test(creator.lastName)) { | ||
creator.lastName = creator.firstName + creator.lastName; | ||
creator.firstName = ''; | ||
creator.fieldMode = 1; | ||
item.creators = creators | ||
.map(creator => creator.replace(/(<.+>)?[\d,\s]+(<.+>)?$/, '')) | ||
.filter(creator => creator) | ||
.map(creator => cleanAuthor(creator)); | ||
const enCreators = attr(doc, 'meta[name="citation_authors"]', 'content', 1).split(/[,;,;]/); | ||
if (enCreators.length) { | ||
const creatorsExt = JSON.parse(JSON.stringify(item.creators)); | ||
for (let i = 0; i < item.creators.length; i++) { | ||
creatorsExt[i].original = ZU.capitalizeName(enCreators[i]); | ||
} | ||
}); | ||
let enCreators = attr(doc, 'meta[name="citation_authors"]', 'content', 1).split(/[,;,;]/); | ||
let creatorsExt = JSON.parse(JSON.stringify(item.creators)); | ||
for (let i = 0; i < item.creators.length; i++) { | ||
creatorsExt[i].original = enCreators[i]; | ||
extra.set('creatorsExt', JSON.stringify(creatorsExt)); | ||
} | ||
item.extra = extra.toString(); | ||
if (item.tags.length == 1) { | ||
item.tags = item.tags[0].split(/[;;]\s*/).map(tag => ({ tag: tag })); | ||
item.tags = item.tags[0].split(/[,;,;、]\s*/).map(tag => ({ tag: tag })); | ||
} | ||
item.extra += addExtra('creatorsExt', JSON.stringify(creatorsExt)); | ||
let pdfLink = doc.querySelector('a[href*="create_pdf"]'); | ||
const pdfLink = doc.querySelector('a[href*="create_pdf"],h1 > a[href*="/pdf/"]'); | ||
if (pdfLink && !item.attachments.some(attachment => attachment.mimeType == 'application/pdf')) { | ||
item.attachments.push({ | ||
url: pdfLink.href, | ||
|
@@ -142,14 +144,127 @@ async function scrape(doc, url = doc.location.href) { | |
item.complete(); | ||
}); | ||
|
||
let em = await translator.getTranslatorObject(); | ||
const em = await translator.getTranslatorObject(); | ||
em.itemType = 'journalArticle'; | ||
await em.doWeb(doc, url); | ||
} | ||
|
||
function addExtra(key, value) { | ||
return value | ||
? `${key}: ${value}\n` | ||
async function scrapeText(doc, url = doc.location.href) { | ||
const newItem = new Z.Item('journalArticle'); | ||
const extra = new Extra(); | ||
newItem.title = doc.querySelector('#FileTitle').innerHTML; | ||
extra.set('original-title', ZU.capitalizeTitle(text(doc, '#EnTitle')), true); | ||
const abstractElm = doc.querySelectorAll('td.unnamed3'); | ||
if (abstractElm.length == 2) { | ||
newItem.abstractNote = trimAbstract(abstractElm.item(0).textContent); | ||
} | ||
else if (abstractElm.length == 4) { | ||
newItem.abstractNote = trimAbstract(abstractElm.item(1).textContent); | ||
} | ||
const referenceText = text(doc, '#ReferenceText'); | ||
Z.debug(referenceText); | ||
const pubInfo = tryMatch(referenceText, /\.\s?([^.]+)\.?$/, 1).split(/[,,]\s?/); | ||
Z.debug(pubInfo); | ||
newItem.publicationTitle = pubInfo[0]; | ||
newItem.volume = tryMatch(pubInfo[2], /0*(\d+)[((]/, 1); | ||
newItem.issue = tryMatch(pubInfo[2], /\(([a-z\d]+)\)/i, 1).replace(/0*(\d+)/, '$1'); | ||
newItem.pages = cleanPages(tryMatch(pubInfo[2], /:\s?(.+)$/, 1)); | ||
newItem.date = pubInfo[1]; | ||
newItem.language = 'zh-CN'; | ||
newItem.DOI = ZU.cleanDOI(text(doc, '#DOI')); | ||
newItem.url = url; | ||
const urlElm = doc.querySelector('a#URL'); | ||
urlElm && (newItem.url = urlElm.href); | ||
extra.set('view', text(doc, '#ClickNum')); | ||
extra.set('download', '#PDFClickNum'); | ||
newItem.extra = extra.toString(); | ||
let creators = doc.querySelector('#Author td > a[href*="field=author"]') | ||
? Array.from(doc.querySelectorAll('#Author td > a[href*="field=author"]')).map(elm => ZU.trimInternal(elm.textContent)) | ||
: tryMatch(pubInfo, /^([^.]+)\./, 1); | ||
if (creators.length == 1) { | ||
creators = creators[0].split(/[,;;,、]/); | ||
} | ||
newItem.creators = creators.map(name => cleanAuthor(name)); | ||
const pdfLink = doc.querySelector('a[href*="create_pdf"]'); | ||
if (pdfLink) { | ||
newItem.attachments.push({ | ||
url: pdfLink.href, | ||
title: 'Full Text PDF', | ||
mimeType: 'application/pdf' | ||
}); | ||
} | ||
let tags = Array.from(doc.querySelectorAll('#KeyWord > a')).map(elm => elm.textContent); | ||
if (tags.length == 1) { | ||
tags = tags[0].split(/[,;,;、]\s*/); | ||
} | ||
newItem.tags = tags; | ||
newItem.complete(); | ||
} | ||
|
||
class Extra { | ||
constructor() { | ||
this.fields = []; | ||
} | ||
|
||
push(key, val, csl = false) { | ||
this.fields.push({ key: key, val: val, csl: csl }); | ||
} | ||
|
||
set(key, val, csl = false) { | ||
const target = this.fields.find(obj => new RegExp(`^${key}$`, 'i').test(obj.key)); | ||
if (target) { | ||
target.val = val; | ||
} | ||
else { | ||
this.push(key, val, csl); | ||
} | ||
} | ||
|
||
get(key) { | ||
const result = this.fields.find(obj => new RegExp(`^${key}$`, 'i').test(obj.key)); | ||
return result | ||
? result.val | ||
: ''; | ||
} | ||
|
||
toString(history = '') { | ||
this.fields = this.fields.filter(obj => obj.val); | ||
return [ | ||
this.fields.filter(obj => obj.csl).map(obj => `${obj.key}: ${obj.val}`).join('\n'), | ||
history, | ||
this.fields.filter(obj => !obj.csl).map(obj => `${obj.key}: ${obj.val}`).join('\n') | ||
].filter(obj => obj).join('\n'); | ||
} | ||
} | ||
|
||
function cleanAuthor(name) { | ||
// https://zkxb.hnust.edu.cn/ch/reader/view_abstract.aspx?file_no=202303007&flag=1 | ||
const creator = ZU.cleanAuthor(name.replace(/[((]?[\d,*]*[))]?$/, ''), 'author'); | ||
if (/[\u4e00-\u9fff]/.test(creator.lastName)) { | ||
creator.lastName = creator.firstName + creator.lastName; | ||
creator.firstName = ''; | ||
creator.fieldMode = 1; | ||
} | ||
return creator; | ||
} | ||
|
||
function cleanPages(page) { | ||
if (!page) return ''; | ||
return page.replace(/~/g, '-') | ||
.replace(/[+,]/g, ', ') | ||
.replace(/0*(\d+)/, '$1'); | ||
} | ||
|
||
function trimAbstract(abstract) { | ||
if (!abstract) return ''; | ||
return abstract.replace(/。;.*$/, '。'); | ||
} | ||
|
||
function tryMatch(string, pattern, index = 0) { | ||
if (!string) return ''; | ||
const match = string.match(pattern); | ||
return (match && match[index]) | ||
? match[index] | ||
: ''; | ||
} | ||
|
||
|
@@ -274,8 +389,9 @@ var testCases = [ | |
"fieldMode": 1 | ||
} | ||
], | ||
"date": "20231211", | ||
"date": "2023-12-11", | ||
"DOI": "10.13506/j.cnki.jpr.2023.11.005", | ||
"abstractNote": "目的 采用正交设计综合加权评分和层次分析法相结合的方法,优化黄芩微波酒制工艺。方法 采用单因素试验优选加酒量、闷润时间、微波功率、微波时间,然后以微波酒制工艺中的加酒量、闷润时间、微波功率、微波时间为考察因素,采用层次分析法确定各指标权重系数,以黄芩苷、汉黄芩苷、黄芩素、汉黄芩素含量进行综合加权评分为评价指标,优化黄芩微波酒制工艺。结果 取生黄芩饮片适量,加入10%辅料酒,置密闭容器内闷润60 min,在300 W功率下微波5 min,经三次验证实验,各指标成分的平均含量分别为15.65%,5.94%,3.00%,0.33%,平均评分98.24分,RSD为0.99%(n=3)。结论 优化所得微波炮制酒工艺稳定,重复性良好,可应用于微波酒黄芩的炮制。", | ||
"extra": "creatorsExt: [{\"firstName\":\"\",\"lastName\":\"李利华\",\"creatorType\":\"author\",\"fieldMode\":1,\"original\":\"\"},{\"firstName\":\"\",\"lastName\":\"王巍\",\"creatorType\":\"author\",\"fieldMode\":1},{\"firstName\":\"\",\"lastName\":\"张一美\",\"creatorType\":\"author\",\"fieldMode\":1},{\"firstName\":\"\",\"lastName\":\"赵梦辉\",\"creatorType\":\"author\",\"fieldMode\":1},{\"firstName\":\"\",\"lastName\":\"鞠成国\",\"creatorType\":\"author\",\"fieldMode\":1}]", | ||
"issue": "11", | ||
"language": "zh-CN", | ||
|
@@ -316,6 +432,74 @@ var testCases = [ | |
} | ||
] | ||
}, | ||
{ | ||
"type": "web", | ||
"url": "http://hkfdj.ijournals.cn/hkfdj/ch/reader/view_abstract.aspx?file_no=20160403&flag=1", | ||
"items": [ | ||
{ | ||
"itemType": "journalArticle", | ||
"title": "基于模型的系统工程概述", | ||
"creators": [ | ||
{ | ||
"firstName": "", | ||
"lastName": "朱静", | ||
"creatorType": "author", | ||
"fieldMode": 1 | ||
}, | ||
{ | ||
"firstName": "", | ||
"lastName": "杨晖", | ||
"creatorType": "author", | ||
"fieldMode": 1 | ||
}, | ||
{ | ||
"firstName": "", | ||
"lastName": "高亚辉", | ||
"creatorType": "author", | ||
"fieldMode": 1 | ||
}, | ||
{ | ||
"firstName": "", | ||
"lastName": "姚太克", | ||
"creatorType": "author", | ||
"fieldMode": 1 | ||
} | ||
], | ||
"date": "2016", | ||
"abstractNote": "由于航空领域涉及的系统日益高度复杂,为更好推进基于模型的系统工程(Model Based System Engineering ,MBSE)研发体系,通过从当前遇到的问题、推行基于模型的系统工程的必要性、优势、未来的挑战等方面进行了较为详细地阐述。基于模型的系统工程研发体系具有知识表示的无二义、系统设计的一体化、沟通交流的高效率等优势,是未来发展的大趋势。", | ||
"extra": "original-title: Summary of Model Based System Engineering\nview: 9490\ndownload: #PDFClickNum", | ||
"issue": "4", | ||
"language": "zh-CN", | ||
"libraryCatalog": "E-Tiller", | ||
"pages": "12-16", | ||
"publicationTitle": "航空发动机", | ||
"url": "http://hkfdj.ijournals.cn/hkfdj/ch/reader/view_abstract.aspx?file_no=20160403&flag=1", | ||
"volume": "42", | ||
"attachments": [ | ||
{ | ||
"title": "Full Text PDF", | ||
"mimeType": "application/pdf" | ||
} | ||
], | ||
"tags": [ | ||
{ | ||
"tag": "模型" | ||
}, | ||
{ | ||
"tag": "研发体系" | ||
}, | ||
{ | ||
"tag": "系统工程" | ||
}, | ||
{ | ||
"tag": "航空发动机" | ||
} | ||
], | ||
"notes": [], | ||
"seeAlso": [] | ||
} | ||
] | ||
}, | ||
{ | ||
"type": "web", | ||
"url": "http://www.stae.com.cn/jsygc/home", | ||
|