Skip to content

Commit

Permalink
close #365
Browse files Browse the repository at this point in the history
  • Loading branch information
jiaojiaodubai committed Sep 30, 2024
1 parent ad57ad7 commit 79e772d
Showing 1 changed file with 246 additions and 62 deletions.
308 changes: 246 additions & 62 deletions E-Tiller.js
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
{
"translatorID": "d611008a-850d-4860-b607-54e1ecbcc592",
"label": "E-Tiller",
"creator": "jiaojiaodubai23",
"target": "^https?://.*(/ch/)?.*(\\.aspx)?",
"creator": "jiaojiaodubai",
"target": "",
"minVersion": "5.0",
"maxVersion": "",
"priority": 200,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2024-06-20 16:34:51"
"lastUpdated": "2024-09-30 10:33:44"
}

/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2022 [email protected]
Copyright © 2022 jiaojiaodubai<[email protected]>
This file is part of Zotero.
Expand All @@ -35,37 +35,34 @@
***** END LICENSE BLOCK *****
*/

const paths = [
'reader/view_abstract.aspx?file_no=',
'/article/abstract/'
];

function detectWeb(doc, url) {
let insite = Array.from(doc.querySelectorAll(`div[class*="foot"], div[id*="foot"]`))
.some(foot => (//.test(foot.textContent)));
Z.debug(`incite: ${insite}`);
if (!insite) return false;
for (let path of paths) {
if (url.includes(path) && doc.querySelector('meta[name="citation_title"]')) {
Z.debug(`match path: ${path}`);
return 'journalArticle';
}
else if (doc.querySelector('meta[name]') && getSearchResults(doc, true)) {
Z.debug(`match path: ${path}`);
return 'multiple';
}
const haveMeta = !!doc.querySelector('meta[name="citation_title"]');
function detect(path, selctor) {
return url.includes(path) && doc.querySelector(selctor);
}
if (haveMeta && (
detect('/article/abstract/', '.article_abstract_main .p1 > a#ExportUrl')
|| detect('/reader/view_abstract.aspx?', 'table#QueryUI table table table span#FileTitle')
)) {
return 'journalArticle';
}
else if (detect('/reader/view_abstract.aspx?', 'table.front_table span#ReferenceText')) {
return 'journalArticle';
}
if (getSearchResults(doc, true)) {
return 'multiple';
}
return false;
}

function getSearchResults(doc, checkOnly) {
var items = {};
var found = false;
var rows = Array.from(doc.querySelectorAll(paths.map(path => `a[href*="${path}"]`).join(',')))
const items = {};
let found = false;
const rows = Array.from(doc.querySelectorAll('a[href*="article/abstract/"],a[href*="reader/view_abstract.aspx?"]'))
.filter(element => !(/^[[]?\s*/.test(ZU.trimInternal(element.textContent))));
for (let row of rows) {
let href = row.href;
let title = ZU.trimInternal(row.textContent);
for (const row of rows) {
const href = row.href;
const title = ZU.trimInternal(row.textContent);
if (!href || !title) continue;
if (checkOnly) return true;
found = true;
Expand All @@ -76,11 +73,9 @@ function getSearchResults(doc, checkOnly) {

async function doWeb(doc, url) {
if (detectWeb(doc, url) == 'multiple') {
let items = await Zotero.selectItems(getSearchResults(doc, false));
Z.debug('selected items:');
Z.debug(items);
const items = await Zotero.selectItems(getSearchResults(doc, false));
if (!items) return;
for (let url of Object.keys(items)) {
for (const url of Object.keys(items)) {
await scrape(await requestDocument(url));
}
}
Expand All @@ -90,48 +85,55 @@ async function doWeb(doc, url) {
}

async function scrape(doc, url = doc.location.href) {
let translator = Zotero.loadTranslator('web');
if (doc.querySelector('meta[name="citation_title"]')) {
await scrapeMeta(doc, url);
}
else {
await scrapeText(doc, url);
}
}

async function scrapeMeta(doc, url = doc.location.href) {
const translator = Zotero.loadTranslator('web');
// Embedded Metadata
translator.setTranslator('951c027d-74ac-47d4-a107-9c3069ab7b48');
translator.setDocument(doc);
translator.setHandler('itemDone', (_obj, item) => {
item.extra = '';
const extra = new Extra();
if (item.title.includes('(网络首发、推荐阅读)')) {
item.extra += addExtra('Status', 'advance online publication');
extra.set('Status', 'advance online publication', true);
item.title = item.title.replace(/$/, '');
}
item.abstractNote = trimAbstract(item.abstractNote);
item.pages = cleanPages(item.pages);
if (/^\d{8}$/.test(item.date)) {
item.date = `${item.date.substring(0, 4)}-${item.date.substring(4, 6)}-${item.date.substring(6, 8)}`;
}
item.language = {
cn: 'zh-CN',
en: 'en-US'
}[item.language];
item.extra += addExtra('original-title', attr(doc, 'meta[name="citation_title"]', 'content', 1));
let creators = doc.querySelector('meta[name="citation_author"]')
extra.set('original-title', ZU.capitalizeTitle(attr(doc, 'meta[name="citation_title"]', 'content', 1) || text(doc, '#EnTitleValue')), true);
const creators = doc.querySelector('meta[name="citation_author"]')
? Array.from(doc.querySelectorAll('meta[name="citation_author"]')).map(element => element.content)
: attr(doc, 'meta[name="citation_authors"]', 'content', 0).split(/[,;]/);
if (creators.length) {
Z.debug(creators);
item.creators = creators
.map(creator => creator.replace(/(<.+>)?[\d,\s]+(<.+>)?$/, ''))
.filter(creator => creator)
.map(creator => ZU.cleanAuthor(creator, 'author'));
}
item.creators.forEach((creator) => {
if (/[\u4e00-\u9fa5]/.test(creator.lastName)) {
creator.lastName = creator.firstName + creator.lastName;
creator.firstName = '';
creator.fieldMode = 1;
item.creators = creators
.map(creator => creator.replace(/(<.+>)?[\d,\s]+(<.+>)?$/, ''))
.filter(creator => creator)
.map(creator => cleanAuthor(creator));
const enCreators = attr(doc, 'meta[name="citation_authors"]', 'content', 1).split(/[,;]/);
if (enCreators.length) {
const creatorsExt = JSON.parse(JSON.stringify(item.creators));
for (let i = 0; i < item.creators.length; i++) {
creatorsExt[i].original = ZU.capitalizeName(enCreators[i]);
}
});
let enCreators = attr(doc, 'meta[name="citation_authors"]', 'content', 1).split(/[,;]/);
let creatorsExt = JSON.parse(JSON.stringify(item.creators));
for (let i = 0; i < item.creators.length; i++) {
creatorsExt[i].original = enCreators[i];
extra.set('creatorsExt', JSON.stringify(creatorsExt));
}
item.extra = extra.toString();
if (item.tags.length == 1) {
item.tags = item.tags[0].split(/[;]\s*/).map(tag => ({ tag: tag }));
item.tags = item.tags[0].split(/[,;]\s*/).map(tag => ({ tag: tag }));
}
item.extra += addExtra('creatorsExt', JSON.stringify(creatorsExt));
let pdfLink = doc.querySelector('a[href*="create_pdf"]');
const pdfLink = doc.querySelector('a[href*="create_pdf"],h1 > a[href*="/pdf/"]');
if (pdfLink && !item.attachments.some(attachment => attachment.mimeType == 'application/pdf')) {
item.attachments.push({
url: pdfLink.href,
Expand All @@ -142,14 +144,127 @@ async function scrape(doc, url = doc.location.href) {
item.complete();
});

let em = await translator.getTranslatorObject();
const em = await translator.getTranslatorObject();
em.itemType = 'journalArticle';
await em.doWeb(doc, url);
}

function addExtra(key, value) {
return value
? `${key}: ${value}\n`
async function scrapeText(doc, url = doc.location.href) {
const newItem = new Z.Item('journalArticle');
const extra = new Extra();
newItem.title = doc.querySelector('#FileTitle').innerHTML;
extra.set('original-title', ZU.capitalizeTitle(text(doc, '#EnTitle')), true);
const abstractElm = doc.querySelectorAll('td.unnamed3');
if (abstractElm.length == 2) {
newItem.abstractNote = trimAbstract(abstractElm.item(0).textContent);
}
else if (abstractElm.length == 4) {
newItem.abstractNote = trimAbstract(abstractElm.item(1).textContent);
}
const referenceText = text(doc, '#ReferenceText');
Z.debug(referenceText);
const pubInfo = tryMatch(referenceText, /\.\s?([^.]+)\.?$/, 1).split(/[,]\s?/);
Z.debug(pubInfo);
newItem.publicationTitle = pubInfo[0];
newItem.volume = tryMatch(pubInfo[2], /0*(\d+)[(]/, 1);
newItem.issue = tryMatch(pubInfo[2], /\(([a-z\d]+)\)/i, 1).replace(/0*(\d+)/, '$1');
newItem.pages = cleanPages(tryMatch(pubInfo[2], /:\s?(.+)$/, 1));
newItem.date = pubInfo[1];
newItem.language = 'zh-CN';
newItem.DOI = ZU.cleanDOI(text(doc, '#DOI'));
newItem.url = url;
const urlElm = doc.querySelector('a#URL');
urlElm && (newItem.url = urlElm.href);
extra.set('view', text(doc, '#ClickNum'));
extra.set('download', '#PDFClickNum');
newItem.extra = extra.toString();
let creators = doc.querySelector('#Author td > a[href*="field=author"]')
? Array.from(doc.querySelectorAll('#Author td > a[href*="field=author"]')).map(elm => ZU.trimInternal(elm.textContent))
: tryMatch(pubInfo, /^([^.]+)\./, 1);
if (creators.length == 1) {
creators = creators[0].split(/[,;]/);
}
newItem.creators = creators.map(name => cleanAuthor(name));
const pdfLink = doc.querySelector('a[href*="create_pdf"]');
if (pdfLink) {
newItem.attachments.push({
url: pdfLink.href,
title: 'Full Text PDF',
mimeType: 'application/pdf'
});
}
let tags = Array.from(doc.querySelectorAll('#KeyWord > a')).map(elm => elm.textContent);
if (tags.length == 1) {
tags = tags[0].split(/[,;]\s*/);
}
newItem.tags = tags;
newItem.complete();
}

class Extra {
constructor() {
this.fields = [];
}

push(key, val, csl = false) {
this.fields.push({ key: key, val: val, csl: csl });
}

set(key, val, csl = false) {
const target = this.fields.find(obj => new RegExp(`^${key}$`, 'i').test(obj.key));
if (target) {
target.val = val;
}
else {
this.push(key, val, csl);
}
}

get(key) {
const result = this.fields.find(obj => new RegExp(`^${key}$`, 'i').test(obj.key));
return result
? result.val
: '';
}

toString(history = '') {
this.fields = this.fields.filter(obj => obj.val);
return [
this.fields.filter(obj => obj.csl).map(obj => `${obj.key}: ${obj.val}`).join('\n'),
history,
this.fields.filter(obj => !obj.csl).map(obj => `${obj.key}: ${obj.val}`).join('\n')
].filter(obj => obj).join('\n');
}
}

function cleanAuthor(name) {
// https://zkxb.hnust.edu.cn/ch/reader/view_abstract.aspx?file_no=202303007&flag=1
const creator = ZU.cleanAuthor(name.replace(/[(]?[\d,*]*[)]?$/, ''), 'author');
if (/[\u4e00-\u9fff]/.test(creator.lastName)) {
creator.lastName = creator.firstName + creator.lastName;
creator.firstName = '';
creator.fieldMode = 1;
}
return creator;
}

function cleanPages(page) {
if (!page) return '';
return page.replace(/~/g, '-')
.replace(/[+]/g, ', ')
.replace(/0*(\d+)/, '$1');
}

function trimAbstract(abstract) {
if (!abstract) return '';
return abstract.replace(/;.*$/, '。');
}

function tryMatch(string, pattern, index = 0) {
if (!string) return '';
const match = string.match(pattern);
return (match && match[index])
? match[index]
: '';
}

Expand Down Expand Up @@ -274,8 +389,9 @@ var testCases = [
"fieldMode": 1
}
],
"date": "20231211",
"date": "2023-12-11",
"DOI": "10.13506/j.cnki.jpr.2023.11.005",
"abstractNote": "目的 采用正交设计综合加权评分和层次分析法相结合的方法,优化黄芩微波酒制工艺。方法 采用单因素试验优选加酒量、闷润时间、微波功率、微波时间,然后以微波酒制工艺中的加酒量、闷润时间、微波功率、微波时间为考察因素,采用层次分析法确定各指标权重系数,以黄芩苷、汉黄芩苷、黄芩素、汉黄芩素含量进行综合加权评分为评价指标,优化黄芩微波酒制工艺。结果 取生黄芩饮片适量,加入10%辅料酒,置密闭容器内闷润60 min,在300 W功率下微波5 min,经三次验证实验,各指标成分的平均含量分别为15.65%,5.94%,3.00%,0.33%,平均评分98.24分,RSD为0.99%(n=3)。结论 优化所得微波炮制酒工艺稳定,重复性良好,可应用于微波酒黄芩的炮制。",
"extra": "creatorsExt: [{\"firstName\":\"\",\"lastName\":\"李利华\",\"creatorType\":\"author\",\"fieldMode\":1,\"original\":\"\"},{\"firstName\":\"\",\"lastName\":\"王巍\",\"creatorType\":\"author\",\"fieldMode\":1},{\"firstName\":\"\",\"lastName\":\"张一美\",\"creatorType\":\"author\",\"fieldMode\":1},{\"firstName\":\"\",\"lastName\":\"赵梦辉\",\"creatorType\":\"author\",\"fieldMode\":1},{\"firstName\":\"\",\"lastName\":\"鞠成国\",\"creatorType\":\"author\",\"fieldMode\":1}]",
"issue": "11",
"language": "zh-CN",
Expand Down Expand Up @@ -316,6 +432,74 @@ var testCases = [
}
]
},
{
"type": "web",
"url": "http://hkfdj.ijournals.cn/hkfdj/ch/reader/view_abstract.aspx?file_no=20160403&flag=1",
"items": [
{
"itemType": "journalArticle",
"title": "基于模型的系统工程概述",
"creators": [
{
"firstName": "",
"lastName": "朱静",
"creatorType": "author",
"fieldMode": 1
},
{
"firstName": "",
"lastName": "杨晖",
"creatorType": "author",
"fieldMode": 1
},
{
"firstName": "",
"lastName": "高亚辉",
"creatorType": "author",
"fieldMode": 1
},
{
"firstName": "",
"lastName": "姚太克",
"creatorType": "author",
"fieldMode": 1
}
],
"date": "2016",
"abstractNote": "由于航空领域涉及的系统日益高度复杂,为更好推进基于模型的系统工程(Model Based System Engineering ,MBSE)研发体系,通过从当前遇到的问题、推行基于模型的系统工程的必要性、优势、未来的挑战等方面进行了较为详细地阐述。基于模型的系统工程研发体系具有知识表示的无二义、系统设计的一体化、沟通交流的高效率等优势,是未来发展的大趋势。",
"extra": "original-title: Summary of Model Based System Engineering\nview: 9490\ndownload: #PDFClickNum",
"issue": "4",
"language": "zh-CN",
"libraryCatalog": "E-Tiller",
"pages": "12-16",
"publicationTitle": "航空发动机",
"url": "http://hkfdj.ijournals.cn/hkfdj/ch/reader/view_abstract.aspx?file_no=20160403&flag=1",
"volume": "42",
"attachments": [
{
"title": "Full Text PDF",
"mimeType": "application/pdf"
}
],
"tags": [
{
"tag": "模型"
},
{
"tag": "研发体系"
},
{
"tag": "系统工程"
},
{
"tag": "航空发动机"
}
],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "http://www.stae.com.cn/jsygc/home",
Expand Down

0 comments on commit 79e772d

Please sign in to comment.