Skip to content

Commit

Permalink
reset author
Browse files Browse the repository at this point in the history
  • Loading branch information
schradert committed Feb 15, 2021
1 parent 9797756 commit 32caee7
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 87 deletions.
3 changes: 2 additions & 1 deletion cogs/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .lookup import Lookup
from .lookup import Lookup
from .etymology import Etymology
181 changes: 95 additions & 86 deletions cogs/etymology.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,115 +7,124 @@
import disputils
import ety
from bs4 import BeautifulSoup
from wiktionaryparser import WiktionaryParser

RESOURCES = ['wiki', 'etym', 'mec', 'bostol']
props = {
'etym': {
'name': 'Etymonline',
'list': {
'url': "https://www.etymonline.com/search?q={}",
'el': 'div',
'class_': 'word--C9UPa word_4pc--2SZw8'}},
'mec': {
'name': 'Middle English Compendium',
'list': {
'url': "https://quod.lib.umich.edu/m/middle-english-dictionary/dictionary?utf8=%E2%9C%93&search_field=anywhere&q={}",
'el': 'h3',
'class_': 'document-title-heading'},
'item': {
'url': "https://quod.lib.umich.edu",
'el': 'span',
'class_': 'ETYM'}},
'bostol': {
'name': 'Bosworth Toller',
'list': {
'url': "https://bosworthtoller.com/search?q={}",
'el': 'header',
'class_': 'btd--search-entry-header'},
'item': {
'url': "https://bosworthtoller.com/",
'el': 'section',
'class_': 'btd--entry-etymology'}},
'wiki': {
'name': 'Wiktionary',
'list': {
'url': "https://en.wiktionary.org/wiki/{}#English"}}
}

class Etymology(commands.Cog):

async def __int__(self, bot):
def __int__(self, bot=None):
self.bot = bot

def _get_embed(**kwargs):
return discord.Embed.from_dict({
'color': 0xDD0000,
'title': kwargs.word,
'author': { 'name': kwargs.name, 'icon_url': kwargs.ctx.author.avatar_url },
'url': kwargs.url.format(kwargs.word),
'fields': kwargs.fields
})

def _wiktionary(self, ctx, word, is_soft):
return self._get_embed({
'ctx': ctx,
'word': word,
'url': "https://en.wiktionary.org/wiki/{}#English",
'name': 'Wiktionary',
'fields': [{ 'value': ety.tree(word).__str__() }]
})
@aioify
def _wiktionaryparser(self, word):
results = WiktionaryParser().fetch(word)
etyms = [etym['etymology'] for etym in results if 'etymology' in etym]
return [{ 'value': etym } for etym in etyms]

async def _etymonline(self, ctx, word, is_soft):
url = "https://www.etymonline.com/search?q={}"
async with aiohttp.ClientSession() as sess:
async with sess.get(url.format(word)) as resp:
soup = BeautifulSoup(await resp.text())
results = soup.find_all('div', class_='word--C9UPa word_4pc--2SZw8')
fields = []
for result in results:
entry_word, word_class = result.div.find('a').text.split(' ')
if not is_soft and entry_word != word:
continue
value = result.div.section.find('p').text
fields.append({ 'name': f'{entry_word} {word_class}', 'value': value })

return self._get_embed({
'ctx': ctx, 'word': word, 'url': url,
'name': 'Etymonline', 'fields': fields
})
def parse_entry(self, result, resource):
if resource == 'etym':
word, class_ = result.div.find('a').text.split(' ')
return { 'word': word, 'class_': class_, 'id': None }
elif resource == 'mec':
return {
'word': result.a.text.strip(),
'class_': f'({result.h3.find('span', class_='index-pos').text})',
'id': result.h3.find('a')['href'][1:]
}
elif resource == 'bostol':
return {
'word': result.h3.find('a').text.strip(),
'class_': result.find('div').text.strip(),
'id': result.h3.find('a')['href'][1:]
}
return {}

async def _middle_english_compendium(self, ctx, word, is_soft):
url = "https://quod.lib.umich.edu/m/middle-english-dictionary/dictionary?utf8=%E2%9C%93&search_field=anywhere&q={}"
async def scrape_fields(self, word, resource, is_soft=False):
if resource == 'wiki':
return self._wiktionaryparser(word)
tags = props[resource]
url = tags.list.url
url_item = tags.item.url
async with aiohttp.ClientSession() as sess:
async with sess.get(url.format(word)) as resp1:
soup1 = BeautifulSoup(await resp1.text())
results = soup1.find_all('h3', class_='document-title-heading')
results = soup1.find_all(tags.list.el, class_=tags.list.class_)
fields = []
for result in results:
entry_word = result.a.text.strip()
if not is_soft and entry_word != word:
entry = self.parse_entry(result, resource)
if not is_soft and entry.word != word:
continue
word_id = result.h3.find('a')['href'][1:]
word_class = f'({result.h3.find('span', class_='index-pos').text})'
async with sess.get("https://quod.lib.umich.edu" + word_id) as resp2:
soup2 = BeautifulSoup(await resp2.text())
value = soup2.find('span', class_='ETYM').text
fields.append({ 'name': f'{entry_word} {word_class}', 'value': value })

return self._get_embed({
'ctx': ctx, 'word': word, 'url': url,
'name': 'Middle English Compendium', 'fields': fields
})

async def _bosworth_toller(self, ctx, word, is_soft):
url = "https://bosworthtoller.com/search?q={}"
async with aiohttp.ClientSession() as sess:
async with sess.get(url.format(word)) as resp1:
soup1 = BeautifulSoup(await resp1.text())
results = soup1.find_all('header', class_='btd--search-entry-header')
fields = []
for result in results:
entry_word = result.h3.find('a').text.strip()
if not is_soft and entry_word != word:
continue
word_id = result.h3.find('a')['href'][1:]
word_class = result.find('div').text.strip()
async with sess.get("https://bosworthtoller.com/" + word_id) as resp2:
soup2 = BeautifulSoup(await resp2.text())
value = soup2.find('section', class_='btd--entry-etymology').text
fields.append({ 'name': f'{entry_word} {word_class}', 'value': value })

return self._get_embed({
'ctx': ctx, 'word': word, 'url': url,
'name': 'Bosworth Toller', 'fields': fields
})
if url_item:
async with sess.get(url_item + entry.id) as resp2:
soup2 = BeautifulSoup(await resp2.text())
try:
value = soup2.find(tags.item.el, class_=tags.item.class_).text
except (AttributeError, KeyError) as error:
print(error)
continue
else:
try:
value = result.div.section.find('p').text # ETYMONLINE
except (AttributeError, KeyError) as error:
print(error)
continue
fields.append({ 'name': f'{entry.word} {entry.class_}', 'value': value })
return fields

@commands.command()
async def ety(self, ctx, word, *flags):
is_soft = '-soft' in flags
resources_ = flags[flags.index('-r') + 1:] if '-r' in flags else RESOURCES
resources = [res.replace(',', '').strip() for res in resources_]

resource_searcher_switch = {
'wiki': self._wiktionary,
'etym': self._etymonline,
'mec': self._middle_english_compendium,
'bostol': self._bosworth_toller}
def embed_paginate(fields, resource):
embed = discord.Embed.from_dict({
'color': 0xDD0000,
'title': word,
'author': { 'name': props[resource].name, 'icon_url': ctx.author.avatar_url },
'url': props[resource].list.url.format(word),
'fields': fields
})
paginator = disputils.BotEmbedPaginator(ctx, embed)
self.bot.loop.create_task(paginator.run())

async with ctx.typing():
embed_paginate([{ 'value': ety.tree(word).__str__() }], 'wiki')
for resource in resources:
embed = await resource_searcher_switch.get(resource)(ctx, word, is_soft)
paginator = disputils.BotEmbedPaginator(ctx, embed)
self.bot.loop.create_task(paginator.run())
fields = await self.scrape_fields(word, resource, is_soft)
embed_paginate(fields, resource)




2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
aiohttp
aioify
asyncio
async_timeout
bs4
Expand All @@ -7,3 +8,4 @@ disputils
ety
gspread_asyncio
oauth2client
wiktionaryparser
21 changes: 21 additions & 0 deletions tests/test_etymology.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import unittest
import asyncio
from ..cogs import Etymology

class TestEtymology(unittest.TestCase):
ety = Etymology()
def test_scrape_fields(self):
self.assertEqual(asyncio.run(ety.scrape_fields('book', 'etym')), [
{ 'name': 'book (n.)',
'value': 'Old English boc "book, writing, written document," generally referred (despite phonetic difficulties) to Proto-Germanic *bōk(ō)-, from *bokiz "beech" (source also of German Buch "book" Buche "beech;" see beech), the notion being of beechwood tablets on which runes were inscribed; but it may be from the tree itself (people still carve initials in them).' },
{ 'name': 'book (v.)',
'value': 'Old English bocian "to grant or assign by charter," from book (n.). Meaning "to enter into a book, record" is early 13c. Meaning "to register a name for a seat or place; issue (railway) tickets" is from 1841; "to engage a performer as a guest" is from 1872. U.S. student slang meaning "to depart hastily, go fast" is by 1977, of uncertain signification. Related: Booked; booking.'}])
self.assertEqual(asyncio.run(ety.scrape_fields('drinken', 'mec')), [
{ 'value': 'OE drincan; sg. 3 drincþ, drinceþ; p. dranc, dronc; pl. druncon; ppl. druncen & gedrincan.' }])
self.assertEqual(asyncio.run(ety.scrape_fields('boga', 'bostol')), [
{ 'name': 'boga (n.)',
'value': '[Wyc. bowe, bouwe: Laym. boȝe, bowe; O. Sax. bogo, m: Frs. boage: O. Frs. boga, m: Dut. boog, m: Ger. boge, bogen, m: M. H. Ger. boge, m: O. H. Ger. bogo, m: Dan. bue, c: Swed. båge, m: Icel. bogi, m. arcus.]'}])


if __name__ == '__main__':
unittest.main()

0 comments on commit 32caee7

Please sign in to comment.