-
Notifications
You must be signed in to change notification settings - Fork 180
/
setup.py
89 lines (74 loc) · 3.28 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import math
import logging
import os
import requests
from setuptools import setup
from setuptools import find_packages
from setuptools.command.install import install
from tqdm import tqdm
import urllib.parse
logging.basicConfig(format="%(asctime)s : %(levelname)s : %(message)s",
level=logging.INFO)
class Word2vecModelDownload(install):
"""Downloads Word2vec models after installation if not already present."""
_MODELS_URL = 'https://storage.googleapis.com/mat2vec/'
_MODEL_FILES = [
'pretrained_embeddings',
'pretrained_embeddings.wv.vectors.npy',
'pretrained_embeddings.trainables.syn1neg.npy',
]
_DOWNLOAD_LOCATION = 'mat2vec/training/models'
def run(self):
for model_file in self._MODEL_FILES:
file_url = urllib.parse.urljoin(self._MODELS_URL, model_file)
final_location = os.path.join(self._DOWNLOAD_LOCATION, model_file)
r = requests.get(file_url, stream=True)
total_size = int(r.headers.get('content-length', 0))
if self._file_exists_correct_size(model_file, total_size):
logging.info("{} already present, skipping download.".format(model_file))
continue # If the file is already there, skip downloading it.
logging.info('Starting download for {}'.format(model_file))
block_size, wrote = 1024, 0
with open(final_location, 'wb') as downloaded_file:
for data in tqdm(r.iter_content(block_size),
total=math.ceil(total_size // block_size),
unit='KB',
unit_scale=True):
wrote = wrote + len(data)
downloaded_file.write(data)
if total_size != 0 and wrote != total_size:
logging.ERROR(
"Something went wrong during the download "
"of {}, the size of the file is not correct. "
"Please retry.".format(model_file))
else:
logging.info("{} successfully downloaded.".format(model_file))
install.run(self)
def _file_exists_correct_size(self, filename, expected_size):
"""Checks if the file exists in the download location and has the correct size.
Args:
filename: The name of the file in the download location.
expected_size: The expected size in bytes.
Returns:
True if the file exists and has the expected size, False otherwise.
"""
full_file_path = os.path.join(self._DOWNLOAD_LOCATION, filename)
if (not os.path.exists(full_file_path) or
os.path.getsize(full_file_path) != expected_size):
return False
return True
with open('README.md', encoding="utf-8") as f:
readme = f.read()
setup(
name='mat2vec',
version='0.2',
description='Word2vec training and text processing code for Tshitoyan et al. Nature (2019).',
long_description=readme,
author='Authors of Tshitoyan et al. Nature (2019)',
packages=find_packages(),
cmdclass={
'install': Word2vecModelDownload,
},
include_package_data=True,
)