diff --git a/LICENSE b/LICENSE index f49a4e16..d4c9a648 100755 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [yyyy] [name of copyright owner] + Copyright 2019 The AmpliGraph Authors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index 12dd1f14..937aa887 100644 --- a/README.md +++ b/README.md @@ -70,21 +70,21 @@ Install from pip or conda: **CPU-only** ``` -pip install tensorflow==1.12.0 +pip install tensorflow==1.13.1 or -conda install tensorflow=1.12.0 +conda install tensorflow=1.13.1 ``` **GPU support** ``` -pip install tensorflow-gpu==1.12.0 +pip install tensorflow-gpu==1.13.1 or -conda install tensorflow-gpu=1.12.0 +conda install tensorflow-gpu=1.13.1 ``` @@ -114,7 +114,7 @@ pip install -e . ```python >> import ampligraph >> ampligraph.__version__ -'1.0.2' +'1.0.3' ``` diff --git a/ampligraph/__init__.py b/ampligraph/__init__.py index 93698016..a8540591 100644 --- a/ampligraph/__init__.py +++ b/ampligraph/__init__.py @@ -1,8 +1,15 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# """Explainable Link Prediction is a library for relational learning on knowledge graphs.""" import logging.config import pkg_resources -__version__ = '1.0.2' +__version__ = '1.0.3' __all__ = ['datasets', 'latent_features', 'evaluation'] logging.config.fileConfig(pkg_resources.resource_filename(__name__, 'logger.conf'), disable_existing_loggers=False) diff --git a/ampligraph/datasets/__init__.py b/ampligraph/datasets/__init__.py index beee2305..69643c67 100644 --- a/ampligraph/datasets/__init__.py +++ b/ampligraph/datasets/__init__.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# """Helper functions to load knowledge graphs.""" from .datasets import load_from_csv, load_from_rdf, load_fb15k, load_wn18, load_fb15k_237, load_from_ntriples, \ diff --git a/ampligraph/datasets/datasets.py b/ampligraph/datasets/datasets.py index a63aa626..d96619cd 100644 --- a/ampligraph/datasets/datasets.py +++ b/ampligraph/datasets/datasets.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# import pandas as pd import os import numpy as np @@ -293,7 +300,7 @@ def _load_dataset(dataset_metadata, data_home=None, check_md5hash=False): The location to save the dataset to. Defaults to None. check_md5hash : boolean - If true check the md4hash of the files after they are downloaded. + If True check the md5hash of the files after they are downloaded. Defaults to False. """ if dataset_metadata.dataset_name is None: @@ -336,6 +343,10 @@ def load_wn18(check_md5hash=False): The dataset includes a large number of inverse relations, and its use in experiments has been deprecated. Use WN18RR instead. + Parameters + ---------- + check_md5hash : bool + If ``True`` check the md5hash of the files. Defaults to ``False``. Returns ------- @@ -394,6 +405,9 @@ def load_wn18rr(check_md5hash=False, clean_unseen=True): clean_unseen : bool If ``True``, filters triples in validation and test sets that include entities not present in the training set. + check_md5hash : bool + If ``True`` check the md5hash of the datset files. Defaults to ``False``. + Returns ------- @@ -448,6 +462,12 @@ def load_fb15k(check_md5hash=False): The dataset includes a large number of inverse relations, and its use in experiments has been deprecated. Use FB15k-237 instead. + Parameters + ---------- + check_md5hash : boolean + If ``True`` check the md5hash of the files. Defaults to ``False``. + + Returns ------- @@ -475,7 +495,7 @@ def load_fb15k(check_md5hash=False): train_checksum='5a87195e68d7797af00e137a7f6929f2', valid_checksum='275835062bb86a86477a3c402d20b814', test_checksum='71098693b0efcfb8ac6cd61cf3a3b505') - return _load_dataset(FB15K, data_home=None, check_md5hash=False) + return _load_dataset(FB15K, data_home=None, check_md5hash=check_md5hash) def load_fb15k_237(check_md5hash=False, clean_unseen=True): @@ -506,6 +526,9 @@ def load_fb15k_237(check_md5hash=False, clean_unseen=True): Parameters ---------- + check_md5hash : boolean + If ``True`` check the md5hash of the files. Defaults to ``False``. + clean_unseen : bool If ``True``, filters triples in validation and test sets that include entities not present in the training set. @@ -559,6 +582,13 @@ def load_yago3_10(check_md5hash=False, clean_unseen = True): YAGO3-10 1,079,040 5,000 5,000 123,182 37 ========= ========= ======= ======= ============ =========== + Parameters + ---------- + check_md5hash : boolean + If ``True`` check the md5hash of the files. Defaults to ``False``. + + clean_unseen : bool + If ``True``, filters triples in validation and test sets that include entities not present in the training set. Returns ------- diff --git a/ampligraph/evaluation/__init__.py b/ampligraph/evaluation/__init__.py index 8a743117..5d2a569b 100644 --- a/ampligraph/evaluation/__init__.py +++ b/ampligraph/evaluation/__init__.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# """The module includes performance metrics for neural graph embeddings models, along with model selection routines, negatives generation, and an implementation of the learning-to-rank-based evaluation protocol used in literature.""" diff --git a/ampligraph/evaluation/metrics.py b/ampligraph/evaluation/metrics.py index 909750ae..f98b5db8 100644 --- a/ampligraph/evaluation/metrics.py +++ b/ampligraph/evaluation/metrics.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# import numpy as np import logging diff --git a/ampligraph/evaluation/protocol.py b/ampligraph/evaluation/protocol.py index 2d517ee5..aa2cd310 100644 --- a/ampligraph/evaluation/protocol.py +++ b/ampligraph/evaluation/protocol.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# import numpy as np from tqdm import tqdm @@ -10,7 +17,7 @@ logger.setLevel(logging.DEBUG) -def train_test_split_no_unseen(X, test_size=5000, seed=0, allow_duplication=False): +def train_test_split_no_unseen(X, test_size=100, seed=0, allow_duplication=False): """Split into train and test sets. This function carves out a test set that contains only entities @@ -116,10 +123,14 @@ def train_test_split_no_unseen(X, test_size=5000, seed=0, allow_duplication=Fals # in case can't find solution if loop_count == tolerance: if allow_duplication: - raise Exception("Not possible to split the dataset...") + raise Exception("Cannot create a test split of the desired size. " + "Some entities will not occur in both training and test set. " + "Change seed values, or set test_size to a smaller value.") else: - raise Exception("Not possible to split the dataset. \ - Maybe set allow_duplication = True can help...") + raise Exception("Cannot create a test split of the desired size. " + "Some entities will not occur in both training and test set. " + "Set allow_duplication=True, or " + "change seed values, or set test_size to a smaller value.") logger.debug('Completed random search.') @@ -129,6 +140,7 @@ def train_test_split_no_unseen(X, test_size=5000, seed=0, allow_duplication=Fals return X[idx_train, :], X[idx_test, :] + def _create_unique_mappings(unique_obj, unique_rel): obj_count = len(unique_obj) rel_count = len(unique_rel) @@ -462,9 +474,11 @@ def evaluate_performance(X, model, filter_triples=None, verbose=False, strict=Tr Run the relational learning evaluation protocol defined in :cite:`bordes2013translating`. - It computes the ranks of each positive triple against all possible negatives created in compliance with - the local closed world assumption (LCWA), as described in :cite:`nickel2016review`. - + It computes the rank of each positive triple against a number of negatives generated on the fly. + Such negatives are compliant with the local closed world assumption (LCWA), + as described in :cite:`nickel2016review`. In practice, that means only one side of the triple is corrupted + (i.e. either the subject or the object). + .. note:: When *filtered* mode is enabled (i.e. `filtered_triples` is not ``None``), to speed up the procedure, we adopt a hashing-based strategy to handle the set difference problem. @@ -496,7 +510,7 @@ def evaluate_performance(X, model, filter_triples=None, verbose=False, strict=Tr .. hint:: When ``rank_against_ent=None``, the method will use all distinct entities in the knowledge graph ``X`` - to generate negatives to rank against. If ``X`` includes more than 1 million unique + to generate negatives to rank against. If ``X`` includes more than 2.5 million unique entities and relations, the method will return a runtime error. To solve the problem, it is recommended to pass the desired entities to use to generate corruptions to ``rank_against_ent``. Besides, trying to rank a positive against an extremely large number of negatives @@ -524,37 +538,57 @@ def evaluate_performance(X, model, filter_triples=None, verbose=False, strict=Tr - 's': corrupt only subject. - 'o': corrupt only object - - 's+o': corrupt both subject and object + - 's+o': corrupt both subject and object. The same behaviour is obtained with ``use_default_protocol=True``. + + .. note:: + If ``corrupt_side='s+o'`` the function will return 2*n ranks. + If ``corrupt_side='s'`` or ``corrupt_side='o'``, it will return n ranks, where n is the + number of statements in X. + The first n elements of ranks are obtained against subject corruptions. From n+1 until 2n ranks are obtained + against object corruptions. + use_default_protocol: bool - Flag to indicate whether to evaluate head and tail corruptions separately (default: True). - If this is set to true, it will also ignore the ``corrupt_side`` argument and corrupt both head and tail - separately and rank triples. + Flag to indicate whether to use the standard protocol used in literature defined in + :cite:`bordes2013translating` (default: True). + If set to ``True`` it is equivalent to ``corrupt_side='s+o'``. + This corresponds to the evaluation protcol used in literature, where head and tail corruptions + are evaluated separately. + + .. note:: + When ``use_default_protocol=True`` the function will return 2*n ranks. + The first n elements of ranks are obtained against subject corruptions. From n+1 until 2n ranks are obtained + against object corruptions. Returns ------- - ranks : ndarray, shape [n] + ranks : ndarray, shape [n] or [2*n] An array of ranks of positive test triples. - + When ``use_default_protocol=True`` or ``corrupt_side='s+o'``, the function returns 2*n ranks instead of n. + In that case the first n elements of ranks are obtained against subject corruptions. From n+1 until 2n ranks + are obtained against object corruptions. Examples -------- >>> import numpy as np >>> from ampligraph.datasets import load_wn18 >>> from ampligraph.latent_features import ComplEx - >>> from ampligraph.evaluation import evaluate_performance + >>> from ampligraph.evaluation import evaluate_performance, mrr_score, hits_at_n_score >>> >>> X = load_wn18() - >>> model = ComplEx(batches_count=10, seed=0, epochs=1, k=150, eta=10, - >>> loss='pairwise', optimizer='adagrad') + >>> model = ComplEx(batches_count=10, seed=0, epochs=10, k=150, eta=1, + >>> loss='nll', optimizer='adam') >>> model.fit(np.concatenate((X['train'], X['valid']))) >>> >>> filter = np.concatenate((X['train'], X['valid'], X['test'])) - >>> ranks = evaluate_performance(X['test'][:5], model=model, filter_triples=filter) + >>> ranks = evaluate_performance(X['test'][:5], model=model, + filter_triples=filter, + corrupt_side='s+o', + use_default_protocol=False) >>> ranks - array([ 2, 4, 1, 1, 28550], dtype=int32) + [1, 582, 543, 6, 31] >>> mrr_score(ranks) - 0.55000700525394053 + 0.24049691297347323 >>> hits_at_n_score(ranks, n=10) - 0.8 + 0.4 """ logger.debug('Evaluating the performance of the embedding model.') diff --git a/ampligraph/latent_features/__init__.py b/ampligraph/latent_features/__init__.py index f7875a45..088d90ff 100644 --- a/ampligraph/latent_features/__init__.py +++ b/ampligraph/latent_features/__init__.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# """This module includes neural graph embedding models and support functions. Knowledge graph embedding models are neural architectures that encode concepts from a knowledge graph diff --git a/ampligraph/latent_features/loss_functions.py b/ampligraph/latent_features/loss_functions.py index 1a5af57b..d6d52e74 100644 --- a/ampligraph/latent_features/loss_functions.py +++ b/ampligraph/latent_features/loss_functions.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# import tensorflow as tf import abc import logging diff --git a/ampligraph/latent_features/misc.py b/ampligraph/latent_features/misc.py index 34d2a58e..ccb10e0e 100644 --- a/ampligraph/latent_features/misc.py +++ b/ampligraph/latent_features/misc.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# import numpy as np import logging diff --git a/ampligraph/latent_features/models.py b/ampligraph/latent_features/models.py index a4028eb3..001375b7 100644 --- a/ampligraph/latent_features/models.py +++ b/ampligraph/latent_features/models.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# import numpy as np import tensorflow as tf from sklearn.utils import check_random_state @@ -346,6 +353,10 @@ def _load_model_from_trained_params(self): def get_embeddings(self, entities, embedding_type='entity'): """Get the embeddings of entities or relations. + + .. Note :: + Use :meth:`ampligraph.utils.create_tensorboard_visualizations` to visualize the embeddings with TensorBoard. + Parameters ---------- entities : array-like, dtype=int, shape=[n] @@ -1053,6 +1064,9 @@ def _fn(e_s, e_p, e_o): def get_embeddings(self, entities, type='entity'): """Get the embeddings of entities or relations. + .. Note :: + Use :meth:`ampligraph.utils.create_tensorboard_visualizations` to visualize the embeddings with TensorBoard. + Parameters ---------- entities : array-like, dtype=int, shape=[n] @@ -1113,7 +1127,7 @@ def predict(self, X, from_idx=False, get_ranks=False): positive_scores = self.rnd.uniform(low=0, high=1, size=len(X)).tolist() if get_ranks: corruption_entities = self.eval_config.get('corruption_entities', DEFAULT_CORRUPTION_ENTITIES) - if corruption_entities is None: + if corruption_entities == "all": corruption_length = len(self.ent_to_idx) else: corruption_length = len(corruption_entities) diff --git a/ampligraph/latent_features/pool_functions.py b/ampligraph/latent_features/pool_functions.py index 9b78da17..777e4fc1 100755 --- a/ampligraph/latent_features/pool_functions.py +++ b/ampligraph/latent_features/pool_functions.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# import tensorflow as tf import logging diff --git a/ampligraph/latent_features/regularizers.py b/ampligraph/latent_features/regularizers.py index 0837583c..8945aa35 100644 --- a/ampligraph/latent_features/regularizers.py +++ b/ampligraph/latent_features/regularizers.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# import tensorflow as tf import numpy as np import abc diff --git a/ampligraph/utils/__init__.py b/ampligraph/utils/__init__.py index d010a1f2..84545e53 100644 --- a/ampligraph/utils/__init__.py +++ b/ampligraph/utils/__init__.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# """This module contains utility functions for neural knowledge graph embedding models. """ diff --git a/ampligraph/utils/model_utils.py b/ampligraph/utils/model_utils.py index babe7981..f97fe459 100644 --- a/ampligraph/utils/model_utils.py +++ b/ampligraph/utils/model_utils.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# import os import pickle import importlib @@ -7,6 +14,7 @@ import tensorflow as tf from tensorflow.contrib.tensorboard.plugins import projector +import numpy as np import pandas as pd """This module contains utility functions for neural knowledge graph embedding models. @@ -140,20 +148,64 @@ def restore_model(model_name_path=None): return model -def create_tensorboard_visualizations(model, loc, labels=None): - """ Create Tensorboard visualization files. +def create_tensorboard_visualizations(model, loc, labels=None, write_metadata=True, export_tsv_embeddings=True): + """ Export embeddings to Tensorboard. - Note: this will create all the files required by Tensorboard to visualize embeddings, - but you must run Tensorboard yourself. + This function exports embeddings to disk in a format used by + `TensorBoard `_ and + `TensorBoard Embedding Projector `_. + The function exports: + + * A number of checkpoint and graph embedding files in the provided location that will allow + you to visualize embeddings using Tensorboard. This is generally for use with a + `local Tensorboard instance `_. + * a tab-separated file of embeddings ``embeddings_projector.tsv``. This is generally used to + visualize embeddings by uploading to `TensorBoard Embedding Projector `_. + * embeddings metadata (i.e. the embeddings labels from the original knowledge graph), saved to ``metadata.tsv``. + Such file can be used in TensorBoard or uploaded to TensorBoard Embedding Projector. + + The content of ``loc`` will look like: :: + + tensorboard_files/ + ├── checkpoint + ├── embeddings_projector.tsv + ├── graph_embedding.ckpt.data-00000-of-00001 + ├── graph_embedding.ckpt.index + ├── graph_embedding.ckpt.meta + ├── metadata.tsv + └── projector_config.pbtxt + + .. Note :: + A TensorBoard guide is available at `this address `_. + + .. Note :: + Uploading ``embeddings_projector.tsv`` and ``metadata.tsv`` to + `TensorBoard Embedding Projector `_ will give a result + similar to the picture below: + + .. image:: ../img/embeddings_projector.png Examples -------- - >>> from ampligraph.utils import create_tensorboard_visualizations, restore_model >>> import numpy as np - >>> example_name = 'helloworld.pkl' - >>> restored_model = restore_model(model_name_path = example_name) - >>> output_path = 'model_tensorboard/' - >>> create_tensorboard_visualizations(restored_model, output_path) + >>> from ampligraph.latent_features import TransE + >>> from ampligraph.utils import create_tensorboard_visualizations + >>> + >>> X = np.array([['a', 'y', 'b'], + >>> ['b', 'y', 'a'], + >>> ['a', 'y', 'c'], + >>> ['c', 'y', 'a'], + >>> ['a', 'y', 'd'], + >>> ['c', 'y', 'd'], + >>> ['b', 'y', 'c'], + >>> ['f', 'y', 'e']]) + >>> + >>> model = TransE(batches_count=1, seed=555, epochs=20, k=10, loss='pairwise', + >>> loss_params={'margin':5}) + >>> model.fit(X) + >>> + >>> create_tensorboard_visualizations(model, 'tensorboard_files') + Parameters ---------- @@ -165,11 +217,17 @@ def create_tensorboard_visualizations(model, loc, labels=None): labels: pd.DataFrame Label(s) for each embedding point in the Tensorboard visualization. Default behaviour is to use the embeddings labels included in the model. + export_tsv_embeddings: bool (Default: True + If True, will generate a tab-separated file of embeddings at the given path. This is generally used to + visualize embeddings by uploading to `TensorBoard Embedding Projector `_. + write_metadata: bool (Default: True) + If True will write a file named 'metadata.tsv' in the same directory as path. """ # Create loc if it doesn't exist if not os.path.exists(loc): + logger.debug('Creating Tensorboard visualization directory: %s' % loc) os.mkdir(loc) if not model.is_fitted: @@ -177,12 +235,21 @@ def create_tensorboard_visualizations(model, loc, labels=None): # If no label data supplied, use model ent_to_idx keys as labels if labels is None: + + logger.info('Using model entity dictionary to create Tensorboard metadata.tsv') labels = list(model.ent_to_idx.keys()) else: if len(labels) != len(model.ent_to_idx): raise ValueError('Label data rows must equal number of embeddings.') - write_metadata_tsv(loc, labels) + if write_metadata: + logger.debug('Writing metadata.tsv to: %s' % loc) + write_metadata_tsv(loc, labels) + + if export_tsv_embeddings: + tsv_filename = "embeddings_projector.tsv" + logger.info('Writing embeddings tsv to: %s' % os.path.join(loc, tsv_filename)) + np.savetxt(os.path.join(loc, tsv_filename), model.trained_model_params[0], delimiter='\t') checkpoint_path = os.path.join(loc, 'graph_embedding.ckpt') diff --git a/docs/changelog.md b/docs/changelog.md index b3ca24ea..4f3f6c66 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,11 @@ # Changelog +## 1.0.3 +- Fixed regression in RandomBaseline (#94) +- Added TensorBoard Embedding Projector support (#86) +- Minor bugfixing (#87, #47) + + ## 1.0.2 - Added multiclass loss (#24 and #22) - Updated the negative generation to speed up evaluation for default protocol.(#74) diff --git a/docs/img/embeddings_projector.png b/docs/img/embeddings_projector.png new file mode 100644 index 00000000..74053dcb Binary files /dev/null and b/docs/img/embeddings_projector.png differ diff --git a/docs/install.md b/docs/install.md index 357997d1..e7a449e3 100644 --- a/docs/install.md +++ b/docs/install.md @@ -23,21 +23,21 @@ Install from pip or conda: **CPU-only** ``` -pip install tensorflow==1.12.0 +pip install tensorflow==1.13.1 or -conda install tensorflow=1.12.0 +conda install tensorflow=1.13.1 ``` **GPU support** ``` -pip install tensorflow-gpu==1.12.0 +pip install tensorflow-gpu==1.13.1 or -conda install tensorflow-gpu=1.12.0 +conda install tensorflow-gpu=1.13.1 ``` @@ -66,5 +66,5 @@ pip install -e . ```python >> import ampligraph >> ampligraph.__version__ -'1.0.2' +'1.0.3' ``` diff --git a/experiments/predictive_performance.py b/experiments/predictive_performance.py index 75ae43db..943eb066 100644 --- a/experiments/predictive_performance.py +++ b/experiments/predictive_performance.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# import ampligraph.datasets import ampligraph.latent_features from ampligraph.evaluation import hits_at_n_score, mr_score, evaluate_performance, mrr_score @@ -181,10 +188,10 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument("-d", "--dataset", - type=str, + type=str.lower, choices=SUPPORT_DATASETS) parser.add_argument("-m", "--model", - type=str, + type=str.lower, choices=SUPPOORT_MODELS) args = parser.parse_args() diff --git a/jenkins.sh b/jenkins.sh index 5f6e7d4d..a5ec4b93 100644 --- a/jenkins.sh +++ b/jenkins.sh @@ -11,11 +11,11 @@ source activate ampligraph # Install library if [[ $# -eq 0 ]] ; then echo "install tensorflow CPU mode" - pip install tensorflow==1.12.0 + pip install tensorflow==1.13.1 else if [[ $1 == "gpu" ]] ; then echo "install tensorflow GPU mode" - conda install tensorflow-gpu==1.12.0 + conda install tensorflow-gpu==1.13.1 fi fi diff --git a/setup.py b/setup.py index c71fbfbb..7a245d53 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# from setuptools import setup, find_packages from ampligraph import __version__ as version @@ -6,7 +13,7 @@ description='A Python library for relational learning on knowledge graphs.', url='https://github.com/Accenture/AmpliGraph/', author='Accenture Dublin Labs', - author_email='luca.costabello@accenture.com', + author_email='about@ampligraph.org', license='Apache 2.0', packages=find_packages(exclude=('tests', 'docs')), include_package_data=True, diff --git a/tests/__init__.py b/tests/__init__.py index e69de29b..64c3c185 100755 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,7 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# \ No newline at end of file diff --git a/tests/ampligraph/__init__.py b/tests/ampligraph/__init__.py index e69de29b..64c3c185 100755 --- a/tests/ampligraph/__init__.py +++ b/tests/ampligraph/__init__.py @@ -0,0 +1,7 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# \ No newline at end of file diff --git a/tests/ampligraph/datasets/__init__.py b/tests/ampligraph/datasets/__init__.py index e69de29b..64c3c185 100755 --- a/tests/ampligraph/datasets/__init__.py +++ b/tests/ampligraph/datasets/__init__.py @@ -0,0 +1,7 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# \ No newline at end of file diff --git a/tests/ampligraph/datasets/test_datasets.py b/tests/ampligraph/datasets/test_datasets.py index 72550cd7..7d3a0f8e 100644 --- a/tests/ampligraph/datasets/test_datasets.py +++ b/tests/ampligraph/datasets/test_datasets.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# from ampligraph.datasets import load_wn18, load_fb15k, load_fb15k_237, load_yago3_10, load_wn18rr import numpy as np import pytest diff --git a/tests/ampligraph/evaluation/__init__.py b/tests/ampligraph/evaluation/__init__.py index e69de29b..64c3c185 100755 --- a/tests/ampligraph/evaluation/__init__.py +++ b/tests/ampligraph/evaluation/__init__.py @@ -0,0 +1,7 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# \ No newline at end of file diff --git a/tests/ampligraph/evaluation/test_metrics.py b/tests/ampligraph/evaluation/test_metrics.py index 647d53fa..319271b7 100644 --- a/tests/ampligraph/evaluation/test_metrics.py +++ b/tests/ampligraph/evaluation/test_metrics.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# import numpy as np from ampligraph.evaluation.metrics import rank_score, mrr_score, hits_at_n_score, mr_score diff --git a/tests/ampligraph/evaluation/test_protocol.py b/tests/ampligraph/evaluation/test_protocol.py index 50c44395..a7fa799e 100644 --- a/tests/ampligraph/evaluation/test_protocol.py +++ b/tests/ampligraph/evaluation/test_protocol.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# import numpy as np import pytest from ampligraph.latent_features import TransE, DistMult, ComplEx diff --git a/tests/ampligraph/latent_features/__init__.py b/tests/ampligraph/latent_features/__init__.py index e69de29b..64c3c185 100755 --- a/tests/ampligraph/latent_features/__init__.py +++ b/tests/ampligraph/latent_features/__init__.py @@ -0,0 +1,7 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# \ No newline at end of file diff --git a/tests/ampligraph/latent_features/test_misc.py b/tests/ampligraph/latent_features/test_misc.py index 8864a3a8..c1cb0d39 100755 --- a/tests/ampligraph/latent_features/test_misc.py +++ b/tests/ampligraph/latent_features/test_misc.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# import numpy as np from ampligraph.latent_features.misc import get_entity_triples diff --git a/tests/ampligraph/latent_features/test_models.py b/tests/ampligraph/latent_features/test_models.py index 3c92f0e6..e47b1bb3 100644 --- a/tests/ampligraph/latent_features/test_models.py +++ b/tests/ampligraph/latent_features/test_models.py @@ -1,9 +1,16 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# import numpy as np import pytest -from ampligraph.latent_features import TransE, DistMult, ComplEx, HolE +from ampligraph.latent_features import TransE, DistMult, ComplEx, HolE, RandomBaseline from ampligraph.datasets import load_wn18 - +from ampligraph.evaluation import evaluate_performance, hits_at_n_score def test_fit_predict_TransE_early_stopping_with_filter(): X = load_wn18() @@ -34,7 +41,19 @@ def test_fit_predict_TransE_early_stopping_without_filter(): y, _ = model.predict(X['test'][:1], get_ranks=True) print(y) - +def test_evaluate_RandomBaseline(): + model = RandomBaseline(seed=0) + X = load_wn18() + model.fit(X["train"]) + ranks = evaluate_performance(X["test"], + model=model, + use_default_protocol=False, + corrupt_side='s+o', + verbose=False) + hits10 = hits_at_n_score(ranks, n=10) + hits1 = hits_at_n_score(ranks, n=1) + assert(hits10==0.0002 and hits1==0.0) + def test_fit_predict_transE(): model = TransE(batches_count=1, seed=555, epochs=20, k=10, loss='pairwise', loss_params={'margin': 5}, optimizer='adagrad', optimizer_params={'lr':0.1}) diff --git a/tests/ampligraph/latent_features/test_regularizers.py b/tests/ampligraph/latent_features/test_regularizers.py index b63606d8..e761cee6 100644 --- a/tests/ampligraph/latent_features/test_regularizers.py +++ b/tests/ampligraph/latent_features/test_regularizers.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# import numpy as np import tensorflow as tf from ampligraph.latent_features import REGULARIZER_REGISTRY diff --git a/tests/ampligraph/utils/__init__.py b/tests/ampligraph/utils/__init__.py index e69de29b..64c3c185 100644 --- a/tests/ampligraph/utils/__init__.py +++ b/tests/ampligraph/utils/__init__.py @@ -0,0 +1,7 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# \ No newline at end of file diff --git a/tests/ampligraph/utils/test_model_utils.py b/tests/ampligraph/utils/test_model_utils.py index 77104cf9..d5beda90 100644 --- a/tests/ampligraph/utils/test_model_utils.py +++ b/tests/ampligraph/utils/test_model_utils.py @@ -1,3 +1,10 @@ +# Copyright 2019 The AmpliGraph Authors. All Rights Reserved. +# +# This file is Licensed under the Apache License, Version 2.0. +# A copy of the Licence is available in LICENCE, or at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# import os import importlib import numpy as np