Skip to content

Commit

Permalink
license
Browse files Browse the repository at this point in the history
  • Loading branch information
shitao committed Aug 3, 2023
1 parent 1287b57 commit c086741
Show file tree
Hide file tree
Showing 24 changed files with 132 additions and 159 deletions.
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2022 staoxiao

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,6 @@ You can easily finetune your model with it.



## Citing & Authors

<!--- Describe where people can find more information -->


4 changes: 2 additions & 2 deletions benchmark/C_MTEB/Classification.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from mteb import AbsTaskClassification


class TNews(AbsTaskClassification):
@property
def description(self):
Expand Down Expand Up @@ -52,7 +53,6 @@ def description(self):
}



class JDReview(AbsTaskClassification):
@property
def description(self):
Expand Down Expand Up @@ -98,4 +98,4 @@ def description(self):
'eval_langs': ['zh'],
'main_score': 'accuracy',
'samples_per_label': 32,
}
}
2 changes: 0 additions & 2 deletions benchmark/C_MTEB/Clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def description(self):
}



class CLSClusteringP2P(AbsTaskClustering):
@property
def description(self):
Expand All @@ -38,7 +37,6 @@ def description(self):
}



class ThuNewsClusteringS2S(AbsTaskClustering):
@property
def description(self):
Expand Down
11 changes: 6 additions & 5 deletions benchmark/C_MTEB/Reranking.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from mteb import AbsTask, RerankingEvaluator, AbsTaskReranking
import logging

import numpy as np

from mteb import RerankingEvaluator, AbsTaskReranking

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -45,7 +44,8 @@ def compute_metrics_batched(self, model):
# In case the query is a list of strings, we get the most similar embedding to any of the queries
all_query_flattened = [q for sample in self.samples for q in sample["query"]]
if hasattr(model, 'encode_queries'):
all_query_embs = model.encode_queries(all_query_flattened, convert_to_tensor=True, batch_size=self.batch_size)
all_query_embs = model.encode_queries(all_query_flattened, convert_to_tensor=True,
batch_size=self.batch_size)
else:
all_query_embs = model.encode(all_query_flattened, convert_to_tensor=True, batch_size=self.batch_size)
else:
Expand All @@ -64,12 +64,12 @@ def compute_metrics_batched(self, model):
query_idx, docs_idx = 0, 0
for instance in self.samples:
num_subqueries = len(instance["query"]) if isinstance(instance["query"], list) else 1
query_emb = all_query_embs[query_idx : query_idx + num_subqueries]
query_emb = all_query_embs[query_idx: query_idx + num_subqueries]
query_idx += num_subqueries

num_pos = len(instance["positive"])
num_neg = len(instance["negative"])
docs_emb = all_docs_embs[docs_idx : docs_idx + num_pos + num_neg]
docs_emb = all_docs_embs[docs_idx: docs_idx + num_pos + num_neg]
docs_idx += num_pos + num_neg

if num_pos == 0 or num_neg == 0:
Expand Down Expand Up @@ -98,6 +98,7 @@ def evaluate(self, model, split="test", **kwargs):

return dict(scores)


AbsTaskReranking.evaluate = evaluate


Expand Down
12 changes: 6 additions & 6 deletions benchmark/C_MTEB/Retrieval.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import defaultdict

from datasets import load_dataset, DatasetDict
from mteb import AbsTaskRetrieval

Expand All @@ -14,9 +15,9 @@ def load_retrieval_data(hf_hub_name, eval_splits):
for e in qrels:
relevant_docs[e['qid']][e['pid']] = e['score']

corpus = DatasetDict({eval_split:corpus})
queries = DatasetDict({eval_split:queries})
relevant_docs = DatasetDict({eval_split:relevant_docs})
corpus = DatasetDict({eval_split: corpus})
queries = DatasetDict({eval_split: queries})
relevant_docs = DatasetDict({eval_split: relevant_docs})
return corpus, queries, relevant_docs


Expand Down Expand Up @@ -116,7 +117,6 @@ def load_data(self, **kwargs):
self.data_loaded = True



class CmedqaRetrieval(AbsTaskRetrieval):
@property
def description(self):
Expand Down Expand Up @@ -208,6 +208,6 @@ def load_data(self, **kwargs):
if self.data_loaded:
return

self.corpus, self.queries, self.relevant_docs = load_retrieval_data(self.description['hf_hub_name'], self.description['eval_splits'])
self.corpus, self.queries, self.relevant_docs = load_retrieval_data(self.description['hf_hub_name'],
self.description['eval_splits'])
self.data_loaded = True

3 changes: 0 additions & 3 deletions benchmark/C_MTEB/STS.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ def description(self):
}



class BQ(AbsTaskSTS):
@property
def description(self):
Expand Down Expand Up @@ -50,7 +49,6 @@ def description(self):
}



class PAWSX(AbsTaskSTS):
@property
def description(self):
Expand Down Expand Up @@ -99,7 +97,6 @@ def description(self):
}



class QBQTC(AbsTaskSTS):
@property
def description(self):
Expand Down
9 changes: 4 additions & 5 deletions benchmark/C_MTEB/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from .Classification import *
from .Clustering import *
from .Reranking import *
from .PairClassification import *
from .Reranking import *
from .Retrieval import *
from .STS import *
from .Classification import *

ChineseTaskList = ['TNews', 'IFlyTek', 'MultilingualSentiment', 'JDReview', 'OnlineShopping', 'Waimai',
'CLSClusteringS2S', 'CLSClusteringP2P', 'ThuNewsClusteringS2S', 'ThuNewsClusteringP2P',
'Ocnli', 'Cmnli',
'T2Reranking', 'MmarcoReranking', 'CMedQAv1', 'CMedQAv2',
'T2Retrieval', 'MMarcoRetrieval', 'DuRetrieval', 'CovidRetrieval', 'CmedqaRetrieval', 'EcomRetrieval', 'MedicalRetrieval', 'VideoRetrieval',
'T2Retrieval', 'MMarcoRetrieval', 'DuRetrieval', 'CovidRetrieval', 'CmedqaRetrieval',
'EcomRetrieval', 'MedicalRetrieval', 'VideoRetrieval',
'ATEC', 'BQ', 'LCQMC', 'PAWSX', 'STSB', 'AFQMC', 'QBQTC']


2 changes: 0 additions & 2 deletions benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,4 @@ In retrieval task, we sample 100,000 candidates (including the ground truths) fr
This work is inspired by [Massive Text Embedding Benchmark](https://github.com/embeddings-benchmark/mteb),
which lacks of the evaluation for chinese text.

## Citing & Authors

<!--- Describe where people can find more information -->
10 changes: 2 additions & 8 deletions benchmark/eval_C-MTEB.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import argparse

from mteb import MTEB
from models import UniversalModel
from C_MTEB import *
from C_MTEB import ChineseTaskList


from models import UniversalModel
from mteb import MTEB

query_instruction_for_retrieval_dict = {
"BAAI/baai-general-embedding-large-zh-instruction": "为这个句子生成表示以用于检索相关文章:",
Expand All @@ -20,7 +18,6 @@ def get_args():
return parser.parse_args()



if __name__ == '__main__':
args = get_args()

Expand All @@ -44,6 +41,3 @@ def get_args():

evaluation = MTEB(tasks=[task], task_langs=['zh'])
evaluation.run(model, output_folder=f"zh_results/{args.model_name_or_path.split('/')[-1]}")



6 changes: 1 addition & 5 deletions benchmark/eval_MTEB.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import argparse

from mteb import MTEB
from models import UniversalModel

from mteb import MTEB

query_instruction_for_retrieval_dict = {
"BAAI/baai-general-embedding-large-en-instruction": "Represent this sentence for searching relevant passages: ",
Expand Down Expand Up @@ -39,6 +38,3 @@ def get_args():

evaluation = MTEB(tasks=[task], task_langs=['zh'])
evaluation.run(model, output_folder=f"en_results/{args.model_name_or_path.split('/')[-1]}")



17 changes: 4 additions & 13 deletions benchmark/models.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import numpy as np
from typing import cast, List, Dict

import numpy as np
import torch
from tqdm import tqdm
from mteb import DRESModel
from tqdm import tqdm


class UniversalModel(DRESModel):
Expand Down Expand Up @@ -33,7 +34,6 @@ def __init__(
if num_gpus > 1:
self.model = torch.nn.DataParallel(self.model)


def encode_queries(self, queries: List[str], **kwargs) -> np.ndarray:
'''
encode queries for retrieval task
Expand All @@ -45,7 +45,6 @@ def encode_queries(self, queries: List[str], **kwargs) -> np.ndarray:
input_texts = queries
return self.encode(input_texts)


def encode_corpus(self, corpus: List[Dict[str, str]], **kwargs) -> np.ndarray:
'''
encode corpus for retrieval task
Expand All @@ -54,15 +53,14 @@ def encode_corpus(self, corpus: List[Dict[str, str]], **kwargs) -> np.ndarray:
input_texts = ['{} {}'.format(doc.get('title', ''), doc['text']).strip() for doc in corpus]
return self.encode(input_texts)


@torch.no_grad()
def encode(self, sentences: List[str], batch_size: int = 256, **kwargs) -> np.ndarray:

batch_size = min(batch_size, 256) * self.num_gpus
self.model.eval()

all_embeddings = []
for start_index in tqdm(range(0, len(sentences), batch_size), desc="Batches", disable=len(sentences)<256):
for start_index in tqdm(range(0, len(sentences), batch_size), desc="Batches", disable=len(sentences) < 256):
sentences_batch = sentences[start_index:start_index + batch_size]
inputs = self.tokenizer(
sentences_batch,
Expand All @@ -79,10 +77,3 @@ def encode(self, sentences: List[str], batch_size: int = 256, **kwargs) -> np.nd
all_embeddings.append(embeddings.cpu().numpy())

return np.concatenate(all_embeddings, axis=0)







29 changes: 12 additions & 17 deletions benchmark/summarize_results.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import argparse
from collections import defaultdict
import os
import json
import os
from collections import defaultdict

from mteb import MTEB
from C_MTEB import *
from mteb import MTEB


def read_results(task_types, except_tasks, args):
Expand Down Expand Up @@ -58,8 +58,8 @@ def output_markdown(tasks_results, model_names, save_file):
for task_name in type_results.keys():
first_line += f" {task_name} |"
second_line += ":--------:|"
f.write(first_line+' Avg | \n')
f.write(second_line+':--------:| \n')
f.write(first_line + ' Avg | \n')
f.write(second_line + ':--------:| \n')

for model in model_names:
write_line = f"| {model} |"
Expand All @@ -72,12 +72,11 @@ def output_markdown(tasks_results, model_names, save_file):
write_line += f" |"

if len(all_res) == len(type_results.keys()):
write_line += f" {round(sum(all_res)/len(all_res), 2)} |"
write_line += f" {round(sum(all_res) / len(all_res), 2)} |"
task_type_res[t_type][model] = all_res
else:
write_line += f" |"
f.write(write_line+' \n')

f.write(write_line + ' \n')

f.write(f'Overall \n')
first_line = "| Model |"
Expand All @@ -93,7 +92,7 @@ def output_markdown(tasks_results, model_names, save_file):
all_res = []
for type_name, results in task_type_res.items():
if model in results:
write_line += f" {round(sum(results[model])/len(results[model]), 2)} |"
write_line += f" {round(sum(results[model]) / len(results[model]), 2)} |"
all_res.extend(results[model])
else:
write_line += f" |"
Expand All @@ -104,8 +103,6 @@ def output_markdown(tasks_results, model_names, save_file):
f.write(write_line + ' \n')




def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('--results_dir', default="./zh_results", type=str)
Expand All @@ -120,14 +117,12 @@ def get_args():
task_types = ["Retrieval", "STS", "PairClassification", "Classification", "Reranking", "Clustering"]
except_tasks = ['AmazonReviewsClassification', 'STS22']
elif args.lang == 'en':
task_types = ["Retrieval", "STS", "Summarization", "PairClassification", "Classification", "Reranking", "Clustering"]
task_types = ["Retrieval", "STS", "Summarization", "PairClassification", "Classification", "Reranking",
"Clustering"]
except_tasks = []
else:
raise NotImplementedError(f"args.lang must be zh or en, but{args.lang}")


task_results, model_dirs = read_results(task_types, except_tasks, args=args)
output_markdown(task_results, model_dirs.keys(), save_file=os.path.join(args.results_dir, f'{args.lang}_results.md'))



output_markdown(task_results, model_dirs.keys(),
save_file=os.path.join(args.results_dir, f'{args.lang}_results.md'))
5 changes: 3 additions & 2 deletions examples/search_demo/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
@dataclass
class ModelArguments:
model_name_or_path: str = field(
default='BAAI/baai-general-embedding-large-zh', metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
default='BAAI/baai-general-embedding-large-zh',
metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
)


@dataclass
class DataArguments:
data_path: str = field(
default='./data', metadata={"help": "Path to wikipedia-22-12"}
)
)
Loading

0 comments on commit c086741

Please sign in to comment.