diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths
index d642edc..633f9e0 100644
--- a/.Rproj.user/shared/notebooks/paths
+++ b/.Rproj.user/shared/notebooks/paths
@@ -1,6 +1,7 @@
 /Users/berenz/Downloads/Template of Abstract in Latex.tex="A4C7846D"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/controls.R="5BC637B7"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/method_annoy.R="684202BA"
+/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/method_hnsw.R="A4FAA5A3"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/method_nnd.R="87049873"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/methods.R="B7F84C4B"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/reclin2_pair_ann.R="1D89EE3E"
@@ -10,7 +11,9 @@
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_blocking.R="DABEA252"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_data.R="9D1011B0"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_hnsw.R="2E19A832"
+/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_mlpack.R="51D2EAA1"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_print.R="AA7835F7"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_reclin2.R="E3E08D07"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/tests/tinytest.R="D6BBCDC1"
 /Users/berenz/mac/nauka/ncn-foreigners/software/blocking/vignettes/v1-deduplication.Rmd="9D34DD44"
+/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/vignettes/v2-reclin.Rmd="289A4D2F"
diff --git a/R/blocking.R b/R/blocking.R
index 7dbdd1b..69d9a62 100644
--- a/R/blocking.R
+++ b/R/blocking.R
@@ -12,7 +12,7 @@
 #' @importFrom utils combn
 #'
 #'
-#' @title Main function for blocking records given text data
+#' @title Block records based on text data.
 #'
 #' @author Maciej Beręsewicz
 #'
@@ -271,22 +271,47 @@ blocking <- function(x,
   ## if true are given
   if (!is.null(true_blocks)) {
 
-    ## Graph metrics
-    eval_g1 <- igraph::graph_from_data_frame(x_df[, c("x", "y")], directed = FALSE)
-    eval_g2 <- igraph::graph_from_data_frame(true_blocks[, c("x", "y")], directed = FALSE)
+    setDT(true_blocks)
 
-    eval_g1_cl <- igraph::make_clusters(eval_g1, membership = igraph::components(eval_g1, "weak")$membership)
-    eval_g2_cl <- igraph::make_clusters(eval_g2, membership = igraph::components(eval_g2, "weak")$membership)
+    pairs_to_eval <- x_df[y %in% true_blocks$y, c("x", "y", "block")]
+    pairs_to_eval[true_blocks, on = c("x", "y"), both := TRUE]
+    pairs_to_eval[is.na(both), both := FALSE]
 
-    eval_metrics <- base::sapply(c("vi", "nmi", "split.join", "rand", "adjusted.rand"),
-                                 igraph::compare, comm1=eval_g1_cl, comm2=eval_g2_cl)
+    true_blocks[pairs_to_eval, on = c("x", "y"), both := TRUE]
+    true_blocks[is.na(both), both := FALSE]
+    true_blocks[, block:=block+max(pairs_to_eval$block)]
+
+    pairs_to_eval <- rbind(pairs_to_eval, true_blocks[both == FALSE, .(x,y,block)], fill = TRUE)
+
+    if (!deduplication) {
+
+      pairs_to_eval[, x2:=x+max(y)]
+      pairs_to_eval_long <- melt(pairs_to_eval[, .(y, x2, block, both)], id.vars = c("block", "both"))
+      pairs_to_eval_long[!is.na(both), block_id := .GRP, block]
+      block_id_max <- max(pairs_to_eval_long$block_id, na.rm = T)
+      pairs_to_eval_long[is.na(both), block_id:=block_id_max + rleid(block)]
+      pairs_to_eval_long[both == TRUE | is.na(both), true_id := .GRP, block]
+      true_id_max <- max(pairs_to_eval_long$true_id, na.rm = T)
+      pairs_to_eval_long[both==FALSE, true_id := true_id_max+rleid(block)]
+
+    } else {
+
+      pairs_to_eval_long <- melt(pairs_to_eval[, .(y, x, block, both)], id.vars = c("block", "both"))
+      pairs_to_eval_long[!is.na(both), block_id := .GRP, block]
+      block_id_max <- max(pairs_to_eval_long$block_id, na.rm = T)
+      pairs_to_eval_long[is.na(both), block_id:=block_id_max + rleid(block)]
+      pairs_to_eval_long[both == TRUE | is.na(both), true_id := .GRP, block]
+      true_id_max <- max(pairs_to_eval_long$true_id, na.rm = T)
+      pairs_to_eval_long[both==FALSE, true_id := true_id_max+rleid(block)]
+
+    }
+
+    ## consider using RcppAlgos::comboGeneral(nrow(pairs_to_eval_long), 2,  nThreads=n_threads)
+    candidate_pairs <- utils::combn(nrow(pairs_to_eval_long), 2)
+
+    same_block <- pairs_to_eval_long$block_id[candidate_pairs[1, ]] == pairs_to_eval_long$block_id[candidate_pairs[2, ]]
+    same_truth <- pairs_to_eval_long$true_id[candidate_pairs[1, ]] == pairs_to_eval_long$true_id[candidate_pairs[2, ]]
 
-    ## standard metrics based on klsh::confusion.from.blocking
-    block_ids <- eval_g1_cl$membership
-    true_ids <- eval_g2_cl$membership
-    candidate_pairs <- utils::combn(length(block_ids), 2)
-    same_block <- block_ids[candidate_pairs[1, ]] == block_ids[candidate_pairs[2, ]]
-    same_truth <- true_ids[candidate_pairs[1, ]] == true_ids[candidate_pairs[2, ]]
     confusion <- table(same_block, same_truth)
 
     fp <- confusion[2, 1]
@@ -295,11 +320,11 @@ blocking <- function(x,
     tn <- confusion[1, 1]
     recall <- tp/(fn + tp)
 
-    eval_metrics2 <- c(recall = tp/(fn + tp), precision = tp/(tp + fp),
-                       fpr = fp/(fp + tn), fnr = fn/(fn + tp),
-                       accuracy = (tp + tn)/(tp + tn + fn + fp),
-                       specificity = tn/(tn + fp))
-    eval_metrics <- c(eval_metrics, eval_metrics2)
+    eval_metrics <- c(recall = tp / (fn + tp), precision = tp / (tp + fp),
+                      fpr = fp / (fp + tn), fnr = fn / (fn + tp),
+                      accuracy = (tp + tn) / (tp + tn + fn + fp),
+                      specificity = tn / (tn + fp))
+
   }
 
   setorderv(x_df, c("x", "y", "block"))
diff --git a/R/methods.R b/R/methods.R
index 32e677c..45017ad 100644
--- a/R/methods.R
+++ b/R/methods.R
@@ -2,13 +2,8 @@
 #' @exportS3Method
 print.blocking <- function(x,...) {
 
-  block_ids <- x$result$block
-
-  if (x$deduplication) {
-    blocks_tab <- table(block_ids)
-    block_ids <- rep(as.numeric(names(blocks_tab)), blocks_tab+1)
-  }
-
+  blocks_tab <- table(x$result$block)
+  block_ids <- rep(as.numeric(names(blocks_tab)), blocks_tab+1)
 
   rr <- 1 - sum(choose(table(block_ids), 2))/choose(length(block_ids), 2)
   cat("========================================================\n")
@@ -25,9 +20,7 @@ print.blocking <- function(x,...) {
   if (!is.null(x$metrics)) {
     cat("========================================================\n")
     cat("Evaluation metrics (standard):\n" )
-    print(x$metrics[6:11])
-    cat("\nEvaluation metrics (graph-based):\n" )
-    print(x$metrics[1:5])
+    print(round(x$metrics*100, 4))
 
   }
   invisible(x)
diff --git a/R/reclin2_pair_ann.R b/R/reclin2_pair_ann.R
index dbcf6fe..6770c51 100644
--- a/R/reclin2_pair_ann.R
+++ b/R/reclin2_pair_ann.R
@@ -6,7 +6,7 @@
 #' @author Maciej Beręsewicz
 #'
 #' @description
-#' Function for the integration with the reclin2 package. The function is based on [reclin2::pair_minsim()] and reuses some of its source code.
+#' Function for the integration with the `reclin2` package. The function is based on [reclin2::pair_minsim()] and reuses some of its source code.
 #'
 #' @param x reference data (a data.frame or a data.table),
 #' @param y query data  (a data.frame or a data.table, default NULL),
diff --git a/README.Rmd b/README.Rmd
index 7180a69..60898c5 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -20,16 +20,16 @@ knitr::opts_chunk$set(
 
 ## Description
 
-An R package that aims to block records for data deduplication and record linkage (a.k.a. entity resolution) based on [approximate nearest neighbours algorithms (ANN)](https://en.wikipedia.org/wiki/Nearest_neighbor_search) and graphs (via the `igraph` package).
+This R package is designed to block records for data deduplication and record linkage (also known as entity resolution) using [approximate nearest neighbours algorithms (ANN)](https://en.wikipedia.org/wiki/Nearest_neighbor_search) and graphs (via the `igraph` package).
 
-Currently supports the following R packages that binds to specific ANN algorithms:
+It supports the following R packages that bind to specific ANN algorithms:
 
 + [rnndescent](https://cran.r-project.org/package=rnndescent) (default, very powerful, supports sparse matrices),
 + [RcppHNSW](https://cran.r-project.org/package=RcppHNSW) (powerful but does not support sparse matrices), 
 + [RcppAnnoy](https://cran.r-project.org/package=RcppAnnoy), 
 + [mlpack](https://cran.r-project.org/package=RcppAnnoy) (see `mlpack::lsh` and `mlpack::knn`).
 
-The package also supports integration with the [reclin2](https://cran.r-project.org/package=reclin2) package via `blocking::pair_ann` function.
+The package can be used with the [reclin2](https://cran.r-project.org/package=reclin2) package via the `blocking::pair_ann` function.
 
 ## Funding
 
@@ -37,7 +37,7 @@ Work on this package is supported by the National Science Centre, OPUS 22 grant
 
 ## Installation
 
-You can install the development version of the `blocking` package from GitHub with:
+Install the GitHub blocking package with:
 
 ```{r, eval=FALSE}
 # install.packages("remotes") # uncomment if needed
@@ -53,7 +53,7 @@ library(blocking)
 library(reclin2)
 ```
 
-Generate simple data with two groups (`df_example`) and reference data (`df_base`).
+Generate simple data with three groups (`df_example`) and reference data (`df_base`).
 
 ```{r}
 df_example <- data.frame(txt = c(
@@ -73,7 +73,7 @@ df_example
 df_base
 ```
 
-Deduplication using `blocking` function. Output contains information about:
+Deduplication using the `blocking` function. Output contains information:
 
   + the method used (where `nnd` which refers to the NN descent algorithm), 
   + number of blocks created (here 2 blocks),
@@ -86,9 +86,9 @@ blocking_result <- blocking(x = df_example$txt)
 blocking_result
 ```
 
-Table with blocking which contains:
+Table with blocking results contains:
 
-+ row numbers from the original data
++ row numbers from the original data,
 + block number (integers),
 + distance (from the ANN algorithm).
 
@@ -96,7 +96,7 @@ Table with blocking which contains:
 blocking_result$result
 ```
 
-Deduplication using the `pair_ann` function for integration with the `reclin2` package. Here I use the pipeline that can be used with the `reclin2` package.
+Deduplication using the `pair_ann` function for integration with the `reclin2` package. Use the pipeline with the `reclin2` package.
 
 ```{r}
 pair_ann(x = df_example, on = "txt") |>
@@ -105,7 +105,8 @@ pair_ann(x = df_example, on = "txt") |>
   select_threshold("threshold", score = "score", threshold = 0.55) |>
   link(selection = "threshold")
 ```
-Record linkage using the same function where `df_base` is the "register" and `df_example` is the reference (query data).
+
+Linking records using the same function where `df_base` is the "register" and `df_example` is the reference (data).
 
 ```{r}
 pair_ann(x = df_base, y = df_example, on = "txt", deduplication = FALSE) |>
diff --git a/README.md b/README.md
index bf98fac..38757de 100644
--- a/README.md
+++ b/README.md
@@ -11,13 +11,13 @@ coverage](https://codecov.io/gh/ncn-foreigners/blocking/branch/main/graph/badge.
 
 ## Description
 
-An R package that aims to block records for data deduplication and
-record linkage (a.k.a. entity resolution) based on [approximate nearest
-neighbours algorithms
+This R package is designed to block records for data deduplication and
+record linkage (also known as entity resolution) using [approximate
+nearest neighbours algorithms
 (ANN)](https://en.wikipedia.org/wiki/Nearest_neighbor_search) and graphs
 (via the `igraph` package).
 
-Currently supports the following R packages that binds to specific ANN
+It supports the following R packages that bind to specific ANN
 algorithms:
 
 - [rnndescent](https://cran.r-project.org/package=rnndescent) (default,
@@ -28,8 +28,8 @@ algorithms:
 - [mlpack](https://cran.r-project.org/package=RcppAnnoy) (see
   `mlpack::lsh` and `mlpack::knn`).
 
-The package also supports integration with the
-[reclin2](https://cran.r-project.org/package=reclin2) package via
+The package can be used with the
+[reclin2](https://cran.r-project.org/package=reclin2) package via the
 `blocking::pair_ann` function.
 
 ## Funding
@@ -39,8 +39,7 @@ Work on this package is supported by the National Science Centre, OPUS
 
 ## Installation
 
-You can install the development version of the `blocking` package from
-GitHub with:
+Install the GitHub blocking package with:
 
 ``` r
 # install.packages("remotes") # uncomment if needed
@@ -57,7 +56,7 @@ library(reclin2)
 #> Loading required package: data.table
 ```
 
-Generate simple data with two groups (`df_example`) and reference data
+Generate simple data with three groups (`df_example`) and reference data
 (`df_base`).
 
 ``` r
@@ -91,8 +90,8 @@ df_base
 #> 3       other
 ```
 
-Deduplication using `blocking` function. Output contains information
-about:
+Deduplication using the `blocking` function. Output contains
+information:
 
 - the method used (where `nnd` which refers to the NN descent
   algorithm),
@@ -117,9 +116,9 @@ blocking_result
 #> 2
 ```
 
-Table with blocking which contains:
+Table with blocking results contains:
 
-- row numbers from the original data
+- row numbers from the original data,
 - block number (integers),
 - distance (from the ANN algorithm).
 
@@ -136,8 +135,7 @@ blocking_result$result
 ```
 
 Deduplication using the `pair_ann` function for integration with the
-`reclin2` package. Here I use the pipeline that can be used with the
-`reclin2` package.
+`reclin2` package. Use the pipeline with the `reclin2` package.
 
 ``` r
 pair_ann(x = df_example, on = "txt") |>
@@ -158,8 +156,8 @@ pair_ann(x = df_example, on = "txt") |>
 #> 6:     8     5 montypython           monty
 ```
 
-Record linkage using the same function where `df_base` is the “register”
-and `df_example` is the reference (query data).
+Linking records using the same function where `df_base` is the
+“register” and `df_example` is the reference (data).
 
 ``` r
 pair_ann(x = df_base, y = df_example, on = "txt", deduplication = FALSE) |>
diff --git a/inst/tinytest/test_blocking.R b/inst/tinytest/test_blocking.R
index 7016a42..ac1fba9 100644
--- a/inst/tinytest/test_blocking.R
+++ b/inst/tinytest/test_blocking.R
@@ -59,18 +59,16 @@ expect_error(
 expect_equal(
   blocking(x = df_example$txt,
            true_blocks = result$result[, c("x", "y", "block")])$metrics,
-  c(vi = 0, nmi = 1, split.join = 0, rand = 1, adjusted.rand = 1,
-    recall = 1, precision = 1, fpr = 0, fnr = 0, accuracy = 1, specificity = 1
-  )
+  c(recall = 1, precision = 1, fpr = 0, fnr = 0, accuracy = 1, specificity = 1)
 )
 
 # check if true_block is a vector
 
-expect_silent(
-  blocking(x = df_example$txt,
-           #true_blocks = result$result$block)
-           true_blocks = result$result[, c("x", "y", "block")])
-)
+# expect_silent(
+#   blocking(x = df_example$txt,
+#            #true_blocks = result$result$block)
+#            true_blocks = result$result[, c("x", "y", "block")])
+# )
 
 
 ## printing
diff --git a/man/blocking.Rd b/man/blocking.Rd
index e512a70..12ad080 100644
--- a/man/blocking.Rd
+++ b/man/blocking.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/blocking.R
 \name{blocking}
 \alias{blocking}
-\title{Main function for blocking records given text data}
+\title{Block records based on text data.}
 \usage{
 blocking(
   x,
diff --git a/man/pair_ann.Rd b/man/pair_ann.Rd
index 2ed1122..f5e240b 100644
--- a/man/pair_ann.Rd
+++ b/man/pair_ann.Rd
@@ -36,7 +36,7 @@ pair_ann(
 Returns a \link{data.table} with two columns \code{.x} and \code{.y}. Columns \code{.x} and \code{.y} are row numbers from data.frames x and y respectively. Returning data.table is also of a class \code{pairs} which allows for integration with the \code{\link[reclin2:compare_pairs]{reclin2::compare_pairs()}} package.
 }
 \description{
-Function for the integration with the reclin2 package. The function is based on \code{\link[reclin2:pair_minsim]{reclin2::pair_minsim()}} and reuses some of its source code.
+Function for the integration with the \code{reclin2} package. The function is based on \code{\link[reclin2:pair_minsim]{reclin2::pair_minsim()}} and reuses some of its source code.
 }
 \details{
 Imports
diff --git a/vignettes/v1-deduplication.Rmd b/vignettes/v1-deduplication.Rmd
index ff3d1b9..b4346f5 100644
--- a/vignettes/v1-deduplication.Rmd
+++ b/vignettes/v1-deduplication.Rmd
@@ -33,7 +33,7 @@ library(reclin2)
 library(data.table)
 ```
 
-Read the `RLdata500` data used in the [RecordLinkage](https://CRAN.R-project.org/package=RecordLinkage) package from the [dblink](https://github.com/cleanzr/dblink) Github repository.
+Read the `RLdata500` data from the [RecordLinkage](https://CRAN.R-project.org/package=RecordLinkage) package from the [dblink](https://github.com/cleanzr/dblink) Github repository.
 
 ```{r}
 df <- read.csv("https://raw.githubusercontent.com/cleanzr/dblink/dc3dd0daf55f8a303863423817a0f0042b3c275a/examples/RLdata500.csv")
diff --git a/vignettes/v2-reclin.Rmd b/vignettes/v2-reclin.Rmd
index 21373e4..511a151 100644
--- a/vignettes/v2-reclin.Rmd
+++ b/vignettes/v2-reclin.Rmd
@@ -21,6 +21,151 @@ knitr::opts_chunk$set(
 )
 ```
 
+# Setup
+
+Read required packages
+
 ```{r setup}
 library(blocking)
+library(reclin2)
+library(data.table)
+```
+
+# Data
+
+Read the example data from the tutorial on [the `reclin` package on the URos 2021 Conference](https://github.com/djvanderlaan/tutorial-reclin-uros2021). The data sets are from ESSnet on Data Integration as stated in the repository:
+
+```
+These totally fictional data sets are supposed to have captured details of
+persons up to the date 31 December 2011.  Any years of birth captured as 2012
+are therefore in error.  Note that in the fictional Census data set, dates of
+birth between 27 March 2011 and 31 December 2011 are not necessarily in error.
+
+Census: A fictional data set to represent some observations from a
+        decennial Census
+CIS: Fictional observations from Customer Information System, which is
+        combined administrative data from the tax and benefit systems
+
+In the dataset census all records contain a person_id. For some of the records
+in cis the person_id is also available. This information can be used to
+evaluate the linkage (assuming these records from the cis are representable 
+all records in the cis). 
+```
+
+```{r}
+census <- read.csv("https://raw.githubusercontent.com/djvanderlaan/tutorial-reclin-uros2021/main/data/census.csv")
+cis <- read.csv("https://raw.githubusercontent.com/djvanderlaan/tutorial-reclin-uros2021/main/data/cis.csv")
+setDT(census)
+setDT(cis)
+```
+
++ `census` object has `r nrow(census)` rows and `r ncol(census)`,
++ `cis` object has `r nrow(census)` rows and `r ncol(census)`.
+
+Census data
+
+```{r}
+head(census)
 ```
+CIS data
+
+```{r}
+head(cis)
+```
+
+We need to create new columns that concatanates variables from `pername1` to `enumpc`. In the first step we replace `NA`s with `''`. 
+
+```{r}
+census[is.na(dob_day), dob_day := ""]
+census[is.na(dob_mon), dob_mon := ""]
+census[is.na(dob_year), dob_year := ""]
+cis[is.na(dob_day), dob_day := ""]
+cis[is.na(dob_mon), dob_mon := ""]
+cis[is.na(dob_year), dob_year := ""]
+
+census[, txt:=paste0(pername1, pername2, sex, dob_day, dob_mon, dob_year, enumcap, enumpc)]
+cis[, txt:=paste0(pername1, pername2, sex, dob_day, dob_mon, dob_year, enumcap, enumpc)]
+```
+
+
+
+# Linking datasets
+
+## Using basic functionalities of `blocking` package
+
+The goal of this exercise is to link units from the CIS dataset to the CENSUS dataset. 
+
+```{r}
+result1 <- blocking(x = census$txt, y = cis$txt, verbose = 1, seed = 2024)
+```
+
+Distribution of distances for pairs
+
+```{r}
+hist(result1$result$dist, main = "Distribution of distances between pairs", xlab = "Distances")
+```
+
+Example pairs
+
+```{r}
+head(result1$result, n= 10)
+```
+Let's look at the first pair. Clearly there is a typo on the `pername1` but all other variables are the same so it seems that this is a match.
+
+```{r}
+census[1, ]
+cis[8152, ]
+```
+
+Now, let's look at the 7th pair with the largest distance from the first 10 rows. This seems to be a non-match because only `pername2` and `sex` are the same.
+
+```{r}
+census[8, ]
+cis[3901, ]
+```
+
+
+## Assessing the quality
+
+For some records we have information on the correct linkage. We can use this information to assess our approach.
+
+```{r}
+matches <- merge(x = census[, .(x=1:.N, person_id)],
+                 y = cis[, .(y = 1:.N, person_id)],
+                 by = "person_id")
+matches[, block:=1:.N]
+head(matches)
+```
+So in our example we have `r nrow(matches)` pairs.
+
+```{r}
+result2 <- blocking(x = census$txt, y = cis$txt, verbose = 1, seed = 2024,
+                    true_blocks = matches[, .(x, y, block)], n_threads = 4)
+```
+
+Let's see how our approach handled this problem.
+
+```{r}
+result2
+```
+
+It seems that default parameters of the NND method result in FNR of 16% which is quite large. Let's compare to HNSW algorithm.
+
+```{r}
+result3 <- blocking(x = census$txt, y = cis$txt, seed = 2024, verbose = 1, 
+                    true_blocks = matches[, .(x, y, block)], n_threads = 4, 
+                    ann = "hnsw")
+```
+
+```{r}
+result3
+```
+
+It seems that the HNSW algorithm performed better with 0.62% FNR. This however comes with cost, in particupar computational cost:
+
+1. the HNSW does not handle sparse matrices so sparse matrix of tokens is converted to dense.
+2. HNSW algorithm is slower than NND. 
+
+Computational times are: 16 seconds for NND and about 60 HNSW (on M2 MacBook AIR).
+
+