Skip to content

Commit

Permalink
Use the weighting engine to choose which terms and meta to vectorize
Browse files Browse the repository at this point in the history
  • Loading branch information
felipeelia committed Jan 24, 2025
1 parent 9eb7413 commit 6b7f17c
Showing 1 changed file with 52 additions and 28 deletions.
80 changes: 52 additions & 28 deletions includes/classes/Feature/VectorEmbeddings/Indexables/Post.php
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,46 @@ public function get_post_chunks( int $post_id ): array {

$chunks = $this->feature->chunk_content( $main_content );

$post_terms_str = $this->get_post_terms( $post );
if ( $post_terms_str ) {
$chunks = [ ...$chunks, ...$this->feature->chunk_content( $post_terms_str ) ];
$search_feature = \ElasticPress\Features::factory()->get_registered_feature( 'search' );
$weighting = $search_feature->weighting->get_weighting_configuration_with_defaults();
if ( empty( $weighting[ $post->post_type ] ) ) {
return $chunks;
}

$post_meta_str = $this->get_post_meta( $post );
if ( $post_meta_str ) {
$chunks = [ ...$chunks, ...$this->feature->chunk_content( $post_meta_str ) ];
$post_type_weighting = $weighting[ $post->post_type ];

$taxonomies = array_reduce(
array_keys( $post_type_weighting ),
function ( $acc, $field ) use ( $post_type_weighting ) {
if ( $post_type_weighting[ $field ]['enabled'] && preg_match( '/terms\.(.*)\.name/', $field, $matches ) ) {
$acc[] = $matches[1];
}
return $acc;
},
[]
);
if ( $taxonomies ) {
$post_terms_str = $this->get_post_terms( $post, $taxonomies );
if ( $post_terms_str ) {
$chunks = [ ...$chunks, ...$this->feature->chunk_content( $post_terms_str ) ];

Check failure on line 135 in includes/classes/Feature/VectorEmbeddings/Indexables/Post.php

View workflow job for this annotation

GitHub Actions / PHP Lint

Array unpacking within array declarations using the spread operator is not supported in PHP 7.3 or earlier. Found: ...$chunks

Check failure on line 135 in includes/classes/Feature/VectorEmbeddings/Indexables/Post.php

View workflow job for this annotation

GitHub Actions / PHP Lint

Array unpacking within array declarations using the spread operator is not supported in PHP 7.3 or earlier. Found: ...$this
}
}

$meta_fields = array_reduce(
array_keys( $post_type_weighting ),
function ( $acc, $field ) use ( $post_type_weighting ) {
if ( $post_type_weighting[ $field ]['enabled'] && preg_match( '/meta\.(.*)\.value/', $field, $matches ) ) {
$acc[] = $matches[1];
}
return $acc;
},
[]
);
if ( $meta_fields ) {
$post_meta_str = $this->get_post_meta( $post, $meta_fields );
if ( $post_meta_str ) {
$chunks = [ ...$chunks, ...$this->feature->chunk_content( $post_meta_str ) ];

Check failure on line 152 in includes/classes/Feature/VectorEmbeddings/Indexables/Post.php

View workflow job for this annotation

GitHub Actions / PHP Lint

Array unpacking within array declarations using the spread operator is not supported in PHP 7.3 or earlier. Found: ...$chunks

Check failure on line 152 in includes/classes/Feature/VectorEmbeddings/Indexables/Post.php

View workflow job for this annotation

GitHub Actions / PHP Lint

Array unpacking within array declarations using the spread operator is not supported in PHP 7.3 or earlier. Found: ...$this
}
}

return $chunks;
Expand All @@ -127,27 +159,24 @@ public function get_post_chunks( int $post_id ): array {
/**
* Get the representation of the post terms.
*
* @param \WP_Post $post The post object
* @param \WP_Post $post The post object
* @param array $taxonomies Taxonomies to be added.
* @return string
*/
protected function get_post_terms( $post ): string {
$post_terms_str = '';
$post_terms = [];
$indexable = \ElasticPress\Indexables::factory()->get( 'post' );
$indexable_taxonomies = $indexable->get_indexable_post_taxonomies( $post );
$taxonomy_by_names = wp_list_pluck( $indexable_taxonomies, 'label', 'name' );
foreach ( $taxonomy_by_names as $tax_name => $tax_label ) {
protected function get_post_terms( $post, $taxonomies ): string {
$post_terms_str = '';
$post_terms = [];
foreach ( $taxonomies as $tax_name ) {
$terms = get_the_terms( $post, $tax_name );
if ( is_array( $terms ) ) {
$post_terms[ $tax_label ] = array_map(
$post_terms[ $tax_name ] = array_map(
function ( $term ) {
return $term->name;
},
$terms
);
}
}

if ( ! empty( $post_terms ) ) {
$post_terms_str .= "# Taxonomy Terms\n";
foreach ( $post_terms as $tax_label => $terms ) {
Expand All @@ -162,20 +191,15 @@ function ( $term ) {
/**
* Get te representation of the post meta.
*
* @param \WP_Post $post The post object
* @param \WP_Post $post The post object
* @param array $meta_fields List of metafields
* @return string
*/
protected function get_post_meta( $post ): string {
$meta_str = '';
$meta_to_index = [
'footnotes',
'searchwp_content_pdf_metadata',
];
$values = [];
if ( ! empty( $meta_to_index ) ) {
foreach ( $meta_to_index as $meta_field ) {
$values[ $meta_field ] = get_post_meta( $post->ID, $meta_field, true );
}
protected function get_post_meta( $post, $meta_fields ): string {
$meta_str = '';
$values = [];
foreach ( $meta_fields as $meta_field ) {
$values[ $meta_field ] = get_post_meta( $post->ID, $meta_field, true );
}
$values = array_filter( $values );

Expand Down

0 comments on commit 6b7f17c

Please sign in to comment.