Skip to content

Commit

Permalink
Fixes for parsing PHPS Luke responses (#1116)
Browse files Browse the repository at this point in the history
  • Loading branch information
thomascorthals authored Dec 7, 2023
1 parent 7a7232f commit 13995b6
Show file tree
Hide file tree
Showing 16 changed files with 494 additions and 144 deletions.
14 changes: 14 additions & 0 deletions src/Core/Client/Response.php
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,20 @@ public function getStatusMessage(): string
return $this->statusMessage;
}

/**
* Set body data.
*
* @param string $body
*
* @return self Provides fluent interface
*/
public function setBody(string $body): self
{
$this->body = $body;

return $this;
}

/**
* Set headers.
*
Expand Down
14 changes: 10 additions & 4 deletions src/Core/Query/Result/Result.php
Original file line number Diff line number Diff line change
Expand Up @@ -112,17 +112,23 @@ public function getData(): array
switch ($this->query->getResponseWriter()) {
case AbstractQuery::WT_PHPS:
$this->data = unserialize($this->response->getBody(), ['allowed_classes' => false]);

if (false === $this->data) {
throw new UnexpectedValueException('Solr PHPS response could not be unserialized');
}

break;
case AbstractQuery::WT_JSON:
$this->data = json_decode($this->response->getBody(), true);

if (null === $this->data) {
throw new UnexpectedValueException('Solr JSON response could not be decoded');
}

break;
default:
throw new RuntimeException(sprintf('Responseparser cannot handle %s', $this->query->getResponseWriter()));
}

if (null === $this->data) {
throw new UnexpectedValueException('Solr JSON response could not be decoded');
}
}

return $this->data;
Expand Down
104 changes: 77 additions & 27 deletions src/QueryType/Luke/ResponseParser/Doc.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,15 @@ public function parse(ResultInterface $result): array
{
$this->result = $result;

$data = parent::parse($result);
$query = $result->getQuery();

// 'lucene' can contain the same key multiple times for multiValued fields.
// A SimpleOrderedMap in Solr, it isn't represented as a flat array in JSON
// and unlike a NamedList its output format can't be controlled with json.nl.
// We can't rely on json_decode() if we don't want data loss for these fields.
$data['doc']['lucene'] = JsonMachineItems::fromString(
$result->getResponse()->getBody(),
[
// like json_decode('...', true), ExtJsonDecoder(true) returns arrays instead of objects
'decoder' => new ExtJsonDecoder(true),
'pointer' => '/doc/lucene',
]
);
if ($query::WT_PHPS === $query->getResponseWriter()) {
// workaround for https://github.com/apache/solr/pull/2114
$response = $result->getResponse();
$response->setBody(str_replace('}s:4:"solr";i:0;a:', '}s:4:"solr";a:', $response->getBody()));
}

$data = parent::parse($result);

// parse 'info' first so 'doc' can use it for flag lookups
$data['infoResult'] = $this->parseInfo($data['info']);
Expand All @@ -73,7 +68,28 @@ protected function parseDoc(array $docData): DocInfo
{
$docInfo = new DocInfo($docData['docId']);

$docInfo->setLucene($this->parseLucene($docData['lucene']));
$query = $this->result->getQuery();

// 'lucene' can contain the same key multiple times for multiValued fields.
// How this is represented in the response body depends on Solr's ResponseWriter.
if ($query::WT_JSON === $query->getResponseWriter()) {
// A SimpleOrderedMap in Solr isn't represented as a flat array in JSON
// and unlike a NamedList its output format can't be controlled with json.nl.
// We can't rely on json_decode() if we don't want data loss for these fields.
$docData['lucene'] = JsonMachineItems::fromString(
$this->result->getResponse()->getBody(),
[
// like json_decode('...', true), ExtJsonDecoder(true) returns arrays instead of objects
'decoder' => new ExtJsonDecoder(true),
'pointer' => '/doc/lucene',
]
);

$docInfo->setLucene($this->parseLuceneJson($docData['lucene']));
} else {
$docInfo->setLucene($this->parseLucenePhps($docData['lucene']));
}

$docInfo->setSolr($this->parseSolr($docData['solr']));

return $docInfo;
Expand All @@ -84,28 +100,62 @@ protected function parseDoc(array $docData): DocInfo
*
* @return DocFieldInfo[]
*/
protected function parseLucene(JsonMachineItems $luceneData): array
protected function parseLuceneJson(JsonMachineItems $luceneData): array
{
$lucene = [];

foreach ($luceneData as $name => $details) {
$fieldInfo = new DocFieldInfo($name);

$fieldInfo->setType($details['type']);
$fieldInfo->setSchema(new FlagList($details['schema'], $this->key));
$fieldInfo->setFlags(new FlagList($details['flags'], $this->key));
$fieldInfo->setValue($details['value']);
$fieldInfo->setInternal($details['internal']);
$fieldInfo->setBinary($details['binary'] ?? null);
$fieldInfo->setDocFreq($details['docFreq'] ?? null);
$fieldInfo->setTermVector($details['termVector'] ?? null);

$lucene[] = $fieldInfo;
$lucene[] = $this->parseLuceneField($name, $details);
}

return $lucene;
}

/**
* @param array $luceneData
*
* @return DocFieldInfo[]
*/
protected function parseLucenePhps(array $luceneData): array
{
$lucene = [];
$prevName = '';

foreach ($luceneData as $name => $details) {
if (str_starts_with($name, $prevName.' ')) {
// field name was mangled by Solr's ResponseWriter to avoid repeats in the output
$name = $prevName;
}

$lucene[] = $this->parseLuceneField($name, $details);
$prevName = $name;
}

return $lucene;
}

/**
* @param string $name
* @param array $details
*
* @return DocFieldInfo
*/
protected function parseLuceneField(string $name, array $details): DocFieldInfo
{
$fieldInfo = new DocFieldInfo($name);

$fieldInfo->setType($details['type']);
$fieldInfo->setSchema(new FlagList($details['schema'], $this->key));
$fieldInfo->setFlags(new FlagList($details['flags'], $this->key));
$fieldInfo->setValue($details['value']);
$fieldInfo->setInternal($details['internal']);
$fieldInfo->setBinary($details['binary'] ?? null);
$fieldInfo->setDocFreq($details['docFreq'] ?? null);
$fieldInfo->setTermVector($details['termVector'] ?? null);

return $fieldInfo;
}

/**
* @param array $solrData
*
Expand Down
20 changes: 18 additions & 2 deletions src/QueryType/Luke/ResponseParser/Fields.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ class Fields extends Index
{
use InfoTrait;

/**
* @var ResultInterface
*/
protected $result;

/**
* Get result data for the response.
*
Expand All @@ -30,6 +35,8 @@ class Fields extends Index
*/
public function parse(ResultInterface $result): array
{
$this->result = $result;

$data = parent::parse($result);

// parse 'info' first so 'fields' can use it for flag lookups
Expand All @@ -46,6 +53,7 @@ public function parse(ResultInterface $result): array
*/
protected function parseFields(array $fieldsData): array
{
$query = $this->result->getQuery();
$fields = [];

foreach ($fieldsData as $name => $info) {
Expand All @@ -71,11 +79,19 @@ protected function parseFields(array $fieldsData): array
$field->setDistinct($info['distinct'] ?? null);

if (isset($info['topTerms'])) {
$field->setTopTerms($this->convertToKeyValueArray($info['topTerms']));
if ($query::WT_JSON === $query->getResponseWriter()) {
$info['topTerms'] = $this->convertToKeyValueArray($info['topTerms']);
}

$field->setTopTerms($info['topTerms']);
}

if (isset($info['histogram'])) {
$field->setHistogram($this->convertToKeyValueArray($info['histogram']));
if ($query::WT_JSON === $query->getResponseWriter()) {
$info['histogram'] = $this->convertToKeyValueArray($info['histogram']);
}

$field->setHistogram($info['histogram']);
}

$fields[$name] = $field;
Expand Down
16 changes: 16 additions & 0 deletions tests/Core/Client/ClientTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -1498,6 +1498,22 @@ public function testCreateRealtimeGet()
$observer->createRealtimeGet($options);
}

public function testCreateLuke()
{
$options = ['optionA' => 1, 'optionB' => 2];

$observer = $this->getMockBuilder(Client::class)
->onlyMethods(['createQuery'])
->setConstructorArgs([new MyAdapter(), new EventDispatcher()])
->getMock();
$observer->expects($this->once())
->method('createQuery')
->with($this->equalTo(Client::QUERY_LUKE), $this->equalTo($options))
->willReturn(new LukeQuery());

$observer->createLuke($options);
}

public function testCreateCoreAdmin()
{
$options = ['optionA' => 1, 'optionB' => 2];
Expand Down
6 changes: 6 additions & 0 deletions tests/Core/Client/ResponseTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ public function testGetBody()
$this->assertSame($this->data, $this->response->getBody());
}

public function testSetBody()
{
$this->response->setBody('test body');
$this->assertSame('test body', $this->response->getBody());
}

public function testMissingStatusCode()
{
$headers = ['dummy'];
Expand Down
22 changes: 21 additions & 1 deletion tests/Core/Query/Result/ResultTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,10 @@ public function testGetDataWithUnkownResponseWriter()
$result->getData();
}

public function testGetInvalidData()
public function testGetInvalidJsonData()
{
$this->query->setResponseWriter($this->query::WT_JSON);

$data = 'invalid';
$this->response = new Response($data, $this->headers);
$this->result = new Result($this->query, $this->response);
Expand All @@ -132,6 +134,24 @@ public function testGetInvalidData()
$this->result->getData();
}

public function testGetInvalidPhpsData()
{
set_error_handler(static function (int $errno, string $errstr): void {
// ignore E_NOTICE from unserialize() to check that we throw an exception
}, \E_NOTICE);

$this->query->setResponseWriter($this->query::WT_PHPS);

$data = 'invalid';
$this->response = new Response($data, $this->headers);
$this->result = new Result($this->query, $this->response);

$this->expectException(UnexpectedValueException::class);
$this->result->getData();

restore_error_handler();
}

public function testJsonSerialize()
{
$this->assertJsonStringEqualsJsonString($this->data, json_encode($this->result));
Expand Down
5 changes: 4 additions & 1 deletion tests/Integration/AbstractTechproductsTestCase.php
Original file line number Diff line number Diff line change
Expand Up @@ -4450,11 +4450,14 @@ public function testExtractInvalidFile()
* not the entire index. That makes some expected values unpredictable. We'd also
* need to know the correct shard to query for a specific document.
*
* @dataProvider responseWriterProvider
*
* @group skip_for_solr_cloud
*/
public function testLuke()
public function testLuke(string $responseWriter)
{
$luke = self::$client->createLuke();
$luke->setResponseWriter($responseWriter);

// SHOW_INDEX has the simplest response, which is also included in all other responses
$luke->setShow(LukeQuery::SHOW_INDEX);
Expand Down
33 changes: 29 additions & 4 deletions tests/QueryType/Luke/ResponseParser/DocDataTrait.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ trait DocDataTrait
*
* The $.doc.lucene node of a Luke response contains duplicate keys for
* multiValued fields. We can't fully represent these in the array returned
* by {@see getDocData()}.
* by {@see getDocJsonData()}.
*
* @return string
*/
public function getRawDocData(): string
public function getRawDocJsonData(): string
{
return <<<'JSON'
{
Expand Down Expand Up @@ -98,8 +98,33 @@ public function getRawDocData(): string
JSON;
}

public function getDocData(): array
public function getDocJsonData(): array
{
return json_decode($this->getRawDocData(), true);
return json_decode($this->getRawDocJsonData(), true);
}

/**
* Returns a raw PHPS string as it would be output by Solr.
*
* This string contains a syntax error just like Solr's actual output does.
*
* @see https://github.com/apache/solr/pull/2114
*
* @return string
*/
public function getRawDocPhpsData(): string
{
$phpsData = 'a:3:{s:5:"docId";i:1701;s:6:"lucene";a:7:{'.
's:2:"id";a:6:{s:4:"type";s:6:"string";s:6:"schema";s:18:"I-S-U-----OF-----l";s:5:"flags";s:18:"ITS-------OF------";s:5:"value";s:8:"NCC-1701";s:8:"internal";s:8:"NCC-1701";s:7:"docFreq";i:1;}'.
's:4:"name";a:7:{s:4:"type";s:12:"text_general";s:6:"schema";s:18:"ITS-U-------------";s:5:"flags";s:18:"ITS---------------";s:5:"value";s:19:"Enterprise document";s:8:"internal";s:19:"Enterprise document";s:7:"docFreq";i:0;s:10:"termVector";a:2:{s:10:"enterprise";i:2;s:8:"document";i:1;}}'.
's:3:"cat";a:6:{s:4:"type";s:6:"string";s:6:"schema";s:18:"I-S-UM----OF-----l";s:5:"flags";s:18:"ITS-------OF------";s:5:"value";s:12:"Constitution";s:8:"internal";s:12:"Constitution";s:7:"docFreq";i:12;}'.
's:5:"cat 1";a:6:{s:4:"type";s:6:"string";s:6:"schema";s:18:"I-S-UM----OF-----l";s:5:"flags";s:18:"ITS-------OF------";s:5:"value";s:6:"Galaxy";s:8:"internal";s:6:"Galaxy";s:7:"docFreq";i:6;}'.
's:5:"price";a:5:{s:4:"type";s:6:"pfloat";s:6:"schema";s:18:"I-SDU-----OF------";s:5:"flags";s:18:"-TS---------------";s:5:"value";s:4:"92.0";s:8:"internal";s:4:"92.0";}'.
's:8:"flagship";a:6:{s:4:"type";s:7:"boolean";s:6:"schema";s:18:"I-S-U-----OF-----l";s:5:"flags";s:18:"ITS-------OF------";s:5:"value";s:4:"true";s:8:"internal";s:1:"T";s:7:"docFreq";i:1;}'.
's:8:"insignia";a:7:{s:4:"type";s:6:"binary";s:6:"schema";s:18:"I-S-U------F------";s:5:"flags";s:18:"-TS------------B--";s:5:"value";s:8:"PS9cPQ==";s:8:"internal";N;s:6:"binary";s:8:"PS9cPQ==";s:7:"docFreq";i:0;}'.
'}s:4:"solr";i:0;a:6:{s:2:"id";s:8:"NCC-1701";s:4:"name";s:19:"Enterprise document";s:3:"cat";a:2:{i:0;s:12:"Constitution";i:1;s:6:"Galaxy";}s:5:"price";d:3.59;s:8:"flagship";b:1;s:8:"insignia";s:8:"PS9cPQ==";}}';
// the i:0; here ^^^^ is superfluous data that causes unserialize() to fail as discussed in https://github.com/apache/solr/pull/2114

return $phpsData;
}
}
Loading

0 comments on commit 13995b6

Please sign in to comment.