Skip to content
This repository has been archived by the owner on Dec 9, 2022. It is now read-only.

Tests for TPCDS queries join ordering #21

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions presto-tpcds/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -96,5 +96,17 @@
<artifactId>log-manager</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>joni</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>bytecode</artifactId>
<scope>test</scope>
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

interesting, what are these for?

</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.facebook.presto.sql.planner.optimizations;

import com.facebook.presto.Session;
import com.facebook.presto.testing.LocalQueryRunner;
import com.facebook.presto.tpcds.TpcdsConnectorFactory;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.Files;
import com.google.common.io.Resources;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.stream.IntStream;
import java.util.stream.Stream;

import static com.facebook.presto.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE;
import static com.facebook.presto.SystemSessionProperties.JOIN_REORDERING_STRATEGY;
import static com.facebook.presto.sql.analyzer.FeaturesConfig.JoinDistributionType.AUTOMATIC;
import static com.facebook.presto.sql.analyzer.FeaturesConfig.JoinReorderingStrategy.COST_BASED;
import static com.google.common.base.Charsets.UTF_8;
import static java.lang.String.format;
import static java.util.stream.Collectors.toList;
import static org.testng.Assert.assertEquals;

/**
* This class tests cost-based optimization rules related to joins. It contains unmodified TPCH queries.
* This class is using TPCH connector configured in way to mock Hive connector with unpartitioned TPCH tables.
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update

*/
public class TestTpcdsJoinReordering
extends BaseJoinReorderingTest
{
/*
* CAUTION: The expected plans here are not necessarily optimal yet. Their role is to prevent
* inadvertent regressions. A conscious improvement to the planner may require changing some
* of the expected plans, but any such change should be verified on an actual cluster with
* large amount of data.
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update (assuming the purpose/usage of this test class is different than the purpose of the class dedicated to tpch).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if the purpose is different. Practically it works in the same way.

*/

public TestTpcdsJoinReordering()
{
super(
"sf3000.0",
ImmutableMap.of(
JOIN_REORDERING_STRATEGY, COST_BASED.name(),
JOIN_DISTRIBUTION_TYPE, AUTOMATIC.name()));
}

@Override
protected LocalQueryRunner createQueryRunner(Session session)
{
return LocalQueryRunner.queryRunnerWithFakeNodeCountForStats(session, 8);
}

@Override
protected void createTpchCatalog(LocalQueryRunner queryRunner)
{
queryRunner.createCatalog(queryRunner.getDefaultSession().getCatalog().get(),
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fmt

new TpcdsConnectorFactory(1),
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The method is called createTpchCatalog ...

ImmutableMap.of());
}

@DataProvider
public Object[][] getTpcdsQueryIds()
{
return IntStream.range(1, 100)
.boxed()
.flatMap(i -> {
if (i < 10) {
return Stream.of("0" + i);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can use format("%02d", i) below and remove this if.

}
if (i == 14 || i == 23 || i == 24 || i == 39) {
return Stream.of(i + "_1", i + "_2");
}
return Stream.of(i.toString());
})
.map(i -> new Object[]{i})
.collect(toList())
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Directly .toArray(Object[][]::new);

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

refactored

.toArray(new Object[][]{});
}

@Test(dataProvider = "getTpcdsQueryIds")
public void test(String queryId)
throws IOException
{
assertEquals(joinOrderString(tpcdsQuery(queryId)), read(getExpectedJoinOrderingFile(queryId)));
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very nice and functional.

However, if something goes wrong (or one wants to run this with a debugger), this is no longer so nice.

}

private String read(File file)
throws IOException
{
return Joiner.on("\n").join(Files.readLines(file, UTF_8)) + "\n";
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use Path instead of File and JDK instead of Guava:

return java.nio.file.Files.lines(file).collect(joining("\n"));

}

private static String tpcdsQuery(String queryId)
{
try {
String queryPath = format("/tpcds/queries/q%s.sql", queryId);
URL resourceUrl = Resources.getResource(TpcdsConnectorFactory.class, queryPath);
return Resources.toString(resourceUrl, StandardCharsets.UTF_8);
}
catch (IOException e) {
throw new RuntimeException(e);
}
}

@Test(dataProvider = "getTpcdsQueryIds", enabled = false)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now, calling this requires temporarily removing enabled = false. What about making this a main method?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then it has to be copied to both classes ; /

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then it has to be copied to both classes ; /

this i don't understand

public void update(String queryId)
throws IOException
{
Files.write(joinOrderString(tpcdsQuery(queryId)), getExpectedJoinOrderingFile(queryId), UTF_8);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very nice and functional.

However, if something goes wrong (or one wants to run this with a debugger), this is no longer so nice.

}

private static File getExpectedJoinOrderingFile(String queryId)
{
return new File(String.format("src/test/resources/join_ordering/%s.join_ordering", queryId));
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rnn queryId

static import format

use Path

end file name with e.g. .txt

}
}