Skip to content

Commit

Permalink
refactor(Fixed merge conflict): Merged dev into branch and fixed conf…
Browse files Browse the repository at this point in the history
…licts
  • Loading branch information
br648 committed Nov 8, 2023
2 parents 1b6cc2f + bde0936 commit 46f16fd
Show file tree
Hide file tree
Showing 23 changed files with 534 additions and 270 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/maven.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
uses: actions/setup-java@v1
with:
java-version: 1.8
# Install node 14+ for running maven-semantic-release.
# Install node 18+ for running maven-semantic-release.
- name: Use Node.js 18.X
uses: actions/setup-node@v1
with:
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# gtfs-lib [![Build Status](https://travis-ci.org/conveyal/gtfs-lib.svg?branch=master)](https://travis-ci.org/conveyal/gtfs-lib)
[![Join the chat at https://matrix.to/#/#transit-data-tools:gitter.im](https://badges.gitter.im/repo.png)](https://matrix.to/#/#gtfs-lib:gitter.im)


A library for loading and saving GTFS feeds of arbitrary size with disk-backed storage.

Expand All @@ -14,6 +16,10 @@ The main design goals are:

A gtfs-lib GTFSFeed object should faithfully represent the contents of a single GTFS feed file. At least during the initial load, no heuristics are applied to clean up the data. Basic syntax is verified, and any problems encountered are logged in detail. At this stage, fields or entites may be missing, and the data may be nonsensical. Then in an optional post-load validation phase, semantic checks are performed and problems are optionally repaired.

## Getting in touch

We have a [Gitter space](https://matrix.to/#/#transit-data-tools:gitter.im) for the full TRANSIT-Data-Tools project where you can post questions and comments. This includes a room dedicated to GTFS-lib discussions.

## Usage

gtfs-lib can be used as a Java library or run via the command line. If using this library with PostgreSQL for persistence, you must use at least version 9.6 of PostgreSQL.
Expand Down
8 changes: 4 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>30.0-jre</version>
<version>32.0.0-jre</version>
</dependency>
<dependency>
<groupId>org.locationtech.jts</groupId>
Expand Down Expand Up @@ -314,7 +314,7 @@
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>42.2.25</version>
<version>42.4.3</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
Expand All @@ -331,13 +331,13 @@
<dependency>
<groupId>com.graphql-java</groupId>
<artifactId>graphql-java</artifactId>
<version>11.0</version>
<version>17.4</version>
</dependency>
<!-- Contains special utils for things like converting escaped strings to unescaped strings for logging. -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>1.6</version>
<version>1.10.0</version>
</dependency>
</dependencies>
</project>
12 changes: 7 additions & 5 deletions src/main/java/com/conveyal/gtfs/GTFS.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,9 @@ public abstract class GTFS {
/**
* Export a feed ID from the database to a zipped GTFS file in the specified export directory.
*/
public static FeedLoadResult export (String feedId, String outFile, DataSource dataSource, boolean fromEditor) {
JdbcGtfsExporter exporter = new JdbcGtfsExporter(feedId, outFile, dataSource, fromEditor);
FeedLoadResult result = exporter.exportTables();
return result;
public static FeedLoadResult export (String feedId, String outFile, DataSource dataSource, boolean fromEditor, boolean publishProprietaryFiles) {
JdbcGtfsExporter exporter = new JdbcGtfsExporter(feedId, outFile, dataSource, fromEditor, publishProprietaryFiles);
return exporter.exportTables();
}

/**
Expand Down Expand Up @@ -299,13 +298,16 @@ public static void main (String[] args) throws IOException {

if (cmd.hasOption("export")) {
String namespaceToExport = cmd.getOptionValue("export");
boolean exportProprietaryFiles = (cmd.getOptionValue("exportProprietaryFiles") != null)
&& Boolean.parseBoolean(cmd.getOptionValue("exportProprietaryFiles"));

String outFile = cmd.getOptionValue("outFile");
if (namespaceToExport == null && loadResult != null) {
namespaceToExport = loadResult.uniqueIdentifier;
}
if (namespaceToExport != null) {
LOG.info("Exporting feed with unique identifier {}", namespaceToExport);
FeedLoadResult exportResult = export(namespaceToExport, outFile, dataSource, true);
export(namespaceToExport, outFile, dataSource, true, exportProprietaryFiles);
LOG.info("Done exporting.");
} else {
LOG.error("No feed to export. Specify one, or load a feed in the same command.");
Expand Down
23 changes: 2 additions & 21 deletions src/main/java/com/conveyal/gtfs/GTFSFeed.java
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ else if (feedId == null || feedId.isEmpty()) {
this.fares.putAll(fares);
fares = null; // free memory

new Pattern.Loader(this).loadTable(zip);
new Route.Loader(this).loadTable(zip);
new ShapePoint.Loader(this).loadTable(zip);
new Stop.Loader(this).loadTable(zip);
Expand Down Expand Up @@ -216,6 +217,7 @@ public void toFile (String file) {
new Transfer.Writer(this).writeTable(zip);
new Trip.Writer(this).writeTable(zip);
new StopTime.Writer(this).writeTable(zip);
new Pattern.Writer(this).writeTable(zip);

zip.close();

Expand Down Expand Up @@ -343,27 +345,6 @@ public Shape getShape (String shape_id) {
return shape.shape_dist_traveled.length > 0 ? shape : null;
}

/**
* MapDB-based implementation to find patterns.
*
* FIXME: Remove and make pattern finding happen during validation? We want to share the pattern finder between the
* two implementations (MapDB and RDBMS), apply the same validation process to both kinds of storage, and produce
* Patterns in the same way in both cases, during validation. This prevents us from iterating over every stopTime
* twice, since we're already iterating over all of them in validation. However, in this case it might not be costly
* to simply retrieve the stop times from the stop_times map.
*/
public void findPatterns () {
PatternFinder patternFinder = new PatternFinder();
// Iterate over trips and process each trip and its stop times.
for (Trip trip : this.trips.values()) {
Iterable<StopTime> orderedStopTimesForTrip = this.getOrderedStopTimesForTrip(trip.trip_id);
patternFinder.processTrip(trip, orderedStopTimesForTrip);
}
Map<TripPatternKey, Pattern> patternObjects = patternFinder.createPatternObjects(this.stops, null);
this.patterns.putAll(patternObjects.values().stream()
.collect(Collectors.toMap(Pattern::getId, pattern -> pattern)));
}

/**
* For the given trip ID, fetch all the stop times in order, and interpolate stop-to-stop travel times.
*/
Expand Down
235 changes: 235 additions & 0 deletions src/main/java/com/conveyal/gtfs/PatternBuilder.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
package com.conveyal.gtfs;

import com.conveyal.gtfs.loader.BatchTracker;
import com.conveyal.gtfs.loader.Feed;
import com.conveyal.gtfs.loader.Requirement;
import com.conveyal.gtfs.loader.Table;
import com.conveyal.gtfs.model.Pattern;
import com.conveyal.gtfs.model.PatternStop;
import org.apache.commons.dbutils.DbUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.file.Files;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Map;
import java.util.Set;

import static com.conveyal.gtfs.loader.JdbcGtfsLoader.copyFromFile;
import static com.conveyal.gtfs.model.Entity.INT_MISSING;
import static com.conveyal.gtfs.model.Entity.setDoubleParameter;
import static com.conveyal.gtfs.model.Entity.setIntParameter;

public class PatternBuilder {

private static final Logger LOG = LoggerFactory.getLogger(PatternBuilder.class);

private final Feed feed;
private static final String TEMP_FILE_NAME = "pattern_for_trips";

private final Connection connection;
public PatternBuilder(Feed feed) throws SQLException {
this.feed = feed;
connection = feed.getConnection();
}

public void create(Map<TripPatternKey, Pattern> patterns, Set<String> patternIdsLoadedFromFile) {
String patternsTableName = feed.getTableNameWithSchemaPrefix("patterns");
String tripsTableName = feed.getTableNameWithSchemaPrefix("trips");
String patternStopsTableName = feed.getTableNameWithSchemaPrefix("pattern_stops");

Table patternsTable = new Table(patternsTableName, Pattern.class, Requirement.EDITOR, Table.PATTERNS.fields);
Table patternStopsTable = new Table(patternStopsTableName, PatternStop.class, Requirement.EDITOR, Table.PATTERN_STOP.fields);

try {
File tempPatternForTripsTextFile = File.createTempFile(TEMP_FILE_NAME, "text");
LOG.info("Creating pattern and pattern stops tables.");
Statement statement = connection.createStatement();
statement.execute(String.format("alter table %s add column pattern_id varchar", tripsTableName));
if (patternIdsLoadedFromFile.isEmpty()) {
// No patterns were loaded from file so the pattern table has not previously been created.
patternsTable.createSqlTable(connection, null, true);
}
patternStopsTable.createSqlTable(connection, null, true);
try (PrintStream patternForTripsFileStream = createTempPatternForTripsTable(tempPatternForTripsTextFile, statement)) {
processPatternAndPatternStops(patternsTable, patternStopsTable, patternForTripsFileStream, patterns, patternIdsLoadedFromFile);
}
updateTripPatternIds(tempPatternForTripsTextFile, statement, tripsTableName);
createIndexes(statement, patternsTableName, patternStopsTableName, tripsTableName);
connection.commit();
} catch (SQLException | IOException e) {
// Rollback transaction if failure occurs on creating patterns.
DbUtils.rollbackAndCloseQuietly(connection);
// This exception will be stored as a validator failure.
throw new RuntimeException(e);
} finally {
// Close transaction finally.
if (connection != null) DbUtils.closeQuietly(connection);
}
}

private void processPatternAndPatternStops(
Table patternsTable,
Table patternStopsTable,
PrintStream patternForTripsFileStream,
Map<TripPatternKey, Pattern> patterns,
Set<String> patternIdsLoadedFromFile
) throws SQLException {
// Generate prepared statements for inserts.
String insertPatternSql = patternsTable.generateInsertSql(true);
PreparedStatement insertPatternStatement = connection.prepareStatement(insertPatternSql);
BatchTracker patternTracker = new BatchTracker("pattern", insertPatternStatement);
LOG.info("Storing patterns and pattern stops.");
for (Map.Entry<TripPatternKey, Pattern> entry : patterns.entrySet()) {
Pattern pattern = entry.getValue();
LOG.debug("Batching pattern {}", pattern.pattern_id);
if (!patternIdsLoadedFromFile.contains(pattern.pattern_id)) {
// Only insert the pattern if it has not already been imported from file.
pattern.setStatementParameters(insertPatternStatement, true);
patternTracker.addBatch();
}
createPatternStops(entry.getKey(), pattern.pattern_id, patternStopsTable);
updateTripPatternReferences(patternForTripsFileStream, pattern);
}
// Send any remaining prepared statement calls to the database backend.
patternTracker.executeRemaining();
LOG.info("Done storing patterns and pattern stops.");
}

/**
* Create temp table for updating trips with pattern IDs to be dropped at the end of the transaction.
* NOTE: temp table name must NOT be prefixed with schema because temp tables are prefixed with their own
* connection-unique schema.
*/
private PrintStream createTempPatternForTripsTable(
File tempPatternForTripsTextFile,
Statement statement
) throws SQLException, IOException {
LOG.info("Loading via temporary text file at {}.", tempPatternForTripsTextFile.getAbsolutePath());
String createTempSql = String.format("create temp table %s(trip_id varchar, pattern_id varchar) on commit drop", TEMP_FILE_NAME);
LOG.info(createTempSql);
statement.execute(createTempSql);
return new PrintStream(new BufferedOutputStream(Files.newOutputStream(tempPatternForTripsTextFile.toPath())));
}

/**
* Update all trips on this pattern to reference this pattern's ID.
*/
private void updateTripPatternReferences(PrintStream patternForTripsFileStream, Pattern pattern) {
// Prepare each trip in pattern to update trips table.
for (String tripId : pattern.associatedTrips) {
// Add line to temp csv file if using postgres.
// No need to worry about null trip IDs because the trips have already been processed.
String[] strings = new String[]{tripId, pattern.pattern_id};
// Print a new line in the standard postgres text format:
// https://www.postgresql.org/docs/9.1/static/sql-copy.html#AEN64380
patternForTripsFileStream.println(String.join("\t", strings));
}
}

/**
* Copy the pattern for trips text file into a table, create an index on trip IDs, and update the trips
* table.
*/
private void updateTripPatternIds(
File tempPatternForTripsTextFile,
Statement statement,
String tripsTableName
) throws SQLException, IOException {
LOG.info("Updating trips with pattern IDs.");
// Copy file contents into temp pattern for trips table.
copyFromFile(connection, tempPatternForTripsTextFile, TEMP_FILE_NAME);
// Before updating the trips with pattern IDs, index the table on trip_id.
String patternForTripsIndexSql = String.format(
"create index temp_trips_pattern_id_idx on %s (trip_id)",
TEMP_FILE_NAME
);
LOG.info(patternForTripsIndexSql);
statement.execute(patternForTripsIndexSql);
// Finally, execute the update statement.
String updateTripsSql = String.format(
"update %s set pattern_id = %s.pattern_id from %s where %s.trip_id = %s.trip_id",
tripsTableName,
TEMP_FILE_NAME,
TEMP_FILE_NAME,
tripsTableName,
TEMP_FILE_NAME
);
LOG.info(updateTripsSql);
statement.executeUpdate(updateTripsSql);
// Delete temp file. Temp table will be dropped after the transaction is committed.
Files.delete(tempPatternForTripsTextFile.toPath());
LOG.info("Updating trips complete.");
}

private void createIndexes(
Statement statement,
String patternsTableName,
String patternStopsTableName,
String tripsTableName
) throws SQLException {
LOG.info("Creating index on patterns.");
statement.executeUpdate(String.format("alter table %s add primary key (pattern_id)", patternsTableName));
LOG.info("Creating index on pattern stops.");
statement.executeUpdate(String.format("alter table %s add primary key (pattern_id, stop_sequence)", patternStopsTableName));
// Index new pattern_id column on trips. The other tables are already indexed because they have primary keys.
LOG.info("Indexing trips on pattern id.");
statement.execute(String.format("create index trips_pattern_id_idx on %s (pattern_id)", tripsTableName));
LOG.info("Done indexing.");
}

/**
* Construct pattern stops based on values in trip pattern key.
*/
private void createPatternStops(TripPatternKey key, String patternId, Table patternStopsTable) throws SQLException {
String insertPatternStopSql = patternStopsTable.generateInsertSql(true);
PreparedStatement insertPatternStopStatement = connection.prepareStatement(insertPatternStopSql);
BatchTracker patternStopTracker = new BatchTracker("pattern stop", insertPatternStopStatement);

int lastValidDeparture = key.departureTimes.get(0);
for (int i = 0; i < key.stops.size(); i++) {
int travelTime = 0;
String stopId = key.stops.get(i);
int arrival = key.arrivalTimes.get(i);
if (i > 0) {
int prevDeparture = key.departureTimes.get(i - 1);
// Set travel time for all stops except the first.
if (prevDeparture != INT_MISSING) {
// Update the previous departure if it's not missing. Otherwise, base travel time based on the
// most recent valid departure.
lastValidDeparture = prevDeparture;
}
travelTime = arrival == INT_MISSING || lastValidDeparture == INT_MISSING
? INT_MISSING
: arrival - lastValidDeparture;
}
int departure = key.departureTimes.get(i);
int dwellTime = arrival == INT_MISSING || departure == INT_MISSING
? INT_MISSING
: departure - arrival;

insertPatternStopStatement.setString(1, patternId);
// Stop sequence is zero-based.
setIntParameter(insertPatternStopStatement, 2, i);
insertPatternStopStatement.setString(3, stopId);
insertPatternStopStatement.setString(4, key.stopHeadsigns.get(i));
setIntParameter(insertPatternStopStatement,5, travelTime);
setIntParameter(insertPatternStopStatement,6, dwellTime);
setIntParameter(insertPatternStopStatement,7, key.dropoffTypes.get(i));
setIntParameter(insertPatternStopStatement,8, key.pickupTypes.get(i));
setDoubleParameter(insertPatternStopStatement, 9, key.shapeDistances.get(i));
setIntParameter(insertPatternStopStatement,10, key.timepoints.get(i));
setIntParameter(insertPatternStopStatement,11, key.continuous_pickup.get(i));
setIntParameter(insertPatternStopStatement,12, key.continuous_drop_off.get(i));
patternStopTracker.addBatch();
}
patternStopTracker.executeRemaining();
}
}
Loading

0 comments on commit 46f16fd

Please sign in to comment.