Skip to content

Commit

Permalink
Merge pull request #5 from nlnwa/stream_harvester_response
Browse files Browse the repository at this point in the history
Stream harvester response
  • Loading branch information
maeb authored Apr 17, 2018
2 parents 9afc694 + f479f3d commit 47f08d9
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 9 deletions.
1 change: 1 addition & 0 deletions protobuf/contentwriter.proto
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ message WriteResponseMeta {
string storage_ref = 4;
string block_digest = 5;
string payload_digest = 6;
string warc_refers_to = 7;
}

map<int32, RecordMeta> record_meta = 1;
Expand Down
15 changes: 6 additions & 9 deletions protobuf/harvester.proto
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@ import "google/protobuf/empty.proto";
// Service for communicating with harvester.
service Harvester {
// Harvest a page
rpc HarvestPage (HarvestPageRequest) returns (HarvestPageReply) {}

// Request cleanup of caches after all pages for a seed are harvested.
rpc CleanupExecution (CleanupExecutionRequest) returns (google.protobuf.Empty) {}
rpc HarvestPage (HarvestPageRequest) returns (stream HarvestPageReply) {}
}

// A request telling which page to harvest.
Expand All @@ -24,13 +21,13 @@ message HarvestPageRequest {
CrawlConfig crawl_config = 2;
}

// Response from harvesting a page. Response is a stream where the first object should include uri_count and bytes_downloaded.
// Subsequent objects should only contain a outlink until all outlinks are sent.
message HarvestPageReply {
// The outlinks found in the harvested page
repeated QueuedUri outlinks = 1;
QueuedUri outlink = 1;
// The number of uri's downloaded. The requested uri + embedded resources
int32 uri_count = 2;
// Byte count for the resources downloaded. Includes embedded resources
int64 bytes_downloaded = 3;
}

message CleanupExecutionRequest {
string execution_id = 1;
}
1 change: 1 addition & 0 deletions protobuf/messages.proto
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ message CrawlHostGroup {
message Error {
int32 code = 1;
string msg = 2;
string detail = 3;
}

message QueuedUri {
Expand Down

0 comments on commit 47f08d9

Please sign in to comment.