Skip to content

Commit

Permalink
Merge pull request #32 from nlnwa/refactor_frontier
Browse files Browse the repository at this point in the history
Refactor frontier
  • Loading branch information
maeb authored Sep 25, 2020
2 parents 570c474 + ba26ff5 commit d38b8d6
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 3 deletions.
9 changes: 7 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>

<io.grpc.version>1.24.1</io.grpc.version>
<com.google.protobuf.protoc.version>3.10.0</com.google.protobuf.protoc.version>
<io.grpc.version>1.31.1</io.grpc.version>
<com.google.protobuf.protoc.version>3.13.0</com.google.protobuf.protoc.version>
</properties>

<dependencies>
Expand All @@ -42,6 +42,11 @@
<artifactId>grpc-stub</artifactId>
<version>${io.grpc.version}</version>
</dependency>
<dependency>
<groupId>io.grpc</groupId>
<artifactId>grpc-services</artifactId>
<version>${io.grpc.version}</version>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
Expand Down
13 changes: 12 additions & 1 deletion protobuf/controller/v1/controller.proto
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ syntax = "proto3";
package veidemann.api.controller.v1;

import "controller/v1/resources.proto";
import "frontier/v1/frontier.proto";
import "frontier/v1/resources.proto";
import "config/v1/resources.proto";
import "google/protobuf/empty.proto";
Expand Down Expand Up @@ -32,6 +33,12 @@ service Controller {
rpc UnPauseCrawler (google.protobuf.Empty) returns (google.protobuf.Empty) {}

rpc Status (google.protobuf.Empty) returns (CrawlerStatus) {}

// Number of queued URI's for a CrawlExecution
rpc QueueCountForCrawlExecution (veidemann.api.frontier.v1.CrawlExecutionId) returns (veidemann.api.frontier.v1.CountResponse) {}

// Number of queued URI's for a CrawlHostGroup
rpc QueueCountForCrawlHostGroup (veidemann.api.frontier.v1.CrawlHostGroup) returns (veidemann.api.frontier.v1.CountResponse) {}
}

// Kick of a crawl job immediately
Expand Down Expand Up @@ -63,4 +70,8 @@ enum RunStatus {

message CrawlerStatus {
RunStatus runStatus = 1;
}
// The number of busy CrawlHostGroups which essentially is the number of web pages currently downloading
int64 busyCrawlHostGroupCount = 2;
// Total number of queued URI's
int64 queueSize = 3;
}
18 changes: 18 additions & 0 deletions protobuf/frontier/v1/frontier.proto
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package veidemann.api.frontier.v1;
import "commons/v1/resources.proto";
import "config/v1/resources.proto";
import "frontier/v1/resources.proto";
import "google/protobuf/empty.proto";

option go_package = "github.com/nlnwa/veidemann-api-go/frontier/v1;frontier";
option java_multiple_files = true;
Expand All @@ -13,11 +14,24 @@ option java_outer_classname = "FrontierService";

// Service for controlling frontier.
service Frontier {
// Start crawling seed
rpc CrawlSeed (CrawlSeedRequest) returns (CrawlExecutionId) {}

// Request a URI from the Frontiers queue
// Used by a Harvester to fetch a new page
rpc GetNextPage (stream PageHarvest) returns (stream PageHarvestSpec) {}

// The number of busy CrawlHostGroups which essentially is the number of web pages currently downloading
rpc BusyCrawlHostGroupCount (google.protobuf.Empty) returns (CountResponse) {}

// Total number of queued URI's
rpc QueueCountTotal (google.protobuf.Empty) returns (CountResponse) {}

// Number of queued URI's for a CrawlExecution
rpc QueueCountForCrawlExecution (CrawlExecutionId) returns (CountResponse) {}

// Number of queued URI's for a CrawlHostGroup
rpc QueueCountForCrawlHostGroup (veidemann.api.frontier.v1.CrawlHostGroup) returns (CountResponse) {}
}

message CrawlSeedRequest {
Expand Down Expand Up @@ -64,3 +78,7 @@ message PageHarvestSpec {
// The configuration for the fetch
veidemann.api.config.v1.ConfigObject crawl_config = 2;
}

message CountResponse {
int64 count = 1;
}

0 comments on commit d38b8d6

Please sign in to comment.