From a16d3cf9a73f30c3b722e61ed254e1265d7282fb Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Thu, 6 Jun 2024 11:29:18 +0200 Subject: [PATCH] test: add Github workflow to build the branch 'cc' --- .github/workflows/cc-build.yml | 56 ++++++++++++++++++++++++++++++++++ README.md | 6 ++-- 2 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/cc-build.yml diff --git a/.github/workflows/cc-build.yml b/.github/workflows/cc-build.yml new file mode 100644 index 0000000000..80a29b4c76 --- /dev/null +++ b/.github/workflows/cc-build.yml @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: cc ci + +on: + push: + branches: [cc] + pull_request: + types: [opened, synchronize, reopened] + branches: [cc] +jobs: + test: + strategy: + matrix: + java: ['11'] + os: [ubuntu-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.java }} + distribution: 'temurin' + - name: Install CLD2 + run: | + sudo apt-get update + sudo apt-get install libcld2-0 libcld2-dev + - name: Install language-detection-cld2 + run: | + git clone https://github.com/commoncrawl/language-detection-cld2.git + cd language-detection-cld2/ + mvn install + - name: Install crawler-commons development version + run: | + git clone https://github.com/crawler-commons/crawler-commons.git + cd crawler-commons/ + mvn install + - name: Install recent public suffix list + run: | + curl https://publicsuffix.org/list/public_suffix_list.dat -o conf/effective_tld_names.dat + - name: Test + run: ant clean test -buildfile build.xml diff --git a/README.md b/README.md index 265e9451be..902d87ceee 100644 --- a/README.md +++ b/README.md @@ -12,17 +12,17 @@ Notable additions in Common Crawl's fork of Nutch (not yet pushed to upstream Nu How to install additional requirements to build this fork of Nutch: - [crawler-commons](/crawler-commons/crawler-commons) development snapshot package: ``` - git clone git@github.com:crawler-commons/crawler-commons.git + git clone https://github.com/crawler-commons/crawler-commons.git cd crawler-commons/ mvn install ``` -- install the latest public suffix list into `conf/` to ensure that it is definitely used (see #17): +- install the latest public suffix list into `conf/` to ensure that it is definitely used (see [#17](https://github.com/commoncrawl/nutch/issues/17)): ``` wget https://publicsuffix.org/list/public_suffix_list.dat -O conf/effective_tld_names.dat ``` - [Java wrapper for CLD2 language detection](/commoncrawl/language-detection-cld2) ``` - git clone git@github.com:commoncrawl/language-detection-cld2.git + git clone https://github.com/commoncrawl/language-detection-cld2.git cd language-detection-cld2/ mvn install ```