Skip to content

Commit

Permalink
fix small bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
axif0 committed Jan 19, 2025
1 parent cfc2777 commit e302a9b
Show file tree
Hide file tree
Showing 12 changed files with 1,158 additions and 11 deletions.
133 changes: 133 additions & 0 deletions .github/workflows/missing_form_check&update.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
name: Create Automated PR
on:
schedule:
- cron: '0 0 1 * *' # Runs at 00:00 UTC on the first day of every month
# Allow manual trigger
workflow_dispatch:

jobs:
check-repository:
runs-on: ubuntu-latest
outputs:
is_correct_repo: ${{ steps.check.outputs.is_correct_repo }}
steps:
- name: Check repository
id: check
run: |
if [ "$GITHUB_REPOSITORY" = "scribe-org/Scribe-Data" ]; then
echo "is_correct_repo=true" >> "$GITHUB_OUTPUT"
else
echo "is_correct_repo=false" >> "$GITHUB_OUTPUT"
echo "::warning::This workflow should only run in scribe-org/Scribe-Data repository."
fi
create-pull-request:
needs: check-repository
if: needs.check-repository.outputs.is_correct_repo == 'true'
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install rich requests tqdm
pip install -e .
- name: Generate Missing Features Data
run: |
# Set up paths
DUMP_PATH=$(PYTHONPATH=$PYTHONPATH:$(pwd)/src python src/scribe_data/check/check_missing_forms/download_wd.py | grep "DOWNLOAD_PATH=" | cut -d'=' -f2)
QUERY_DIR="$(pwd)/src/scribe_data/wikidata/language_data_extraction"
echo "Dump path: ${DUMP_PATH}"
echo "Query directory: ${QUERY_DIR}"
# Check if paths exist
if [ -n "${DUMP_PATH}" ] && [ -d "${QUERY_DIR}" ]; then
# Generate the missing features data with all keys processing
PYTHONPATH=$PYTHONPATH:$(pwd)/src python src/scribe_data/check/check_missing_forms/check_missing_forms.py "${DUMP_PATH}" "${QUERY_DIR}" --process-all-keys
else
echo "Required paths not found:"
echo "Dump path exists: $([ -n "${DUMP_PATH}" ] && echo "Yes" || echo "No")"
echo "Query directory exists: $([ -d "${QUERY_DIR}" ] && echo "Yes" || echo "No")"
exit 1
fi
# Debug steps to understand the state
- name: Debug Info
run: |
echo "Current branch: $(git branch --show-current)"
echo "List of changes:"
git status
- name: Make changes
run: |
git add src/scribe_data/wikidata/language_data_extraction/**/*.sparql
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git config --global user.name "github-actions[bot]"
- name: Debug Missing Features Data
if: always()
run: |
# Print the contents of the missing features JSON file if it exists
if [ -f missing_features.json ]; then
echo "Contents of missing_features.json:"
cat missing_features.json
else
echo "missing_features.json not found"
fi
- name: Generate PR Body
id: pr-body
run: |
# Run the pr_body.py script with the missing features data
PR_BODY_CONTENT=$(python src/scribe_data/check/check_missing_forms/pr_body.py missing_features.json)
# Debug output
echo "PR Body Content:"
echo "$PR_BODY_CONTENT"
# Initialize PR body with delimiter
{
echo "body<<EOF"
echo "$PR_BODY_CONTENT"
echo "EOF"
} >> $GITHUB_OUTPUT
- name: Debug PR Body Output
run: |
# Print the PR body content from the output
echo "PR Body from GITHUB_OUTPUT:"
cat $GITHUB_OUTPUT
- name: Create Pull Request
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.GITHUB_TOKEN }}
title: 'Automated PR: Updated Language Data Files'
body: ${{ steps.pr-body.outputs.body }}
base: master
branch: automated-missing-forms-pr
delete-branch: true
draft: false
commit-message: '[create-pull-request] automated change'
committer: GitHub <[email protected]>
author: github-actions[bot] <github-actions[bot]@users.noreply.github.com>

# Debug step to verify PR creation attempt
- name: Check PR Creation
run: |
echo "Checking if PR was created..."
gh pr list
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
150 changes: 150 additions & 0 deletions .github/workflows/update_emojis.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
name: Check and Update Emoji Data
on:
schedule:
- cron: '0 0 1 * *' # Runs at 00:00 UTC on the first day of every month
# Allow manual trigger
workflow_dispatch:

jobs:
check-repository:
runs-on: ubuntu-latest
outputs:
is_correct_repo: ${{ steps.check.outputs.is_correct_repo }}
steps:
- name: Check repository
id: check
run: |
if [ "$GITHUB_REPOSITORY" = "scribe-org/Scribe-Data" ]; then
echo "is_correct_repo=true" >> "$GITHUB_OUTPUT"
else
echo "is_correct_repo=false" >> "$GITHUB_OUTPUT"
echo "::warning::This workflow should only run in scribe-org/Scribe-Data repository."
fi
check-and-update:
needs: check-repository
if: needs.check-repository.outputs.is_correct_repo == 'true'
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install requests
sudo apt-get install jq
- name: Get language list
id: get-langs
run: |
# Fetch language list from GitHub API
DERIVED_LANGS=$(curl -s https://api.github.com/repos/unicode-org/cldr-json/contents/cldr-json/cldr-annotations-derived-full/annotationsDerived | jq -r '.[].name')
FULL_LANGS=$(curl -s https://api.github.com/repos/unicode-org/cldr-json/contents/cldr-json/cldr-annotations-full/annotations | jq -r '.[].name')
# Combine and deduplicate language lists
LANG_LIST=$(echo "$DERIVED_LANGS $FULL_LANGS" | tr ' ' '\n' | sort -u | tr '\n' ' ')
echo "lang_list=${LANG_LIST}" >> $GITHUB_OUTPUT
echo "Detected languages: ${LANG_LIST}"
- name: Download and check emoji data
id: check-updates
run: |
# Create directories if they don't exist
mkdir -p src/scribe_data/unicode/cldr-annotations-derived-full
mkdir -p src/scribe_data/unicode/cldr-annotations-full
CHANGES_EXIST=false
CHANGE_SUMMARY="| Language | Derived Changes | Full Changes |\n|----------|-----------------|--------------|"
# Use dynamic language list from previous step
for lang in ${{ steps.get-langs.outputs.lang_list }}; do
DERIVED_CHANGED="No"
FULL_CHANGED="No"
# Download latest data for each language
mkdir -p "src/scribe_data/unicode/cldr-annotations-derived-full/annotationsDerived/$lang"
mkdir -p "src/scribe_data/unicode/cldr-annotations-full/annotations/$lang"
curl -L "https://raw.githubusercontent.com/unicode-org/cldr-json/main/cldr-json/cldr-annotations-derived-full/annotationsDerived/$lang/annotations.json" -o "new_derived_$lang.json"
curl -L "https://raw.githubusercontent.com/unicode-org/cldr-json/main/cldr-json/cldr-annotations-full/annotations/$lang/annotations.json" -o "new_full_$lang.json"
# Check derived annotations
if [ -f "src/scribe_data/unicode/cldr-annotations-derived-full/annotationsDerived/$lang/annotations.json" ]; then
if ! cmp -s "new_derived_$lang.json" "src/scribe_data/unicode/cldr-annotations-derived-full/annotationsDerived/$lang/annotations.json"; then
CHANGES_EXIST=true
DERIVED_CHANGED="Yes"
fi
else
CHANGES_EXIST=true
DERIVED_CHANGED="New"
fi
# Check full annotations
if [ -f "src/scribe_data/unicode/cldr-annotations-full/annotations/$lang/annotations.json" ]; then
if ! cmp -s "new_full_$lang.json" "src/scribe_data/unicode/cldr-annotations-full/annotations/$lang/annotations.json"; then
CHANGES_EXIST=true
FULL_CHANGED="Yes"
fi
else
CHANGES_EXIST=true
FULL_CHANGED="New"
fi
# Only add to summary if there are changes
if [ "$DERIVED_CHANGED" != "No" ] || [ "$FULL_CHANGED" != "No" ]; then
CHANGE_SUMMARY="$CHANGE_SUMMARY\n| $lang | $DERIVED_CHANGED | $FULL_CHANGED |"
fi
done
echo "changes_exist=${CHANGES_EXIST}" >> $GITHUB_OUTPUT
echo "change_summary<<EOF" >> $GITHUB_OUTPUT
echo -e "$CHANGE_SUMMARY" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
- name: Update files if changed
if: steps.check-updates.outputs.changes_exist == 'true'
run: |
# Use dynamic language list
for lang in ${{ steps.get-langs.outputs.lang_list }}; do
mkdir -p "src/scribe_data/unicode/cldr-annotations-derived-full/annotationsDerived/$lang"
mkdir -p "src/scribe_data/unicode/cldr-annotations-full/annotations/$lang"
mv "new_derived_$lang.json" "src/scribe_data/unicode/cldr-annotations-derived-full/annotationsDerived/$lang/annotations.json"
mv "new_full_$lang.json" "src/scribe_data/unicode/cldr-annotations-full/annotations/$lang/annotations.json"
done
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git config --global user.name "github-actions[bot]"
- name: Create Pull Request
if: steps.check-updates.outputs.changes_exist == 'true'
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.GITHUB_TOKEN }}
title: 'chore: Update emoji annotations data'
body: |
This PR updates the emoji annotations data from CLDR.
## Changes Summary
${{ steps.check-updates.outputs.change_summary }}
### Legend:
- Yes: File was updated
- New: File was newly added
- No: No changes
This is an automated PR created by the emoji data update workflow.
branch: update-emoji-data # Branch name
delete-branch: true
commit-message: 'chore: Update emoji annotations data'
labels: |
automated pr
emoji-data
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,7 @@ scribe_data_wikidata_dumps_export/*
# MARK: Wiki Dumps

*.json.bz2

# MARK: GitHub Actions

missing_features.json
Loading

0 comments on commit e302a9b

Please sign in to comment.