fix small bugs

scribe-org · Jan 19, 2025 · e302a9b · e302a9b
1 parent cfc2777
commit e302a9b
Show file tree

Hide file tree

Showing 12 changed files with 1,158 additions and 11 deletions.
diff --git a/.github/workflows/missing_form_check&update.yaml b/.github/workflows/missing_form_check&update.yaml
@@ -0,0 +1,133 @@
+name: Create Automated PR
+on:
+  schedule:
+    - cron: '0 0 1 * *'  # Runs at 00:00 UTC on the first day of every month
+  # Allow manual trigger
+  workflow_dispatch:
+
+jobs:
+  check-repository:
+    runs-on: ubuntu-latest
+    outputs:
+      is_correct_repo: ${{ steps.check.outputs.is_correct_repo }}
+    steps:
+      - name: Check repository
+        id: check
+        run: |
+          if [ "$GITHUB_REPOSITORY" = "scribe-org/Scribe-Data" ]; then
+            echo "is_correct_repo=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "is_correct_repo=false" >> "$GITHUB_OUTPUT"
+            echo "::warning::This workflow should only run in scribe-org/Scribe-Data repository."
+          fi
+
+  create-pull-request:
+    needs: check-repository
+    if: needs.check-repository.outputs.is_correct_repo == 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.x'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install rich requests tqdm
+          pip install -e .
+
+      - name: Generate Missing Features Data
+        run: |
+          # Set up paths
+          DUMP_PATH=$(PYTHONPATH=$PYTHONPATH:$(pwd)/src python src/scribe_data/check/check_missing_forms/download_wd.py | grep "DOWNLOAD_PATH=" | cut -d'=' -f2)
+          QUERY_DIR="$(pwd)/src/scribe_data/wikidata/language_data_extraction"
+
+          echo "Dump path: ${DUMP_PATH}"
+          echo "Query directory: ${QUERY_DIR}"
+
+          # Check if paths exist
+          if [ -n "${DUMP_PATH}" ] && [ -d "${QUERY_DIR}" ]; then
+            # Generate the missing features data with all keys processing
+            PYTHONPATH=$PYTHONPATH:$(pwd)/src python src/scribe_data/check/check_missing_forms/check_missing_forms.py "${DUMP_PATH}" "${QUERY_DIR}" --process-all-keys
+          else
+            echo "Required paths not found:"
+            echo "Dump path exists: $([ -n "${DUMP_PATH}" ] && echo "Yes" || echo "No")"
+            echo "Query directory exists: $([ -d "${QUERY_DIR}" ] && echo "Yes" || echo "No")"
+            exit 1
+          fi
+
+      # Debug steps to understand the state
+      - name: Debug Info
+        run: |
+          echo "Current branch: $(git branch --show-current)"
+          echo "List of changes:"
+          git status
+
+      - name: Make changes
+        run: |
+          git add src/scribe_data/wikidata/language_data_extraction/**/*.sparql
+          git config --global user.email "github-actions[bot]@users.noreply.github.com"
+          git config --global user.name "github-actions[bot]"
+
+      - name: Debug Missing Features Data
+        if: always()
+        run: |
+          # Print the contents of the missing features JSON file if it exists
+          if [ -f missing_features.json ]; then
+            echo "Contents of missing_features.json:"
+            cat missing_features.json
+          else
+            echo "missing_features.json not found"
+          fi
+
+      - name: Generate PR Body
+        id: pr-body
+        run: |
+          # Run the pr_body.py script with the missing features data
+          PR_BODY_CONTENT=$(python src/scribe_data/check/check_missing_forms/pr_body.py missing_features.json)
+
+          # Debug output
+          echo "PR Body Content:"
+          echo "$PR_BODY_CONTENT"
+
+          # Initialize PR body with delimiter
+          {
+            echo "body<<EOF"
+            echo "$PR_BODY_CONTENT"
+            echo "EOF"
+          } >> $GITHUB_OUTPUT
+
+      - name: Debug PR Body Output
+        run: |
+          # Print the PR body content from the output
+          echo "PR Body from GITHUB_OUTPUT:"
+          cat $GITHUB_OUTPUT
+
+      - name: Create Pull Request
+        uses: peter-evans/create-pull-request@v5
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          title: 'Automated PR: Updated Language Data Files'
+          body: ${{ steps.pr-body.outputs.body }}
+          base: master
+          branch: automated-missing-forms-pr
+          delete-branch: true
+          draft: false
+          commit-message: '[create-pull-request] automated change'
+          committer: GitHub <[email protected]>
+          author: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
+
+      # Debug step to verify PR creation attempt
+      - name: Check PR Creation
+        run: |
+          echo "Checking if PR was created..."
+          gh pr list
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/update_emojis.yaml b/.github/workflows/update_emojis.yaml
@@ -0,0 +1,150 @@
+name: Check and Update Emoji Data
+on:
+  schedule:
+    - cron: '0 0 1 * *'  # Runs at 00:00 UTC on the first day of every month
+  # Allow manual trigger
+  workflow_dispatch:
+
+jobs:
+  check-repository:
+    runs-on: ubuntu-latest
+    outputs:
+      is_correct_repo: ${{ steps.check.outputs.is_correct_repo }}
+    steps:
+      - name: Check repository
+        id: check
+        run: |
+          if [ "$GITHUB_REPOSITORY" = "scribe-org/Scribe-Data" ]; then
+            echo "is_correct_repo=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "is_correct_repo=false" >> "$GITHUB_OUTPUT"
+            echo "::warning::This workflow should only run in scribe-org/Scribe-Data repository."
+          fi
+
+  check-and-update:
+    needs: check-repository
+    if: needs.check-repository.outputs.is_correct_repo == 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.x'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install requests
+          sudo apt-get install jq
+
+      - name: Get language list
+        id: get-langs
+        run: |
+          # Fetch language list from GitHub API
+          DERIVED_LANGS=$(curl -s https://api.github.com/repos/unicode-org/cldr-json/contents/cldr-json/cldr-annotations-derived-full/annotationsDerived | jq -r '.[].name')
+          FULL_LANGS=$(curl -s https://api.github.com/repos/unicode-org/cldr-json/contents/cldr-json/cldr-annotations-full/annotations | jq -r '.[].name')
+
+          # Combine and deduplicate language lists
+          LANG_LIST=$(echo "$DERIVED_LANGS $FULL_LANGS" | tr ' ' '\n' | sort -u | tr '\n' ' ')
+          echo "lang_list=${LANG_LIST}" >> $GITHUB_OUTPUT
+          echo "Detected languages: ${LANG_LIST}"
+
+      - name: Download and check emoji data
+        id: check-updates
+        run: |
+          # Create directories if they don't exist
+          mkdir -p src/scribe_data/unicode/cldr-annotations-derived-full
+          mkdir -p src/scribe_data/unicode/cldr-annotations-full
+
+          CHANGES_EXIST=false
+          CHANGE_SUMMARY="| Language | Derived Changes | Full Changes |\n|----------|-----------------|--------------|"
+
+          # Use dynamic language list from previous step
+          for lang in ${{ steps.get-langs.outputs.lang_list }}; do
+            DERIVED_CHANGED="No"
+            FULL_CHANGED="No"
+
+            # Download latest data for each language
+            mkdir -p "src/scribe_data/unicode/cldr-annotations-derived-full/annotationsDerived/$lang"
+            mkdir -p "src/scribe_data/unicode/cldr-annotations-full/annotations/$lang"
+
+            curl -L "https://raw.githubusercontent.com/unicode-org/cldr-json/main/cldr-json/cldr-annotations-derived-full/annotationsDerived/$lang/annotations.json" -o "new_derived_$lang.json"
+            curl -L "https://raw.githubusercontent.com/unicode-org/cldr-json/main/cldr-json/cldr-annotations-full/annotations/$lang/annotations.json" -o "new_full_$lang.json"
+
+            # Check derived annotations
+            if [ -f "src/scribe_data/unicode/cldr-annotations-derived-full/annotationsDerived/$lang/annotations.json" ]; then
+              if ! cmp -s "new_derived_$lang.json" "src/scribe_data/unicode/cldr-annotations-derived-full/annotationsDerived/$lang/annotations.json"; then
+                CHANGES_EXIST=true
+                DERIVED_CHANGED="Yes"
+              fi
+            else
+              CHANGES_EXIST=true
+              DERIVED_CHANGED="New"
+            fi
+
+            # Check full annotations
+            if [ -f "src/scribe_data/unicode/cldr-annotations-full/annotations/$lang/annotations.json" ]; then
+              if ! cmp -s "new_full_$lang.json" "src/scribe_data/unicode/cldr-annotations-full/annotations/$lang/annotations.json"; then
+                CHANGES_EXIST=true
+                FULL_CHANGED="Yes"
+              fi
+            else
+              CHANGES_EXIST=true
+              FULL_CHANGED="New"
+            fi
+
+            # Only add to summary if there are changes
+            if [ "$DERIVED_CHANGED" != "No" ] || [ "$FULL_CHANGED" != "No" ]; then
+              CHANGE_SUMMARY="$CHANGE_SUMMARY\n| $lang | $DERIVED_CHANGED | $FULL_CHANGED |"
+            fi
+          done
+
+          echo "changes_exist=${CHANGES_EXIST}" >> $GITHUB_OUTPUT
+          echo "change_summary<<EOF" >> $GITHUB_OUTPUT
+          echo -e "$CHANGE_SUMMARY" >> $GITHUB_OUTPUT
+          echo "EOF" >> $GITHUB_OUTPUT
+
+      - name: Update files if changed
+        if: steps.check-updates.outputs.changes_exist == 'true'
+        run: |
+          # Use dynamic language list
+          for lang in ${{ steps.get-langs.outputs.lang_list }}; do
+            mkdir -p "src/scribe_data/unicode/cldr-annotations-derived-full/annotationsDerived/$lang"
+            mkdir -p "src/scribe_data/unicode/cldr-annotations-full/annotations/$lang"
+
+            mv "new_derived_$lang.json" "src/scribe_data/unicode/cldr-annotations-derived-full/annotationsDerived/$lang/annotations.json"
+            mv "new_full_$lang.json" "src/scribe_data/unicode/cldr-annotations-full/annotations/$lang/annotations.json"
+          done
+
+          git config --global user.email "github-actions[bot]@users.noreply.github.com"
+          git config --global user.name "github-actions[bot]"
+
+      - name: Create Pull Request
+        if: steps.check-updates.outputs.changes_exist == 'true'
+        uses: peter-evans/create-pull-request@v5
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          title: 'chore: Update emoji annotations data'
+          body: |
+            This PR updates the emoji annotations data from CLDR.
+
+            ## Changes Summary
+            ${{ steps.check-updates.outputs.change_summary }}
+
+            ### Legend:
+            - Yes: File was updated
+            - New: File was newly added
+            - No: No changes
+
+            This is an automated PR created by the emoji data update workflow.
+          branch: update-emoji-data # Branch name
+          delete-branch: true
+          commit-message: 'chore: Update emoji annotations data'
+          labels: |
+            automated pr
+            emoji-data
diff --git a/.gitignore b/.gitignore
@@ -46,3 +46,7 @@ scribe_data_wikidata_dumps_export/*
 # MARK: Wiki Dumps
 
 *.json.bz2
+
+# MARK: GitHub Actions
+
+missing_features.json