Merge pull request #1 from sudo-jarvis/create-lexer

Create the Json Schema lexer using pygments
python-jsonschema · Feb 28, 2024 · 6d7330b · 6d7330b
2 parents 4ac2d45 + 1c300be
commit 6d7330b
Show file tree

Hide file tree

Showing 6 changed files with 340 additions and 4 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,154 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dirhtml/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# Editor vomit
+.idea/
+.vscode/
+
+# User defined
+_cache
+_templates
diff --git a/README.rst b/README.rst
@@ -1,6 +1,6 @@
-====================
+=======================
 ``jsonschema-lexer``
-====================
+=======================
 
 |PyPI| |Pythons| |CI|
 
@@ -15,3 +15,49 @@
 .. |CI| image:: https://github.com/python-jsonschema/jsonschema-lexer/workflows/CI/badge.svg
   :alt: Build status
   :target: https://github.com/python-jsonschema/jsonschema-lexer/actions?query=workflow%3ACI
+
+Introduction
+------------
+
+`jsonschema-lexer` is a Python package that provides a JSON Schema lexer for syntax highlighting JSON Schema documents based on the `2020-12 dialect`.
+It utilizes Pygments, a syntax highlighting library, to tokenize JSON Schema documents according to the JSON Schema specification.
+
+Usage
+-----
+
+Once installed, you can use it in your Python code to highlight JSON Schema documents.
+
+Here's a simple example:
+
+.. code-block:: python
+
+  # Import the JSONSchemaLexer class from the package
+  from jsonschema_lexer.lexer import JSONSchemaLexer
+
+  from rich.console import Console
+  from rich.syntax import Syntax
+
+  console = Console()
+
+  code = """
+  {
+    "$schema": "https://json-schema.org/draft/2020-12/schema",
+    "$id": "https://example.com/product.schema.json",
+    "title": "Product",
+    "description": "A product from Acme's catalog",
+    "type": "object",
+    "properties": {
+      "productId": {
+        "description": "The unique identifier for a product",
+        "type": "integer"
+      },
+      "productName": {
+        "description": "Name of the product",
+        "type": "string"
+      }
+    }
+  }
+  """
+
+  syntax = Syntax(code, lexer=JSONSchemaLexer(), background_color="default", word_wrap=True)
+  console.print(syntax)
diff --git a/jsonschema_lexer/__init__.py b/jsonschema_lexer/__init__.py
@@ -1,3 +1,3 @@
 """
-Fill me in!
+Provides the JSONSchema Lexer.
 """
diff --git a/jsonschema_lexer/lexer.py b/jsonschema_lexer/lexer.py
@@ -0,0 +1,126 @@
+"""
+Contains the main functionality of the JSONSchemaLexer.
+"""
+
+from typing import ClassVar
+
+from pygments.lexers.data import (  # type: ignore[reportMissingTypeStubs]
+    JsonLexer,
+)
+from pygments.token import Token
+
+
+class JSONSchemaLexer(JsonLexer):
+    """
+    For JSONSchema.
+    """
+
+    name = "JSON Schema Lexer"
+
+    data_types: ClassVar[list[str]] = [
+        "object",
+        "integer",
+        "string",
+        "number",
+        "array",
+        "boolean",
+        "null",
+    ]
+    core_keywords: ClassVar[list[str]] = [
+        "$schema",
+        "$id",
+        "$ref",
+        "$defs",
+        "$comment",
+        "$dynamicAnchor",
+        "$dynamicRef",
+        "$anchor",
+        "$vocabulary",
+    ]
+    applicator_keywords: ClassVar[list[str]] = [
+        "oneOf",
+        "allOf",
+        "anyOf",
+        "if",
+        "then",
+        "else",
+        "not",
+        "properties",
+        "patternProperties",
+        "additionalProperties",
+        "dependentSchemas",
+        "propertyNames",
+        "prefixItems",
+        "contains",
+        "items",
+    ]
+    meta_data_keywords: ClassVar[list[str]] = [
+        "title",
+        "description",
+        "default",
+        "deprecated",
+        "examples",
+        "readOnly",
+        "writeOnly",
+    ]
+    validation_keywords: ClassVar[list[str]] = [
+        "type",
+        "enum",
+        "const",
+        "minLength",
+        "maxLength",
+        "pattern",
+        "maximum",
+        "exclusiveMinimum",
+        "multipleOf",
+        "exclusiveMaximum",
+        "minimum",
+        "dependentRequired",
+        "minProperties",
+        "maxProperties",
+        "required",
+        "minItems",
+        "maxItems",
+        "minContains",
+        "maxContains",
+        "uniqueItems",
+    ]
+    other_keywords: ClassVar[list[str]] = [
+        "format",
+        "unevaluatedItems",
+        "unevaluatedProperties",
+        "contentEncoding",
+        "contentMediaType",
+        "contentSchema",
+        "format_assertion",
+    ]
+
+    parsed_keywords: ClassVar[list[str]] = [
+        '"%s"' % keyword
+        for keyword in (
+            core_keywords
+            + applicator_keywords
+            + meta_data_keywords
+            + validation_keywords
+            + other_keywords
+        )
+    ]
+
+    parsed_data_types: ClassVar[list[str]] = [
+        '"%s"' % data_type for data_type in data_types
+    ]
+
+    def get_tokens_unprocessed(self, text: str):  # type: ignore[reportUnknownParameterType]
+        """
+        Add token classes to it according to JSON Schema.
+        """
+        for start, token, value in super().get_tokens_unprocessed(text):  # type: ignore[reportUnknownVariableType]
+            if token is Token.Name.Tag and value in self.parsed_keywords:
+                yield start, Token.Keyword, value
+            elif (
+                token is Token.String.Double
+                and value in self.parsed_data_types
+            ):
+                yield start, Token.Name.Decorator, value
+            else:
+                yield start, token, value
diff --git a/pyproject.toml b/pyproject.toml
@@ -29,7 +29,9 @@ classifiers = [
 
 ]
 dynamic = ["version"]
-
+dependencies = [
+  "Pygments==2.17.2"
+]
 
 [project.urls]
 Issues = "https://github.com/python-jsonschema/jsonschema-lexer/issues/"

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,8 @@
+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    pip-compile --strip-extras pyproject.toml
+#
+pygments==2.17.2
+    # via jsonschema_lexer (pyproject.toml)