diff --git a/.github/workflows/python-package.yaml b/.github/workflows/python-package.yaml index 1402546..4992809 100644 --- a/.github/workflows/python-package.yaml +++ b/.github/workflows/python-package.yaml @@ -57,7 +57,7 @@ jobs: pip install . PACKAGE_DIR=`pip show jsf | grep "Location" | sed 's/^.*: //'` cd $PACKAGE_DIR/jsf - pip install pytest + pip install pytest pyjwt pytest - name: Upload coverage uses: codecov/codecov-action@v1 diff --git a/jsf/README.md b/jsf/README.md index 7595309..64443fd 100644 --- a/jsf/README.md +++ b/jsf/README.md @@ -147,6 +147,10 @@ Navigate to [http://127.0.0.1:8000](http://127.0.0.1:8000) and check out your en +### Partially supported features + +- string `contentMediaType` - only a subset of these are supported, however they can be expanded within [this file](jsf/schema_types/string_utils/content_type/__init__.py) + ## Credits - This repository is a Python port of [json-schema-faker](https://github.com/json-schema-faker/json-schema-faker) with some minor differences in implementation. diff --git a/jsf/schema_types/string.py b/jsf/schema_types/string.py index e1a021e..8cf5185 100644 --- a/jsf/schema_types/string.py +++ b/jsf/schema_types/string.py @@ -1,16 +1,15 @@ -import base64 import logging -import quopri import random import re from datetime import timezone -from enum import Enum from typing import Any, Callable, Dict, Optional import rstr from faker import Faker from jsf.schema_types.base import BaseSchema, ProviderNotSetException +from jsf.schema_types.string_utils import content_encoding, content_type +from jsf.schema_types.string_utils.content_type.text__plain import random_fixed_length_sentence logger = logging.getLogger() faker = Faker() @@ -19,11 +18,6 @@ URI_PATTERN = f"https?://{{hostname}}(?:{FRAGMENT})+" PARAM_PATTERN = "(?:\\?([a-z]{1,7}(=\\w{1,5})?&){0,3})?" -LOREM = """Lorem ipsum dolor sit amet consectetur adipisicing elit. -Hic molestias, esse veniam placeat officiis nobis architecto modi -possimus reiciendis accusantium exercitationem quas illum libero odit magnam, -reprehenderit ipsum, repellendus culpa!""".split() - def temporal_duration( positive: bool = True, @@ -123,100 +117,35 @@ def fake_duration(): } -def random_fixed_length_sentence(_min: int, _max: int) -> str: - output = "" - while len(output) < _max: - remaining = _max - len(output) - valid_words = list(filter(lambda s: len(s) < remaining, LOREM)) - if len(valid_words) == 0: - break - output += random.choice(valid_words) + " " - if len(output) > _min and random.uniform(0, 1) > 0.9: - break - return output.strip() - - -class ContentEncoding(str, Enum): - SEVEN_BIT = "7-bit" - EIGHT_BIT = "8-bit" - BINARY = "binary" - QUOTED_PRINTABLE = "quoted-printable" - BASE16 = "base-16" - BASE32 = "base-32" - BASE64 = "base-64" - - -def binary_encoder(string: str) -> str: - return "".join(format(x, "b") for x in bytearray(string, "utf-8")) - - -def bytes_str_repr(b: bytes) -> str: - return repr(b)[2:-1] - - -def seven_bit_encoder(string: str) -> str: - return bytes_str_repr(string.encode("utf-7")) - - -def eight_bit_encoder(string: str) -> str: - return bytes_str_repr(string.encode("utf-8")) - - -def quoted_printable_encoder(string: str) -> str: - return bytes_str_repr(quopri.encodestring(string.encode("utf-8"))) - - -def b16_encoder(string: str) -> str: - return bytes_str_repr(base64.b16encode(string.encode("utf-8"))) - - -def b32_encoder(string: str) -> str: - return bytes_str_repr(base64.b32encode(string.encode("utf-8"))) - - -def b64_encoder(string: str) -> str: - return bytes_str_repr(base64.b64encode(string.encode("utf-8"))) - - -Encoder = { - ContentEncoding.SEVEN_BIT: seven_bit_encoder, - ContentEncoding.EIGHT_BIT: eight_bit_encoder, - ContentEncoding.BINARY: binary_encoder, - ContentEncoding.QUOTED_PRINTABLE: quoted_printable_encoder, - ContentEncoding.BASE16: b16_encoder, - ContentEncoding.BASE32: b32_encoder, - ContentEncoding.BASE64: b64_encoder, -} - - -def encode(string: str, encoding: Optional[ContentEncoding]) -> str: - return Encoder.get(encoding, lambda s: s)(string) - - class String(BaseSchema): minLength: Optional[float] = 0 maxLength: Optional[float] = 50 pattern: Optional[str] = None format: Optional[str] = None # enum: Optional[List[Union[str, int, float]]] = None # NOTE: Not used - enums go to enum class - # contentMediaType: Optional[str] = None # TODO: Long list, need to document which ones will be supported and how to extend - contentEncoding: Optional[ContentEncoding] - # contentSchema # No docs detailing this yet... + contentMediaType: Optional[str] = None + contentEncoding: Optional[content_encoding.ContentEncoding] + # contentSchema # Doesnt help with generation def generate(self, context: Dict[str, Any]) -> Optional[str]: try: s = super().generate(context) - return str(encode(s, self.contentEncoding)) if s else s + return str(content_encoding.encode(s, self.contentEncoding)) if s else s except ProviderNotSetException: format_map["regex"] = lambda: rstr.xeger(self.pattern) format_map["relative-json-pointer"] = lambda: random.choice( context["state"]["__all_json_paths__"] ) if format_map.get(self.format) is not None: - return encode(format_map[self.format](), self.contentEncoding) + return content_encoding.encode(format_map[self.format](), self.contentEncoding) if self.pattern is not None: - return encode(rstr.xeger(self.pattern), self.contentEncoding) - return encode( + return content_encoding.encode(rstr.xeger(self.pattern), self.contentEncoding) + if self.contentMediaType is not None: + return content_encoding.encode( + content_type.generate(self.contentMediaType, self.minLength, self.maxLength), + self.contentEncoding, + ) + return content_encoding.encode( random_fixed_length_sentence(self.minLength, self.maxLength), self.contentEncoding ) diff --git a/jsf/schema_types/string_utils/BUILD b/jsf/schema_types/string_utils/BUILD new file mode 100644 index 0000000..c1ffc1a --- /dev/null +++ b/jsf/schema_types/string_utils/BUILD @@ -0,0 +1 @@ +python_sources(name="src") \ No newline at end of file diff --git a/jsf/schema_types/string_utils/content_encoding.py b/jsf/schema_types/string_utils/content_encoding.py new file mode 100644 index 0000000..3b7e84a --- /dev/null +++ b/jsf/schema_types/string_utils/content_encoding.py @@ -0,0 +1,61 @@ +import base64 +import quopri +from enum import Enum +from typing import Optional + + +class ContentEncoding(str, Enum): + SEVEN_BIT = "7-bit" + EIGHT_BIT = "8-bit" + BINARY = "binary" + QUOTED_PRINTABLE = "quoted-printable" + BASE16 = "base-16" + BASE32 = "base-32" + BASE64 = "base-64" + + +def binary_encoder(string: str) -> str: + return "".join(format(x, "b") for x in bytearray(string, "utf-8")) + + +def bytes_str_repr(b: bytes) -> str: + return repr(b)[2:-1] + + +def seven_bit_encoder(string: str) -> str: + return bytes_str_repr(string.encode("utf-7")) + + +def eight_bit_encoder(string: str) -> str: + return bytes_str_repr(string.encode("utf-8")) + + +def quoted_printable_encoder(string: str) -> str: + return bytes_str_repr(quopri.encodestring(string.encode("utf-8"))) + + +def b16_encoder(string: str) -> str: + return bytes_str_repr(base64.b16encode(string.encode("utf-8"))) + + +def b32_encoder(string: str) -> str: + return bytes_str_repr(base64.b32encode(string.encode("utf-8"))) + + +def b64_encoder(string: str) -> str: + return bytes_str_repr(base64.b64encode(string.encode("utf-8"))) + + +Encoder = { + ContentEncoding.SEVEN_BIT: seven_bit_encoder, + ContentEncoding.EIGHT_BIT: eight_bit_encoder, + ContentEncoding.BINARY: binary_encoder, + ContentEncoding.QUOTED_PRINTABLE: quoted_printable_encoder, + ContentEncoding.BASE16: b16_encoder, + ContentEncoding.BASE32: b32_encoder, + ContentEncoding.BASE64: b64_encoder, +} + + +def encode(string: str, encoding: Optional[ContentEncoding]) -> str: + return Encoder.get(encoding, lambda s: s)(string) diff --git a/jsf/schema_types/string_utils/content_type/BUILD b/jsf/schema_types/string_utils/content_type/BUILD new file mode 100644 index 0000000..c1ffc1a --- /dev/null +++ b/jsf/schema_types/string_utils/content_type/BUILD @@ -0,0 +1 @@ +python_sources(name="src") \ No newline at end of file diff --git a/jsf/schema_types/string_utils/content_type/__init__.py b/jsf/schema_types/string_utils/content_type/__init__.py new file mode 100644 index 0000000..630837b --- /dev/null +++ b/jsf/schema_types/string_utils/content_type/__init__.py @@ -0,0 +1,38 @@ +from jsf.schema_types.string_utils.content_type.application__gzip import create_random_gzip +from jsf.schema_types.string_utils.content_type.application__jwt import create_random_jwt +from jsf.schema_types.string_utils.content_type.application__zip import create_random_zip +from jsf.schema_types.string_utils.content_type.image__jpeg import random_jpg +from jsf.schema_types.string_utils.content_type.image__webp import random_webp +from jsf.schema_types.string_utils.content_type.text__plain import random_fixed_length_sentence + + +def not_implemented(*args, **kwargs): + raise NotImplementedError() + + +ContentTypeGenerator = { + "application/jwt": create_random_jwt, + # "text/html": not_implemented, + # "application/xml": not_implemented, # To implement: Port code from https://onlinerandomtools.com/generate-random-xml + # "image/bmp": not_implemented, # To implement: request jpg and convert to bmp + # "text/css": not_implemented, + # "text/csv": not_implemented, + # "image/gif": not_implemented, # To implement: request jpg and convert to gif + "image/jpeg": random_jpg, + # "application/json": not_implemented, # To implement: Port code from https://onlinerandomtools.com/generate-random-xml + # "text/javascript": not_implemented, + # "image/png": not_implemented, # To implement: request jpg and convert to png + # "image/tiff": not_implemented, # To implement: request jpg and convert to tiff + "text/plain": random_fixed_length_sentence, + "image/webp": random_webp, + "application/zip": create_random_zip, + "application/gzip": create_random_gzip, + # "application/x-bzip": not_implemented, # To implement: create in memory random files using text/plain then zip + # "application/x-bzip2": not_implemented, # To implement: create in memory random files using text/plain then zip + # "application/pdf": not_implemented, # To implement: request jpg and convert to pdf and/or make pdf using python package + # "text/calendar": not_implemented, +} + + +def generate(content_type: str, min_length: int, max_length: int) -> str: + return ContentTypeGenerator.get(content_type, not_implemented)(min_length, max_length) diff --git a/jsf/schema_types/string_utils/content_type/application__gzip.py b/jsf/schema_types/string_utils/content_type/application__gzip.py new file mode 100644 index 0000000..00facc7 --- /dev/null +++ b/jsf/schema_types/string_utils/content_type/application__gzip.py @@ -0,0 +1,16 @@ +import gzip +import io + +from jsf.schema_types.string_utils.content_encoding import bytes_str_repr +from jsf.schema_types.string_utils.content_type.application__zip import create_random_file_name +from jsf.schema_types.string_utils.content_type.text__plain import random_fixed_length_sentence + + +def create_random_gzip(*args, **kwargs) -> str: + fgz = io.BytesIO() + gzip_obj = gzip.GzipFile(filename=create_random_file_name(), mode="wb", fileobj=fgz) + gzip_obj.write(random_fixed_length_sentence().encode("utf-8")) + gzip_obj.close() + + fgz.seek(0) + return bytes_str_repr(fgz.getvalue()) diff --git a/jsf/schema_types/string_utils/content_type/application__jwt.py b/jsf/schema_types/string_utils/content_type/application__jwt.py new file mode 100644 index 0000000..0d2f60a --- /dev/null +++ b/jsf/schema_types/string_utils/content_type/application__jwt.py @@ -0,0 +1,48 @@ +import base64 +import hashlib +import hmac +import json +import secrets +from datetime import timezone + +from faker import Faker + +faker = Faker() + + +def base64url_encode(input: bytes): + return base64.urlsafe_b64encode(input).decode("utf-8").replace("=", "") + + +def jwt(api_key, expiry, api_sec): + + segments = [] + + header = {"typ": "JWT", "alg": "HS256"} + payload = {"iss": api_key, "exp": expiry} + + json_header = json.dumps(header, separators=(",", ":")).encode() + json_payload = json.dumps(payload, separators=(",", ":")).encode() + + segments.append(base64url_encode(json_header)) + segments.append(base64url_encode(json_payload)) + + signing_input = ".".join(segments).encode() + key = api_sec.encode() + signature = hmac.new(key, signing_input, hashlib.sha256).digest() + + segments.append(base64url_encode(signature)) + + encoded_string = ".".join(segments) + + return encoded_string + + +def create_random_jwt(*args, **kwargs): + + api_key = secrets.token_urlsafe(16) + api_sec = secrets.token_urlsafe(16) + + expiry = int(faker.date_time(timezone.utc).timestamp()) + + return jwt(api_key, expiry, api_sec) diff --git a/jsf/schema_types/string_utils/content_type/application__zip.py b/jsf/schema_types/string_utils/content_type/application__zip.py new file mode 100644 index 0000000..9a8c0a8 --- /dev/null +++ b/jsf/schema_types/string_utils/content_type/application__zip.py @@ -0,0 +1,27 @@ +import io +import random +import zipfile +from typing import Tuple + +import rstr + +from jsf.schema_types.string_utils.content_encoding import bytes_str_repr +from jsf.schema_types.string_utils.content_type.text__plain import random_fixed_length_sentence + + +def create_random_file_name() -> str: + return rstr.xeger(r"[a-zA-Z0-9]+\.txt") + + +def create_random_file() -> Tuple[str, io.BytesIO]: + return (create_random_file_name(), io.BytesIO(random_fixed_length_sentence().encode("utf-8"))) + + +def create_random_zip(*args, **kwargs) -> str: + zip_buffer = io.BytesIO() + + with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, False) as zip_file: + for file_name, data in [create_random_file() for _ in range(random.randint(1, 10))]: + zip_file.writestr(file_name, data.getvalue()) + + return bytes_str_repr(zip_buffer.getvalue()) diff --git a/jsf/schema_types/string_utils/content_type/image__jpeg.py b/jsf/schema_types/string_utils/content_type/image__jpeg.py new file mode 100644 index 0000000..b90d82f --- /dev/null +++ b/jsf/schema_types/string_utils/content_type/image__jpeg.py @@ -0,0 +1,13 @@ +import random + +import requests + +from jsf.schema_types.string_utils.content_encoding import bytes_str_repr + + +def random_jpg(*args, **kwargs) -> str: + return bytes_str_repr( + requests.get( + f"https://picsum.photos/{random.randint(1,50)*10}/{random.randint(1,50)*10}.jpg" + ).content + ) diff --git a/jsf/schema_types/string_utils/content_type/image__webp.py b/jsf/schema_types/string_utils/content_type/image__webp.py new file mode 100644 index 0000000..26dff32 --- /dev/null +++ b/jsf/schema_types/string_utils/content_type/image__webp.py @@ -0,0 +1,13 @@ +import random + +import requests + +from jsf.schema_types.string_utils.content_encoding import bytes_str_repr + + +def random_webp(*args, **kwargs) -> str: + return bytes_str_repr( + requests.get( + f"https://picsum.photos/{random.randint(1,50)*10}/{random.randint(1,50)*10}.webp" + ).content + ) diff --git a/jsf/schema_types/string_utils/content_type/text__plain.py b/jsf/schema_types/string_utils/content_type/text__plain.py new file mode 100644 index 0000000..9c7f5d0 --- /dev/null +++ b/jsf/schema_types/string_utils/content_type/text__plain.py @@ -0,0 +1,19 @@ +import random + +LOREM = """Lorem ipsum dolor sit amet consectetur adipisicing elit. +Hic molestias, esse veniam placeat officiis nobis architecto modi +possimus reiciendis accusantium exercitationem quas illum libero odit magnam, +reprehenderit ipsum, repellendus culpa!""".split() + + +def random_fixed_length_sentence(_min: int = 0, _max: int = 50) -> str: + output = "" + while len(output) < _max: + remaining = _max - len(output) + valid_words = list(filter(lambda s: len(s) < remaining, LOREM)) + if len(valid_words) == 0: + break + output += random.choice(valid_words) + " " + if len(output) > _min and random.uniform(0, 1) > 0.9: + break + return output.strip() diff --git a/jsf/tests/data/string-content-type.json b/jsf/tests/data/string-content-type.json new file mode 100644 index 0000000..60dfa1c --- /dev/null +++ b/jsf/tests/data/string-content-type.json @@ -0,0 +1,39 @@ +{ + "type": "object", + "properties": { + "application/jwt": { + "type": "string", + "contentMediaType": "application/jwt" + }, + "application/zip": { + "type": "string", + "contentMediaType": "application/zip" + }, + "application/gzip": { + "type": "string", + "contentMediaType": "application/gzip" + }, + "text/plain": { + "type": "string", + "contentMediaType": "text/plain", + "minLength": 5, + "maxLength": 10 + }, + "image/jpeg": { + "type": "string", + "contentMediaType": "image/jpeg" + }, + "image/webp": { + "type": "string", + "contentMediaType": "image/webp" + } + }, + "required": [ + "application/jwt", + "application/zip", + "application/gzip", + "text/plain", + "image/jpeg", + "image/webp" + ] +} diff --git a/jsf/tests/test_default_fake.py b/jsf/tests/test_default_fake.py index 795a1ba..78daa13 100644 --- a/jsf/tests/test_default_fake.py +++ b/jsf/tests/test_default_fake.py @@ -1,6 +1,7 @@ import json import re +import jwt from jsf.parser import JSF @@ -116,6 +117,22 @@ def test_fake_string_content_encoding(TestData): fake_data = [p.generate() for _ in range(100)] for d in fake_data: assert set(d["binary"]) - {"1", "0"} == set() + # TODO: Test other encodings are working as expected + + +def test_fake_string_content_type(TestData): + with open(TestData / "string-content-type.json", "r") as file: + schema = json.load(file) + p = JSF(schema) + assert isinstance(p.generate(), dict) + fake_data = [p.generate() for _ in range(10)] # Reducing for rate limiting of external requests + for d in fake_data: + assert len(d["text/plain"]) >= 5 and len(d["text/plain"]) <= 10 + + decoded_jwt = jwt.decode(d["application/jwt"], options={"verify_signature": False}) + assert set(decoded_jwt.keys()) == {"exp", "iss"} + assert isinstance(decoded_jwt["exp"], int) + assert isinstance(decoded_jwt["iss"], str) def test_fake_null(TestData): diff --git a/pants.toml b/pants.toml index 7039ccd..b5ca28d 100644 --- a/pants.toml +++ b/pants.toml @@ -22,6 +22,7 @@ use_coverage = true [pytest] extra_requirements = [ "typer>=0.7.0", + "pyjwt", "pytest-cov" ] lockfile = ""