Skip to content

Commit

Permalink
Feature/content type enhancement (#58)
Browse files Browse the repository at this point in the history
* Adding JWT,  plain, zip, gzip, jpg, webp generation for content type
  • Loading branch information
ghandic authored Jan 8, 2023
1 parent 7ffd0dd commit 665284f
Show file tree
Hide file tree
Showing 16 changed files with 313 additions and 86 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
pip install .
PACKAGE_DIR=`pip show jsf | grep "Location" | sed 's/^.*: //'`
cd $PACKAGE_DIR/jsf
pip install pytest
pip install pytest pyjwt
pytest
- name: Upload coverage
uses: codecov/codecov-action@v1
Expand Down
4 changes: 4 additions & 0 deletions jsf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,10 @@ Navigate to [http://127.0.0.1:8000](http://127.0.0.1:8000) and check out your en

</div>

### Partially supported features

- string `contentMediaType` - only a subset of these are supported, however they can be expanded within [this file](jsf/schema_types/string_utils/content_type/__init__.py)

## Credits

- This repository is a Python port of [json-schema-faker](https://github.com/json-schema-faker/json-schema-faker) with some minor differences in implementation.
Expand Down
99 changes: 14 additions & 85 deletions jsf/schema_types/string.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import base64
import logging
import quopri
import random
import re
from datetime import timezone
from enum import Enum
from typing import Any, Callable, Dict, Optional

import rstr
from faker import Faker

from jsf.schema_types.base import BaseSchema, ProviderNotSetException
from jsf.schema_types.string_utils import content_encoding, content_type
from jsf.schema_types.string_utils.content_type.text__plain import random_fixed_length_sentence

logger = logging.getLogger()
faker = Faker()
Expand All @@ -19,11 +18,6 @@
URI_PATTERN = f"https?://{{hostname}}(?:{FRAGMENT})+"
PARAM_PATTERN = "(?:\\?([a-z]{1,7}(=\\w{1,5})?&){0,3})?"

LOREM = """Lorem ipsum dolor sit amet consectetur adipisicing elit.
Hic molestias, esse veniam placeat officiis nobis architecto modi
possimus reiciendis accusantium exercitationem quas illum libero odit magnam,
reprehenderit ipsum, repellendus culpa!""".split()


def temporal_duration(
positive: bool = True,
Expand Down Expand Up @@ -123,100 +117,35 @@ def fake_duration():
}


def random_fixed_length_sentence(_min: int, _max: int) -> str:
output = ""
while len(output) < _max:
remaining = _max - len(output)
valid_words = list(filter(lambda s: len(s) < remaining, LOREM))
if len(valid_words) == 0:
break
output += random.choice(valid_words) + " "
if len(output) > _min and random.uniform(0, 1) > 0.9:
break
return output.strip()


class ContentEncoding(str, Enum):
SEVEN_BIT = "7-bit"
EIGHT_BIT = "8-bit"
BINARY = "binary"
QUOTED_PRINTABLE = "quoted-printable"
BASE16 = "base-16"
BASE32 = "base-32"
BASE64 = "base-64"


def binary_encoder(string: str) -> str:
return "".join(format(x, "b") for x in bytearray(string, "utf-8"))


def bytes_str_repr(b: bytes) -> str:
return repr(b)[2:-1]


def seven_bit_encoder(string: str) -> str:
return bytes_str_repr(string.encode("utf-7"))


def eight_bit_encoder(string: str) -> str:
return bytes_str_repr(string.encode("utf-8"))


def quoted_printable_encoder(string: str) -> str:
return bytes_str_repr(quopri.encodestring(string.encode("utf-8")))


def b16_encoder(string: str) -> str:
return bytes_str_repr(base64.b16encode(string.encode("utf-8")))


def b32_encoder(string: str) -> str:
return bytes_str_repr(base64.b32encode(string.encode("utf-8")))


def b64_encoder(string: str) -> str:
return bytes_str_repr(base64.b64encode(string.encode("utf-8")))


Encoder = {
ContentEncoding.SEVEN_BIT: seven_bit_encoder,
ContentEncoding.EIGHT_BIT: eight_bit_encoder,
ContentEncoding.BINARY: binary_encoder,
ContentEncoding.QUOTED_PRINTABLE: quoted_printable_encoder,
ContentEncoding.BASE16: b16_encoder,
ContentEncoding.BASE32: b32_encoder,
ContentEncoding.BASE64: b64_encoder,
}


def encode(string: str, encoding: Optional[ContentEncoding]) -> str:
return Encoder.get(encoding, lambda s: s)(string)


class String(BaseSchema):
minLength: Optional[float] = 0
maxLength: Optional[float] = 50
pattern: Optional[str] = None
format: Optional[str] = None
# enum: Optional[List[Union[str, int, float]]] = None # NOTE: Not used - enums go to enum class
# contentMediaType: Optional[str] = None # TODO: Long list, need to document which ones will be supported and how to extend
contentEncoding: Optional[ContentEncoding]
# contentSchema # No docs detailing this yet...
contentMediaType: Optional[str] = None
contentEncoding: Optional[content_encoding.ContentEncoding]
# contentSchema # Doesnt help with generation

def generate(self, context: Dict[str, Any]) -> Optional[str]:
try:
s = super().generate(context)
return str(encode(s, self.contentEncoding)) if s else s
return str(content_encoding.encode(s, self.contentEncoding)) if s else s
except ProviderNotSetException:
format_map["regex"] = lambda: rstr.xeger(self.pattern)
format_map["relative-json-pointer"] = lambda: random.choice(
context["state"]["__all_json_paths__"]
)
if format_map.get(self.format) is not None:
return encode(format_map[self.format](), self.contentEncoding)
return content_encoding.encode(format_map[self.format](), self.contentEncoding)
if self.pattern is not None:
return encode(rstr.xeger(self.pattern), self.contentEncoding)
return encode(
return content_encoding.encode(rstr.xeger(self.pattern), self.contentEncoding)
if self.contentMediaType is not None:
return content_encoding.encode(
content_type.generate(self.contentMediaType, self.minLength, self.maxLength),
self.contentEncoding,
)
return content_encoding.encode(
random_fixed_length_sentence(self.minLength, self.maxLength), self.contentEncoding
)

Expand Down
1 change: 1 addition & 0 deletions jsf/schema_types/string_utils/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python_sources(name="src")
61 changes: 61 additions & 0 deletions jsf/schema_types/string_utils/content_encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import base64
import quopri
from enum import Enum
from typing import Optional


class ContentEncoding(str, Enum):
SEVEN_BIT = "7-bit"
EIGHT_BIT = "8-bit"
BINARY = "binary"
QUOTED_PRINTABLE = "quoted-printable"
BASE16 = "base-16"
BASE32 = "base-32"
BASE64 = "base-64"


def binary_encoder(string: str) -> str:
return "".join(format(x, "b") for x in bytearray(string, "utf-8"))


def bytes_str_repr(b: bytes) -> str:
return repr(b)[2:-1]


def seven_bit_encoder(string: str) -> str:
return bytes_str_repr(string.encode("utf-7"))


def eight_bit_encoder(string: str) -> str:
return bytes_str_repr(string.encode("utf-8"))


def quoted_printable_encoder(string: str) -> str:
return bytes_str_repr(quopri.encodestring(string.encode("utf-8")))


def b16_encoder(string: str) -> str:
return bytes_str_repr(base64.b16encode(string.encode("utf-8")))


def b32_encoder(string: str) -> str:
return bytes_str_repr(base64.b32encode(string.encode("utf-8")))


def b64_encoder(string: str) -> str:
return bytes_str_repr(base64.b64encode(string.encode("utf-8")))


Encoder = {
ContentEncoding.SEVEN_BIT: seven_bit_encoder,
ContentEncoding.EIGHT_BIT: eight_bit_encoder,
ContentEncoding.BINARY: binary_encoder,
ContentEncoding.QUOTED_PRINTABLE: quoted_printable_encoder,
ContentEncoding.BASE16: b16_encoder,
ContentEncoding.BASE32: b32_encoder,
ContentEncoding.BASE64: b64_encoder,
}


def encode(string: str, encoding: Optional[ContentEncoding]) -> str:
return Encoder.get(encoding, lambda s: s)(string)
1 change: 1 addition & 0 deletions jsf/schema_types/string_utils/content_type/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python_sources(name="src")
38 changes: 38 additions & 0 deletions jsf/schema_types/string_utils/content_type/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from jsf.schema_types.string_utils.content_type.application__gzip import create_random_gzip
from jsf.schema_types.string_utils.content_type.application__jwt import create_random_jwt
from jsf.schema_types.string_utils.content_type.application__zip import create_random_zip
from jsf.schema_types.string_utils.content_type.image__jpeg import random_jpg
from jsf.schema_types.string_utils.content_type.image__webp import random_webp
from jsf.schema_types.string_utils.content_type.text__plain import random_fixed_length_sentence


def not_implemented(*args, **kwargs):
raise NotImplementedError()


ContentTypeGenerator = {
"application/jwt": create_random_jwt,
# "text/html": not_implemented,
# "application/xml": not_implemented, # To implement: Port code from https://onlinerandomtools.com/generate-random-xml
# "image/bmp": not_implemented, # To implement: request jpg and convert to bmp
# "text/css": not_implemented,
# "text/csv": not_implemented,
# "image/gif": not_implemented, # To implement: request jpg and convert to gif
"image/jpeg": random_jpg,
# "application/json": not_implemented, # To implement: Port code from https://onlinerandomtools.com/generate-random-xml
# "text/javascript": not_implemented,
# "image/png": not_implemented, # To implement: request jpg and convert to png
# "image/tiff": not_implemented, # To implement: request jpg and convert to tiff
"text/plain": random_fixed_length_sentence,
"image/webp": random_webp,
"application/zip": create_random_zip,
"application/gzip": create_random_gzip,
# "application/x-bzip": not_implemented, # To implement: create in memory random files using text/plain then zip
# "application/x-bzip2": not_implemented, # To implement: create in memory random files using text/plain then zip
# "application/pdf": not_implemented, # To implement: request jpg and convert to pdf and/or make pdf using python package
# "text/calendar": not_implemented,
}


def generate(content_type: str, min_length: int, max_length: int) -> str:
return ContentTypeGenerator.get(content_type, not_implemented)(min_length, max_length)
16 changes: 16 additions & 0 deletions jsf/schema_types/string_utils/content_type/application__gzip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import gzip
import io

from jsf.schema_types.string_utils.content_encoding import bytes_str_repr
from jsf.schema_types.string_utils.content_type.application__zip import create_random_file_name
from jsf.schema_types.string_utils.content_type.text__plain import random_fixed_length_sentence


def create_random_gzip(*args, **kwargs) -> str:
fgz = io.BytesIO()
gzip_obj = gzip.GzipFile(filename=create_random_file_name(), mode="wb", fileobj=fgz)
gzip_obj.write(random_fixed_length_sentence().encode("utf-8"))
gzip_obj.close()

fgz.seek(0)
return bytes_str_repr(fgz.getvalue())
48 changes: 48 additions & 0 deletions jsf/schema_types/string_utils/content_type/application__jwt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import base64
import hashlib
import hmac
import json
import secrets
from datetime import timezone

from faker import Faker

faker = Faker()


def base64url_encode(input: bytes):
return base64.urlsafe_b64encode(input).decode("utf-8").replace("=", "")


def jwt(api_key, expiry, api_sec):

segments = []

header = {"typ": "JWT", "alg": "HS256"}
payload = {"iss": api_key, "exp": expiry}

json_header = json.dumps(header, separators=(",", ":")).encode()
json_payload = json.dumps(payload, separators=(",", ":")).encode()

segments.append(base64url_encode(json_header))
segments.append(base64url_encode(json_payload))

signing_input = ".".join(segments).encode()
key = api_sec.encode()
signature = hmac.new(key, signing_input, hashlib.sha256).digest()

segments.append(base64url_encode(signature))

encoded_string = ".".join(segments)

return encoded_string


def create_random_jwt(*args, **kwargs):

api_key = secrets.token_urlsafe(16)
api_sec = secrets.token_urlsafe(16)

expiry = int(faker.date_time(timezone.utc).timestamp())

return jwt(api_key, expiry, api_sec)
27 changes: 27 additions & 0 deletions jsf/schema_types/string_utils/content_type/application__zip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import io
import random
import zipfile
from typing import Tuple

import rstr

from jsf.schema_types.string_utils.content_encoding import bytes_str_repr
from jsf.schema_types.string_utils.content_type.text__plain import random_fixed_length_sentence


def create_random_file_name() -> str:
return rstr.xeger(r"[a-zA-Z0-9]+\.txt")


def create_random_file() -> Tuple[str, io.BytesIO]:
return (create_random_file_name(), io.BytesIO(random_fixed_length_sentence().encode("utf-8")))


def create_random_zip(*args, **kwargs) -> str:
zip_buffer = io.BytesIO()

with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, False) as zip_file:
for file_name, data in [create_random_file() for _ in range(random.randint(1, 10))]:
zip_file.writestr(file_name, data.getvalue())

return bytes_str_repr(zip_buffer.getvalue())
13 changes: 13 additions & 0 deletions jsf/schema_types/string_utils/content_type/image__jpeg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import random

import requests

from jsf.schema_types.string_utils.content_encoding import bytes_str_repr


def random_jpg(*args, **kwargs) -> str:
return bytes_str_repr(
requests.get(
f"https://picsum.photos/{random.randint(1,50)*10}/{random.randint(1,50)*10}.jpg"
).content
)
13 changes: 13 additions & 0 deletions jsf/schema_types/string_utils/content_type/image__webp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import random

import requests

from jsf.schema_types.string_utils.content_encoding import bytes_str_repr


def random_webp(*args, **kwargs) -> str:
return bytes_str_repr(
requests.get(
f"https://picsum.photos/{random.randint(1,50)*10}/{random.randint(1,50)*10}.webp"
).content
)
Loading

0 comments on commit 665284f

Please sign in to comment.