Feature/content type enhancement (#58)

* Adding JWT, plain, zip, gzip, jpg, webp generation for content type
ghandic · Jan 8, 2023 · 665284f · 665284f
1 parent 7ffd0dd
commit 665284f
Show file tree

Hide file tree

Showing 16 changed files with 313 additions and 86 deletions.
diff --git a/.github/workflows/python-package.yaml b/.github/workflows/python-package.yaml
@@ -57,7 +57,7 @@ jobs:
           pip install .
           PACKAGE_DIR=`pip show jsf | grep "Location" | sed 's/^.*: //'`
           cd $PACKAGE_DIR/jsf
-          pip install pytest
+          pip install pytest pyjwt
           pytest
       - name: Upload coverage
         uses: codecov/codecov-action@v1

diff --git a/jsf/README.md b/jsf/README.md
@@ -147,6 +147,10 @@ Navigate to [http://127.0.0.1:8000](http://127.0.0.1:8000) and check out your en
 
 </div>
 
+### Partially supported features
+
+- string `contentMediaType` - only a subset of these are supported, however they can be expanded within [this file](jsf/schema_types/string_utils/content_type/__init__.py)  
+
 ## Credits
 
 - This repository is a Python port of [json-schema-faker](https://github.com/json-schema-faker/json-schema-faker) with some minor differences in implementation.

diff --git a/jsf/schema_types/string.py b/jsf/schema_types/string.py
@@ -1,16 +1,15 @@
-import base64
 import logging
-import quopri
 import random
 import re
 from datetime import timezone
-from enum import Enum
 from typing import Any, Callable, Dict, Optional
 
 import rstr
 from faker import Faker
 
 from jsf.schema_types.base import BaseSchema, ProviderNotSetException
+from jsf.schema_types.string_utils import content_encoding, content_type
+from jsf.schema_types.string_utils.content_type.text__plain import random_fixed_length_sentence
 
 logger = logging.getLogger()
 faker = Faker()
@@ -19,11 +18,6 @@
 URI_PATTERN = f"https?://{{hostname}}(?:{FRAGMENT})+"
 PARAM_PATTERN = "(?:\\?([a-z]{1,7}(=\\w{1,5})?&){0,3})?"
 
-LOREM = """Lorem ipsum dolor sit amet consectetur adipisicing elit.
-Hic molestias, esse veniam placeat officiis nobis architecto modi
-possimus reiciendis accusantium exercitationem quas illum libero odit magnam,
-reprehenderit ipsum, repellendus culpa!""".split()
-
 
 def temporal_duration(
     positive: bool = True,
@@ -123,100 +117,35 @@ def fake_duration():
 }
 
 
-def random_fixed_length_sentence(_min: int, _max: int) -> str:
-    output = ""
-    while len(output) < _max:
-        remaining = _max - len(output)
-        valid_words = list(filter(lambda s: len(s) < remaining, LOREM))
-        if len(valid_words) == 0:
-            break
-        output += random.choice(valid_words) + " "
-        if len(output) > _min and random.uniform(0, 1) > 0.9:
-            break
-    return output.strip()
-
-
-class ContentEncoding(str, Enum):
-    SEVEN_BIT = "7-bit"
-    EIGHT_BIT = "8-bit"
-    BINARY = "binary"
-    QUOTED_PRINTABLE = "quoted-printable"
-    BASE16 = "base-16"
-    BASE32 = "base-32"
-    BASE64 = "base-64"
-
-
-def binary_encoder(string: str) -> str:
-    return "".join(format(x, "b") for x in bytearray(string, "utf-8"))
-
-
-def bytes_str_repr(b: bytes) -> str:
-    return repr(b)[2:-1]
-
-
-def seven_bit_encoder(string: str) -> str:
-    return bytes_str_repr(string.encode("utf-7"))
-
-
-def eight_bit_encoder(string: str) -> str:
-    return bytes_str_repr(string.encode("utf-8"))
-
-
-def quoted_printable_encoder(string: str) -> str:
-    return bytes_str_repr(quopri.encodestring(string.encode("utf-8")))
-
-
-def b16_encoder(string: str) -> str:
-    return bytes_str_repr(base64.b16encode(string.encode("utf-8")))
-
-
-def b32_encoder(string: str) -> str:
-    return bytes_str_repr(base64.b32encode(string.encode("utf-8")))
-
-
-def b64_encoder(string: str) -> str:
-    return bytes_str_repr(base64.b64encode(string.encode("utf-8")))
-
-
-Encoder = {
-    ContentEncoding.SEVEN_BIT: seven_bit_encoder,
-    ContentEncoding.EIGHT_BIT: eight_bit_encoder,
-    ContentEncoding.BINARY: binary_encoder,
-    ContentEncoding.QUOTED_PRINTABLE: quoted_printable_encoder,
-    ContentEncoding.BASE16: b16_encoder,
-    ContentEncoding.BASE32: b32_encoder,
-    ContentEncoding.BASE64: b64_encoder,
-}
-
-
-def encode(string: str, encoding: Optional[ContentEncoding]) -> str:
-    return Encoder.get(encoding, lambda s: s)(string)
-
-
 class String(BaseSchema):
     minLength: Optional[float] = 0
     maxLength: Optional[float] = 50
     pattern: Optional[str] = None
     format: Optional[str] = None
     # enum: Optional[List[Union[str, int, float]]] = None  # NOTE: Not used - enums go to enum class
-    # contentMediaType: Optional[str] = None  # TODO: Long list, need to document which ones will be supported and how to extend
-    contentEncoding: Optional[ContentEncoding]
-    # contentSchema # No docs detailing this yet...
+    contentMediaType: Optional[str] = None
+    contentEncoding: Optional[content_encoding.ContentEncoding]
+    # contentSchema # Doesnt help with generation
 
     def generate(self, context: Dict[str, Any]) -> Optional[str]:
         try:
             s = super().generate(context)
-            return str(encode(s, self.contentEncoding)) if s else s
+            return str(content_encoding.encode(s, self.contentEncoding)) if s else s
         except ProviderNotSetException:
             format_map["regex"] = lambda: rstr.xeger(self.pattern)
             format_map["relative-json-pointer"] = lambda: random.choice(
                 context["state"]["__all_json_paths__"]
             )
             if format_map.get(self.format) is not None:
-                return encode(format_map[self.format](), self.contentEncoding)
+                return content_encoding.encode(format_map[self.format](), self.contentEncoding)
             if self.pattern is not None:
-                return encode(rstr.xeger(self.pattern), self.contentEncoding)
-            return encode(
+                return content_encoding.encode(rstr.xeger(self.pattern), self.contentEncoding)
+            if self.contentMediaType is not None:
+                return content_encoding.encode(
+                    content_type.generate(self.contentMediaType, self.minLength, self.maxLength),
+                    self.contentEncoding,
+                )
+            return content_encoding.encode(
                 random_fixed_length_sentence(self.minLength, self.maxLength), self.contentEncoding
             )
 

diff --git a/jsf/schema_types/string_utils/BUILD b/jsf/schema_types/string_utils/BUILD
@@ -0,0 +1 @@
+python_sources(name="src")
diff --git a/jsf/schema_types/string_utils/content_encoding.py b/jsf/schema_types/string_utils/content_encoding.py
@@ -0,0 +1,61 @@
+import base64
+import quopri
+from enum import Enum
+from typing import Optional
+
+
+class ContentEncoding(str, Enum):
+    SEVEN_BIT = "7-bit"
+    EIGHT_BIT = "8-bit"
+    BINARY = "binary"
+    QUOTED_PRINTABLE = "quoted-printable"
+    BASE16 = "base-16"
+    BASE32 = "base-32"
+    BASE64 = "base-64"
+
+
+def binary_encoder(string: str) -> str:
+    return "".join(format(x, "b") for x in bytearray(string, "utf-8"))
+
+
+def bytes_str_repr(b: bytes) -> str:
+    return repr(b)[2:-1]
+
+
+def seven_bit_encoder(string: str) -> str:
+    return bytes_str_repr(string.encode("utf-7"))
+
+
+def eight_bit_encoder(string: str) -> str:
+    return bytes_str_repr(string.encode("utf-8"))
+
+
+def quoted_printable_encoder(string: str) -> str:
+    return bytes_str_repr(quopri.encodestring(string.encode("utf-8")))
+
+
+def b16_encoder(string: str) -> str:
+    return bytes_str_repr(base64.b16encode(string.encode("utf-8")))
+
+
+def b32_encoder(string: str) -> str:
+    return bytes_str_repr(base64.b32encode(string.encode("utf-8")))
+
+
+def b64_encoder(string: str) -> str:
+    return bytes_str_repr(base64.b64encode(string.encode("utf-8")))
+
+
+Encoder = {
+    ContentEncoding.SEVEN_BIT: seven_bit_encoder,
+    ContentEncoding.EIGHT_BIT: eight_bit_encoder,
+    ContentEncoding.BINARY: binary_encoder,
+    ContentEncoding.QUOTED_PRINTABLE: quoted_printable_encoder,
+    ContentEncoding.BASE16: b16_encoder,
+    ContentEncoding.BASE32: b32_encoder,
+    ContentEncoding.BASE64: b64_encoder,
+}
+
+
+def encode(string: str, encoding: Optional[ContentEncoding]) -> str:
+    return Encoder.get(encoding, lambda s: s)(string)
diff --git a/jsf/schema_types/string_utils/content_type/BUILD b/jsf/schema_types/string_utils/content_type/BUILD
@@ -0,0 +1 @@
+python_sources(name="src")
diff --git a/jsf/schema_types/string_utils/content_type/__init__.py b/jsf/schema_types/string_utils/content_type/__init__.py
@@ -0,0 +1,38 @@
+from jsf.schema_types.string_utils.content_type.application__gzip import create_random_gzip
+from jsf.schema_types.string_utils.content_type.application__jwt import create_random_jwt
+from jsf.schema_types.string_utils.content_type.application__zip import create_random_zip
+from jsf.schema_types.string_utils.content_type.image__jpeg import random_jpg
+from jsf.schema_types.string_utils.content_type.image__webp import random_webp
+from jsf.schema_types.string_utils.content_type.text__plain import random_fixed_length_sentence
+
+
+def not_implemented(*args, **kwargs):
+    raise NotImplementedError()
+
+
+ContentTypeGenerator = {
+    "application/jwt": create_random_jwt,
+    # "text/html": not_implemented,
+    # "application/xml": not_implemented, # To implement: Port code from https://onlinerandomtools.com/generate-random-xml
+    # "image/bmp": not_implemented, # To implement: request jpg and convert to bmp
+    # "text/css": not_implemented,
+    # "text/csv": not_implemented,
+    # "image/gif": not_implemented, # To implement: request jpg and convert to gif
+    "image/jpeg": random_jpg,
+    # "application/json": not_implemented, # To implement: Port code from https://onlinerandomtools.com/generate-random-xml
+    # "text/javascript": not_implemented,
+    # "image/png": not_implemented, # To implement: request jpg and convert to png
+    # "image/tiff": not_implemented, # To implement: request jpg and convert to tiff
+    "text/plain": random_fixed_length_sentence,
+    "image/webp": random_webp,
+    "application/zip": create_random_zip,
+    "application/gzip": create_random_gzip,
+    # "application/x-bzip": not_implemented,  # To implement: create in memory random files using text/plain then zip
+    # "application/x-bzip2": not_implemented,  # To implement: create in memory random files using text/plain then zip
+    # "application/pdf": not_implemented, # To implement: request jpg and convert to pdf and/or make pdf using python package
+    # "text/calendar": not_implemented,
+}
+
+
+def generate(content_type: str, min_length: int, max_length: int) -> str:
+    return ContentTypeGenerator.get(content_type, not_implemented)(min_length, max_length)
diff --git a/jsf/schema_types/string_utils/content_type/application__gzip.py b/jsf/schema_types/string_utils/content_type/application__gzip.py
@@ -0,0 +1,16 @@
+import gzip
+import io
+
+from jsf.schema_types.string_utils.content_encoding import bytes_str_repr
+from jsf.schema_types.string_utils.content_type.application__zip import create_random_file_name
+from jsf.schema_types.string_utils.content_type.text__plain import random_fixed_length_sentence
+
+
+def create_random_gzip(*args, **kwargs) -> str:
+    fgz = io.BytesIO()
+    gzip_obj = gzip.GzipFile(filename=create_random_file_name(), mode="wb", fileobj=fgz)
+    gzip_obj.write(random_fixed_length_sentence().encode("utf-8"))
+    gzip_obj.close()
+
+    fgz.seek(0)
+    return bytes_str_repr(fgz.getvalue())
diff --git a/jsf/schema_types/string_utils/content_type/application__jwt.py b/jsf/schema_types/string_utils/content_type/application__jwt.py
@@ -0,0 +1,48 @@
+import base64
+import hashlib
+import hmac
+import json
+import secrets
+from datetime import timezone
+
+from faker import Faker
+
+faker = Faker()
+
+
+def base64url_encode(input: bytes):
+    return base64.urlsafe_b64encode(input).decode("utf-8").replace("=", "")
+
+
+def jwt(api_key, expiry, api_sec):
+
+    segments = []
+
+    header = {"typ": "JWT", "alg": "HS256"}
+    payload = {"iss": api_key, "exp": expiry}
+
+    json_header = json.dumps(header, separators=(",", ":")).encode()
+    json_payload = json.dumps(payload, separators=(",", ":")).encode()
+
+    segments.append(base64url_encode(json_header))
+    segments.append(base64url_encode(json_payload))
+
+    signing_input = ".".join(segments).encode()
+    key = api_sec.encode()
+    signature = hmac.new(key, signing_input, hashlib.sha256).digest()
+
+    segments.append(base64url_encode(signature))
+
+    encoded_string = ".".join(segments)
+
+    return encoded_string
+
+
+def create_random_jwt(*args, **kwargs):
+
+    api_key = secrets.token_urlsafe(16)
+    api_sec = secrets.token_urlsafe(16)
+
+    expiry = int(faker.date_time(timezone.utc).timestamp())
+
+    return jwt(api_key, expiry, api_sec)
diff --git a/jsf/schema_types/string_utils/content_type/application__zip.py b/jsf/schema_types/string_utils/content_type/application__zip.py
@@ -0,0 +1,27 @@
+import io
+import random
+import zipfile
+from typing import Tuple
+
+import rstr
+
+from jsf.schema_types.string_utils.content_encoding import bytes_str_repr
+from jsf.schema_types.string_utils.content_type.text__plain import random_fixed_length_sentence
+
+
+def create_random_file_name() -> str:
+    return rstr.xeger(r"[a-zA-Z0-9]+\.txt")
+
+
+def create_random_file() -> Tuple[str, io.BytesIO]:
+    return (create_random_file_name(), io.BytesIO(random_fixed_length_sentence().encode("utf-8")))
+
+
+def create_random_zip(*args, **kwargs) -> str:
+    zip_buffer = io.BytesIO()
+
+    with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, False) as zip_file:
+        for file_name, data in [create_random_file() for _ in range(random.randint(1, 10))]:
+            zip_file.writestr(file_name, data.getvalue())
+
+    return bytes_str_repr(zip_buffer.getvalue())
diff --git a/jsf/schema_types/string_utils/content_type/image__jpeg.py b/jsf/schema_types/string_utils/content_type/image__jpeg.py
@@ -0,0 +1,13 @@
+import random
+
+import requests
+
+from jsf.schema_types.string_utils.content_encoding import bytes_str_repr
+
+
+def random_jpg(*args, **kwargs) -> str:
+    return bytes_str_repr(
+        requests.get(
+            f"https://picsum.photos/{random.randint(1,50)*10}/{random.randint(1,50)*10}.jpg"
+        ).content
+    )
diff --git a/jsf/schema_types/string_utils/content_type/image__webp.py b/jsf/schema_types/string_utils/content_type/image__webp.py
@@ -0,0 +1,13 @@
+import random
+
+import requests
+
+from jsf.schema_types.string_utils.content_encoding import bytes_str_repr
+
+
+def random_webp(*args, **kwargs) -> str:
+    return bytes_str_repr(
+        requests.get(
+            f"https://picsum.photos/{random.randint(1,50)*10}/{random.randint(1,50)*10}.webp"
+        ).content
+    )