Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

releasing 1.2.3 #227

Merged
merged 15 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion clams/app/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,16 @@
datefmt="%Y-%m-%d %H:%M:%S")


falsy_values = [
'False',
'false',
'F',
'f',
'0',
0,
False
]

class ClamsApp(ABC):
"""
An abstract class to define API's for ClamsApps. A CLAMS app should inherit
Expand Down Expand Up @@ -388,7 +398,7 @@ def bool_param(value) -> bool:
"""
Helper function to convert string values to bool type.
"""
return False if value in (False, 0, 'False', 'false', '0') else True
return False if value in falsy_values else True

@staticmethod
def float_param(value) -> float:
Expand Down
9 changes: 6 additions & 3 deletions clams/appmetadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ class RuntimeParameter(_BaseModel):
type: param_value_types = pydantic.Field(
...,
description=f"Type of the parameter value the app expects. Must be one of {param_value_types_values}. When "
"type is ``map``, ``multivalued=true`` is automatically forced. \n\n"
"type is ``map``, ``multivalued=true`` is forced, and when ``boolean``, ``multivalued=false`` is "
"forced. \n\n"
"Notes for developers: \n\n"
"When the type is ``map``, the parameter value (still a single string from the users' perspective) "
"must be formatted as a ``KEY:VALUE`` pair, namely a colon-separated string. To pass multiple "
Expand All @@ -186,7 +187,7 @@ class RuntimeParameter(_BaseModel):
description="(optional) Default value for the parameter.\n\n"
"Notes for developers: \n\n"
"Setting a default value makes a parameter `optional`. \n\n"
"When ``multivalued=True``, the default value should be a list of values. \n\n"
"When ``multivalued=true``, the default value should be a list of values. \n\n"
"When ``type=map``, the default value should be a list of colon-separated strings. \n\n"
)
multivalued: bool = pydantic.Field(
Expand Down Expand Up @@ -420,8 +421,10 @@ def add_parameter(self, name: str, description: str, type: param_value_types,
# see https://docs.pydantic.dev/1.10/usage/types/#unions
# e.g. casting 0.1 using the `primitives` dict will result in 0 (int)
# while casting "0.1" using the `primitives` dict will result in 0.1 (float)
if type == 'map' and multivalued is False:
if type == 'map':
multivalued = True
if type == 'boolean':
multivalued = False
if default is not None:
if isinstance(default, list):
default = [str(d) for d in default]
Expand Down
78 changes: 64 additions & 14 deletions clams/develop/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import clams

update_tmp_suffix = '.tmp'
available_recipes = {
'app': {
'description': 'Skeleton code for a CLAMS app',
Expand All @@ -26,7 +27,7 @@ class CookieCutter(object):
def __init__(self, name: str, outdir: str, recipes: List[str]):
self.rawname = name
self.name_tokens = self.tokenize_rawname()
self.ourdir = pathlib.Path(outdir)
self.outdir = pathlib.Path(outdir)
if recipes:
self.recipes = recipes
else:
Expand All @@ -42,24 +43,30 @@ def tokenize_rawname(self):
words.pop()
return words

def bake(self):
def bake(self, update_level=0):
print(f"Baking {self.recipes}")
for recipe in self.recipes:
src_dir = pathlib.Path(__file__).parent / 'templates' / available_recipes[recipe]['sourcedir']
dst_dir = self.ourdir / self.rawname / available_recipes[recipe]['targetdir']
dst_dir = self.outdir / self.rawname / available_recipes[recipe]['targetdir']
if recipe == 'app':
self.bake_app(src_dir, dst_dir)
caps = [t.capitalize() for t in self.name_tokens]
app_vars = {
'CLAMS_VERSION': clams.__version__,
'APP_CLASS_NAME': "".join(caps),
'APP_NAME': " ".join(caps),
'APP_IDENTIFIER': '-'.join(self.name_tokens)
}
if update_level > 0:
self.reheat_app(src_dir, dst_dir, app_vars, reheat_level=update_level)
else:
if dst_dir.exists():
raise FileExistsError(f" {dst_dir} already exists. Did you mean `--update`? ")
self.bake_app(src_dir, dst_dir, app_vars)
if recipe == 'gha':
# There's nothing for devs to tweak GHA template, so first generation and updating are the same.
self.bake_gha(src_dir, dst_dir)

def bake_app(self, src_dir, dst_dir):
caps = [t.capitalize() for t in self.name_tokens]
templating_vars = {
'CLAMS_VERSION': clams.__version__,
'APP_CLASS_NAME': "".join(caps),
'APP_NAME': " ".join(caps),
'APP_IDENTIFIER': '-'.join(self.name_tokens)
}
def bake_app(self, src_dir, dst_dir, templating_vars):
for g in src_dir.glob("**/*.template"):
r = g.relative_to(src_dir).parent
f = g.with_suffix('').name
Expand All @@ -71,7 +78,40 @@ def bake_app(self, src_dir, dst_dir):
out_f.write(compiled)
print(f"App skeleton code is copied to {self.rawname}")
print(f" Checkout {self.rawname}/README.md for the next steps!")


def reheat_app(self, src_dir, dst_dir, templating_vars, reheat_level=1):
essentials = ['app.py', 'metadata.py', 'cli.py', 'Containerfile', 'requirements.txt']
for template in src_dir.glob("**/*.template"):
dirname = template.relative_to(src_dir).parent
basename = template.with_suffix('').name
if basename not in essentials:
# if non-essential, just skip when updating
continue
in_f = open(template, 'r')
tmpl_to_compile = Template(in_f.read())
compiled = tmpl_to_compile.safe_substitute(templating_vars)
in_f.close()
ori_fpath = dst_dir / dirname / basename
if not ori_fpath.exists():
# this file is new in this version of cookiecutter
with open(ori_fpath, 'w') as out_f:
out_f.write(compiled)
else:
ori_f = open(ori_fpath, 'r')
ori_content = ori_f.read()
if ori_content != compiled:
# when the target file already exists, we need to do diff & patch
# TODO (krim @ 5/5/24): add update level 2 and 3 code here
out_fpath = f'{ori_fpath}{update_tmp_suffix}'
print(f' {dst_dir / dirname / basename} already exists, generating a tmp file: {out_fpath}')
with open(out_fpath, 'w') as out_f:
out_f.write(compiled)
else:
print(f' {dst_dir / dirname / basename} already exists, but the content is unchanged from the '
f'template, skipping re-generating')
print(f"App skeleton code is updated in {self.rawname}")
print(f" Checkout {self.rawname}/README.md for the next steps!")

def bake_gha(self, src_dir, dst_dir):
self.simple_recursive_copy_minus_template_suffix(src_dir, dst_dir)
print(f"GitHub Actions workflow files are copied to {self.rawname}/.github")
Expand Down Expand Up @@ -128,12 +168,22 @@ def prep_argparser(**kwargs):
nargs='?',
help='The name of the parent directory where the app skeleton directory is placed. (default: current directory)'
)
parser.add_argument(
'-u', '--update',
action='count',
help=f'Set update level by passing this flag multiple times. This is EXPERIMENTAL, and developers MUST NOT'
f'rely on the update results, and should conduct manual checks afterward. LEVEL 0: does not update and '
f'raise an error when existing directory found. LEVEL 1: generate non-existing files and generate '
f'`{update_tmp_suffix}`-suffixed files for existing one. LEVEL 2 (WIP): generate non-existing files and '
f'automatically generate patch files for existing files. LEVEL 3 (WIP): generate non-existing files and '
f'apply patches to existing files. (default: 0)'
)
return parser


def main(args):
cutter = CookieCutter(name=args.name, outdir=args.parent_dir, recipes=args.recipes)
cutter.bake()
cutter.bake(args.update)

if __name__ == '__main__':
parser = prep_argparser()
Expand Down
14 changes: 14 additions & 0 deletions clams/develop/templates/app/Containerfile.template
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,20 @@ ARG CLAMS_APP_VERSION
ENV CLAMS_APP_VERSION ${CLAMS_APP_VERSION}
################################################################################

################################################################################
# This is duplicate from the base image Containerfile
# but makes sure the cache directories are consistent across all CLAMS apps

# https://github.com/openai/whisper/blob/ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab/whisper/__init__.py#L130
ENV XDG_CACHE_HOME='/cache'
# https://huggingface.co/docs/huggingface_hub/main/en/package_reference/environment_variables#hfhome
ENV HF_HOME="/cache/huggingface"
# https://pytorch.org/docs/stable/hub.html#where-are-my-downloaded-models-saved
ENV TORCH_HOME="/cache/torch"

RUN mkdir /cache && rm -rf /root/.cache && ln -s /cache /root/.cache
################################################################################

################################################################################
# clams-python base images are based on debian distro
# install more system packages as needed using the apt manager
Expand Down
14 changes: 13 additions & 1 deletion clams/develop/templates/app/app.py.template
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,16 @@ class $APP_CLASS_NAME(ClamsApp):
# see https://sdk.clams.ai/autodoc/clams.app.html#clams.app.ClamsApp._annotate
raise NotImplementedError

def get_app():
"""
This function effectively creates an instance of the app class, without any arguments passed in, meaning, any
external information such as initial app configuration should be set without using function arguments. The easiest
way to do this is to set global variables before calling this.
"""
# for example:
# return $APP_CLASS_NAME(create, from, global, params)
raise NotImplementedError


if __name__ == "__main__":
parser = argparse.ArgumentParser()
Expand All @@ -50,7 +60,9 @@ if __name__ == "__main__":
parsed_args = parser.parse_args()

# create the app instance
app = $APP_CLASS_NAME()
# if get_app() call requires any "configurations", they should be set now as global variables
# and referenced in the get_app() function. NOTE THAT you should not change the signature of get_app()
app = get_app()

http_app = Restifier(app, port=int(parsed_args.port))
# for running the application in production mode
Expand Down
92 changes: 92 additions & 0 deletions clams/develop/templates/app/cli.py.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/usr/bin/env python3
"""
The purpose of this file is to define a thin CLI interface for your app

DO NOT CHANGE the name of the file
"""

import argparse
import sys
from contextlib import redirect_stdout

import app

import clams.app
from clams import AppMetadata


def metadata_to_argparser(app_metadata: AppMetadata) -> argparse.ArgumentParser:
"""
Automatically generate an argparse.ArgumentParser from parameters specified in the app metadata (metadata.py).
"""

parser = argparse.ArgumentParser(
description=f"{app_metadata.name}: {app_metadata.description} (visit {app_metadata.url} for more info)",
formatter_class=argparse.RawDescriptionHelpFormatter)

# parse cli args from app parameters
for parameter in app_metadata.parameters:
if parameter.multivalued:
a = parser.add_argument(
f"--{parameter.name}",
help=parameter.description,
nargs='+',
action='extend',
type=str
)
else:
a = parser.add_argument(
f"--{parameter.name}",
help=parameter.description,
nargs=1,
action="store",
type=str)
if parameter.choices is not None:
a.choices = parameter.choices
if parameter.default is not None:
a.help += f" (default: {parameter.default}"
if parameter.type == "boolean":
a.help += (f", any value except for {[v for v in clams.app.falsy_values if isinstance(v, str)]} "
f"will be interpreted as True")
a.help += ')'
# then we don't have to add default values to the arg_parser
# since that's handled by the app._refined_params() method.
parser.add_argument('IN_MMIF_FILE', nargs='?', type=argparse.FileType('r'),
help='input MMIF file path, or STDIN if `-` or not provided. NOTE: When running this cli.py in '
'a containerized environment, make sure the container is run with `-i` flag to keep stdin '
'open.',
# will check if stdin is a keyboard, and return None if it is
default=None if sys.stdin.isatty() else sys.stdin)
parser.add_argument('OUT_MMIF_FILE', nargs='?', type=argparse.FileType('w'),
help='output MMIF file path, or STDOUT if `-` or not provided. NOTE: When this is set to '
'STDOUT, any print statements in the app code will be redirected to stderr.',
default=sys.stdout)
return parser


if __name__ == "__main__":
clamsapp = app.get_app()
arg_parser = metadata_to_argparser(app_metadata=clamsapp.metadata)
args = arg_parser.parse_args()
if args.IN_MMIF_FILE:
in_data = args.IN_MMIF_FILE.read()
# since flask webapp interface will pass parameters as "unflattened" dict to handle multivalued parameters
# (https://werkzeug.palletsprojects.com/en/latest/datastructures/#werkzeug.datastructures.MultiDict.to_dict)
# we need to convert arg_parsers results into a similar structure, which is the dict values are wrapped in lists
params = {}
for pname, pvalue in vars(args).items():
if pvalue is None or pname in ['IN_MMIF_FILE', 'OUT_MMIF_FILE']:
continue
elif isinstance(pvalue, list):
params[pname] = pvalue
else:
params[pname] = [pvalue]
if args.OUT_MMIF_FILE.name == '<stdout>':
with redirect_stdout(sys.stderr):
out_mmif = clamsapp.annotate(in_data, **params)
else:
out_mmif = clamsapp.annotate(in_data, **params)
args.OUT_MMIF_FILE.write(out_mmif)
else:
arg_parser.print_help()
sys.exit(1)
31 changes: 10 additions & 21 deletions clams/mmif_utils/rewind.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,14 @@ def describe_argparser():
def prep_argparser(**kwargs):
parser = argparse.ArgumentParser(description=describe_argparser()[1],
formatter_class=argparse.RawDescriptionHelpFormatter, **kwargs)
parser.add_argument("mmif_file",
help="Path to the input MMIF file, or '-' to read from stdin.")
parser.add_argument("-o", '--output', default=None, metavar="PATH",
help="Path to the rewound MMIF output file. When not given, the rewound is printed to stdout.")
parser.add_argument("IN_MMIF_FILE",
nargs="?", type=argparse.FileType("r"),
default=None if sys.stdin.isatty() else sys.stdin,
help='input MMIF file path, or STDIN if `-` or not provided.')
parser.add_argument("OUT_MMIF_FILE",
nargs="?", type=argparse.FileType("w"),
default=sys.stdout,
help='output MMIF file path, or STDOUT if `-` or not provided.')
parser.add_argument("-p", '--pretty', action='store_true',
help="Pretty-print rewound MMIF")
parser.add_argument("-n", '--number', default="0", type=int,
Expand All @@ -92,7 +96,7 @@ def prep_argparser(**kwargs):


def main(args):
mmif_obj = mmif.Mmif(str(sys.stdin)) if args.mmif_file[0] == '-' else mmif.Mmif(open(args.mmif_file).read())
mmif_obj = mmif.Mmif(args.IN_MMIF_FILE.read())

if args.number == 0: # If user doesn't know how many views to rewind, give them choices.
choice = prompt_user(mmif_obj)
Expand All @@ -101,22 +105,7 @@ def main(args):
if not isinstance(choice, int) or choice <= 0:
raise ValueError(f"Only can rewind by a positive number of views. Got {choice}.")

if args.output:
# Check if the same file name exist in the path and avoid overwriting.
output_fp = P(args.output)
if output_fp.is_file():
parent = output_fp.parent
stem = output_fp.stem
suffix = output_fp.suffix
count = 1
while (parent / f"{stem}_{count}{suffix}").is_file():
count += 1
output_fp = parent / f"{stem}_{count}{suffix}"

out_f = open(output_fp, 'w')
else:
out_f = sys.stdout
out_f.write(rewind_mmif(mmif_obj, choice, args.mode == 'view').serialize(pretty=args.pretty))
args.OUT_MMIF_FILE.write(rewind_mmif(mmif_obj, choice, args.mode == 'view').serialize(pretty=args.pretty))


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions clams/restify/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def post(self) -> Response:
try:
return self.json_to_response(self.cla.annotate(raw_data, **raw_params))
except Exception:
self.cla.logger.exception("Error in annotation")
return self.json_to_response(self.cla.record_error(raw_data, **raw_params).serialize(pretty=True), status=500)

put = post
10 changes: 9 additions & 1 deletion container/Containerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
FROM python:3.8-slim-buster
FROM python:3.8-slim-bookworm
LABEL org.opencontainers.image.description="clams-python image is a base image for CLAMS apps"

ARG clams_version
# https://github.com/openai/whisper/blob/ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab/whisper/__init__.py#L130
ENV XDG_CACHE_HOME='/cache'
# https://huggingface.co/docs/huggingface_hub/main/en/package_reference/environment_variables#hfhome
ENV HF_HOME="/cache/huggingface"
# https://pytorch.org/docs/stable/hub.html#where-are-my-downloaded-models-saved
ENV TORCH_HOME="/cache/torch"

RUN mkdir /cache && rm -rf /root/.cache && ln -s /cache /root/.cache
RUN apt-get update && apt-get install -y pkg-config
RUN pip install --no-cache-dir clams-python==$clams_version
Loading
Loading