From a1381b6c0be5d472b11594c81572d185f34b9da4 Mon Sep 17 00:00:00 2001 From: Hernan Grecco Date: Thu, 18 Jan 2024 01:47:14 -0300 Subject: [PATCH] Work on the formatter delegate 1. split into modules: plain (raw, compact, pretty), latex, html, full 2. added format_magnitude to all Formatters 3. format_ methods have an argument related to babel (it must be always there, other architectures lead to multiplication of classes or lot of overhead) 4. some test where changed: - format_babel was using per (as in meter per seconds) for any format - ro was not a valid locale: should be ro_RO Note: there are still a few circular imports that were fixed in caveman way in order to move forward. --- pint/compat.py | 9 +- pint/delegates/formatter/__init__.py | 4 +- pint/delegates/formatter/_helpers.py | 289 +++++++++++++++ pint/delegates/formatter/_unit_handlers.py | 175 +++++++++ pint/delegates/formatter/base_formatter.py | 265 -------------- pint/delegates/formatter/full.py | 154 ++++++++ pint/delegates/formatter/html.py | 111 ++++++ pint/delegates/formatter/latex.py | 240 ++++++++++++ pint/delegates/formatter/plain.py | 166 +++++++++ pint/facets/measurement/objects.py | 6 +- pint/formatting.py | 401 +-------------------- pint/testsuite/test_babel.py | 13 +- pint/util.py | 7 +- 13 files changed, 1178 insertions(+), 662 deletions(-) create mode 100644 pint/delegates/formatter/_helpers.py create mode 100644 pint/delegates/formatter/_unit_handlers.py delete mode 100644 pint/delegates/formatter/base_formatter.py create mode 100644 pint/delegates/formatter/full.py create mode 100644 pint/delegates/formatter/html.py create mode 100644 pint/delegates/formatter/latex.py create mode 100644 pint/delegates/formatter/plain.py diff --git a/pint/compat.py b/pint/compat.py index b01dcc7c0..6bbdf35af 100644 --- a/pint/compat.py +++ b/pint/compat.py @@ -46,7 +46,14 @@ else: from typing_extensions import Never # noqa -if sys.version_info >= (3, 12): + +if sys.version_info >= (3, 11): + from typing import Unpack # noqa +else: + from typing_extensions import Unpack # noqa + + +if sys.version_info >= (3, 13): from warnings import deprecated # noqa else: from typing_extensions import deprecated # noqa diff --git a/pint/delegates/formatter/__init__.py b/pint/delegates/formatter/__init__.py index 3954d69b7..84fdd8777 100644 --- a/pint/delegates/formatter/__init__.py +++ b/pint/delegates/formatter/__init__.py @@ -7,10 +7,10 @@ """ -from .base_formatter import BabelFormatter +from .full import MultipleFormatter -class Formatter(BabelFormatter): +class Formatter(MultipleFormatter): # TODO: this should derive from all relevant formaters to # reproduce the current behavior of Pint. pass diff --git a/pint/delegates/formatter/_helpers.py b/pint/delegates/formatter/_helpers.py new file mode 100644 index 000000000..d01977895 --- /dev/null +++ b/pint/delegates/formatter/_helpers.py @@ -0,0 +1,289 @@ +from __future__ import annotations + +from typing import Iterable, Optional, Callable, Any +import warnings +from ...compat import Number +import re +from ...babel_names import _babel_lengths, _babel_units +from ...compat import babel_parse + +FORMATTER = Callable[ + [ + Any, + ], + str, +] + + +def formatter( + items: Iterable[tuple[str, Number]], + as_ratio: bool = True, + single_denominator: bool = False, + product_fmt: str = " * ", + division_fmt: str = " / ", + power_fmt: str = "{} ** {}", + parentheses_fmt: str = "({0})", + exp_call: FORMATTER = "{:n}".format, + locale: Optional[str] = None, + babel_length: str = "long", + babel_plural_form: str = "one", + sort: bool = True, +) -> str: + """Format a list of (name, exponent) pairs. + + Parameters + ---------- + items : list + a list of (name, exponent) pairs. + as_ratio : bool, optional + True to display as ratio, False as negative powers. (Default value = True) + single_denominator : bool, optional + all with terms with negative exponents are + collected together. (Default value = False) + product_fmt : str + the format used for multiplication. (Default value = " * ") + division_fmt : str + the format used for division. (Default value = " / ") + power_fmt : str + the format used for exponentiation. (Default value = "{} ** {}") + parentheses_fmt : str + the format used for parenthesis. (Default value = "({0})") + locale : str + the locale object as defined in babel. (Default value = None) + babel_length : str + the length of the translated unit, as defined in babel cldr. (Default value = "long") + babel_plural_form : str + the plural form, calculated as defined in babel. (Default value = "one") + exp_call : callable + (Default value = lambda x: f"{x:n}") + sort : bool, optional + True to sort the formatted units alphabetically (Default value = True) + + Returns + ------- + str + the formula as a string. + + """ + + if not items: + return "" + + if as_ratio: + fun = lambda x: exp_call(abs(x)) + else: + fun = exp_call + + pos_terms, neg_terms = [], [] + + if sort: + items = sorted(items) + for key, value in items: + if locale and babel_length and babel_plural_form and key in _babel_units: + _key = _babel_units[key] + locale = babel_parse(locale) + unit_patterns = locale._data["unit_patterns"] + compound_unit_patterns = locale._data["compound_unit_patterns"] + plural = "one" if abs(value) <= 0 else babel_plural_form + if babel_length not in _babel_lengths: + other_lengths = [ + _babel_length + for _babel_length in reversed(_babel_lengths) + if babel_length != _babel_length + ] + else: + other_lengths = [] + for _babel_length in [babel_length] + other_lengths: + pat = unit_patterns.get(_key, {}).get(_babel_length, {}).get(plural) + if pat is not None: + # Don't remove this positional! This is the format used in Babel + key = pat.replace("{0}", "").strip() + break + + tmp = compound_unit_patterns.get("per", {}).get(babel_length, division_fmt) + + try: + division_fmt = tmp.get("compound", division_fmt) + except AttributeError: + division_fmt = tmp + power_fmt = "{}{}" + exp_call = _pretty_fmt_exponent + if value == 1: + pos_terms.append(key) + elif value > 0: + pos_terms.append(power_fmt.format(key, fun(value))) + elif value == -1 and as_ratio: + neg_terms.append(key) + else: + neg_terms.append(power_fmt.format(key, fun(value))) + + if not as_ratio: + # Show as Product: positive * negative terms ** -1 + return _join(product_fmt, pos_terms + neg_terms) + + # Show as Ratio: positive terms / negative terms + pos_ret = _join(product_fmt, pos_terms) or "1" + + if not neg_terms: + return pos_ret + + if single_denominator: + neg_ret = _join(product_fmt, neg_terms) + if len(neg_terms) > 1: + neg_ret = parentheses_fmt.format(neg_ret) + else: + neg_ret = _join(division_fmt, neg_terms) + + return _join(division_fmt, [pos_ret, neg_ret]) + + +# Extract just the type from the specification mini-language: see +# http://docs.python.org/2/library/string.html#format-specification-mini-language +# We also add uS for uncertainties. +_BASIC_TYPES = frozenset("bcdeEfFgGnosxX%uS") + + +def _parse_spec(spec: str) -> str: + # TODO: provisional + from ...formatting import _FORMATTERS + + result = "" + for ch in reversed(spec): + if ch == "~" or ch in _BASIC_TYPES: + continue + elif ch in list(_FORMATTERS.keys()) + ["~"]: + if result: + raise ValueError("expected ':' after format specifier") + else: + result = ch + elif ch.isalpha(): + raise ValueError("Unknown conversion specified " + ch) + else: + break + return result + + +__JOIN_REG_EXP = re.compile(r"{\d*}") + + +def _join(fmt: str, iterable: Iterable[Any]) -> str: + """Join an iterable with the format specified in fmt. + + The format can be specified in two ways: + - PEP3101 format with two replacement fields (eg. '{} * {}') + - The concatenating string (eg. ' * ') + + Parameters + ---------- + fmt : str + + iterable : + + + Returns + ------- + str + + """ + if not iterable: + return "" + if not __JOIN_REG_EXP.search(fmt): + return fmt.join(iterable) + miter = iter(iterable) + first = next(miter) + for val in miter: + ret = fmt.format(first, val) + first = ret + return first + + +_PRETTY_EXPONENTS = "⁰¹²³⁴⁵⁶⁷⁸⁹" + + +def _pretty_fmt_exponent(num: Number) -> str: + """Format an number into a pretty printed exponent. + + Parameters + ---------- + num : int + + Returns + ------- + str + + """ + # unicode dot operator (U+22C5) looks like a superscript decimal + ret = f"{num:n}".replace("-", "⁻").replace(".", "\u22C5") + for n in range(10): + ret = ret.replace(str(n), _PRETTY_EXPONENTS[n]) + return ret + + +def extract_custom_flags(spec: str) -> str: + import re + + if not spec: + return "" + + # TODO: provisional + from ...formatting import _FORMATTERS + + # sort by length, with longer items first + known_flags = sorted(_FORMATTERS.keys(), key=len, reverse=True) + + flag_re = re.compile("(" + "|".join(known_flags + ["~"]) + ")") + custom_flags = flag_re.findall(spec) + + return "".join(custom_flags) + + +def remove_custom_flags(spec: str) -> str: + # TODO: provisional + from ...formatting import _FORMATTERS + + for flag in sorted(_FORMATTERS.keys(), key=len, reverse=True) + ["~"]: + if flag: + spec = spec.replace(flag, "") + return spec + + +def split_format( + spec: str, default: str, separate_format_defaults: bool = True +) -> tuple[str, str]: + mspec = remove_custom_flags(spec) + uspec = extract_custom_flags(spec) + + default_mspec = remove_custom_flags(default) + default_uspec = extract_custom_flags(default) + + if separate_format_defaults in (False, None): + # should we warn always or only if there was no explicit choice? + # Given that we want to eventually remove the flag again, I'd say yes? + if spec and separate_format_defaults is None: + if not uspec and default_uspec: + warnings.warn( + ( + "The given format spec does not contain a unit formatter." + " Falling back to the builtin defaults, but in the future" + " the unit formatter specified in the `default_format`" + " attribute will be used instead." + ), + DeprecationWarning, + ) + if not mspec and default_mspec: + warnings.warn( + ( + "The given format spec does not contain a magnitude formatter." + " Falling back to the builtin defaults, but in the future" + " the magnitude formatter specified in the `default_format`" + " attribute will be used instead." + ), + DeprecationWarning, + ) + elif not spec: + mspec, uspec = default_mspec, default_uspec + else: + mspec = mspec or default_mspec + uspec = uspec or default_uspec + + return mspec, uspec diff --git a/pint/delegates/formatter/_unit_handlers.py b/pint/delegates/formatter/_unit_handlers.py new file mode 100644 index 000000000..b5d603b30 --- /dev/null +++ b/pint/delegates/formatter/_unit_handlers.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +import functools +from typing import Iterable, TypeVar, Callable, TYPE_CHECKING, Literal, TypedDict + +from locale import getlocale, setlocale, LC_NUMERIC +from contextlib import contextmanager + +import locale + +from ...compat import Locale, babel_parse, Number + + +if TYPE_CHECKING: + from ...registry import UnitRegistry + from ...facets.plain import PlainUnit + +T = TypeVar("T") + + +def format_unit_no_magnitude( + measurement_unit: str, + use_plural: bool = True, + length: Literal["short", "long", "narrow"] = "long", + locale: Locale | str | None = locale.LC_NUMERIC, +) -> str | None: + """Format a value of a given unit. + + THIS IS TAKEN FROM BABEL format_unit. But + - No magnitude is returned in the string. + - If the unit is not found, the same is given. + - use_plural instead of value + + Values are formatted according to the locale's usual pluralization rules + and number formats. + + >>> format_unit(12, 'length-meter', locale='ro_RO') + u'metri' + >>> format_unit(15.5, 'length-mile', locale='fi_FI') + u'mailia' + >>> format_unit(1200, 'pressure-millimeter-ofhg', locale='nb') + u'millimeter kvikks\\xf8lv' + >>> format_unit(270, 'ton', locale='en') + u'tons' + >>> format_unit(1234.5, 'kilogram', locale='ar_EG', numbering_system='default') + u'كيلوغرام' + + + The locale's usual pluralization rules are respected. + + >>> format_unit(1, 'length-meter', locale='ro_RO') + u'metru' + >>> format_unit(0, 'length-mile', locale='cy') + u'mi' + >>> format_unit(1, 'length-mile', locale='cy') + u'filltir' + >>> format_unit(3, 'length-mile', locale='cy') + u'milltir' + + >>> format_unit(15, 'length-horse', locale='fi') + Traceback (most recent call last): + ... + UnknownUnitError: length-horse is not a known unit in fi + + .. versionadded:: 2.2.0 + + :param value: the value to format. If this is a string, no number formatting will be attempted. + :param measurement_unit: the code of a measurement unit. + Known units can be found in the CLDR Unit Validity XML file: + https://unicode.org/repos/cldr/tags/latest/common/validity/unit.xml + :param length: "short", "long" or "narrow" + :param format: An optional format, as accepted by `format_decimal`. + :param locale: the `Locale` object or locale identifier + :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. + :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale. + """ + locale = babel_parse(locale) + from babel.units import _find_unit_pattern, get_unit_name + + q_unit = _find_unit_pattern(measurement_unit, locale=locale) + if not q_unit: + return measurement_unit + + unit_patterns = locale._data["unit_patterns"][q_unit].get(length, {}) + + if use_plural: + plural_form = "other" + else: + plural_form = "one" + + if plural_form in unit_patterns: + return unit_patterns[plural_form].format("").replace("\xa0", "").strip() + + # Fall back to a somewhat bad representation. + # nb: This is marked as no-cover, as the current CLDR seemingly has no way for this to happen. + fallback_name = get_unit_name( + measurement_unit, length=length, locale=locale + ) # pragma: no cover + return f"{fallback_name or measurement_unit}" # pragma: no cover + + +def _unit_mapper( + units: Iterable[tuple[str, T]], + shortener: Callable[ + [ + str, + ], + str, + ], +) -> Iterable[tuple[str, T]]: + return map(lambda el: (shortener(el[0]), el[1]), units) + + +def short_form( + units: Iterable[tuple[str, T]], + registry: UnitRegistry, +) -> Iterable[tuple[str, T]]: + return _unit_mapper(units, registry.get_symbol) + + +def localized_form( + units: Iterable[tuple[str, T]], + use_plural: bool, + length: Literal["short", "long", "narrow"], + locale: Locale | str, +) -> Iterable[tuple[str, T]]: + mapper = functools.partial( + format_unit_no_magnitude, + use_plural=use_plural, + length=length, + locale=babel_parse(locale), + ) + + return _unit_mapper(units, mapper) + + +class BabelKwds(TypedDict): + use_plural: bool + length: Literal["short", "long", "narrow"] | None + locale: Locale | str | None + + +def format_compound_unit( + unit: PlainUnit, + spec: str = "", + use_plural: bool = False, + length: Literal["short", "long", "narrow"] | None = None, + locale: Locale | str | None = None, +) -> Iterable[tuple[str, Number]]: + registry = unit._REGISTRY + + out = unit._units.items() + + if "~" in spec: + out = short_form(out, registry) + + if locale is not None: + out = localized_form(out, use_plural, length or "long", locale) + + return out + + +@contextmanager +def override_locale(locale: str | Locale | None): + if locale is None: + yield + else: + prev_locale_string = getlocale(LC_NUMERIC) + if isinstance(locale, str): + setlocale(LC_NUMERIC, locale) + else: + setlocale(LC_NUMERIC, str(locale)) + yield + setlocale(LC_NUMERIC, prev_locale_string) diff --git a/pint/delegates/formatter/base_formatter.py b/pint/delegates/formatter/base_formatter.py deleted file mode 100644 index d15a7a6a6..000000000 --- a/pint/delegates/formatter/base_formatter.py +++ /dev/null @@ -1,265 +0,0 @@ -""" - pint.delegates.formatter.base_formatter - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Common class and function for all formatters. - :copyright: 2022 by Pint Authors, see AUTHORS for more details. - :license: BSD, see LICENSE for more details. -""" - -from __future__ import annotations - -from typing import TYPE_CHECKING, Optional, Any -import locale -from ...compat import babel_parse -import re -from ...util import UnitsContainer, iterable - -from ...compat import ndarray, np -from ...formatting import ( - _pretty_fmt_exponent, - extract_custom_flags, - format_unit, - ndarray_to_latex, - remove_custom_flags, - siunitx_format_unit, - split_format, -) - -if TYPE_CHECKING: - from ...facets.plain import PlainQuantity, PlainUnit, MagnitudeT - from ...compat import Locale - - -_EXP_PATTERN = re.compile(r"([0-9]\.?[0-9]*)e(-?)\+?0*([0-9]+)") - - -class BaseFormatter: - def format_quantity( - self, quantity: PlainQuantity[MagnitudeT], spec: str = "" - ) -> str: - # TODO Fill the proper functions - return str(quantity.magnitude) + " " + self.format_unit(quantity.units, spec) - - def format_unit(self, unit: PlainUnit, spec: str = "") -> str: - # TODO Fill the proper functions and discuss - # how to make it that _units is not accessible directly - return " ".join(k if v == 1 else f"{k} ** {v}" for k, v in unit._units.items()) - - -class BabelFormatter: - locale: Optional[Locale] = None - default_format: str = "" - - def set_locale(self, loc: Optional[str]) -> None: - """Change the locale used by default by `format_babel`. - - Parameters - ---------- - loc : str or None - None` (do not translate), 'sys' (detect the system locale) or a locale id string. - """ - if isinstance(loc, str): - if loc == "sys": - loc = locale.getdefaultlocale()[0] - - # We call babel parse to fail here and not in the formatting operation - babel_parse(loc) - - self.locale = loc - - def format_quantity( - self, quantity: PlainQuantity[MagnitudeT], spec: str = "" - ) -> str: - if self.locale is not None: - return self.format_quantity_babel(quantity, spec) - - registry = quantity._REGISTRY - - mspec, uspec = split_format( - spec, self.default_format, registry.separate_format_defaults - ) - - # If Compact is selected, do it at the beginning - if "#" in spec: - # TODO: don't replace '#' - mspec = mspec.replace("#", "") - uspec = uspec.replace("#", "") - obj = quantity.to_compact() - else: - obj = quantity - - del quantity - - if "L" in uspec: - allf = plain_allf = r"{}\ {}" - elif "H" in uspec: - allf = plain_allf = "{} {}" - if iterable(obj.magnitude): - # Use HTML table instead of plain text template for array-likes - allf = ( - "" - "" - "" - "" - "
Magnitude{}
Units{}
" - ) - else: - allf = plain_allf = "{} {}" - - if "Lx" in uspec: - # the LaTeX siunitx code - # TODO: add support for extracting options - opts = "" - ustr = siunitx_format_unit(obj.units._units, registry) - allf = r"\SI[%s]{{{}}}{{{}}}" % opts - else: - # Hand off to unit formatting - # TODO: only use `uspec` after completing the deprecation cycle - ustr = self.format_unit(obj.units, mspec + uspec) - - # mspec = remove_custom_flags(spec) - if "H" in uspec: - # HTML formatting - if hasattr(obj.magnitude, "_repr_html_"): - # If magnitude has an HTML repr, nest it within Pint's - mstr = obj.magnitude._repr_html_() - else: - if isinstance(obj.magnitude, ndarray): - # Use custom ndarray text formatting with monospace font - formatter = f"{{:{mspec}}}" - # Need to override for scalars, which are detected as iterable, - # and don't respond to printoptions. - if obj.magnitude.ndim == 0: - allf = plain_allf = "{} {}" - mstr = formatter.format(obj.magnitude) - else: - with np.printoptions( - formatter={"float_kind": formatter.format} - ): - mstr = ( - "
"
-                                + format(obj.magnitude).replace("\n", "
") - + "
" - ) - elif not iterable(obj.magnitude): - # Use plain text for scalars - mstr = format(obj.magnitude, mspec) - else: - # Use monospace font for other array-likes - mstr = ( - "
"
-                        + format(obj.magnitude, mspec).replace("\n", "
") - + "
" - ) - elif isinstance(obj.magnitude, ndarray): - if "L" in uspec: - # Use ndarray LaTeX special formatting - mstr = ndarray_to_latex(obj.magnitude, mspec) - else: - # Use custom ndarray text formatting--need to handle scalars differently - # since they don't respond to printoptions - formatter = f"{{:{mspec}}}" - if obj.magnitude.ndim == 0: - mstr = formatter.format(obj.magnitude) - else: - with np.printoptions(formatter={"float_kind": formatter.format}): - mstr = format(obj.magnitude).replace("\n", "") - else: - mstr = format(obj.magnitude, mspec).replace("\n", "") - - if "L" in uspec and "Lx" not in uspec: - mstr = _EXP_PATTERN.sub(r"\1\\times 10^{\2\3}", mstr) - elif "H" in uspec or "P" in uspec: - m = _EXP_PATTERN.match(mstr) - _exp_formatter = ( - _pretty_fmt_exponent if "P" in uspec else lambda s: f"{s}" - ) - if m: - exp = int(m.group(2) + m.group(3)) - mstr = _EXP_PATTERN.sub(r"\1×10" + _exp_formatter(exp), mstr) - - if allf == plain_allf and ustr.startswith("1 /"): - # Write e.g. "3 / s" instead of "3 1 / s" - ustr = ustr[2:] - return allf.format(mstr, ustr).strip() - - def format_quantity_babel( - self, quantity: PlainQuantity[MagnitudeT], spec: str = "", **kwspec: Any - ) -> str: - spec = spec or self.default_format - - # standard cases - if "#" in spec: - spec = spec.replace("#", "") - obj = quantity.to_compact() - else: - obj = quantity - - del quantity - - kwspec = kwspec.copy() - if "length" in kwspec: - kwspec["babel_length"] = kwspec.pop("length") - - loc = kwspec.get("locale", self.locale) - if loc is None: - raise ValueError("Provide a `locale` value to localize translation.") - - kwspec["locale"] = babel_parse(loc) - kwspec["babel_plural_form"] = kwspec["locale"].plural_form(obj.magnitude) - return "{} {}".format( - format(obj.magnitude, remove_custom_flags(spec)), - self.format_unit_babel(obj.units, spec, **kwspec), - ).replace("\n", "") - - def format_unit(self, unit: PlainUnit, spec: str = "") -> str: - registry = unit._REGISTRY - - _, uspec = split_format( - spec, self.default_format, registry.separate_format_defaults - ) - if "~" in uspec: - if not unit._units: - return "" - units = UnitsContainer( - {registry._get_symbol(key): value for key, value in unit._units.items()} - ) - uspec = uspec.replace("~", "") - else: - units = unit._units - - return format_unit(units, uspec, registry=registry) - - def format_unit_babel( - self, - unit: PlainUnit, - spec: str = "", - locale: Optional[Locale] = None, - **kwspec: Any, - ) -> str: - spec = spec or extract_custom_flags(self.default_format) - - if "~" in spec: - if unit.dimensionless: - return "" - units = UnitsContainer( - { - unit._REGISTRY._get_symbol(key): value - for key, value in unit._units.items() - } - ) - spec = spec.replace("~", "") - else: - units = unit._units - - locale = self.locale if locale is None else locale - - if locale is None: - raise ValueError("Provide a `locale` value to localize translation.") - else: - kwspec["locale"] = babel_parse(locale) - - if "registry" not in kwspec: - kwspec["registry"] = unit._REGISTRY - - return format_unit(units, spec, **kwspec) diff --git a/pint/delegates/formatter/full.py b/pint/delegates/formatter/full.py new file mode 100644 index 000000000..f5849225b --- /dev/null +++ b/pint/delegates/formatter/full.py @@ -0,0 +1,154 @@ +""" + pint.delegates.formatter.base_formatter + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Common class and function for all formatters. + :copyright: 2022 by Pint Authors, see AUTHORS for more details. + :license: BSD, see LICENSE for more details. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Literal, Optional, Any +import locale +from ...compat import babel_parse, Unpack +from ...util import iterable + +from ..._typing import Magnitude +from .html import HTMLFormatter +from .latex import LatexFormatter, SIunitxFormatter +from .plain import RawFormatter, CompactFormatter, PrettyFormatter +from ._unit_handlers import BabelKwds + +if TYPE_CHECKING: + from ...facets.plain import PlainQuantity, PlainUnit, MagnitudeT + from ...compat import Locale + + +class MultipleFormatter: + _formatters: dict[str, Any] = {} + + default_format: str = "" + + locale: Optional[Locale] = None + babel_length: Literal["short", "long", "narrow"] = "long" + + def set_locale(self, loc: Optional[str]) -> None: + """Change the locale used by default by `format_babel`. + + Parameters + ---------- + loc : str or None + None (do not translate), 'sys' (detect the system locale) or a locale id string. + """ + if isinstance(loc, str): + if loc == "sys": + loc = locale.getdefaultlocale()[0] + + # We call babel parse to fail here and not in the formatting operation + babel_parse(loc) + + self.locale = loc + + def __init__(self) -> None: + self._formatters = {} + self._formatters["raw"] = RawFormatter() + self._formatters["H"] = HTMLFormatter() + self._formatters["P"] = PrettyFormatter() + self._formatters["Lx"] = SIunitxFormatter() + self._formatters["L"] = LatexFormatter() + self._formatters["C"] = CompactFormatter() + + def get_formatter(self, spec: str): + if spec == "": + return self._formatters["raw"] + for k, v in self._formatters.items(): + if k in spec: + return v + return self._formatters["raw"] + + def format_magnitude( + self, magnitude: Magnitude, mspec: str = "", **babel_kwds: Unpack[BabelKwds] + ) -> str: + return self.get_formatter(mspec).format_magnitude( + magnitude, mspec, **babel_kwds + ) + + def format_unit( + self, unit: PlainUnit, uspec: str = "", **babel_kwds: Unpack[BabelKwds] + ) -> str: + return self.get_formatter(uspec).format_unit(unit, uspec, **babel_kwds) + + def format_quantity( + self, + quantity: PlainQuantity[MagnitudeT], + spec: str = "", + **babel_kwds: Unpack[BabelKwds], + ) -> str: + spec = spec or self.default_format + # If Compact is selected, do it at the beginning + if "#" in spec: + spec = spec.replace("#", "") + obj = quantity.to_compact() + else: + obj = quantity + + del quantity + + use_plural = obj.magnitude > 1 + if iterable(use_plural): + use_plural = True + + return self.get_formatter(spec).format_quantity( + obj, + spec, + use_plural=babel_kwds.get("use_plural", use_plural), + length=babel_kwds.get("length", self.babel_length), + locale=babel_kwds.get("locale", self.locale), + ) + + ####################################### + # This is for backwards compatibility + ####################################### + + def format_unit_babel( + self, + unit: PlainUnit, + spec: str = "", + length: Optional[Literal["short", "long", "narrow"]] = "long", + locale: Optional[Locale] = None, + ) -> str: + if self.locale is None and locale is None: + raise ValueError( + "format_babel requires a locale argumente if the Formatter locale is not set." + ) + + return self.format_unit( + unit, + spec or self.default_format, + use_plural=False, + length=length or self.babel_length, + locale=locale or self.locale, + ) + + def format_quantity_babel( + self, + quantity: PlainQuantity[MagnitudeT], + spec: str = "", + length: Literal["short", "long", "narrow"] = "long", + locale: Optional[Locale] = None, + ) -> str: + if self.locale is None and locale is None: + raise ValueError( + "format_babel requires a locale argumente if the Formatter locale is not set." + ) + + use_plural = quantity.magnitude > 1 + if iterable(use_plural): + use_plural = True + return self.format_quantity( + quantity, + spec or self.default_format, + use_plural=use_plural, + length=length or self.babel_length, + locale=locale or self.locale, + ) diff --git a/pint/delegates/formatter/html.py b/pint/delegates/formatter/html.py new file mode 100644 index 000000000..eadb41f44 --- /dev/null +++ b/pint/delegates/formatter/html.py @@ -0,0 +1,111 @@ +""" + pint.delegates.formatter.base_formatter + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Common class and function for all formatters. + :copyright: 2022 by Pint Authors, see AUTHORS for more details. + :license: BSD, see LICENSE for more details. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING +import re +from ...util import iterable +from ...compat import ndarray, np, Unpack +from ._helpers import ( + split_format, + formatter, +) + +from ..._typing import Magnitude +from ._unit_handlers import BabelKwds, format_compound_unit + +if TYPE_CHECKING: + from ...facets.plain import PlainQuantity, PlainUnit, MagnitudeT + + +_EXP_PATTERN = re.compile(r"([0-9]\.?[0-9]*)e(-?)\+?0*([0-9]+)") + + +class HTMLFormatter: + def format_magnitude( + self, magnitude: Magnitude, mspec: str = "", **babel_kwds: Unpack[BabelKwds] + ) -> str: + if hasattr(magnitude, "_repr_html_"): + # If magnitude has an HTML repr, nest it within Pint's + mstr = magnitude._repr_html_() # type: ignore + assert isinstance(mstr, str) + else: + if isinstance(magnitude, ndarray): + # Use custom ndarray text formatting with monospace font + formatter = f"{{:{mspec}}}" + # Need to override for scalars, which are detected as iterable, + # and don't respond to printoptions. + if magnitude.ndim == 0: + mstr = formatter.format(magnitude) + else: + with np.printoptions(formatter={"float_kind": formatter.format}): + mstr = ( + "
" + format(magnitude).replace("\n", "
") + "
" + ) + elif not iterable(magnitude): + # Use plain text for scalars + mstr = format(magnitude, mspec) + else: + # Use monospace font for other array-likes + mstr = ( + "
" + format(magnitude, mspec).replace("\n", "
") + "
" + ) + + m = _EXP_PATTERN.match(mstr) + _exp_formatter = lambda s: f"{s}" + + if m: + exp = int(m.group(2) + m.group(3)) + mstr = _EXP_PATTERN.sub(r"\1×10" + _exp_formatter(exp), mstr) + + return mstr + + def format_unit( + self, unit: PlainUnit, uspec: str = "", **babel_kwds: Unpack[BabelKwds] + ) -> str: + units = format_compound_unit(unit, uspec, **babel_kwds) + + return formatter( + units, + as_ratio=True, + single_denominator=True, + product_fmt=r" ", + division_fmt=r"{}/{}", + power_fmt=r"{}{}", + parentheses_fmt=r"({})", + ) + + def format_quantity( + self, + quantity: PlainQuantity[MagnitudeT], + qspec: str = "", + **babel_kwds: Unpack[BabelKwds], + ) -> str: + registry = quantity._REGISTRY + + mspec, uspec = split_format( + qspec, registry.default_format, registry.separate_format_defaults + ) + + if iterable(quantity.magnitude): + # Use HTML table instead of plain text template for array-likes + joint_fstring = ( + "" + "" + "" + "" + "
Magnitude{}
Units{}
" + ) + else: + joint_fstring = "{} {}" + + return joint_fstring.format( + self.format_magnitude(quantity.magnitude, mspec, **babel_kwds), + self.format_unit(quantity.units, uspec, **babel_kwds), + ) diff --git a/pint/delegates/formatter/latex.py b/pint/delegates/formatter/latex.py new file mode 100644 index 000000000..9bd7cf1d8 --- /dev/null +++ b/pint/delegates/formatter/latex.py @@ -0,0 +1,240 @@ +""" + pint.delegates.formatter.base_formatter + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Common class and function for all formatters. + :copyright: 2022 by Pint Authors, see AUTHORS for more details. + :license: BSD, see LICENSE for more details. +""" + +from __future__ import annotations +import functools + +from typing import TYPE_CHECKING, Any, Iterable, Union + +import re +from ._helpers import split_format, formatter, FORMATTER + +from ..._typing import Magnitude +from ...compat import ndarray, Unpack, Number +from ._unit_handlers import BabelKwds, override_locale, format_compound_unit + +if TYPE_CHECKING: + from ...facets.plain import PlainQuantity, PlainUnit, MagnitudeT + from ...util import ItMatrix + from ...registry import UnitRegistry + + +def vector_to_latex( + vec: Iterable[Any], fmtfun: FORMATTER | str = "{:.2n}".format +) -> str: + return matrix_to_latex([vec], fmtfun) + + +def matrix_to_latex(matrix: ItMatrix, fmtfun: FORMATTER | str = "{:.2n}".format) -> str: + ret: list[str] = [] + + for row in matrix: + ret += [" & ".join(fmtfun(f) for f in row)] + + return r"\begin{pmatrix}%s\end{pmatrix}" % "\\\\ \n".join(ret) + + +def ndarray_to_latex_parts( + ndarr, fmtfun: FORMATTER = "{:.2n}".format, dim: tuple[int, ...] = tuple() +): + if isinstance(fmtfun, str): + fmtfun = fmtfun.format + + if ndarr.ndim == 0: + _ndarr = ndarr.reshape(1) + return [vector_to_latex(_ndarr, fmtfun)] + if ndarr.ndim == 1: + return [vector_to_latex(ndarr, fmtfun)] + if ndarr.ndim == 2: + return [matrix_to_latex(ndarr, fmtfun)] + else: + ret = [] + if ndarr.ndim == 3: + header = ("arr[%s," % ",".join("%d" % d for d in dim)) + "%d,:,:]" + for elno, el in enumerate(ndarr): + ret += [header % elno + " = " + matrix_to_latex(el, fmtfun)] + else: + for elno, el in enumerate(ndarr): + ret += ndarray_to_latex_parts(el, fmtfun, dim + (elno,)) + + return ret + + +def ndarray_to_latex( + ndarr, fmtfun: FORMATTER | str = "{:.2n}".format, dim: tuple[int, ...] = tuple() +) -> str: + return "\n".join(ndarray_to_latex_parts(ndarr, fmtfun, dim)) + + +def latex_escape(string: str) -> str: + """ + Prepend characters that have a special meaning in LaTeX with a backslash. + """ + return functools.reduce( + lambda s, m: re.sub(m[0], m[1], s), + ( + (r"[\\]", r"\\textbackslash "), + (r"[~]", r"\\textasciitilde "), + (r"[\^]", r"\\textasciicircum "), + (r"([&%$#_{}])", r"\\\1"), + ), + str(string), + ) + + +def siunitx_format_unit( + units: Iterable[tuple[str, Number]], registry: UnitRegistry +) -> str: + """Returns LaTeX code for the unit that can be put into an siunitx command.""" + + def _tothe(power: Union[int, float]) -> str: + if isinstance(power, int) or (isinstance(power, float) and power.is_integer()): + if power == 1: + return "" + elif power == 2: + return r"\squared" + elif power == 3: + return r"\cubed" + else: + return rf"\tothe{{{int(power):d}}}" + else: + # limit float powers to 3 decimal places + return rf"\tothe{{{power:.3f}}}".rstrip("0") + + lpos = [] + lneg = [] + # loop through all units in the container + for unit, power in sorted(units): + # remove unit prefix if it exists + # siunitx supports \prefix commands + + lpick = lpos if power >= 0 else lneg + prefix = None + # TODO: fix this to be fore efficient and detect also aliases. + for p in registry._prefixes.values(): + p = str(p.name) + if len(p) > 0 and unit.find(p) == 0: + prefix = p + unit = unit.replace(prefix, "", 1) + + if power < 0: + lpick.append(r"\per") + if prefix is not None: + lpick.append(rf"\{prefix}") + lpick.append(rf"\{unit}") + lpick.append(rf"{_tothe(abs(power))}") + + return "".join(lpos) + "".join(lneg) + + +_EXP_PATTERN = re.compile(r"([0-9]\.?[0-9]*)e(-?)\+?0*([0-9]+)") + + +class LatexFormatter: + def format_magnitude( + self, magnitude: Magnitude, mspec: str = "", **babel_kwds: Unpack[BabelKwds] + ) -> str: + with override_locale(babel_kwds.get("locale", None)): + if isinstance(magnitude, ndarray): + mstr = ndarray_to_latex(magnitude, mspec or "n") + else: + mstr = format(magnitude, mspec or "n").replace("\n", "") + + mstr = _EXP_PATTERN.sub(r"\1\\times 10^{\2\3}", mstr) + + return mstr + + def format_unit( + self, unit: PlainUnit, uspec: str = "", **babel_kwds: Unpack[BabelKwds] + ) -> str: + units = format_compound_unit(unit, uspec, **babel_kwds) + + preprocessed = {rf"\mathrm{{{latex_escape(u)}}}": p for u, p in units} + formatted = formatter( + preprocessed.items(), + as_ratio=True, + single_denominator=True, + product_fmt=r" \cdot ", + division_fmt=r"\frac[{}][{}]", + power_fmt="{}^[{}]", + parentheses_fmt=r"\left({}\right)", + ) + return formatted.replace("[", "{").replace("]", "}") + + def format_quantity( + self, + quantity: PlainQuantity[MagnitudeT], + qspec: str = "", + **babel_kwds: Unpack[BabelKwds], + ) -> str: + registry = quantity._REGISTRY + + mspec, uspec = split_format( + qspec, registry.default_format, registry.separate_format_defaults + ) + + joint_fstring = r"{}\ {}" + + return joint_fstring.format( + self.format_magnitude(quantity.magnitude, mspec, **babel_kwds), + self.format_unit(quantity.units, uspec, **babel_kwds), + ) + + +class SIunitxFormatter: + def format_magnitude( + self, magnitude: Magnitude, mspec: str = "", **babel_kwds: Unpack[BabelKwds] + ) -> str: + with override_locale(babel_kwds.get("locale", None)): + if isinstance(magnitude, ndarray): + mstr = ndarray_to_latex(magnitude, mspec or "n") + else: + mstr = format(magnitude, mspec or "n").replace("\n", "") + + mstr = _EXP_PATTERN.sub(r"\1\\times 10^{\2\3}", mstr) + + return mstr + + def format_unit( + self, unit: PlainUnit, uspec: str = "", **babel_kwds: Unpack[BabelKwds] + ) -> str: + registry = unit._REGISTRY + if registry is None: + raise ValueError( + "Can't format as siunitx without a registry." + " This is usually triggered when formatting a instance" + ' of the internal `UnitsContainer` with a spec of `"Lx"`' + " and might indicate a bug in `pint`." + ) + + # TODO: not sure if I should call format_compound_unit here. + # siunitx_format_unit requires certain specific names? + + units = format_compound_unit(unit, uspec, **babel_kwds) + + formatted = siunitx_format_unit(units, registry) + return rf"\si[]{{{formatted}}}" + + def format_quantity( + self, + quantity: PlainQuantity[MagnitudeT], + qspec: str = "", + **babel_kwds: Unpack[BabelKwds], + ) -> str: + registry = quantity._REGISTRY + + mspec, uspec = split_format( + qspec, registry.default_format, registry.separate_format_defaults + ) + + joint_fstring = r"{}\ {}" + + return joint_fstring.format( + self.format_magnitude(quantity.magnitude, mspec, **babel_kwds), + self.format_unit(quantity.units, uspec, **babel_kwds), + ) diff --git a/pint/delegates/formatter/plain.py b/pint/delegates/formatter/plain.py new file mode 100644 index 000000000..747c16f10 --- /dev/null +++ b/pint/delegates/formatter/plain.py @@ -0,0 +1,166 @@ +""" + pint.delegates.formatter.base_formatter + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Common class and function for all formatters. + :copyright: 2022 by Pint Authors, see AUTHORS for more details. + :license: BSD, see LICENSE for more details. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING +import re +from ...compat import ndarray, np, Unpack +from ._helpers import ( + _pretty_fmt_exponent, + split_format, + formatter, +) + +from ..._typing import Magnitude + +from ._unit_handlers import format_compound_unit, BabelKwds, override_locale + +if TYPE_CHECKING: + from ...facets.plain import PlainQuantity, PlainUnit, MagnitudeT + + +_EXP_PATTERN = re.compile(r"([0-9]\.?[0-9]*)e(-?)\+?0*([0-9]+)") + + +class RawFormatter: + def format_magnitude( + self, magnitude: Magnitude, mspec: str = "", **babel_kwds: Unpack[BabelKwds] + ) -> str: + with override_locale(babel_kwds.get("locale", None)): + return format(magnitude, mspec or "n") + + def format_unit( + self, unit: PlainUnit, uspec: str = "", **babel_kwds: Unpack[BabelKwds] + ) -> str: + units = format_compound_unit(unit, uspec, **babel_kwds) + + return " ".join(k if v == 1 else f"{k} ** {v}" for k, v in units) + + def format_quantity( + self, + quantity: PlainQuantity[MagnitudeT], + qspec: str = "", + **babel_kwds: Unpack[BabelKwds], + ) -> str: + registry = quantity._REGISTRY + + mspec, uspec = split_format( + qspec, registry.default_format, registry.separate_format_defaults + ) + + joint_fstring = "{} {}" + + return joint_fstring.format( + self.format_magnitude(quantity.magnitude, mspec, **babel_kwds), + self.format_unit(quantity.units, uspec, **babel_kwds), + ) + + +class CompactFormatter: + def format_magnitude( + self, magnitude: Magnitude, mspec: str = "", **babel_kwds: Unpack[BabelKwds] + ) -> str: + with override_locale(babel_kwds.get("locale", None)): + return format(magnitude, mspec or "n") + + def format_unit( + self, unit: PlainUnit, uspec: str = "", **babel_kwds: Unpack[BabelKwds] + ) -> str: + units = format_compound_unit(unit, uspec, **babel_kwds) + + return formatter( + units, + as_ratio=True, + single_denominator=False, + product_fmt="*", # TODO: Should this just be ''? + division_fmt="/", + power_fmt="{}**{}", + parentheses_fmt=r"({})", + ) + + def format_quantity( + self, + quantity: PlainQuantity[MagnitudeT], + qspec: str = "", + **babel_kwds: Unpack[BabelKwds], + ) -> str: + registry = quantity._REGISTRY + + mspec, uspec = split_format( + qspec, registry.default_format, registry.separate_format_defaults + ) + + joint_fstring = "{} {}" + + return joint_fstring.format( + self.format_magnitude(quantity.magnitude, mspec, **babel_kwds), + self.format_unit(quantity.units, uspec, **babel_kwds), + ) + + +class PrettyFormatter: + def format_magnitude( + self, magnitude: Magnitude, mspec: str = "", **babel_kwds: Unpack[BabelKwds] + ) -> str: + with override_locale(babel_kwds.get("locale", None)): + if isinstance(magnitude, ndarray): + # Use custom ndarray text formatting--need to handle scalars differently + # since they don't respond to printoptions + formatter = f"{{:{mspec}}}" + if magnitude.ndim == 0: + mstr = format(magnitude, mspec or "n") + else: + formatter = f"{{:{mspec or 'n'}}}" + with np.printoptions(formatter={"float_kind": formatter.format}): + mstr = format(magnitude).replace("\n", "") + else: + mstr = format(magnitude, mspec or "n").replace("\n", "") + + m = _EXP_PATTERN.match(mstr) + + if m: + exp = int(m.group(2) + m.group(3)) + mstr = _EXP_PATTERN.sub(r"\1×10" + _pretty_fmt_exponent(exp), mstr) + + return mstr + + def format_unit( + self, unit: PlainUnit, uspec: str = "", **babel_kwds: Unpack[BabelKwds] + ) -> str: + units = format_compound_unit(unit, uspec, **babel_kwds) + + return formatter( + units, + as_ratio=True, + single_denominator=False, + product_fmt="·", + division_fmt="/", + power_fmt="{}{}", + parentheses_fmt="({})", + exp_call=_pretty_fmt_exponent, + ) + + def format_quantity( + self, + quantity: PlainQuantity[MagnitudeT], + qspec: str = "", + **babel_kwds: Unpack[BabelKwds], + ) -> str: + registry = quantity._REGISTRY + + mspec, uspec = split_format( + qspec, registry.default_format, registry.separate_format_defaults + ) + + joint_fstring = "{} {}" + + return joint_fstring.format( + self.format_magnitude(quantity.magnitude, mspec, **babel_kwds), + self.format_unit(quantity.units, uspec, **babel_kwds), + ) diff --git a/pint/facets/measurement/objects.py b/pint/facets/measurement/objects.py index 4dd09b584..72d0b4526 100644 --- a/pint/facets/measurement/objects.py +++ b/pint/facets/measurement/objects.py @@ -13,7 +13,6 @@ from typing import Generic from ...compat import ufloat -from ...formatting import _FORMATS, extract_custom_flags, siunitx_format_unit from ..plain import PlainQuantity, PlainUnit, MagnitudeT MISSING = object() @@ -109,6 +108,9 @@ def __str__(self): def __format__(self, spec): spec = spec or self._REGISTRY.default_format + # TODO: provisional + from ...formatting import _FORMATS, extract_custom_flags, siunitx_format_unit + # special cases if "Lx" in spec: # the LaTeX siunitx code # the uncertainties module supports formatting @@ -138,7 +140,7 @@ def __format__(self, spec): # Also, SIunitx doesn't accept parentheses, which uncs uses with # scientific notation ('e' or 'E' and sometimes 'g' or 'G'). mstr = mstr.replace("(", "").replace(")", " ") - ustr = siunitx_format_unit(self.units._units, self._REGISTRY) + ustr = siunitx_format_unit(self.units._units.items(), self._REGISTRY) return rf"\SI{opts}{{{mstr}}}{{{ustr}}}" # standard cases diff --git a/pint/formatting.py b/pint/formatting.py index b00b771c7..39c6156e0 100644 --- a/pint/formatting.py +++ b/pint/formatting.py @@ -10,19 +10,24 @@ from __future__ import annotations -import functools -import re -import warnings -from typing import Callable, Any, TYPE_CHECKING, TypeVar, Optional, Union -from collections.abc import Iterable -from numbers import Number +from typing import Callable, Any, TYPE_CHECKING, TypeVar -from .babel_names import _babel_lengths, _babel_units -from .compat import babel_parse, HAS_BABEL +from .compat import HAS_BABEL + + +# Backwards compatiblity stuff +from .delegates.formatter.latex import ( + latex_escape, + siunitx_format_unit, +) # noqa: F401 +from .delegates.formatter._helpers import ( + formatter, + _pretty_fmt_exponent, +) # noqa: F401 if TYPE_CHECKING: from .registry import UnitRegistry - from .util import ItMatrix, UnitsContainer + from .util import UnitsContainer if HAS_BABEL: import babel @@ -32,69 +37,6 @@ Locale = TypeVar("Locale") -__JOIN_REG_EXP = re.compile(r"{\d*}") - -FORMATTER = Callable[ - [ - Any, - ], - str, -] - - -def _join(fmt: str, iterable: Iterable[Any]) -> str: - """Join an iterable with the format specified in fmt. - - The format can be specified in two ways: - - PEP3101 format with two replacement fields (eg. '{} * {}') - - The concatenating string (eg. ' * ') - - Parameters - ---------- - fmt : str - - iterable : - - - Returns - ------- - str - - """ - if not iterable: - return "" - if not __JOIN_REG_EXP.search(fmt): - return fmt.join(iterable) - miter = iter(iterable) - first = next(miter) - for val in miter: - ret = fmt.format(first, val) - first = ret - return first - - -_PRETTY_EXPONENTS = "⁰¹²³⁴⁵⁶⁷⁸⁹" - - -def _pretty_fmt_exponent(num: Number) -> str: - """Format an number into a pretty printed exponent. - - Parameters - ---------- - num : int - - Returns - ------- - str - - """ - # unicode dot operator (U+22C5) looks like a superscript decimal - ret = f"{num:n}".replace("-", "⁻").replace(".", "\u22C5") - for n in range(10): - ret = ret.replace(str(n), _PRETTY_EXPONENTS[n]) - return ret - - #: _FORMATS maps format specifications to the corresponding argument set to #: formatter(). _FORMATS: dict[str, dict[str, Any]] = { @@ -201,22 +143,6 @@ def format_pretty(unit: UnitsContainer, registry: UnitRegistry, **options) -> st ) -def latex_escape(string: str) -> str: - """ - Prepend characters that have a special meaning in LaTeX with a backslash. - """ - return functools.reduce( - lambda s, m: re.sub(m[0], m[1], s), - ( - (r"[\\]", r"\\textbackslash "), - (r"[~]", r"\\textasciitilde "), - (r"[\^]", r"\\textasciicircum "), - (r"([&%$#_{}])", r"\\\1"), - ), - str(string), - ) - - @register_unit_format("L") def format_latex(unit: UnitsContainer, registry: UnitRegistry, **options) -> str: preprocessed = {rf"\mathrm{{{latex_escape(u)}}}": p for u, p in unit.items()} @@ -245,7 +171,7 @@ def format_latex_siunitx( " and might indicate a bug in `pint`." ) - formatted = siunitx_format_unit(unit, registry) + formatted = siunitx_format_unit(unit.items(), registry) return rf"\si[]{{{formatted}}}" @@ -291,151 +217,6 @@ def format_compact(unit: UnitsContainer, registry: UnitRegistry, **options) -> s ) -def formatter( - items: Iterable[tuple[str, Number]], - as_ratio: bool = True, - single_denominator: bool = False, - product_fmt: str = " * ", - division_fmt: str = " / ", - power_fmt: str = "{} ** {}", - parentheses_fmt: str = "({0})", - exp_call: FORMATTER = "{:n}".format, - locale: Optional[str] = None, - babel_length: str = "long", - babel_plural_form: str = "one", - sort: bool = True, -) -> str: - """Format a list of (name, exponent) pairs. - - Parameters - ---------- - items : list - a list of (name, exponent) pairs. - as_ratio : bool, optional - True to display as ratio, False as negative powers. (Default value = True) - single_denominator : bool, optional - all with terms with negative exponents are - collected together. (Default value = False) - product_fmt : str - the format used for multiplication. (Default value = " * ") - division_fmt : str - the format used for division. (Default value = " / ") - power_fmt : str - the format used for exponentiation. (Default value = "{} ** {}") - parentheses_fmt : str - the format used for parenthesis. (Default value = "({0})") - locale : str - the locale object as defined in babel. (Default value = None) - babel_length : str - the length of the translated unit, as defined in babel cldr. (Default value = "long") - babel_plural_form : str - the plural form, calculated as defined in babel. (Default value = "one") - exp_call : callable - (Default value = lambda x: f"{x:n}") - sort : bool, optional - True to sort the formatted units alphabetically (Default value = True) - - Returns - ------- - str - the formula as a string. - - """ - - if not items: - return "" - - if as_ratio: - fun = lambda x: exp_call(abs(x)) - else: - fun = exp_call - - pos_terms, neg_terms = [], [] - - if sort: - items = sorted(items) - for key, value in items: - if locale and babel_length and babel_plural_form and key in _babel_units: - _key = _babel_units[key] - locale = babel_parse(locale) - unit_patterns = locale._data["unit_patterns"] - compound_unit_patterns = locale._data["compound_unit_patterns"] - plural = "one" if abs(value) <= 0 else babel_plural_form - if babel_length not in _babel_lengths: - other_lengths = [ - _babel_length - for _babel_length in reversed(_babel_lengths) - if babel_length != _babel_length - ] - else: - other_lengths = [] - for _babel_length in [babel_length] + other_lengths: - pat = unit_patterns.get(_key, {}).get(_babel_length, {}).get(plural) - if pat is not None: - # Don't remove this positional! This is the format used in Babel - key = pat.replace("{0}", "").strip() - break - - tmp = compound_unit_patterns.get("per", {}).get(babel_length, division_fmt) - - try: - division_fmt = tmp.get("compound", division_fmt) - except AttributeError: - division_fmt = tmp - power_fmt = "{}{}" - exp_call = _pretty_fmt_exponent - if value == 1: - pos_terms.append(key) - elif value > 0: - pos_terms.append(power_fmt.format(key, fun(value))) - elif value == -1 and as_ratio: - neg_terms.append(key) - else: - neg_terms.append(power_fmt.format(key, fun(value))) - - if not as_ratio: - # Show as Product: positive * negative terms ** -1 - return _join(product_fmt, pos_terms + neg_terms) - - # Show as Ratio: positive terms / negative terms - pos_ret = _join(product_fmt, pos_terms) or "1" - - if not neg_terms: - return pos_ret - - if single_denominator: - neg_ret = _join(product_fmt, neg_terms) - if len(neg_terms) > 1: - neg_ret = parentheses_fmt.format(neg_ret) - else: - neg_ret = _join(division_fmt, neg_terms) - - return _join(division_fmt, [pos_ret, neg_ret]) - - -# Extract just the type from the specification mini-language: see -# http://docs.python.org/2/library/string.html#format-specification-mini-language -# We also add uS for uncertainties. -_BASIC_TYPES = frozenset("bcdeEfFgGnosxX%uS") - - -def _parse_spec(spec: str) -> str: - result = "" - for ch in reversed(spec): - if ch == "~" or ch in _BASIC_TYPES: - continue - elif ch in list(_FORMATTERS.keys()) + ["~"]: - if result: - raise ValueError("expected ':' after format specifier") - else: - result = ch - elif ch.isalpha(): - raise ValueError("Unknown conversion specified " + ch) - else: - break - return result - - def format_unit(unit, spec: str, registry=None, **options): # registry may be None to allow formatting `UnitsContainer` objects # in that case, the spec may not be "Lx" @@ -454,155 +235,3 @@ def format_unit(unit, spec: str, registry=None, **options): raise ValueError(f"Unknown conversion specified: {spec}") return fmt(unit, registry=registry, **options) - - -def siunitx_format_unit(units: UnitsContainer, registry) -> str: - """Returns LaTeX code for the unit that can be put into an siunitx command.""" - - def _tothe(power: Union[int, float]) -> str: - if isinstance(power, int) or (isinstance(power, float) and power.is_integer()): - if power == 1: - return "" - elif power == 2: - return r"\squared" - elif power == 3: - return r"\cubed" - else: - return rf"\tothe{{{int(power):d}}}" - else: - # limit float powers to 3 decimal places - return rf"\tothe{{{power:.3f}}}".rstrip("0") - - lpos = [] - lneg = [] - # loop through all units in the container - for unit, power in sorted(units.items()): - # remove unit prefix if it exists - # siunitx supports \prefix commands - - lpick = lpos if power >= 0 else lneg - prefix = None - # TODO: fix this to be fore efficient and detect also aliases. - for p in registry._prefixes.values(): - p = str(p.name) - if len(p) > 0 and unit.find(p) == 0: - prefix = p - unit = unit.replace(prefix, "", 1) - - if power < 0: - lpick.append(r"\per") - if prefix is not None: - lpick.append(rf"\{prefix}") - lpick.append(rf"\{unit}") - lpick.append(rf"{_tothe(abs(power))}") - - return "".join(lpos) + "".join(lneg) - - -def extract_custom_flags(spec: str) -> str: - import re - - if not spec: - return "" - - # sort by length, with longer items first - known_flags = sorted(_FORMATTERS.keys(), key=len, reverse=True) - - flag_re = re.compile("(" + "|".join(known_flags + ["~"]) + ")") - custom_flags = flag_re.findall(spec) - - return "".join(custom_flags) - - -def remove_custom_flags(spec: str) -> str: - for flag in sorted(_FORMATTERS.keys(), key=len, reverse=True) + ["~"]: - if flag: - spec = spec.replace(flag, "") - return spec - - -def split_format( - spec: str, default: str, separate_format_defaults: bool = True -) -> tuple[str, str]: - mspec = remove_custom_flags(spec) - uspec = extract_custom_flags(spec) - - default_mspec = remove_custom_flags(default) - default_uspec = extract_custom_flags(default) - - if separate_format_defaults in (False, None): - # should we warn always or only if there was no explicit choice? - # Given that we want to eventually remove the flag again, I'd say yes? - if spec and separate_format_defaults is None: - if not uspec and default_uspec: - warnings.warn( - ( - "The given format spec does not contain a unit formatter." - " Falling back to the builtin defaults, but in the future" - " the unit formatter specified in the `default_format`" - " attribute will be used instead." - ), - DeprecationWarning, - ) - if not mspec and default_mspec: - warnings.warn( - ( - "The given format spec does not contain a magnitude formatter." - " Falling back to the builtin defaults, but in the future" - " the magnitude formatter specified in the `default_format`" - " attribute will be used instead." - ), - DeprecationWarning, - ) - elif not spec: - mspec, uspec = default_mspec, default_uspec - else: - mspec = mspec or default_mspec - uspec = uspec or default_uspec - - return mspec, uspec - - -def vector_to_latex(vec: Iterable[Any], fmtfun: FORMATTER = ".2f".format) -> str: - return matrix_to_latex([vec], fmtfun) - - -def matrix_to_latex(matrix: ItMatrix, fmtfun: FORMATTER = ".2f".format) -> str: - ret: list[str] = [] - - for row in matrix: - ret += [" & ".join(fmtfun(f) for f in row)] - - return r"\begin{pmatrix}%s\end{pmatrix}" % "\\\\ \n".join(ret) - - -def ndarray_to_latex_parts( - ndarr, fmtfun: FORMATTER = ".2f".format, dim: tuple[int, ...] = tuple() -): - if isinstance(fmtfun, str): - fmtfun = fmtfun.format - - if ndarr.ndim == 0: - _ndarr = ndarr.reshape(1) - return [vector_to_latex(_ndarr, fmtfun)] - if ndarr.ndim == 1: - return [vector_to_latex(ndarr, fmtfun)] - if ndarr.ndim == 2: - return [matrix_to_latex(ndarr, fmtfun)] - else: - ret = [] - if ndarr.ndim == 3: - header = ("arr[%s," % ",".join("%d" % d for d in dim)) + "%d,:,:]" - for elno, el in enumerate(ndarr): - ret += [header % elno + " = " + matrix_to_latex(el, fmtfun)] - else: - for elno, el in enumerate(ndarr): - ret += ndarray_to_latex_parts(el, fmtfun, dim + (elno,)) - - return ret - - -def ndarray_to_latex( - ndarr, fmtfun: FORMATTER = ".2f".format, dim: tuple[int, ...] = tuple() -) -> str: - return "\n".join(ndarray_to_latex_parts(ndarr, fmtfun, dim)) diff --git a/pint/testsuite/test_babel.py b/pint/testsuite/test_babel.py index eb91709db..3bb88db9d 100644 --- a/pint/testsuite/test_babel.py +++ b/pint/testsuite/test_babel.py @@ -24,11 +24,11 @@ def test_format(func_registry): assert distance.format_babel(locale="fr_FR", length="long") == "24.0 mètres" time = 8.0 * ureg.second assert time.format_babel(locale="fr_FR", length="long") == "8.0 secondes" - assert time.format_babel(locale="ro", length="short") == "8.0 s" + assert time.format_babel(locale="ro_RO", length="short") == "8.0 s" acceleration = distance / time**2 assert ( - acceleration.format_babel(locale="fr_FR", length="long") - == "0.375 mètre par seconde²" + acceleration.format_babel(spec="P", locale="fr_FR", length="long") + == "0.375 mètre/seconde²" ) mks = ureg.get_system("mks") assert mks.format_babel(locale="fr_FR") == "métrique" @@ -44,9 +44,12 @@ def test_registry_locale(): assert distance.format_babel(length="long") == "24.0 mètres" time = 8.0 * ureg.second assert time.format_babel(length="long") == "8.0 secondes" - assert time.format_babel(locale="ro", length="short") == "8.0 s" + assert time.format_babel(locale="ro_RO", length="short") == "8.0 s" acceleration = distance / time**2 - assert acceleration.format_babel(length="long") == "0.375 mètre par seconde²" + assert ( + acceleration.format_babel(spec="C", length="long") == "0.375 mètre/seconde**2" + ) + assert acceleration.format_babel(spec="P", length="long") == "0.375 mètre/seconde²" mks = ureg.get_system("mks") assert mks.format_babel(locale="fr_FR") == "métrique" diff --git a/pint/util.py b/pint/util.py index 3e73944d4..45f409135 100644 --- a/pint/util.py +++ b/pint/util.py @@ -34,7 +34,6 @@ from .compat import NUMERIC_TYPES, Self from .errors import DefinitionSyntaxError -from .formatting import format_unit from .pint_eval import build_eval_tree from . import pint_eval @@ -606,9 +605,15 @@ def __repr__(self) -> str: return f"" def __format__(self, spec: str) -> str: + # TODO: provisional + from .formatting import format_unit + return format_unit(self, spec) def format_babel(self, spec: str, registry=None, **kwspec) -> str: + # TODO: provisional + from .formatting import format_unit + return format_unit(self, spec, registry=registry, **kwspec) def __copy__(self):