From 8fd0148e3c70e7613150e2e9f542f059735beea2 Mon Sep 17 00:00:00 2001 From: mikhnenko Date: Fri, 31 Jan 2025 07:22:37 +0300 Subject: [PATCH 01/16] Move include from namespace commit_hash:4cd737269b19eab384e9de53bdc2e4bec6c8a7d3 --- .../include/aws/crt/external/cJSON.h | 4 ++-- .../01-move-include-from-namespace.patch | 20 +++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 contrib/restricted/aws/aws-crt-cpp/patches/01-move-include-from-namespace.patch diff --git a/contrib/restricted/aws/aws-crt-cpp/include/aws/crt/external/cJSON.h b/contrib/restricted/aws/aws-crt-cpp/include/aws/crt/external/cJSON.h index ee10f0225d1d..c337f342a0e4 100644 --- a/contrib/restricted/aws/aws-crt-cpp/include/aws/crt/external/cJSON.h +++ b/contrib/restricted/aws/aws-crt-cpp/include/aws/crt/external/cJSON.h @@ -44,8 +44,6 @@ #ifndef cJSON__h #define cJSON__h -namespace Aws { - #if !defined(__WINDOWS__) && (defined(WIN32) || defined(WIN64) || defined(_MSC_VER) || defined(_WIN32)) #define __WINDOWS__ #endif @@ -117,6 +115,8 @@ then using the CJSON_API_VISIBILITY flag to "export" the same symbols the way CJ #define cJSON_IsReference 256 #define cJSON_StringIsConst 512 +namespace Aws { + /* The cJSON structure: */ typedef struct cJSON { diff --git a/contrib/restricted/aws/aws-crt-cpp/patches/01-move-include-from-namespace.patch b/contrib/restricted/aws/aws-crt-cpp/patches/01-move-include-from-namespace.patch new file mode 100644 index 000000000000..807b008f3997 --- /dev/null +++ b/contrib/restricted/aws/aws-crt-cpp/patches/01-move-include-from-namespace.patch @@ -0,0 +1,20 @@ +--- contrib/restricted/aws/aws-crt-cpp/include/aws/crt/external/cJSON.h (f680f339877c3c3539c85345ee15f023012893c9) ++++ contrib/restricted/aws/aws-crt-cpp/include/aws/crt/external/cJSON.h (working tree) +@@ -44,8 +44,6 @@ + #ifndef cJSON__h + #define cJSON__h + +-namespace Aws { +- + #if !defined(__WINDOWS__) && (defined(WIN32) || defined(WIN64) || defined(_MSC_VER) || defined(_WIN32)) + #define __WINDOWS__ + #endif +@@ -117,6 +115,8 @@ then using the CJSON_API_VISIBILITY flag to "export" the same symbols the way CJ + #define cJSON_IsReference 256 + #define cJSON_StringIsConst 512 + ++namespace Aws { ++ + /* The cJSON structure: */ + typedef struct cJSON + { From 972bb8975cad653370d8c4dc06031ea20faf7c51 Mon Sep 17 00:00:00 2001 From: thenewone Date: Fri, 31 Jan 2025 09:34:35 +0300 Subject: [PATCH 02/16] persist flow view in YT dynamic tables Persist flow state in dynamic sorted tables Improve persisted state data structure. Use persisted state data structure as an engine. Update prebuilt YT for tests. commit_hash:149cef6c3bc4db65ef9fa9ee4955e522c15e40bd --- yt/yt/core/misc/collection_helpers-inl.h | 2 +- yt/yt/core/misc/collection_helpers.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/yt/yt/core/misc/collection_helpers-inl.h b/yt/yt/core/misc/collection_helpers-inl.h index 1afff7460ed6..fa299a05ca45 100644 --- a/yt/yt/core/misc/collection_helpers-inl.h +++ b/yt/yt/core/misc/collection_helpers-inl.h @@ -155,7 +155,7 @@ TKeySet DropAndReturnMissingKeys(TMap&& map, const TKeySet& set) } template -void DropMissingKeys(TMap&& map, const TKeySet& set) +void DropMissingKeys(TMap&& map, TKeySet&& set) { for (auto it = map.begin(); it != map.end(); ) { if (!set.contains(it->first)) { diff --git a/yt/yt/core/misc/collection_helpers.h b/yt/yt/core/misc/collection_helpers.h index 4695096e2588..f06740e24535 100644 --- a/yt/yt/core/misc/collection_helpers.h +++ b/yt/yt/core/misc/collection_helpers.h @@ -43,7 +43,7 @@ template [[nodiscard]] TKeySet DropAndReturnMissingKeys(TMap&& map, const TKeySet& set); template -void DropMissingKeys(TMap&& map, const TKeySet& set); +void DropMissingKeys(TMap&& map, TKeySet&& set); /*! * This function is supposed to replace a frequent pattern From f12d0f878911d1506ff75f19bb550d498b9ec242 Mon Sep 17 00:00:00 2001 From: robot-piglet Date: Fri, 31 Jan 2025 11:37:10 +0300 Subject: [PATCH 03/16] Intermediate changes commit_hash:8c2a03558616560deb74e3412a33769b4f519a0a --- .../more-itertools/py3/.dist-info/METADATA | 10 +- contrib/python/more-itertools/py3/README.rst | 3 + .../py3/more_itertools/__init__.py | 2 +- .../more-itertools/py3/more_itertools/more.py | 108 ++++++----- .../py3/more_itertools/more.pyi | 148 ++++++++++++-- .../py3/more_itertools/recipes.py | 140 +++++++++++++- .../py3/more_itertools/recipes.pyi | 68 ++++++- .../more-itertools/py3/tests/test_more.py | 46 +++-- .../more-itertools/py3/tests/test_recipes.py | 182 +++++++++++++++++- contrib/python/more-itertools/py3/ya.make | 2 +- 10 files changed, 611 insertions(+), 98 deletions(-) diff --git a/contrib/python/more-itertools/py3/.dist-info/METADATA b/contrib/python/more-itertools/py3/.dist-info/METADATA index a06c9b0a570f..72be20349f3e 100644 --- a/contrib/python/more-itertools/py3/.dist-info/METADATA +++ b/contrib/python/more-itertools/py3/.dist-info/METADATA @@ -1,25 +1,26 @@ Metadata-Version: 2.1 Name: more-itertools -Version: 10.5.0 +Version: 10.6.0 Summary: More routines for operating on iterables, beyond itertools Keywords: itertools,iterator,iteration,filter,peek,peekable,chunk,chunked Author-email: Erik Rose -Requires-Python: >=3.8 +Requires-Python: >=3.9 Description-Content-Type: text/x-rst Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers Classifier: Natural Language :: English Classifier: License :: OSI Approved :: MIT License Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 Classifier: Programming Language :: Python :: 3 :: Only Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Topic :: Software Development :: Libraries +Project-URL: Documentation, https://more-itertools.readthedocs.io/en/stable/ Project-URL: Homepage, https://github.com/more-itertools/more-itertools ============== @@ -142,6 +143,8 @@ Python iterables. | | `convolve `_, | | | `dotproduct `_, | | | `factor `_, | +| | `is_prime `_, | +| | `nth_prime `_, | | | `matmul `_, | | | `polynomial_from_roots `_, | | | `polynomial_derivative `_, | @@ -185,6 +188,7 @@ Python iterables. | | `numeric_range `_, | | | `side_effect `_, | | | `iterate `_, | +| | `loops `_, | | | `difference `_, | | | `make_decorator `_, | | | `SequenceView `_, | diff --git a/contrib/python/more-itertools/py3/README.rst b/contrib/python/more-itertools/py3/README.rst index 5be8552b2d59..059fda912627 100644 --- a/contrib/python/more-itertools/py3/README.rst +++ b/contrib/python/more-itertools/py3/README.rst @@ -118,6 +118,8 @@ Python iterables. | | `convolve `_, | | | `dotproduct `_, | | | `factor `_, | +| | `is_prime `_, | +| | `nth_prime `_, | | | `matmul `_, | | | `polynomial_from_roots `_, | | | `polynomial_derivative `_, | @@ -161,6 +163,7 @@ Python iterables. | | `numeric_range `_, | | | `side_effect `_, | | | `iterate `_, | +| | `loops `_, | | | `difference `_, | | | `make_decorator `_, | | | `SequenceView `_, | diff --git a/contrib/python/more-itertools/py3/more_itertools/__init__.py b/contrib/python/more-itertools/py3/more_itertools/__init__.py index 583fb5745782..4d43d11a637b 100644 --- a/contrib/python/more-itertools/py3/more_itertools/__init__.py +++ b/contrib/python/more-itertools/py3/more_itertools/__init__.py @@ -3,4 +3,4 @@ from .more import * # noqa from .recipes import * # noqa -__version__ = '10.5.0' +__version__ = '10.6.0' diff --git a/contrib/python/more-itertools/py3/more_itertools/more.py b/contrib/python/more-itertools/py3/more_itertools/more.py index 64fab2618584..2228687ea274 100644 --- a/contrib/python/more-itertools/py3/more_itertools/more.py +++ b/contrib/python/more-itertools/py3/more_itertools/more.py @@ -25,7 +25,7 @@ from math import comb, e, exp, factorial, floor, fsum, log, log1p, perm, tau from queue import Empty, Queue from random import random, randrange, shuffle, uniform -from operator import itemgetter, mul, sub, gt, lt, le +from operator import itemgetter, mul, sub, gt, lt from sys import hexversion, maxsize from time import monotonic @@ -35,7 +35,9 @@ UnequalIterablesError, consume, flatten, + nth, powerset, + sieve, take, unique_everseen, all_equal, @@ -104,6 +106,7 @@ 'minmax', 'nth_or_last', 'nth_permutation', + 'nth_prime', 'nth_product', 'nth_combination_with_replacement', 'numeric_range', @@ -215,8 +218,8 @@ def first(iterable, default=_marker): return item if default is _marker: raise ValueError( - 'first() was called on an empty iterable, and no ' - 'default value was provided.' + 'first() was called on an empty iterable, ' + 'and no default value was provided.' ) return default @@ -237,15 +240,14 @@ def last(iterable, default=_marker): if isinstance(iterable, Sequence): return iterable[-1] # Work around https://bugs.python.org/issue38525 - elif hasattr(iterable, '__reversed__') and (hexversion != 0x030800F0): + if hasattr(iterable, '__reversed__'): return next(reversed(iterable)) - else: - return deque(iterable, maxlen=1)[-1] + return deque(iterable, maxlen=1)[-1] except (IndexError, TypeError, StopIteration): if default is _marker: raise ValueError( - 'last() was called on an empty iterable, and no default was ' - 'provided.' + 'last() was called on an empty iterable, ' + 'and no default value was provided.' ) return default @@ -569,8 +571,8 @@ def one(iterable, too_short=None, too_long=None): pass else: msg = ( - 'Expected exactly one item in iterable, but got {!r}, {!r}, ' - 'and perhaps more.'.format(first_value, second_value) + f'Expected exactly one item in iterable, but got {first_value!r}, ' + f'{second_value!r}, and perhaps more.' ) raise too_long or ValueError(msg) @@ -631,13 +633,13 @@ def strictly_n(iterable, n, too_short=None, too_long=None): if too_short is None: too_short = lambda item_count: raise_( ValueError, - 'Too few items in iterable (got {})'.format(item_count), + f'Too few items in iterable (got {item_count})', ) if too_long is None: too_long = lambda item_count: raise_( ValueError, - 'Too many items in iterable (got at least {})'.format(item_count), + f'Too many items in iterable (got at least {item_count})', ) it = iter(iterable) @@ -1118,10 +1120,8 @@ def spy(iterable, n=1): [1, 2, 3, 4, 5] """ - it = iter(iterable) - head = take(n, it) - - return head.copy(), chain(head, it) + p, q = tee(iterable) + return take(n, q), p def interleave(*iterables): @@ -1558,8 +1558,8 @@ def split_into(iterable, sizes): [[1], [2, 3], [4], []] When a ``None`` object is encountered in *sizes*, the returned list will - contain items up to the end of *iterable* the same way that itertools.slice - does: + contain items up to the end of *iterable* the same way that + :func:`itertools.slice` does: >>> list(split_into([1,2,3,4,5,6,7,8,9,0], [2,3,None])) [[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]] @@ -2167,13 +2167,11 @@ def __init__(self, *args): self._start, self._stop, self._step = args elif argc == 0: raise TypeError( - 'numeric_range expected at least ' - '1 argument, got {}'.format(argc) + f'numeric_range expected at least 1 argument, got {argc}' ) else: raise TypeError( - 'numeric_range expected at most ' - '3 arguments, got {}'.format(argc) + f'numeric_range expected at most 3 arguments, got {argc}' ) self._zero = type(self._step)(0) @@ -2236,7 +2234,7 @@ def __getitem__(self, key): else: raise TypeError( 'numeric range indices must be ' - 'integers or slices, not {}'.format(type(key).__name__) + f'integers or slices, not {type(key).__name__}' ) def __hash__(self): @@ -2277,13 +2275,10 @@ def __reduce__(self): def __repr__(self): if self._step == 1: - return "numeric_range({}, {})".format( - repr(self._start), repr(self._stop) - ) - else: - return "numeric_range({}, {}, {})".format( - repr(self._start), repr(self._stop), repr(self._step) - ) + return f"numeric_range({self._start!r}, {self._stop!r})" + return ( + f"numeric_range({self._start!r}, {self._stop!r}, {self._step!r})" + ) def __reversed__(self): return iter( @@ -2307,7 +2302,7 @@ def index(self, value): if r == self._zero: return int(q) - raise ValueError("{} is not in numeric range".format(value)) + raise ValueError(f"{value} is not in numeric range") def _get_by_index(self, i): if i < 0: @@ -2781,7 +2776,7 @@ def __len__(self): return len(self._target) def __repr__(self): - return '{}({})'.format(self.__class__.__name__, repr(self._target)) + return f'{self.__class__.__name__}({self._target!r})' class seekable: @@ -3443,8 +3438,8 @@ def only(iterable, default=None, too_long=None): pass else: msg = ( - 'Expected exactly one item in iterable, but got {!r}, {!r}, ' - 'and perhaps more.'.format(first_value, second_value) + f'Expected exactly one item in iterable, but got {first_value!r}, ' + f'{second_value!r}, and perhaps more.' ) raise too_long or ValueError(msg) @@ -3726,9 +3721,11 @@ def feed(i): reservoir = [] for _ in range(k): reservoir.append(feed(0)) - if strict and len(reservoir) < k: - raise ValueError('Sample larger than population') + if strict and len(reservoir) < k: + raise ValueError('Sample larger than population') + + with suppress(StopIteration): W = 1.0 while True: W *= exp(log(random()) / k) @@ -3821,15 +3818,16 @@ def is_sorted(iterable, key=None, reverse=False, strict=False): The function returns ``False`` after encountering the first out-of-order item, which means it may produce results that differ from the built-in - :func:`sorted` function for objects with unusual comparison dynamics. - If there are no out-of-order items, the iterable is exhausted. + :func:`sorted` function for objects with unusual comparison dynamics + (like ``math.nan``). If there are no out-of-order items, the iterable is + exhausted. """ - compare = le if strict else lt it = iterable if (key is None) else map(key, iterable) - it_1, it_2 = tee(it) - next(it_2 if reverse else it_1, None) - - return not any(map(compare, it_1, it_2)) + a, b = tee(it) + next(b, None) + if reverse: + b, a = a, b + return all(map(lt, a, b)) if strict else not any(map(lt, b, a)) class AbortThread(BaseException): @@ -4822,8 +4820,8 @@ def outer_product(func, xs, ys, *args, **kwargs): >>> xs = ['A', 'B', 'A', 'A', 'B', 'B', 'A', 'A', 'B', 'B'] >>> ys = ['X', 'X', 'X', 'Y', 'Z', 'Z', 'Y', 'Y', 'Z', 'Z'] - >>> rows = list(zip(xs, ys)) - >>> count_rows = lambda x, y: rows.count((x, y)) + >>> pair_counts = Counter(zip(xs, ys)) + >>> count_rows = lambda x, y: pair_counts[x, y] >>> list(outer_product(count_rows, sorted(set(xs)), sorted(set(ys)))) [(2, 3, 0), (1, 0, 4)] @@ -4978,3 +4976,23 @@ def doublestarmap(func, iterable): """ for item in iterable: yield func(**item) + + +def _nth_prime_ub(n): + "Upper bound for the nth prime (counting from 1)." + # https://en.wikipedia.org/wiki/Prime-counting_function#Inequalities + return n * log(n * log(n)) if n >= 6 else 11.1 + + +def nth_prime(n): + """Return the nth prime (counting from 0). + + >>> nth_prime(0) + 2 + >>> nth_prime(100) + 547 + """ + if n < 0: + raise ValueError + limit = math.ceil(_nth_prime_ub(n + 1)) + return nth(sieve(limit), n) diff --git a/contrib/python/more-itertools/py3/more_itertools/more.pyi b/contrib/python/more-itertools/py3/more_itertools/more.pyi index 66e6938e1345..b8406906154a 100644 --- a/contrib/python/more-itertools/py3/more_itertools/more.pyi +++ b/contrib/python/more-itertools/py3/more_itertools/more.pyi @@ -5,27 +5,141 @@ from __future__ import annotations import sys import types -from typing import ( - Any, - Callable, +from collections.abc import ( Container, - ContextManager, - Generic, Hashable, - Mapping, Iterable, Iterator, Mapping, - overload, Reversible, Sequence, Sized, - Type, +) +from contextlib import AbstractContextManager +from typing import ( + Any, + Callable, + Generic, TypeVar, + overload, type_check_only, ) from typing_extensions import Protocol +__all__ = [ + 'AbortThread', + 'SequenceView', + 'UnequalIterablesError', + 'adjacent', + 'all_unique', + 'always_iterable', + 'always_reversible', + 'bucket', + 'callback_iter', + 'chunked', + 'chunked_even', + 'circular_shifts', + 'collapse', + 'combination_index', + 'combination_with_replacement_index', + 'consecutive_groups', + 'constrained_batches', + 'consumer', + 'count_cycle', + 'countable', + 'dft', + 'difference', + 'distinct_combinations', + 'distinct_permutations', + 'distribute', + 'divide', + 'doublestarmap', + 'duplicates_everseen', + 'duplicates_justseen', + 'classify_unique', + 'exactly_n', + 'filter_except', + 'filter_map', + 'first', + 'gray_product', + 'groupby_transform', + 'ichunked', + 'iequals', + 'idft', + 'ilen', + 'interleave', + 'interleave_evenly', + 'interleave_longest', + 'intersperse', + 'is_sorted', + 'islice_extended', + 'iterate', + 'iter_suppress', + 'join_mappings', + 'last', + 'locate', + 'longest_common_prefix', + 'lstrip', + 'make_decorator', + 'map_except', + 'map_if', + 'map_reduce', + 'mark_ends', + 'minmax', + 'nth_or_last', + 'nth_permutation', + 'nth_prime', + 'nth_product', + 'nth_combination_with_replacement', + 'numeric_range', + 'one', + 'only', + 'outer_product', + 'padded', + 'partial_product', + 'partitions', + 'peekable', + 'permutation_index', + 'powerset_of_sets', + 'product_index', + 'raise_', + 'repeat_each', + 'repeat_last', + 'replace', + 'rlocate', + 'rstrip', + 'run_length', + 'sample', + 'seekable', + 'set_partitions', + 'side_effect', + 'sliced', + 'sort_together', + 'split_after', + 'split_at', + 'split_before', + 'split_into', + 'split_when', + 'spy', + 'stagger', + 'strip', + 'strictly_n', + 'substrings', + 'substrings_indexes', + 'takewhile_inclusive', + 'time_limited', + 'unique_in_window', + 'unique_to_each', + 'unzip', + 'value_chain', + 'windowed', + 'windowed_complete', + 'with_iter', + 'zip_broadcast', + 'zip_equal', + 'zip_offset', +] + # Type and type variable definitions _T = TypeVar('_T') _T1 = TypeVar('_T1') @@ -38,7 +152,7 @@ _V = TypeVar('_V') _W = TypeVar('_W') _T_co = TypeVar('_T_co', covariant=True) _GenFn = TypeVar('_GenFn', bound=Callable[..., Iterator[Any]]) -_Raisable = BaseException | Type[BaseException] +_Raisable = BaseException | type[BaseException] # The type of isinstance's second argument (from typeshed builtins) if sys.version_info >= (3, 10): @@ -91,7 +205,7 @@ def consumer(func: _GenFn) -> _GenFn: ... def ilen(iterable: Iterable[_T]) -> int: ... def iterate(func: Callable[[_T], _T], start: _T) -> Iterator[_T]: ... def with_iter( - context_manager: ContextManager[Iterable[_T]], + context_manager: AbstractContextManager[Iterable[_T]], ) -> Iterator[_T]: ... def one( iterable: Iterable[_T], @@ -410,7 +524,7 @@ class numeric_range(Generic[_T, _U], Sequence[_T], Hashable, Reversible[_T]): def __len__(self) -> int: ... def __reduce__( self, - ) -> tuple[Type[numeric_range[_T, _U]], tuple[_T, _T, _U]]: ... + ) -> tuple[type[numeric_range[_T, _U]], tuple[_T, _T, _U]]: ... def __repr__(self) -> str: ... def __reversed__(self) -> Iterator[_T]: ... def count(self, value: _T) -> int: ... @@ -567,12 +681,12 @@ def distinct_combinations( def filter_except( validator: Callable[[Any], object], iterable: Iterable[_T], - *exceptions: Type[BaseException], + *exceptions: type[BaseException], ) -> Iterator[_T]: ... def map_except( function: Callable[[Any], _U], iterable: Iterable[_T], - *exceptions: Type[BaseException], + *exceptions: type[BaseException], ) -> Iterator[_U]: ... def map_if( iterable: Iterable[Any], @@ -587,7 +701,7 @@ def _sample_counted( population: Iterator[_T], k: int, counts: Iterable[int], strict: bool ) -> list[_T]: ... def _sample_weighted( - iterator: Iterator[_T], k: int, weights, strict + iterator: Iterator[_T], k: int, weights: Iterator[float], strict: bool ) -> list[_T]: ... def sample( iterable: Iterable[_T], @@ -617,7 +731,7 @@ class callback_iter(Generic[_T], Iterator[_T]): def __enter__(self) -> callback_iter[_T]: ... def __exit__( self, - exc_type: Type[BaseException] | None, + exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: types.TracebackType | None, ) -> bool | None: ... @@ -797,7 +911,7 @@ def outer_product( ) -> Iterator[tuple[_V, ...]]: ... def iter_suppress( iterable: Iterable[_T], - *exceptions: Type[BaseException], + *exceptions: type[BaseException], ) -> Iterator[_T]: ... def filter_map( func: Callable[[_T], _V | None], @@ -813,3 +927,5 @@ def doublestarmap( ) -> Iterator[_T]: ... def dft(xarr: Sequence[complex]) -> Iterator[complex]: ... def idft(Xarr: Sequence[complex]) -> Iterator[complex]: ... +def _nth_prime_ub(n: int) -> float: ... +def nth_prime(n: int) -> int: ... diff --git a/contrib/python/more-itertools/py3/more_itertools/recipes.py b/contrib/python/more-itertools/py3/more_itertools/recipes.py index 67f76fa899ef..5a11fc5a3049 100644 --- a/contrib/python/more-itertools/py3/more_itertools/recipes.py +++ b/contrib/python/more-itertools/py3/more_itertools/recipes.py @@ -13,7 +13,7 @@ from collections import deque from collections.abc import Sized -from functools import partial, reduce +from functools import lru_cache, partial from itertools import ( chain, combinations, @@ -42,8 +42,10 @@ 'factor', 'flatten', 'grouper', + 'is_prime', 'iter_except', 'iter_index', + 'loops', 'matmul', 'ncycles', 'nth', @@ -872,8 +874,10 @@ def polynomial_from_roots(roots): >>> polynomial_from_roots(roots) # x^3 - 4 * x^2 - 17 * x + 60 [1, -4, -17, 60] """ - factors = zip(repeat(1), map(operator.neg, roots)) - return list(reduce(convolve, factors, [1])) + poly = [1] + for root in roots: + poly = list(convolve(poly, (1, -root))) + return poly def iter_index(iterable, value, start=0, stop=None): @@ -1005,20 +1009,56 @@ def matmul(m1, m2): return batched(starmap(_sumprod, product(m1, transpose(m2))), n) +def _factor_pollard(n): + # Return a factor of n using Pollard's rho algorithm + gcd = math.gcd + for b in range(1, n - 2): + x = y = 2 + d = 1 + while d == 1: + x = (x * x + b) % n + y = (y * y + b) % n + y = (y * y + b) % n + d = gcd(x - y, n) + if d != n: + return d + raise ValueError('prime or under 5') + + +_primes_below_211 = tuple(sieve(211)) + + def factor(n): """Yield the prime factors of n. >>> list(factor(360)) [2, 2, 2, 3, 3, 5] + + Finds small factors with trial division. Larger factors are + either verified as prime with ``is_prime`` or split into + smaller factors with Pollard's rho algorithm. """ - for prime in sieve(math.isqrt(n) + 1): + + # Corner case reduction + if n < 2: + return + + # Trial division reduction + for prime in _primes_below_211: while not n % prime: yield prime n //= prime - if n == 1: - return - if n > 1: - yield n + + # Pollard's rho reduction + primes = [] + todo = [n] if n > 1 else [] + for n in todo: + if n < 211**2 or is_prime(n): + primes.append(n) + else: + fact = _factor_pollard(n) + todo += (fact, n // fact) + yield from sorted(primes) def polynomial_eval(coefficients, x): @@ -1073,3 +1113,87 @@ def totient(n): for prime in set(factor(n)): n -= n // prime return n + + +# Miller–Rabin primality test: https://oeis.org/A014233 +_perfect_tests = [ + (2047, (2,)), + (9080191, (31, 73)), + (4759123141, (2, 7, 61)), + (1122004669633, (2, 13, 23, 1662803)), + (2152302898747, (2, 3, 5, 7, 11)), + (3474749660383, (2, 3, 5, 7, 11, 13)), + (18446744073709551616, (2, 325, 9375, 28178, 450775, 9780504, 1795265022)), + ( + 3317044064679887385961981, + (2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41), + ), +] + + +@lru_cache +def _shift_to_odd(n): + 'Return s, d such that 2**s * d == n' + s = ((n - 1) ^ n).bit_length() - 1 + d = n >> s + assert (1 << s) * d == n and d & 1 and s >= 0 + return s, d + + +def _strong_probable_prime(n, base): + assert (n > 2) and (n & 1) and (2 <= base < n) + + s, d = _shift_to_odd(n - 1) + + x = pow(base, d, n) + if x == 1 or x == n - 1: + return True + + for _ in range(s - 1): + x = x * x % n + if x == n - 1: + return True + + return False + + +def is_prime(n): + """Return ``True`` if *n* is prime and ``False`` otherwise. + + >>> is_prime(37) + True + >>> is_prime(3 * 13) + False + >>> is_prime(18_446_744_073_709_551_557) + True + + This function uses the Miller-Rabin primality test, which can return false + positives for very large inputs. For values of *n* below 10**24 + there are no false positives. For larger values, there is less than + a 1 in 2**128 false positive rate. Multiple tests can further reduce the + chance of a false positive. + """ + if n < 17: + return n in {2, 3, 5, 7, 11, 13} + if not (n & 1 and n % 3 and n % 5 and n % 7 and n % 11 and n % 13): + return False + for limit, bases in _perfect_tests: + if n < limit: + break + else: + bases = [randrange(2, n - 1) for i in range(64)] + return all(_strong_probable_prime(n, base) for base in bases) + + +def loops(n): + """Returns an iterable with *n* elements for efficient looping. + Like ``range(n)`` but doesn't create integers. + + >>> i = 0 + >>> for _ in loops(5): + ... i += 1 + >>> i + 5 + + """ + return repeat(None, n) diff --git a/contrib/python/more-itertools/py3/more_itertools/recipes.pyi b/contrib/python/more-itertools/py3/more_itertools/recipes.pyi index 739acec05fb3..e404f4d3df46 100644 --- a/contrib/python/more-itertools/py3/more_itertools/recipes.pyi +++ b/contrib/python/more-itertools/py3/more_itertools/recipes.pyi @@ -2,17 +2,65 @@ from __future__ import annotations +from collections.abc import Iterable, Iterator, Sequence from typing import ( Any, Callable, - Iterable, - Iterator, - overload, - Sequence, - Type, TypeVar, + overload, ) +__all__ = [ + 'all_equal', + 'batched', + 'before_and_after', + 'consume', + 'convolve', + 'dotproduct', + 'first_true', + 'factor', + 'flatten', + 'grouper', + 'is_prime', + 'iter_except', + 'iter_index', + 'loops', + 'matmul', + 'ncycles', + 'nth', + 'nth_combination', + 'padnone', + 'pad_none', + 'pairwise', + 'partition', + 'polynomial_eval', + 'polynomial_from_roots', + 'polynomial_derivative', + 'powerset', + 'prepend', + 'quantify', + 'reshape', + 'random_combination_with_replacement', + 'random_combination', + 'random_permutation', + 'random_product', + 'repeatfunc', + 'roundrobin', + 'sieve', + 'sliding_window', + 'subslices', + 'sum_of_squares', + 'tabulate', + 'tail', + 'take', + 'totient', + 'transpose', + 'triplewise', + 'unique', + 'unique_everseen', + 'unique_justseen', +] + # Type and type variable definitions _T = TypeVar('_T') _T1 = TypeVar('_T1') @@ -69,13 +117,13 @@ def unique( @overload def iter_except( func: Callable[[], _T], - exception: Type[BaseException] | tuple[Type[BaseException], ...], + exception: type[BaseException] | tuple[type[BaseException], ...], first: None = ..., ) -> Iterator[_T]: ... @overload def iter_except( func: Callable[[], _T], - exception: Type[BaseException] | tuple[Type[BaseException], ...], + exception: type[BaseException] | tuple[type[BaseException], ...], first: Callable[[], _U], ) -> Iterator[_T | _U]: ... @overload @@ -129,8 +177,14 @@ def reshape( matrix: Iterable[Iterable[_T]], cols: int ) -> Iterator[tuple[_T, ...]]: ... def matmul(m1: Sequence[_T], m2: Sequence[_T]) -> Iterator[tuple[_T]]: ... +def _factor_trial(n: int) -> Iterator[int]: ... +def _factor_pollard(n: int) -> int: ... def factor(n: int) -> Iterator[int]: ... def polynomial_eval(coefficients: Sequence[_T], x: _U) -> _U: ... def sum_of_squares(it: Iterable[_T]) -> _T: ... def polynomial_derivative(coefficients: Sequence[_T]) -> list[_T]: ... def totient(n: int) -> int: ... +def _shift_to_odd(n: int) -> tuple[int, int]: ... +def _strong_probable_prime(n: int, base: int) -> bool: ... +def is_prime(n: int) -> bool: ... +def loops(n: int) -> Iterator[None]: ... diff --git a/contrib/python/more-itertools/py3/tests/test_more.py b/contrib/python/more-itertools/py3/tests/test_more.py index 1a70ea08e57b..bfbf583f28f8 100644 --- a/contrib/python/more-itertools/py3/tests/test_more.py +++ b/contrib/python/more-itertools/py3/tests/test_more.py @@ -3836,7 +3836,7 @@ def __hash__(self): return hash(self._collection) def __repr__(self): - return "FrozenSet([{}]".format(", ".join(repr(x) for x in iter(self))) + return f'FrozenSet([{", ".join(repr(x) for x in iter(self))}]' class SetPartitionsTests(TestCase): @@ -4321,6 +4321,33 @@ def test_invariance_under_permutations_weighted(self): # The observed largest difference in 10,000 simulations was 4.337999 self.assertTrue(difference_in_means < 4.4) + def test_error_cases(self): + + # weights and counts are mutally exclusive + with self.assertRaises(TypeError): + mi.sample( + 'abcde', 3, weights=[1, 2, 3, 4, 5], counts=[1, 2, 3, 4, 5] + ) + + # Weighted sample larger than population + with self.assertRaises(ValueError): + mi.sample('abcde', 10, weights=[1, 2, 3, 4, 5], strict=True) + + # Counted sample larger than population + with self.assertRaises(ValueError): + mi.sample('abcde', 10, counts=[1, 1, 1, 1, 1], strict=True) + + +class BarelySortable: + def __init__(self, value): + self.value = value + + def __lt__(self, other): + return self.value < other.value + + def __int__(self): + return int(self.value) + class IsSortedTests(TestCase): def test_basic(self): @@ -4330,7 +4357,6 @@ def test_basic(self): ([1, 2, 3], {}, True), ([1, 1, 2, 3], {}, True), ([1, 10, 2, 3], {}, False), - ([3, float('nan'), 1, 2], {}, True), (['1', '10', '2', '3'], {}, True), (['1', '10', '2', '3'], {'key': int}, False), ([1, 2, 3], {'reverse': True}, False), @@ -4362,17 +4388,6 @@ def test_basic(self): {'strict': True, 'key': int, 'reverse': True}, False, ), - # We'll do the same weird thing as Python here - (['nan', 0, 'nan', 0], {'key': float}, True), - ([0, 'nan', 0, 'nan'], {'key': float}, True), - (['nan', 0, 'nan', 0], {'key': float, 'reverse': True}, True), - ([0, 'nan', 0, 'nan'], {'key': float, 'reverse': True}, True), - ([0, 'nan', 0, 'nan'], {'strict': True, 'key': float}, True), - ( - ['nan', 0, 'nan', 0], - {'strict': True, 'key': float, 'reverse': True}, - True, - ), ]: key = kwargs.get('key', None) reverse = kwargs.get('reverse', False) @@ -4382,7 +4397,10 @@ def test_basic(self): iterable=iterable, key=key, reverse=reverse, strict=strict ): mi_result = mi.is_sorted( - iter(iterable), key=key, reverse=reverse, strict=strict + map(BarelySortable, iterable), + key=key, + reverse=reverse, + strict=strict, ) sorted_iterable = sorted(iterable, key=key, reverse=reverse) diff --git a/contrib/python/more-itertools/py3/tests/test_recipes.py b/contrib/python/more-itertools/py3/tests/test_recipes.py index 684a6fcd0b1f..a810b8de1e07 100644 --- a/contrib/python/more-itertools/py3/tests/test_recipes.py +++ b/contrib/python/more-itertools/py3/tests/test_recipes.py @@ -4,7 +4,7 @@ from functools import reduce from itertools import combinations, count, groupby, permutations from operator import mul -from math import factorial +from math import comb, factorial from sys import version_info from unittest import TestCase, skipIf from unittest.mock import patch @@ -923,6 +923,12 @@ def test_basic(self): actual = mi.polynomial_from_roots(roots) self.assertEqual(actual, expected) + def test_large(self): + n = 1_500 + actual = mi.polynomial_from_roots([-1] * n) + expected = [comb(n, k) for k in range(n + 1)] + self.assertEqual(actual, expected) + class PolynomialEvalTests(TestCase): def test_basic(self): @@ -1147,8 +1153,12 @@ def test_basic(self): (6, [2, 3]), (360, [2, 2, 2, 3, 3, 5]), (128_884_753_939, [128_884_753_939]), - (999953 * 999983, [999953, 999983]), - (909_909_090_909, [3, 3, 7, 13, 13, 751, 113797]), + (999_953 * 999_983, [999_953, 999_983]), + (909_909_090_909, [3, 3, 7, 13, 13, 751, 1_137_97]), + ( + 1_647_403_876_764_101_672_307_088, + [2, 2, 2, 2, 19, 23, 109471, 13571009, 158594251], + ), ): with self.subTest(n=n): actual = list(mi.factor(n)) @@ -1209,3 +1219,169 @@ def test_basic(self): ): with self.subTest(n=n): self.assertEqual(mi.totient(n), expected) + + +class PrimeFunctionTests(TestCase): + def test_is_prime_pseudoprimes(self): + # Carmichael number that strong pseudoprime to prime bases < 307 + # https://doi.org/10.1006/jsco.1995.1042 + p = 29674495668685510550154174642905332730771991799853043350995075531276838753171770199594238596428121188033664754218345562493168782883 # noqa:E501 + gnarly_carmichael = (313 * (p - 1) + 1) * (353 * (p - 1) + 1) + + for n in ( + # Least Carmichael number with n prime factors: + # https://oeis.org/A006931 + 561, + 41041, + 825265, + 321197185, + 5394826801, + 232250619601, + 9746347772161, + 1436697831295441, + 60977817398996785, + 7156857700403137441, + 1791562810662585767521, + 87674969936234821377601, + 6553130926752006031481761, + 1590231231043178376951698401, + # Carmichael numbers with exactly 4 prime factors: + # https://oeis.org/A074379 + 41041, + 62745, + 63973, + 75361, + 101101, + 126217, + 172081, + 188461, + 278545, + 340561, + 449065, + 552721, + 656601, + 658801, + 670033, + 748657, + 838201, + 852841, + 997633, + 1033669, + 1082809, + 1569457, + 1773289, + 2100901, + 2113921, + 2433601, + 2455921, + # Lucas-Carmichael numbers: + # https://oeis.org/A006972 + 399, + 935, + 2015, + 2915, + 4991, + 5719, + 7055, + 8855, + 12719, + 18095, + 20705, + 20999, + 22847, + 29315, + 31535, + 46079, + 51359, + 60059, + 63503, + 67199, + 73535, + 76751, + 80189, + 81719, + 88559, + 90287, + # Strong pseudoprimes to bases 2, 3 and 5: + # https://oeis.org/A056915 + 25326001, + 161304001, + 960946321, + 1157839381, + 3215031751, + 3697278427, + 5764643587, + 6770862367, + 14386156093, + 15579919981, + 18459366157, + 19887974881, + 21276028621, + 27716349961, + 29118033181, + 37131467521, + 41752650241, + 42550716781, + 43536545821, + # Strong pseudoprimes to bases 2, 3, 5, and 7: + # https://oeis.org/A211112 + 39365185894561, + 52657210792621, + 11377272352951, + 15070413782971, + 3343433905957, + 16603327018981, + 3461715915661, + 52384617784801, + 3477707481751, + 18996486073489, + 55712149574381, + gnarly_carmichael, + ): + with self.subTest(n=n): + self.assertFalse(mi.is_prime(n)) + + def test_primes(self): + for i, n in enumerate(mi.sieve(10**5)): + with self.subTest(n=n): + self.assertTrue(mi.is_prime(n)) + self.assertEqual(mi.nth_prime(i), n) + + self.assertFalse(mi.is_prime(-1)) + with self.assertRaises(ValueError): + mi.nth_prime(-1) + + def test_special_primes(self): + for n in ( + # Mersenee primes: + # https://oeis.org/A211112 + 3, + 7, + 31, + 127, + 8191, + 131071, + 524287, + 2147483647, + 2305843009213693951, + 618970019642690137449562111, + 162259276829213363391578010288127, + 170141183460469231731687303715884105727, + # Various big primes: + # https://bigprimes.org/ + 7990614013, + 80358337843874809987, + 814847562949580526031364519741, + 1982427225022428178169740526258124929077, + 91828213828508622559862344537590739566883686537727, + 406414746815201693481517584049440077164779143248351060891669, + ): + with self.subTest(n=n): + self.assertTrue(mi.is_prime(n)) + + +class LoopsTests(TestCase): + def test_basic(self): + self.assertTrue( + all(list(mi.loops(n)) == [None] * n for n in range(-10, 10)) + ) diff --git a/contrib/python/more-itertools/py3/ya.make b/contrib/python/more-itertools/py3/ya.make index 45df93175b64..b604dff01581 100644 --- a/contrib/python/more-itertools/py3/ya.make +++ b/contrib/python/more-itertools/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(10.5.0) +VERSION(10.6.0) LICENSE(MIT) From 757ce306e6a07bfb3a3c4d729c75cd7512af2e45 Mon Sep 17 00:00:00 2001 From: zhvv117 Date: Fri, 31 Jan 2025 12:58:25 +0300 Subject: [PATCH 04/16] Fix comment line count calculation commit_hash:b18a35e909cfd6662088765125b0d6f3dcf83c15 --- yql/essentials/sql/v1/format/sql_format.cpp | 4 ++++ yql/essentials/sql/v1/format/sql_format_ut.h | 20 +++++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/yql/essentials/sql/v1/format/sql_format.cpp b/yql/essentials/sql/v1/format/sql_format.cpp index 3b180827a658..19619bf6e898 100644 --- a/yql/essentials/sql/v1/format/sql_format.cpp +++ b/yql/essentials/sql/v1/format/sql_format.cpp @@ -496,6 +496,10 @@ friend struct TStaticData; ++CommentLines; } + if (!text.StartsWith("--")) { + CommentLines += CountIf(text, [](auto c) { return c == '\n'; }); + } + Out(text); if (text.StartsWith("--") && !text.EndsWith("\n")) { diff --git a/yql/essentials/sql/v1/format/sql_format_ut.h b/yql/essentials/sql/v1/format/sql_format_ut.h index 0960fec3b693..b22bf1cb21cc 100644 --- a/yql/essentials/sql/v1/format/sql_format_ut.h +++ b/yql/essentials/sql/v1/format/sql_format_ut.h @@ -1,10 +1,10 @@ Y_UNIT_TEST(Pragma) { TCases cases = { - {"pragma user = user;","PRAGMA user = user;\n"}, - {"pragma user = default;","PRAGMA user = default;\n"}, - {"pragma user.user = user;","PRAGMA user.user = user;\n"}, - {"pragma user.user(user);","PRAGMA user.user(user);\n"}, - {"pragma user.user(user, user);","PRAGMA user.user(user, user);\n"}, + {"pragma user = user;", "PRAGMA user = user;\n"}, + {"pragma user = default;", "PRAGMA user = default;\n"}, + {"pragma user.user = user;", "PRAGMA user.user = user;\n"}, + {"pragma user.user(user);", "PRAGMA user.user(user);\n"}, + {"pragma user.user(user, user);", "PRAGMA user.user(user, user);\n"}, }; TSetup setup; @@ -1512,6 +1512,16 @@ Y_UNIT_TEST(Union) { setup.Run(cases); } +Y_UNIT_TEST(Comment) { + TCases cases = { + {"/*\nmulti\nline\ncomment\n*/\npragma foo = \"true\";\npragma bar = \"1\"", + "/*\nmulti\nline\ncomment\n*/\nPRAGMA foo = 'true';\nPRAGMA bar = '1';\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + Y_UNIT_TEST(CommentAfterLastSelect) { TCases cases = { {"SELECT 1--comment\n", From 76be2240b5d036d3e83ff927abd8eb57a2bc8e2b Mon Sep 17 00:00:00 2001 From: don-dron Date: Fri, 31 Jan 2025 13:24:27 +0300 Subject: [PATCH 05/16] YT-23775: Customize table writer buffers commit_hash:6a2610d1a5aa32de432a072a3757f72b286d71db --- yt/yt/client/table_client/config.cpp | 5 +++++ yt/yt/client/table_client/config.h | 3 +++ 2 files changed, 8 insertions(+) diff --git a/yt/yt/client/table_client/config.cpp b/yt/yt/client/table_client/config.cpp index 78131f636084..ec590507dc88 100644 --- a/yt/yt/client/table_client/config.cpp +++ b/yt/yt/client/table_client/config.cpp @@ -474,6 +474,11 @@ void TChunkWriterOptions::Register(TRegistrar registrar) registrar.Parameter("max_heavy_columns", &TThis::MaxHeavyColumns) .Default(0); + registrar.Parameter("block_size", &TThis::BlockSize) + .Default(); + registrar.Parameter("buffer_size", &TThis::BufferSize) + .Default(); + registrar.Postprocessor([] (TThis* config) { if (config->ValidateUniqueKeys && !config->ValidateSorted) { THROW_ERROR_EXCEPTION("\"validate_unique_keys\" is allowed to be true only if \"validate_sorted\" is true"); diff --git a/yt/yt/client/table_client/config.h b/yt/yt/client/table_client/config.h index bdb92d90be61..c21ed33d0de2 100644 --- a/yt/yt/client/table_client/config.h +++ b/yt/yt/client/table_client/config.h @@ -440,6 +440,9 @@ class TChunkWriterOptions //! Maximum number of heavy columns in approximate statistics. int MaxHeavyColumns; + std::optional BlockSize; + std::optional BufferSize; + void EnableValidationOptions(bool validateAnyIsValidYson = false); REGISTER_YSON_STRUCT(TChunkWriterOptions); From a44c39d80f6462d9ac6a75ef11a0543596bdeb4a Mon Sep 17 00:00:00 2001 From: babenko Date: Fri, 31 Jan 2025 15:07:45 +0300 Subject: [PATCH 06/16] GetLoopbackAddress returns std::string commit_hash:b295db904aa741f31e17a94399822a493dc646d3 --- yt/yt/core/net/local_address.cpp | 6 +++--- yt/yt/core/net/local_address.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/yt/yt/core/net/local_address.cpp b/yt/yt/core/net/local_address.cpp index 541b152cdbfb..19d307faae64 100644 --- a/yt/yt/core/net/local_address.cpp +++ b/yt/yt/core/net/local_address.cpp @@ -169,10 +169,10 @@ void UpdateLocalHostName(const TAddressResolverConfigPtr& config) //////////////////////////////////////////////////////////////////////////////// -const TString& GetLoopbackAddress() +const std::string& GetLoopbackAddress() { - static const TString ipv4result("[127.0.1.1]"); - static const TString ipv6result("[::1]"); + static const std::string ipv4result("[127.0.1.1]"); + static const std::string ipv6result("[::1]"); return IPv6Enabled_.load(std::memory_order::relaxed) ? ipv6result : ipv4result; } diff --git a/yt/yt/core/net/local_address.h b/yt/yt/core/net/local_address.h index 3665feaa4da6..1c1c6bd2ff4e 100644 --- a/yt/yt/core/net/local_address.h +++ b/yt/yt/core/net/local_address.h @@ -41,7 +41,7 @@ TString GetLocalHostName(); TString GetLocalYPCluster(); // Returns the loopback address (either IPv4 or IPv6, depending on the configuration). -const TString& GetLoopbackAddress(); +const std::string& GetLoopbackAddress(); //////////////////////////////////////////////////////////////////////////////// From 84db1a13e70e60d44b341eabf9d598405a1f4bec Mon Sep 17 00:00:00 2001 From: sabdenovch Date: Fri, 31 Jan 2025 15:26:49 +0300 Subject: [PATCH 07/16] YT-24099: Fix memory leaks in cancelled selects * Changelog entry Type: fix Component: dynamic-tables Eliminate a memory leak caused by cancelled selects. commit_hash:4e3db46a482fef077427748b6c426370b9a8922c --- yt/yt/client/table_client/pipe.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/yt/yt/client/table_client/pipe.cpp b/yt/yt/client/table_client/pipe.cpp index be6607b232e5..fcb77361e769 100644 --- a/yt/yt/client/table_client/pipe.cpp +++ b/yt/yt/client/table_client/pipe.cpp @@ -42,10 +42,13 @@ struct TSchemafulPipe::TData void ResetReaderReadyEvent() { ReaderReadyEvent = NewPromise(); - ReaderReadyEvent.OnCanceled(BIND([=, this, this_ = MakeStrong(this)] (const TError& error) { - Fail(TError(NYT::EErrorCode::Canceled, "Pipe reader canceled") - << error); - })); + ReaderReadyEvent.OnCanceled(BIND(&TSchemafulPipe::TData::HandleCancel, MakeWeak(this))); + } + + void HandleCancel(const TError& error) + { + Fail(TError(NYT::EErrorCode::Canceled, "Pipe reader canceled") + << error); } void Fail(const TError& error) From 777ee5007a3e4632ec0ae4d719a91b31c9eda4b7 Mon Sep 17 00:00:00 2001 From: tobo Date: Fri, 31 Jan 2025 15:57:00 +0300 Subject: [PATCH 08/16] add missing comma after the conjunctive/ linking adverb 'Thus' commit_hash:a2b2ef662dca59ed77030a8be927f291464b9406 --- yt/cpp/mapreduce/interface/operation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt/cpp/mapreduce/interface/operation.h b/yt/cpp/mapreduce/interface/operation.h index 7da77213d413..aa109575a8a3 100644 --- a/yt/cpp/mapreduce/interface/operation.h +++ b/yt/cpp/mapreduce/interface/operation.h @@ -779,7 +779,7 @@ struct TUserJobSpec /// @note /// When @ref NYT::TOperationOptions::MountSandboxInTmpfs is enabled library will compute /// total size of all files used by this job and add this total size to MemoryLimit. - /// Thus you shouldn't include size of your files (e.g. binary file) into MemoryLimit. + /// Thus, you shouldn't include size of your files (e.g. binary file) into MemoryLimit. /// /// @note /// Final memory memory_limit passed to YT is calculated as follows: From a71348eea33c4994ac09f98889c952705040fa9c Mon Sep 17 00:00:00 2001 From: robot-piglet Date: Fri, 31 Jan 2025 18:06:47 +0300 Subject: [PATCH 09/16] Intermediate changes commit_hash:5b4ba99902e35b0cefa8ea0c82f8e08557fa4ad2 --- yt/yt/client/table_client/unittests/serialization_ut.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/yt/yt/client/table_client/unittests/serialization_ut.cpp b/yt/yt/client/table_client/unittests/serialization_ut.cpp index 2f33425d4be4..ffa87259595a 100644 --- a/yt/yt/client/table_client/unittests/serialization_ut.cpp +++ b/yt/yt/client/table_client/unittests/serialization_ut.cpp @@ -2,6 +2,7 @@ #include #include + #include #include From 1cc0a0cdbd6981938122cdca7ddc8c7a8a50f0f2 Mon Sep 17 00:00:00 2001 From: udovichenko-r Date: Fri, 31 Jan 2025 18:06:49 +0300 Subject: [PATCH 10/16] More strict yson checks in optimizers commit_hash:f35ce42e177d81a10b1f1133cb449ab70538bf8d --- .../yt/provider/phy_opt/yql_yt_phy_opt_content.cpp | 4 ++++ .../yt/provider/phy_opt/yql_yt_phy_opt_join.cpp | 3 +++ .../yt/provider/phy_opt/yql_yt_phy_opt_misc.cpp | 9 +++++++++ 3 files changed, 16 insertions(+) diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_content.cpp b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_content.cpp index 0afdbe789a1c..107d17bd4785 100644 --- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_content.cpp +++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_content.cpp @@ -2,6 +2,7 @@ #include #include +#include namespace NYql { @@ -154,6 +155,9 @@ TMaybeNode TYtPhysicalOptProposalTransformer::NonOptimalTableContent( } } if (materialize) { + if (!NPrivate::EnsurePersistableYsonTypes(section.Pos(), *section.Ref().GetTypeAnn()->Cast()->GetItemType(), ctx, state)) { + return {}; + } auto path = CopyOrTrivialMap(section.Pos(), TExprBase(world), TYtDSink(ctx.RenameNode(read.DataSource().Ref(), "DataSink")), diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_join.cpp b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_join.cpp index 918aec0795da..c175bcade9b9 100644 --- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_join.cpp +++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_join.cpp @@ -223,6 +223,9 @@ TMaybeNode TYtPhysicalOptProposalTransformer::EquiJoin(TExprBase node if (NYql::HasSetting(sectionNode.Settings().Ref(), EYtSettingType::Sample)) { auto scheme = list.Ref().GetTypeAnn()->Cast()->GetItemType(); + if (!NPrivate::EnsurePersistableYsonTypes(sectionNode.Pos(), *scheme, ctx, State_)) { + return {}; + } auto path = CopyOrTrivialMap(sectionNode.Pos(), TExprBase(world ? world : ctx.NewWorld(sectionNode.Pos())), dataSink.Cast(), diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_misc.cpp b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_misc.cpp index 30e21070dad0..8ec8d9f1ec09 100644 --- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_misc.cpp +++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_misc.cpp @@ -445,6 +445,9 @@ TMaybeNode TYtPhysicalOptProposalTransformer::Extend(TExprBase node, return node; } auto scheme = section.Ref().GetTypeAnn()->Cast()->GetItemType(); + if (!NPrivate::EnsurePersistableYsonTypes(section.Pos(), *scheme, ctx, State_)) { + return {}; + } auto path = CopyOrTrivialMap(section.Pos(), read.Cast().World(), dataSink, *scheme, @@ -492,6 +495,9 @@ TMaybeNode TYtPhysicalOptProposalTransformer::Extend(TExprBase node, if (State_->Types->EvaluationInProgress) { return node; } + if (!NPrivate::EnsurePersistableYsonTypes(extend.Pos(), *scheme, ctx, State_)) { + return {}; + } auto path = CopyOrTrivialMap(extend.Pos(), world, dataSink, *scheme, @@ -818,6 +824,9 @@ TMaybeNode TYtPhysicalOptProposalTransformer::ResPull(TExprBase node, bool keepSorted = ctx.IsConstraintEnabled() ? (!NYql::HasSetting(section.Settings().Ref(), EYtSettingType::Unordered) && !hasNonTemp && section.Paths().Size() == 1) // single sorted input from operation : (!hasDynamic || !NYql::HasAnySetting(section.Settings().Ref(), EYtSettingType::Take | EYtSettingType::Skip)); // compatibility - all except dynamic with limit + if (!NPrivate::EnsurePersistableYsonTypes(read.Pos(), *scheme, ctx, State_)) { + return {}; + } auto path = CopyOrTrivialMap(read.Pos(), read.World(), TYtDSink(ctx.RenameNode(read.DataSource().Ref(), "DataSink")), From 0a45708aff069610623744298139fae0d1ae2651 Mon Sep 17 00:00:00 2001 From: robot-piglet Date: Fri, 31 Jan 2025 18:59:54 +0300 Subject: [PATCH 11/16] Intermediate changes commit_hash:c10d459b85995eaf547fb461cebdee19caea07c8 --- yql/essentials/docs/ru/toc_i.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yql/essentials/docs/ru/toc_i.yaml b/yql/essentials/docs/ru/toc_i.yaml index f1080ccf57af..aa8b7a95dad8 100644 --- a/yql/essentials/docs/ru/toc_i.yaml +++ b/yql/essentials/docs/ru/toc_i.yaml @@ -5,5 +5,5 @@ items: include: { mode: link, path: syntax/toc_i.yaml } - name: Встроенные функции include: { mode: link, path: builtins/toc_i.yaml } -- name: Репецпты +- name: Рецепты include: { mode: link, path: recipes/toc_i.yaml } From 16981ec67a23463ba2532710940a7fe4eb28f812 Mon Sep 17 00:00:00 2001 From: jolex007 Date: Fri, 31 Jan 2025 19:00:13 +0300 Subject: [PATCH 12/16] Fix using port in universal_fetcher commit_hash:fa0dfc03d76b5e40181e589078cdfff0c13ae51d --- library/cpp/http/simple/http_client.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/library/cpp/http/simple/http_client.cpp b/library/cpp/http/simple/http_client.cpp index 00145c804f94..bac6bdc39edf 100644 --- a/library/cpp/http/simple/http_client.cpp +++ b/library/cpp/http/simple/http_client.cpp @@ -327,9 +327,18 @@ void TRedirectableHttpClient::ProcessResponse(const TStringBuf relativeUrl, THtt TStringBuf schemeHostPort = GetSchemeHostAndPort(i->Value()); TStringBuf scheme("http://"); TStringBuf host("unknown"); - ui16 port = 80; + ui16 port = 0; GetSchemeHostAndPort(schemeHostPort, scheme, host, port); TStringBuf body = GetPathAndQuery(i->Value(), false); + if (port == 0) { + if (scheme.StartsWith("https")) { + port = 443; + } else if (scheme.StartsWith("http")) { + port = 80; + } else { + port = 80; + } + } auto opts = Opts; opts.Host(TString(scheme) + TString(host)); From ab23c69305efbe4a26d080ff7054ffce3520c258 Mon Sep 17 00:00:00 2001 From: thegeorg Date: Fri, 31 Jan 2025 19:52:56 +0300 Subject: [PATCH 13/16] Allow resolving cxxabi.h into libcxxabi commit_hash:9c3b26db50fa812b1a54fafe6e32bf4311e98264 --- build/sysincl/emscripten.yml | 2 -- build/sysincl/stl-to-libcxx.yml | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/build/sysincl/emscripten.yml b/build/sysincl/emscripten.yml index 7ae4c55c9d36..cf3eb5c5221e 100644 --- a/build/sysincl/emscripten.yml +++ b/build/sysincl/emscripten.yml @@ -347,5 +347,3 @@ - emscripten/websocket.h: contrib/restricted/emscripten/system/include/emscripten/websocket.h - stdc-predef.h: contrib/restricted/emscripten/system/lib/libc/musl/include/stdc-predef.h - - - cxxabi.h: contrib/libs/cxxsupp/libcxxabi/include/cxxabi.h diff --git a/build/sysincl/stl-to-libcxx.yml b/build/sysincl/stl-to-libcxx.yml index ef8a55bafe7b..4838f7224489 100644 --- a/build/sysincl/stl-to-libcxx.yml +++ b/build/sysincl/stl-to-libcxx.yml @@ -32,7 +32,10 @@ - includes: - cxxabi.h: + - contrib/libs/cxxsupp/libcxxabi/include/cxxabi.h - contrib/libs/cxxsupp/libcxxrt/include/cxxabi.h + - __cxxabi_config.h: + - contrib/libs/cxxsupp/libcxxabi/include/__cxxabi_config.h - omp.h: - contrib/libs/cxxsupp/openmp/omp.h - unwind.h: @@ -44,9 +47,6 @@ - math_cuda.h: - contrib/libs/cxxsupp/libcxx/include/math_cuda.h -- source_filter: "^contrib/libs/cxxsupp/libcxxabi/" - includes: - - cxxabi.h: contrib/libs/cxxsupp/libcxxabi/include/cxxabi.h # Though these are headers provided by libcxx, we do not want to allow them to be included. # We are using non-existent filename to generate error instead, as there is no specific syntax for this case. From af5ff2ac328e499127ab4b1be4c9cdab7a0e3531 Mon Sep 17 00:00:00 2001 From: thegeorg Date: Fri, 31 Jan 2025 19:53:17 +0300 Subject: [PATCH 14/16] Cleanup libcxxrt import commit_hash:a45aa7528cf49e9f590ceddf8b5dd43f8856e327 --- contrib/libs/cxxsupp/libcxxrt/cxxabi.h | 2 +- contrib/libs/cxxsupp/libcxxrt/exception.cc | 2 +- contrib/libs/cxxsupp/libcxxrt/msan.h | 12 ------------ contrib/libs/cxxsupp/libcxxrt/stdexcept.cc | 9 +++++---- contrib/libs/cxxsupp/libcxxrt/stdexcept.h | 8 ++++---- contrib/libs/cxxsupp/libcxxrt/ya.make | 15 +++++---------- 6 files changed, 16 insertions(+), 32 deletions(-) delete mode 100644 contrib/libs/cxxsupp/libcxxrt/msan.h diff --git a/contrib/libs/cxxsupp/libcxxrt/cxxabi.h b/contrib/libs/cxxsupp/libcxxrt/cxxabi.h index 6526f22b6516..2e50c9b7f57f 100644 --- a/contrib/libs/cxxsupp/libcxxrt/cxxabi.h +++ b/contrib/libs/cxxsupp/libcxxrt/cxxabi.h @@ -41,7 +41,7 @@ namespace std #ifdef __cplusplus #if __cplusplus < 201103L -#define _LIBCXXRT_NOEXCEPT noexcept +#define _LIBCXXRT_NOEXCEPT throw() #else #define _LIBCXXRT_NOEXCEPT noexcept #endif diff --git a/contrib/libs/cxxsupp/libcxxrt/exception.cc b/contrib/libs/cxxsupp/libcxxrt/exception.cc index 97f499f882c8..7c34bd9cbe1d 100644 --- a/contrib/libs/cxxsupp/libcxxrt/exception.cc +++ b/contrib/libs/cxxsupp/libcxxrt/exception.cc @@ -35,7 +35,7 @@ #include "dwarf_eh.h" #include "atomic.h" #include "cxxabi.h" -#include "msan.h" +#include using namespace ABI_NAMESPACE; diff --git a/contrib/libs/cxxsupp/libcxxrt/msan.h b/contrib/libs/cxxsupp/libcxxrt/msan.h deleted file mode 100644 index 5cf9572af0e4..000000000000 --- a/contrib/libs/cxxsupp/libcxxrt/msan.h +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once - -#include - -#if defined(__clang__) -#if __has_feature(memory_sanitizer) -extern "C" void __msan_unpoison(const volatile void* ptr, size_t size); -#ifndef __SANITIZE_MEMORY__ -#define __SANITIZE_MEMORY__ -#endif -#endif -#endif diff --git a/contrib/libs/cxxsupp/libcxxrt/stdexcept.cc b/contrib/libs/cxxsupp/libcxxrt/stdexcept.cc index f0c2cddbecb0..22c6930dca6b 100644 --- a/contrib/libs/cxxsupp/libcxxrt/stdexcept.cc +++ b/contrib/libs/cxxsupp/libcxxrt/stdexcept.cc @@ -32,7 +32,7 @@ namespace std { exception::exception() _LIBCXXRT_NOEXCEPT {} -exception::~exception() _LIBCXXRT_NOEXCEPT {} +exception::~exception() {} exception::exception(const exception&) _LIBCXXRT_NOEXCEPT {} exception& exception::operator=(const exception&) _LIBCXXRT_NOEXCEPT { @@ -44,7 +44,7 @@ const char* exception::what() const _LIBCXXRT_NOEXCEPT } bad_alloc::bad_alloc() _LIBCXXRT_NOEXCEPT {} -bad_alloc::~bad_alloc() _LIBCXXRT_NOEXCEPT {} +bad_alloc::~bad_alloc() {} bad_alloc::bad_alloc(const bad_alloc&) _LIBCXXRT_NOEXCEPT {} bad_alloc& bad_alloc::operator=(const bad_alloc&) _LIBCXXRT_NOEXCEPT { @@ -58,7 +58,7 @@ const char* bad_alloc::what() const _LIBCXXRT_NOEXCEPT bad_cast::bad_cast() _LIBCXXRT_NOEXCEPT {} -bad_cast::~bad_cast() _LIBCXXRT_NOEXCEPT {} +bad_cast::~bad_cast() {} bad_cast::bad_cast(const bad_cast&) _LIBCXXRT_NOEXCEPT {} bad_cast& bad_cast::operator=(const bad_cast&) _LIBCXXRT_NOEXCEPT { @@ -70,7 +70,7 @@ const char* bad_cast::what() const _LIBCXXRT_NOEXCEPT } bad_typeid::bad_typeid() _LIBCXXRT_NOEXCEPT {} -bad_typeid::~bad_typeid() _LIBCXXRT_NOEXCEPT {} +bad_typeid::~bad_typeid() {} bad_typeid::bad_typeid(const bad_typeid &__rhs) _LIBCXXRT_NOEXCEPT {} bad_typeid& bad_typeid::operator=(const bad_typeid &__rhs) _LIBCXXRT_NOEXCEPT { @@ -96,3 +96,4 @@ const char* bad_array_new_length::what() const _LIBCXXRT_NOEXCEPT } } // namespace std + diff --git a/contrib/libs/cxxsupp/libcxxrt/stdexcept.h b/contrib/libs/cxxsupp/libcxxrt/stdexcept.h index d9ba05224c1f..38cd36d8e566 100644 --- a/contrib/libs/cxxsupp/libcxxrt/stdexcept.h +++ b/contrib/libs/cxxsupp/libcxxrt/stdexcept.h @@ -40,7 +40,7 @@ namespace std exception() _LIBCXXRT_NOEXCEPT; exception(const exception&) _LIBCXXRT_NOEXCEPT; exception& operator=(const exception&) _LIBCXXRT_NOEXCEPT; - virtual ~exception() _LIBCXXRT_NOEXCEPT; + virtual ~exception(); virtual const char* what() const _LIBCXXRT_NOEXCEPT; }; @@ -54,7 +54,7 @@ namespace std bad_alloc() _LIBCXXRT_NOEXCEPT; bad_alloc(const bad_alloc&) _LIBCXXRT_NOEXCEPT; bad_alloc& operator=(const bad_alloc&) _LIBCXXRT_NOEXCEPT; - ~bad_alloc() _LIBCXXRT_NOEXCEPT; + ~bad_alloc(); virtual const char* what() const _LIBCXXRT_NOEXCEPT; }; @@ -66,7 +66,7 @@ namespace std bad_cast() _LIBCXXRT_NOEXCEPT; bad_cast(const bad_cast&) _LIBCXXRT_NOEXCEPT; bad_cast& operator=(const bad_cast&) _LIBCXXRT_NOEXCEPT; - virtual ~bad_cast() _LIBCXXRT_NOEXCEPT; + virtual ~bad_cast(); virtual const char* what() const _LIBCXXRT_NOEXCEPT; }; @@ -78,7 +78,7 @@ namespace std public: bad_typeid() _LIBCXXRT_NOEXCEPT; bad_typeid(const bad_typeid &__rhs) _LIBCXXRT_NOEXCEPT; - virtual ~bad_typeid() _LIBCXXRT_NOEXCEPT; + virtual ~bad_typeid(); bad_typeid& operator=(const bad_typeid &__rhs) _LIBCXXRT_NOEXCEPT; virtual const char* what() const _LIBCXXRT_NOEXCEPT; }; diff --git a/contrib/libs/cxxsupp/libcxxrt/ya.make b/contrib/libs/cxxsupp/libcxxrt/ya.make index 568f018f17d7..7915fb58bede 100644 --- a/contrib/libs/cxxsupp/libcxxrt/ya.make +++ b/contrib/libs/cxxsupp/libcxxrt/ya.make @@ -15,23 +15,18 @@ VERSION(2024-10-14) ORIGINAL_SOURCE(https://github.com/libcxxrt/libcxxrt/archive/76435c4451aeb5e04e9500b090293347a38cef8d.tar.gz) -ADDINCL( - contrib/libs/cxxsupp/libcxxrt +PEERDIR( + contrib/libs/libunwind + library/cpp/sanitizer/include ) NO_COMPILER_WARNINGS() NO_RUNTIME() -CXXFLAGS(-nostdinc++) +NO_UTIL() -IF (CXX_UNWIND == "glibcxx_dynamic" OR ARCH_PPC64LE) - LDFLAGS(-lgcc_s) -ELSE() - PEERDIR( - contrib/libs/libunwind - ) -ENDIF() +CXXFLAGS(-nostdinc++) IF (SANITIZER_TYPE == undefined OR FUZZING) NO_SANITIZE() From 6368797460629c9331821d5b4b8d0356298b9cf5 Mon Sep 17 00:00:00 2001 From: aneporada Date: Fri, 31 Jan 2025 20:03:52 +0300 Subject: [PATCH 15/16] Add Member/Nth over FlatMapBase optimizer (under feature flag) commit_hash:20a9ba9fcc12aae6dd641e67751e61b844e174ef --- .../core/common_opt/yql_co_simple1.cpp | 44 ++++++++++++++++++- yql/essentials/core/yql_opt_utils.h | 22 ++++++++++ .../sql/suites/join/pullup_extra_columns.cfg | 2 + .../sql/suites/join/pullup_extra_columns.sql | 13 ++++++ 4 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 yt/yql/tests/sql/suites/join/pullup_extra_columns.cfg create mode 100644 yt/yql/tests/sql/suites/join/pullup_extra_columns.sql diff --git a/yql/essentials/core/common_opt/yql_co_simple1.cpp b/yql/essentials/core/common_opt/yql_co_simple1.cpp index 4f0ddab7413d..b8dffb997f84 100644 --- a/yql/essentials/core/common_opt/yql_co_simple1.cpp +++ b/yql/essentials/core/common_opt/yql_co_simple1.cpp @@ -3697,6 +3697,38 @@ TExprNode::TPtr ReplaceFuncWithImpl(const TExprNode::TPtr& node, TExprContext& c .Build(); } +TExprNode::TPtr MemberNthOverFlatMapWithOptional(const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { + YQL_ENSURE(node->IsCallable({"Member", "Nth"})); + YQL_ENSURE(optCtx.Types); + static const char optName[] = "MemberNthOverFlatMap"; + if (!IsOptimizerEnabled(*optCtx.Types) || IsOptimizerDisabled(*optCtx.Types)) { + return node; + } + if (auto maybeFlatMap = TMaybeNode(node->HeadPtr())) { + auto flatMap = maybeFlatMap.Cast(); + if (flatMap.Input().Ref().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Optional && + flatMap.Lambda().Ref().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Optional) + { + YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content(); + return ctx.Builder(node->Pos()) + .Callable(flatMap.CallableName()) + .Add(0, flatMap.Input().Ptr()) + .Lambda(1) + .Param("item") + .Callable(node->Content()) + .Apply(0, flatMap.Lambda().Ptr()) + .With(0, "item") + .Seal() + .Add(1, node->Child(1)) + .Seal() + .Seal() + .Seal() + .Build(); + } + } + return node; +} + } // namespace void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { @@ -4619,7 +4651,7 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { return ctx.NewCallable(node->Pos(), "AsStruct", std::move(asStructChildren)); }; - map["Member"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { + map["Member"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { if (node->Head().IsCallable("AsStruct")) { YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content(); return ExtractMember(*node); @@ -4643,6 +4675,10 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { return ctx.ChangeChild(*node, 0, node->Head().HeadPtr()); } + if (auto opt = MemberNthOverFlatMapWithOptional(node, ctx, optCtx); opt != node) { + return opt; + } + return node; }; @@ -4664,7 +4700,7 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { map["AsStruct"] = std::bind(&OptimizeAsStruct, _1, _2); - map["Nth"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { + map["Nth"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { if (node->Head().Type() == TExprNode::List) { YQL_CLOG(DEBUG, Core) << node->Content() << " over tuple literal"; const auto index = FromString(node->Tail().Content()); @@ -4701,6 +4737,10 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { .Ptr(); } + if (auto opt = MemberNthOverFlatMapWithOptional(node, ctx, optCtx); opt != node) { + return opt; + } + return node; }; diff --git a/yql/essentials/core/yql_opt_utils.h b/yql/essentials/core/yql_opt_utils.h index 771090c78b4a..3af33bed49d1 100644 --- a/yql/essentials/core/yql_opt_utils.h +++ b/yql/essentials/core/yql_opt_utils.h @@ -176,4 +176,26 @@ bool CheckSupportedTypes( bool allowNestedOptionals = true ); +template +bool IsOptimizerEnabled(const TTypeAnnotationContext& types) { + struct TFlag { + TFlag(const TTypeAnnotationContext& types) + : Value(types.OptimizerFlags.contains(to_lower(TString(OptName)))) + {} + const bool Value; + }; + return Singleton(types)->Value; +} + +template +bool IsOptimizerDisabled(const TTypeAnnotationContext& types) { + struct TFlag { + TFlag(const TTypeAnnotationContext& types) + : Value(types.OptimizerFlags.contains(to_lower("Disable" + TString(OptName)))) + {} + const bool Value; + }; + return Singleton(types)->Value; +} + } diff --git a/yt/yql/tests/sql/suites/join/pullup_extra_columns.cfg b/yt/yql/tests/sql/suites/join/pullup_extra_columns.cfg new file mode 100644 index 000000000000..6098eb907d55 --- /dev/null +++ b/yt/yql/tests/sql/suites/join/pullup_extra_columns.cfg @@ -0,0 +1,2 @@ +in Input1 kv1_sorted.txt +in Input2 kv2_sorted.txt diff --git a/yt/yql/tests/sql/suites/join/pullup_extra_columns.sql b/yt/yql/tests/sql/suites/join/pullup_extra_columns.sql new file mode 100644 index 000000000000..342a7bb967f4 --- /dev/null +++ b/yt/yql/tests/sql/suites/join/pullup_extra_columns.sql @@ -0,0 +1,13 @@ +use plato; + +pragma config.flags('OptimizerFlags', 'MemberNthOverFlatMap'); +pragma yt.MapJoinLimit="1m"; + + +$t1 = select k1, v1 from Input1; +$t2 = select k2, v2, u2 as renamed from Input2; + +select + a.*, + b.v2, +from $t1 as a left join any $t2 as b on a.k1 = b.k2; From 6bdb8392267259f3f148458ab5946e2654631bfc Mon Sep 17 00:00:00 2001 From: imunkin Date: Fri, 31 Jan 2025 20:17:43 +0300 Subject: [PATCH 16/16] YQL-18303: Introduce Get* overloads for time components (numeric) commit_hash:9dcc43f4facdab5b4fca0a4079833c8534359124 --- .../udfs/common/datetime2/datetime_udf.cpp | 283 ++++++++------- .../canondata/test.test_Get_/results.txt | 340 ++++++++++++++++++ .../datetime2/test_bigdates/cases/Get.sql | 5 + 3 files changed, 500 insertions(+), 128 deletions(-) diff --git a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp index 6213e35fe2f9..d7566137c570 100644 --- a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp +++ b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp @@ -226,43 +226,13 @@ class TToUnits { } }; -template +template struct TGetTimeComponent { typedef bool TTypeAwareMarker; - template - static TFieldStorage Core(TInput val) { - if constexpr (AlwaysZero) { - return 0; - } - - if constexpr (InputFractional) { - if constexpr (Fractional) { - return (val / Scale) % Limit; - } else { - return (val / 1000000u / Scale) % Limit; - } - } else { - if constexpr (Fractional) { - return 0; - } else { - return (val / Scale) % Limit; - } - } - } - - class TImpl : public TBoxedValue { - public: - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - if (!args[0]) { - return {}; - } - - return TUnboxedValuePod(TFieldStorage((FieldFunc(args[0])) / Divisor)); - } - }; - static const TStringRef& Name() { static auto name = TStringRef(TFuncName, std::strlen(TFuncName)); return name; @@ -278,118 +248,175 @@ struct TGetTimeComponent { return false; } - try { - auto typeInfoHelper = builder.TypeInfoHelper(); - TTupleTypeInspector tuple(*typeInfoHelper, userType); - if (tuple) { - Y_ENSURE(tuple.GetElementsCount() > 0); - TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); - Y_ENSURE(argsTuple); - if (argsTuple.GetElementsCount() != 1) { - builder.SetError("Expected one argument"); - return true; - } + if (!userType) { + builder.SetError("User type is missing"); + return true; + } + builder.UserType(userType); - auto argType = argsTuple.GetElementType(0); - TVector argBlockTypes; - argBlockTypes.push_back(argType); + const auto typeInfoHelper = builder.TypeInfoHelper(); + TTupleTypeInspector tuple(*typeInfoHelper, userType); + Y_ENSURE(tuple, "Tuple with args and options tuples expected"); + Y_ENSURE(tuple.GetElementsCount() > 0, + "Tuple has to contain positional arguments"); - TBlockTypeInspector block(*typeInfoHelper, argType); - if (block) { - Y_ENSURE(!block.IsScalar()); - argType = block.GetItemType(); - } + TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); + Y_ENSURE(argsTuple, "Tuple with args expected"); + if (argsTuple.GetElementsCount() != 1) { + builder.SetError("Single argument expected"); + return true; + } - bool isOptional = false; - if (auto opt = TOptionalTypeInspector(*typeInfoHelper, argType)) { - argType = opt.GetItemType(); - isOptional = true; - } + auto argType = argsTuple.GetElementType(0); - TResourceTypeInspector res(*typeInfoHelper, argType); - if (!res) { - TDataTypeInspector data(*typeInfoHelper, argType); - if (!data) { - builder.SetError("Expected data type"); - return true; - } + TVector argBlockTypes; + argBlockTypes.push_back(argType); - auto typeId = data.GetTypeId(); - if (typeId == TDataType::Id || - typeId == TDataType::Id || - typeId == TDataType::Id) { + TBlockTypeInspector block(*typeInfoHelper, argType); + if (block) { + Y_ENSURE(!block.IsScalar()); + argType = block.GetItemType(); + } - builder.Args()->Add(argsTuple.GetElementType(0)).Done(); - const TType* retType = builder.SimpleType(); + bool isOptional = false; + if (auto opt = TOptionalTypeInspector(*typeInfoHelper, argType)) { + argType = opt.GetItemType(); + isOptional = true; + } - if (isOptional) { - retType = builder.Optional()->Item(retType).Build(); - } + TResourceTypeInspector resource(*typeInfoHelper, argType); + if (!resource) { + TDataTypeInspector data(*typeInfoHelper, argType); + if (!data) { + builder.SetError("Data type expected"); + return true; + } - auto outputType = retType; - if (block) { - retType = builder.Block(block.IsScalar())->Item(retType).Build(); - } + const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features; + if (features & NUdf::BigDateType) { + BuildSignature(builder, typesOnly); + return true; + } + if (features & NUdf::TzDateType) { + BuildSignature(builder, typesOnly); + return true; + } - builder.Returns(retType); - builder.SupportsBlocks(); - builder.IsStrict(); - - builder.UserType(userType); - if (!typesOnly) { - if (typeId == TDataType::Id) { - if (block) { - builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - UnaryPreallocatedExecImpl>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); - } else { - builder.Implementation(new TUnaryOverOptionalImpl>()); - } - } + if (features & NUdf::DateType) { + builder.Args()->Add(argsTuple.GetElementType(0)).Done(); + const TType* retType = builder.SimpleType(); - if (typeId == TDataType::Id) { - if (block) { - builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - UnaryPreallocatedExecImpl>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); - } else { - builder.Implementation(new TUnaryOverOptionalImpl>()); - } - } + if (isOptional) { + retType = builder.Optional()->Item(retType).Build(); + } - if (typeId == TDataType::Id) { - if (block) { - builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - UnaryPreallocatedExecImpl>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); - } else { - builder.Implementation(new TUnaryOverOptionalImpl>()); - } - } + auto outputType = retType; + if (block) { + retType = builder.Block(block.IsScalar())->Item(retType).Build(); + } + + builder.Returns(retType); + builder.SupportsBlocks(); + builder.IsStrict(); + + if (!typesOnly) { + const auto typeId = data.GetTypeId(); + if (typeId == TDataType::Id) { + if (block) { + builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), + UnaryPreallocatedExecImpl>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + } else { + builder.Implementation(new TUnaryOverOptionalImpl>()); } + } - return true; + if (typeId == TDataType::Id) { + if (block) { + builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), + UnaryPreallocatedExecImpl>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + } else { + builder.Implementation(new TUnaryOverOptionalImpl>()); + } } - } else { - Y_ENSURE(!block); - if (res.GetTag() != TStringRef::Of(TMResourceName)) { - builder.SetError("Unexpected resource tag"); - return true; + + if (typeId == TDataType::Id) { + if (block) { + builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), + UnaryPreallocatedExecImpl>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + } else { + builder.Implementation(new TUnaryOverOptionalImpl>()); + } } } + return true; } - // default implementation - builder.Args()->Add>().Flags(ICallablePayload::TArgumentFlags::AutoMap).Done(); - builder.Returns(); - builder.IsStrict(); - if (!typesOnly) { - builder.Implementation(new TImpl()); - } - } catch (const std::exception& e) { - builder.SetError(TStringBuf(e.what())); + ::TStringBuilder sb; + sb << "Invalid argument type: got "; + TTypePrinter(*typeInfoHelper, argType).Out(sb.Out); + sb << ", but Resource<" << TMResourceName <<"> or Resource<" + << TM64ResourceName << "> expected"; + builder.SetError(sb); + return true; } + Y_ENSURE(!block); + + if (resource.GetTag() == TStringRef::Of(TM64ResourceName)) { + BuildSignature(builder, typesOnly); + return true; + } + + if (resource.GetTag() == TStringRef::Of(TMResourceName)) { + BuildSignature(builder, typesOnly); + return true; + } + + builder.SetError("Unexpected Resource tag"); return true; } +private: + template + static TFieldStorage Core(TInput val) { + if constexpr (AlwaysZero) { + return 0; + } + + if constexpr (InputFractional) { + if constexpr (Fractional) { + return (val / Scale) % Limit; + } else { + return (val / 1000000u / Scale) % Limit; + } + } else { + if constexpr (Fractional) { + return 0; + } else { + return (val / Scale) % Limit; + } + } + } + + template + class TImpl : public TBoxedValue { + public: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + EMPTY_RESULT_ON_EMPTY_ARG(0); + return TUnboxedValuePod((TResult(Func(args[0])) / Divisor)); + } + }; + + template + static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { + builder.Returns(); + builder.Args()->Add>>(); + builder.IsStrict(); + if (!typesOnly) { + builder.Implementation(new TImpl()); + } + } }; namespace { @@ -2623,11 +2650,11 @@ TUnboxedValue GetTimezoneName(const IValueBuilder* valueBuilder, const TUnboxedV TGetDateComponent, TGetDateComponent, TGetDateComponentName, GetDayOfWeekName>, - TGetTimeComponent, - TGetTimeComponent, - TGetTimeComponent, - TGetTimeComponent, - TGetTimeComponent, + TGetTimeComponent, + TGetTimeComponent, + TGetTimeComponent, + TGetTimeComponent, + TGetTimeComponent, TGetDateComponent, TGetDateComponentName, GetTimezoneName>, diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Get_/results.txt b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Get_/results.txt index a8562845b586..600a5335a1aa 100644 --- a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Get_/results.txt +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Get_/results.txt @@ -52,6 +52,26 @@ ] ] ]; + [ + "rhour"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rminute"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; [ "rmonth"; [ @@ -72,6 +92,26 @@ ] ] ]; + [ + "rmsec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "rsecond"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; [ "rtz"; [ @@ -92,6 +132,16 @@ ] ] ]; + [ + "rusec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; [ "rweekofyear"; [ @@ -170,6 +220,26 @@ ] ] ]; + [ + "rhour"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rminute"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; [ "rmonth"; [ @@ -190,6 +260,26 @@ ] ] ]; + [ + "rmsec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "rsecond"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; [ "rtz"; [ @@ -210,6 +300,16 @@ ] ] ]; + [ + "rusec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; [ "rweekofyear"; [ @@ -261,6 +361,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -270,9 +376,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -296,6 +411,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -305,9 +426,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -333,18 +463,33 @@ [ "1" ]; + [ + "23" + ]; + [ + "59" + ]; [ "1" ]; [ "January" ]; + [ + "999" + ]; + [ + "59" + ]; [ "0" ]; [ "GMT" ]; + [ + "999999" + ]; [ "1" ]; @@ -368,18 +513,33 @@ [ "1" ]; + [ + "23" + ]; + [ + "59" + ]; [ "1" ]; [ "January" ]; + [ + "999" + ]; + [ + "59" + ]; [ "0" ]; [ "GMT" ]; + [ + "999999" + ]; [ "1" ]; @@ -405,6 +565,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -414,9 +580,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -440,6 +615,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -449,9 +630,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -477,18 +667,33 @@ [ "365" ]; + [ + "23" + ]; + [ + "59" + ]; [ "12" ]; [ "December" ]; + [ + "999" + ]; + [ + "59" + ]; [ "0" ]; [ "GMT" ]; + [ + "999999" + ]; [ "53" ]; @@ -512,18 +717,33 @@ [ "365" ]; + [ + "23" + ]; + [ + "59" + ]; [ "12" ]; [ "December" ]; + [ + "999" + ]; + [ + "59" + ]; [ "0" ]; [ "GMT" ]; + [ + "999999" + ]; [ "53" ]; @@ -549,6 +769,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -558,9 +784,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -584,6 +819,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -593,9 +834,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -621,6 +871,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -630,9 +886,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -656,6 +921,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -665,9 +936,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -693,6 +973,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -702,9 +988,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -728,6 +1023,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -737,9 +1038,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -765,18 +1075,33 @@ [ "365" ]; + [ + "23" + ]; + [ + "59" + ]; [ "12" ]; [ "December" ]; + [ + "999" + ]; + [ + "59" + ]; [ "0" ]; [ "GMT" ]; + [ + "999999" + ]; [ "53" ]; @@ -800,18 +1125,33 @@ [ "365" ]; + [ + "23" + ]; + [ + "59" + ]; [ "12" ]; [ "December" ]; + [ + "999" + ]; + [ + "59" + ]; [ "0" ]; [ "GMT" ]; + [ + "999999" + ]; [ "53" ]; diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Get.sql b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Get.sql index 6b777c9ab9c6..934e934b2d47 100644 --- a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Get.sql +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Get.sql @@ -10,6 +10,11 @@ $check = ($arg) -> { rdayofmonth: DateTime::GetDayOfMonth($arg), rdayofweek: DateTime::GetDayOfWeek($arg), rdayofweekname: DateTime::GetDayOfWeekName($arg), + rhour: DateTime::GetHour($arg), + rminute: DateTime::GetMinute($arg), + rsecond: DateTime::GetSecond($arg), + rmsec: DateTime::GetMillisecondOfSecond($arg), + rusec: DateTime::GetMicrosecondOfSecond($arg), rtz: DateTime::GetTimezoneId($arg), rtzname: DateTime::GetTimezoneName($arg), |>