diff --git a/build/sysincl/emscripten.yml b/build/sysincl/emscripten.yml index 7ae4c55c9d36..cf3eb5c5221e 100644 --- a/build/sysincl/emscripten.yml +++ b/build/sysincl/emscripten.yml @@ -347,5 +347,3 @@ - emscripten/websocket.h: contrib/restricted/emscripten/system/include/emscripten/websocket.h - stdc-predef.h: contrib/restricted/emscripten/system/lib/libc/musl/include/stdc-predef.h - - - cxxabi.h: contrib/libs/cxxsupp/libcxxabi/include/cxxabi.h diff --git a/build/sysincl/stl-to-libcxx.yml b/build/sysincl/stl-to-libcxx.yml index ef8a55bafe7b..4838f7224489 100644 --- a/build/sysincl/stl-to-libcxx.yml +++ b/build/sysincl/stl-to-libcxx.yml @@ -32,7 +32,10 @@ - includes: - cxxabi.h: + - contrib/libs/cxxsupp/libcxxabi/include/cxxabi.h - contrib/libs/cxxsupp/libcxxrt/include/cxxabi.h + - __cxxabi_config.h: + - contrib/libs/cxxsupp/libcxxabi/include/__cxxabi_config.h - omp.h: - contrib/libs/cxxsupp/openmp/omp.h - unwind.h: @@ -44,9 +47,6 @@ - math_cuda.h: - contrib/libs/cxxsupp/libcxx/include/math_cuda.h -- source_filter: "^contrib/libs/cxxsupp/libcxxabi/" - includes: - - cxxabi.h: contrib/libs/cxxsupp/libcxxabi/include/cxxabi.h # Though these are headers provided by libcxx, we do not want to allow them to be included. # We are using non-existent filename to generate error instead, as there is no specific syntax for this case. diff --git a/contrib/libs/cxxsupp/libcxxrt/cxxabi.h b/contrib/libs/cxxsupp/libcxxrt/cxxabi.h index 6526f22b6516..2e50c9b7f57f 100644 --- a/contrib/libs/cxxsupp/libcxxrt/cxxabi.h +++ b/contrib/libs/cxxsupp/libcxxrt/cxxabi.h @@ -41,7 +41,7 @@ namespace std #ifdef __cplusplus #if __cplusplus < 201103L -#define _LIBCXXRT_NOEXCEPT noexcept +#define _LIBCXXRT_NOEXCEPT throw() #else #define _LIBCXXRT_NOEXCEPT noexcept #endif diff --git a/contrib/libs/cxxsupp/libcxxrt/exception.cc b/contrib/libs/cxxsupp/libcxxrt/exception.cc index 97f499f882c8..7c34bd9cbe1d 100644 --- a/contrib/libs/cxxsupp/libcxxrt/exception.cc +++ b/contrib/libs/cxxsupp/libcxxrt/exception.cc @@ -35,7 +35,7 @@ #include "dwarf_eh.h" #include "atomic.h" #include "cxxabi.h" -#include "msan.h" +#include using namespace ABI_NAMESPACE; diff --git a/contrib/libs/cxxsupp/libcxxrt/msan.h b/contrib/libs/cxxsupp/libcxxrt/msan.h deleted file mode 100644 index 5cf9572af0e4..000000000000 --- a/contrib/libs/cxxsupp/libcxxrt/msan.h +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once - -#include - -#if defined(__clang__) -#if __has_feature(memory_sanitizer) -extern "C" void __msan_unpoison(const volatile void* ptr, size_t size); -#ifndef __SANITIZE_MEMORY__ -#define __SANITIZE_MEMORY__ -#endif -#endif -#endif diff --git a/contrib/libs/cxxsupp/libcxxrt/stdexcept.cc b/contrib/libs/cxxsupp/libcxxrt/stdexcept.cc index f0c2cddbecb0..22c6930dca6b 100644 --- a/contrib/libs/cxxsupp/libcxxrt/stdexcept.cc +++ b/contrib/libs/cxxsupp/libcxxrt/stdexcept.cc @@ -32,7 +32,7 @@ namespace std { exception::exception() _LIBCXXRT_NOEXCEPT {} -exception::~exception() _LIBCXXRT_NOEXCEPT {} +exception::~exception() {} exception::exception(const exception&) _LIBCXXRT_NOEXCEPT {} exception& exception::operator=(const exception&) _LIBCXXRT_NOEXCEPT { @@ -44,7 +44,7 @@ const char* exception::what() const _LIBCXXRT_NOEXCEPT } bad_alloc::bad_alloc() _LIBCXXRT_NOEXCEPT {} -bad_alloc::~bad_alloc() _LIBCXXRT_NOEXCEPT {} +bad_alloc::~bad_alloc() {} bad_alloc::bad_alloc(const bad_alloc&) _LIBCXXRT_NOEXCEPT {} bad_alloc& bad_alloc::operator=(const bad_alloc&) _LIBCXXRT_NOEXCEPT { @@ -58,7 +58,7 @@ const char* bad_alloc::what() const _LIBCXXRT_NOEXCEPT bad_cast::bad_cast() _LIBCXXRT_NOEXCEPT {} -bad_cast::~bad_cast() _LIBCXXRT_NOEXCEPT {} +bad_cast::~bad_cast() {} bad_cast::bad_cast(const bad_cast&) _LIBCXXRT_NOEXCEPT {} bad_cast& bad_cast::operator=(const bad_cast&) _LIBCXXRT_NOEXCEPT { @@ -70,7 +70,7 @@ const char* bad_cast::what() const _LIBCXXRT_NOEXCEPT } bad_typeid::bad_typeid() _LIBCXXRT_NOEXCEPT {} -bad_typeid::~bad_typeid() _LIBCXXRT_NOEXCEPT {} +bad_typeid::~bad_typeid() {} bad_typeid::bad_typeid(const bad_typeid &__rhs) _LIBCXXRT_NOEXCEPT {} bad_typeid& bad_typeid::operator=(const bad_typeid &__rhs) _LIBCXXRT_NOEXCEPT { @@ -96,3 +96,4 @@ const char* bad_array_new_length::what() const _LIBCXXRT_NOEXCEPT } } // namespace std + diff --git a/contrib/libs/cxxsupp/libcxxrt/stdexcept.h b/contrib/libs/cxxsupp/libcxxrt/stdexcept.h index d9ba05224c1f..38cd36d8e566 100644 --- a/contrib/libs/cxxsupp/libcxxrt/stdexcept.h +++ b/contrib/libs/cxxsupp/libcxxrt/stdexcept.h @@ -40,7 +40,7 @@ namespace std exception() _LIBCXXRT_NOEXCEPT; exception(const exception&) _LIBCXXRT_NOEXCEPT; exception& operator=(const exception&) _LIBCXXRT_NOEXCEPT; - virtual ~exception() _LIBCXXRT_NOEXCEPT; + virtual ~exception(); virtual const char* what() const _LIBCXXRT_NOEXCEPT; }; @@ -54,7 +54,7 @@ namespace std bad_alloc() _LIBCXXRT_NOEXCEPT; bad_alloc(const bad_alloc&) _LIBCXXRT_NOEXCEPT; bad_alloc& operator=(const bad_alloc&) _LIBCXXRT_NOEXCEPT; - ~bad_alloc() _LIBCXXRT_NOEXCEPT; + ~bad_alloc(); virtual const char* what() const _LIBCXXRT_NOEXCEPT; }; @@ -66,7 +66,7 @@ namespace std bad_cast() _LIBCXXRT_NOEXCEPT; bad_cast(const bad_cast&) _LIBCXXRT_NOEXCEPT; bad_cast& operator=(const bad_cast&) _LIBCXXRT_NOEXCEPT; - virtual ~bad_cast() _LIBCXXRT_NOEXCEPT; + virtual ~bad_cast(); virtual const char* what() const _LIBCXXRT_NOEXCEPT; }; @@ -78,7 +78,7 @@ namespace std public: bad_typeid() _LIBCXXRT_NOEXCEPT; bad_typeid(const bad_typeid &__rhs) _LIBCXXRT_NOEXCEPT; - virtual ~bad_typeid() _LIBCXXRT_NOEXCEPT; + virtual ~bad_typeid(); bad_typeid& operator=(const bad_typeid &__rhs) _LIBCXXRT_NOEXCEPT; virtual const char* what() const _LIBCXXRT_NOEXCEPT; }; diff --git a/contrib/libs/cxxsupp/libcxxrt/ya.make b/contrib/libs/cxxsupp/libcxxrt/ya.make index 568f018f17d7..7915fb58bede 100644 --- a/contrib/libs/cxxsupp/libcxxrt/ya.make +++ b/contrib/libs/cxxsupp/libcxxrt/ya.make @@ -15,23 +15,18 @@ VERSION(2024-10-14) ORIGINAL_SOURCE(https://github.com/libcxxrt/libcxxrt/archive/76435c4451aeb5e04e9500b090293347a38cef8d.tar.gz) -ADDINCL( - contrib/libs/cxxsupp/libcxxrt +PEERDIR( + contrib/libs/libunwind + library/cpp/sanitizer/include ) NO_COMPILER_WARNINGS() NO_RUNTIME() -CXXFLAGS(-nostdinc++) +NO_UTIL() -IF (CXX_UNWIND == "glibcxx_dynamic" OR ARCH_PPC64LE) - LDFLAGS(-lgcc_s) -ELSE() - PEERDIR( - contrib/libs/libunwind - ) -ENDIF() +CXXFLAGS(-nostdinc++) IF (SANITIZER_TYPE == undefined OR FUZZING) NO_SANITIZE() diff --git a/contrib/python/more-itertools/py3/.dist-info/METADATA b/contrib/python/more-itertools/py3/.dist-info/METADATA index a06c9b0a570f..72be20349f3e 100644 --- a/contrib/python/more-itertools/py3/.dist-info/METADATA +++ b/contrib/python/more-itertools/py3/.dist-info/METADATA @@ -1,25 +1,26 @@ Metadata-Version: 2.1 Name: more-itertools -Version: 10.5.0 +Version: 10.6.0 Summary: More routines for operating on iterables, beyond itertools Keywords: itertools,iterator,iteration,filter,peek,peekable,chunk,chunked Author-email: Erik Rose -Requires-Python: >=3.8 +Requires-Python: >=3.9 Description-Content-Type: text/x-rst Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers Classifier: Natural Language :: English Classifier: License :: OSI Approved :: MIT License Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 Classifier: Programming Language :: Python :: 3 :: Only Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Topic :: Software Development :: Libraries +Project-URL: Documentation, https://more-itertools.readthedocs.io/en/stable/ Project-URL: Homepage, https://github.com/more-itertools/more-itertools ============== @@ -142,6 +143,8 @@ Python iterables. | | `convolve `_, | | | `dotproduct `_, | | | `factor `_, | +| | `is_prime `_, | +| | `nth_prime `_, | | | `matmul `_, | | | `polynomial_from_roots `_, | | | `polynomial_derivative `_, | @@ -185,6 +188,7 @@ Python iterables. | | `numeric_range `_, | | | `side_effect `_, | | | `iterate `_, | +| | `loops `_, | | | `difference `_, | | | `make_decorator `_, | | | `SequenceView `_, | diff --git a/contrib/python/more-itertools/py3/README.rst b/contrib/python/more-itertools/py3/README.rst index 5be8552b2d59..059fda912627 100644 --- a/contrib/python/more-itertools/py3/README.rst +++ b/contrib/python/more-itertools/py3/README.rst @@ -118,6 +118,8 @@ Python iterables. | | `convolve `_, | | | `dotproduct `_, | | | `factor `_, | +| | `is_prime `_, | +| | `nth_prime `_, | | | `matmul `_, | | | `polynomial_from_roots `_, | | | `polynomial_derivative `_, | @@ -161,6 +163,7 @@ Python iterables. | | `numeric_range `_, | | | `side_effect `_, | | | `iterate `_, | +| | `loops `_, | | | `difference `_, | | | `make_decorator `_, | | | `SequenceView `_, | diff --git a/contrib/python/more-itertools/py3/more_itertools/__init__.py b/contrib/python/more-itertools/py3/more_itertools/__init__.py index 583fb5745782..4d43d11a637b 100644 --- a/contrib/python/more-itertools/py3/more_itertools/__init__.py +++ b/contrib/python/more-itertools/py3/more_itertools/__init__.py @@ -3,4 +3,4 @@ from .more import * # noqa from .recipes import * # noqa -__version__ = '10.5.0' +__version__ = '10.6.0' diff --git a/contrib/python/more-itertools/py3/more_itertools/more.py b/contrib/python/more-itertools/py3/more_itertools/more.py index 64fab2618584..2228687ea274 100644 --- a/contrib/python/more-itertools/py3/more_itertools/more.py +++ b/contrib/python/more-itertools/py3/more_itertools/more.py @@ -25,7 +25,7 @@ from math import comb, e, exp, factorial, floor, fsum, log, log1p, perm, tau from queue import Empty, Queue from random import random, randrange, shuffle, uniform -from operator import itemgetter, mul, sub, gt, lt, le +from operator import itemgetter, mul, sub, gt, lt from sys import hexversion, maxsize from time import monotonic @@ -35,7 +35,9 @@ UnequalIterablesError, consume, flatten, + nth, powerset, + sieve, take, unique_everseen, all_equal, @@ -104,6 +106,7 @@ 'minmax', 'nth_or_last', 'nth_permutation', + 'nth_prime', 'nth_product', 'nth_combination_with_replacement', 'numeric_range', @@ -215,8 +218,8 @@ def first(iterable, default=_marker): return item if default is _marker: raise ValueError( - 'first() was called on an empty iterable, and no ' - 'default value was provided.' + 'first() was called on an empty iterable, ' + 'and no default value was provided.' ) return default @@ -237,15 +240,14 @@ def last(iterable, default=_marker): if isinstance(iterable, Sequence): return iterable[-1] # Work around https://bugs.python.org/issue38525 - elif hasattr(iterable, '__reversed__') and (hexversion != 0x030800F0): + if hasattr(iterable, '__reversed__'): return next(reversed(iterable)) - else: - return deque(iterable, maxlen=1)[-1] + return deque(iterable, maxlen=1)[-1] except (IndexError, TypeError, StopIteration): if default is _marker: raise ValueError( - 'last() was called on an empty iterable, and no default was ' - 'provided.' + 'last() was called on an empty iterable, ' + 'and no default value was provided.' ) return default @@ -569,8 +571,8 @@ def one(iterable, too_short=None, too_long=None): pass else: msg = ( - 'Expected exactly one item in iterable, but got {!r}, {!r}, ' - 'and perhaps more.'.format(first_value, second_value) + f'Expected exactly one item in iterable, but got {first_value!r}, ' + f'{second_value!r}, and perhaps more.' ) raise too_long or ValueError(msg) @@ -631,13 +633,13 @@ def strictly_n(iterable, n, too_short=None, too_long=None): if too_short is None: too_short = lambda item_count: raise_( ValueError, - 'Too few items in iterable (got {})'.format(item_count), + f'Too few items in iterable (got {item_count})', ) if too_long is None: too_long = lambda item_count: raise_( ValueError, - 'Too many items in iterable (got at least {})'.format(item_count), + f'Too many items in iterable (got at least {item_count})', ) it = iter(iterable) @@ -1118,10 +1120,8 @@ def spy(iterable, n=1): [1, 2, 3, 4, 5] """ - it = iter(iterable) - head = take(n, it) - - return head.copy(), chain(head, it) + p, q = tee(iterable) + return take(n, q), p def interleave(*iterables): @@ -1558,8 +1558,8 @@ def split_into(iterable, sizes): [[1], [2, 3], [4], []] When a ``None`` object is encountered in *sizes*, the returned list will - contain items up to the end of *iterable* the same way that itertools.slice - does: + contain items up to the end of *iterable* the same way that + :func:`itertools.slice` does: >>> list(split_into([1,2,3,4,5,6,7,8,9,0], [2,3,None])) [[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]] @@ -2167,13 +2167,11 @@ def __init__(self, *args): self._start, self._stop, self._step = args elif argc == 0: raise TypeError( - 'numeric_range expected at least ' - '1 argument, got {}'.format(argc) + f'numeric_range expected at least 1 argument, got {argc}' ) else: raise TypeError( - 'numeric_range expected at most ' - '3 arguments, got {}'.format(argc) + f'numeric_range expected at most 3 arguments, got {argc}' ) self._zero = type(self._step)(0) @@ -2236,7 +2234,7 @@ def __getitem__(self, key): else: raise TypeError( 'numeric range indices must be ' - 'integers or slices, not {}'.format(type(key).__name__) + f'integers or slices, not {type(key).__name__}' ) def __hash__(self): @@ -2277,13 +2275,10 @@ def __reduce__(self): def __repr__(self): if self._step == 1: - return "numeric_range({}, {})".format( - repr(self._start), repr(self._stop) - ) - else: - return "numeric_range({}, {}, {})".format( - repr(self._start), repr(self._stop), repr(self._step) - ) + return f"numeric_range({self._start!r}, {self._stop!r})" + return ( + f"numeric_range({self._start!r}, {self._stop!r}, {self._step!r})" + ) def __reversed__(self): return iter( @@ -2307,7 +2302,7 @@ def index(self, value): if r == self._zero: return int(q) - raise ValueError("{} is not in numeric range".format(value)) + raise ValueError(f"{value} is not in numeric range") def _get_by_index(self, i): if i < 0: @@ -2781,7 +2776,7 @@ def __len__(self): return len(self._target) def __repr__(self): - return '{}({})'.format(self.__class__.__name__, repr(self._target)) + return f'{self.__class__.__name__}({self._target!r})' class seekable: @@ -3443,8 +3438,8 @@ def only(iterable, default=None, too_long=None): pass else: msg = ( - 'Expected exactly one item in iterable, but got {!r}, {!r}, ' - 'and perhaps more.'.format(first_value, second_value) + f'Expected exactly one item in iterable, but got {first_value!r}, ' + f'{second_value!r}, and perhaps more.' ) raise too_long or ValueError(msg) @@ -3726,9 +3721,11 @@ def feed(i): reservoir = [] for _ in range(k): reservoir.append(feed(0)) - if strict and len(reservoir) < k: - raise ValueError('Sample larger than population') + if strict and len(reservoir) < k: + raise ValueError('Sample larger than population') + + with suppress(StopIteration): W = 1.0 while True: W *= exp(log(random()) / k) @@ -3821,15 +3818,16 @@ def is_sorted(iterable, key=None, reverse=False, strict=False): The function returns ``False`` after encountering the first out-of-order item, which means it may produce results that differ from the built-in - :func:`sorted` function for objects with unusual comparison dynamics. - If there are no out-of-order items, the iterable is exhausted. + :func:`sorted` function for objects with unusual comparison dynamics + (like ``math.nan``). If there are no out-of-order items, the iterable is + exhausted. """ - compare = le if strict else lt it = iterable if (key is None) else map(key, iterable) - it_1, it_2 = tee(it) - next(it_2 if reverse else it_1, None) - - return not any(map(compare, it_1, it_2)) + a, b = tee(it) + next(b, None) + if reverse: + b, a = a, b + return all(map(lt, a, b)) if strict else not any(map(lt, b, a)) class AbortThread(BaseException): @@ -4822,8 +4820,8 @@ def outer_product(func, xs, ys, *args, **kwargs): >>> xs = ['A', 'B', 'A', 'A', 'B', 'B', 'A', 'A', 'B', 'B'] >>> ys = ['X', 'X', 'X', 'Y', 'Z', 'Z', 'Y', 'Y', 'Z', 'Z'] - >>> rows = list(zip(xs, ys)) - >>> count_rows = lambda x, y: rows.count((x, y)) + >>> pair_counts = Counter(zip(xs, ys)) + >>> count_rows = lambda x, y: pair_counts[x, y] >>> list(outer_product(count_rows, sorted(set(xs)), sorted(set(ys)))) [(2, 3, 0), (1, 0, 4)] @@ -4978,3 +4976,23 @@ def doublestarmap(func, iterable): """ for item in iterable: yield func(**item) + + +def _nth_prime_ub(n): + "Upper bound for the nth prime (counting from 1)." + # https://en.wikipedia.org/wiki/Prime-counting_function#Inequalities + return n * log(n * log(n)) if n >= 6 else 11.1 + + +def nth_prime(n): + """Return the nth prime (counting from 0). + + >>> nth_prime(0) + 2 + >>> nth_prime(100) + 547 + """ + if n < 0: + raise ValueError + limit = math.ceil(_nth_prime_ub(n + 1)) + return nth(sieve(limit), n) diff --git a/contrib/python/more-itertools/py3/more_itertools/more.pyi b/contrib/python/more-itertools/py3/more_itertools/more.pyi index 66e6938e1345..b8406906154a 100644 --- a/contrib/python/more-itertools/py3/more_itertools/more.pyi +++ b/contrib/python/more-itertools/py3/more_itertools/more.pyi @@ -5,27 +5,141 @@ from __future__ import annotations import sys import types -from typing import ( - Any, - Callable, +from collections.abc import ( Container, - ContextManager, - Generic, Hashable, - Mapping, Iterable, Iterator, Mapping, - overload, Reversible, Sequence, Sized, - Type, +) +from contextlib import AbstractContextManager +from typing import ( + Any, + Callable, + Generic, TypeVar, + overload, type_check_only, ) from typing_extensions import Protocol +__all__ = [ + 'AbortThread', + 'SequenceView', + 'UnequalIterablesError', + 'adjacent', + 'all_unique', + 'always_iterable', + 'always_reversible', + 'bucket', + 'callback_iter', + 'chunked', + 'chunked_even', + 'circular_shifts', + 'collapse', + 'combination_index', + 'combination_with_replacement_index', + 'consecutive_groups', + 'constrained_batches', + 'consumer', + 'count_cycle', + 'countable', + 'dft', + 'difference', + 'distinct_combinations', + 'distinct_permutations', + 'distribute', + 'divide', + 'doublestarmap', + 'duplicates_everseen', + 'duplicates_justseen', + 'classify_unique', + 'exactly_n', + 'filter_except', + 'filter_map', + 'first', + 'gray_product', + 'groupby_transform', + 'ichunked', + 'iequals', + 'idft', + 'ilen', + 'interleave', + 'interleave_evenly', + 'interleave_longest', + 'intersperse', + 'is_sorted', + 'islice_extended', + 'iterate', + 'iter_suppress', + 'join_mappings', + 'last', + 'locate', + 'longest_common_prefix', + 'lstrip', + 'make_decorator', + 'map_except', + 'map_if', + 'map_reduce', + 'mark_ends', + 'minmax', + 'nth_or_last', + 'nth_permutation', + 'nth_prime', + 'nth_product', + 'nth_combination_with_replacement', + 'numeric_range', + 'one', + 'only', + 'outer_product', + 'padded', + 'partial_product', + 'partitions', + 'peekable', + 'permutation_index', + 'powerset_of_sets', + 'product_index', + 'raise_', + 'repeat_each', + 'repeat_last', + 'replace', + 'rlocate', + 'rstrip', + 'run_length', + 'sample', + 'seekable', + 'set_partitions', + 'side_effect', + 'sliced', + 'sort_together', + 'split_after', + 'split_at', + 'split_before', + 'split_into', + 'split_when', + 'spy', + 'stagger', + 'strip', + 'strictly_n', + 'substrings', + 'substrings_indexes', + 'takewhile_inclusive', + 'time_limited', + 'unique_in_window', + 'unique_to_each', + 'unzip', + 'value_chain', + 'windowed', + 'windowed_complete', + 'with_iter', + 'zip_broadcast', + 'zip_equal', + 'zip_offset', +] + # Type and type variable definitions _T = TypeVar('_T') _T1 = TypeVar('_T1') @@ -38,7 +152,7 @@ _V = TypeVar('_V') _W = TypeVar('_W') _T_co = TypeVar('_T_co', covariant=True) _GenFn = TypeVar('_GenFn', bound=Callable[..., Iterator[Any]]) -_Raisable = BaseException | Type[BaseException] +_Raisable = BaseException | type[BaseException] # The type of isinstance's second argument (from typeshed builtins) if sys.version_info >= (3, 10): @@ -91,7 +205,7 @@ def consumer(func: _GenFn) -> _GenFn: ... def ilen(iterable: Iterable[_T]) -> int: ... def iterate(func: Callable[[_T], _T], start: _T) -> Iterator[_T]: ... def with_iter( - context_manager: ContextManager[Iterable[_T]], + context_manager: AbstractContextManager[Iterable[_T]], ) -> Iterator[_T]: ... def one( iterable: Iterable[_T], @@ -410,7 +524,7 @@ class numeric_range(Generic[_T, _U], Sequence[_T], Hashable, Reversible[_T]): def __len__(self) -> int: ... def __reduce__( self, - ) -> tuple[Type[numeric_range[_T, _U]], tuple[_T, _T, _U]]: ... + ) -> tuple[type[numeric_range[_T, _U]], tuple[_T, _T, _U]]: ... def __repr__(self) -> str: ... def __reversed__(self) -> Iterator[_T]: ... def count(self, value: _T) -> int: ... @@ -567,12 +681,12 @@ def distinct_combinations( def filter_except( validator: Callable[[Any], object], iterable: Iterable[_T], - *exceptions: Type[BaseException], + *exceptions: type[BaseException], ) -> Iterator[_T]: ... def map_except( function: Callable[[Any], _U], iterable: Iterable[_T], - *exceptions: Type[BaseException], + *exceptions: type[BaseException], ) -> Iterator[_U]: ... def map_if( iterable: Iterable[Any], @@ -587,7 +701,7 @@ def _sample_counted( population: Iterator[_T], k: int, counts: Iterable[int], strict: bool ) -> list[_T]: ... def _sample_weighted( - iterator: Iterator[_T], k: int, weights, strict + iterator: Iterator[_T], k: int, weights: Iterator[float], strict: bool ) -> list[_T]: ... def sample( iterable: Iterable[_T], @@ -617,7 +731,7 @@ class callback_iter(Generic[_T], Iterator[_T]): def __enter__(self) -> callback_iter[_T]: ... def __exit__( self, - exc_type: Type[BaseException] | None, + exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: types.TracebackType | None, ) -> bool | None: ... @@ -797,7 +911,7 @@ def outer_product( ) -> Iterator[tuple[_V, ...]]: ... def iter_suppress( iterable: Iterable[_T], - *exceptions: Type[BaseException], + *exceptions: type[BaseException], ) -> Iterator[_T]: ... def filter_map( func: Callable[[_T], _V | None], @@ -813,3 +927,5 @@ def doublestarmap( ) -> Iterator[_T]: ... def dft(xarr: Sequence[complex]) -> Iterator[complex]: ... def idft(Xarr: Sequence[complex]) -> Iterator[complex]: ... +def _nth_prime_ub(n: int) -> float: ... +def nth_prime(n: int) -> int: ... diff --git a/contrib/python/more-itertools/py3/more_itertools/recipes.py b/contrib/python/more-itertools/py3/more_itertools/recipes.py index 67f76fa899ef..5a11fc5a3049 100644 --- a/contrib/python/more-itertools/py3/more_itertools/recipes.py +++ b/contrib/python/more-itertools/py3/more_itertools/recipes.py @@ -13,7 +13,7 @@ from collections import deque from collections.abc import Sized -from functools import partial, reduce +from functools import lru_cache, partial from itertools import ( chain, combinations, @@ -42,8 +42,10 @@ 'factor', 'flatten', 'grouper', + 'is_prime', 'iter_except', 'iter_index', + 'loops', 'matmul', 'ncycles', 'nth', @@ -872,8 +874,10 @@ def polynomial_from_roots(roots): >>> polynomial_from_roots(roots) # x^3 - 4 * x^2 - 17 * x + 60 [1, -4, -17, 60] """ - factors = zip(repeat(1), map(operator.neg, roots)) - return list(reduce(convolve, factors, [1])) + poly = [1] + for root in roots: + poly = list(convolve(poly, (1, -root))) + return poly def iter_index(iterable, value, start=0, stop=None): @@ -1005,20 +1009,56 @@ def matmul(m1, m2): return batched(starmap(_sumprod, product(m1, transpose(m2))), n) +def _factor_pollard(n): + # Return a factor of n using Pollard's rho algorithm + gcd = math.gcd + for b in range(1, n - 2): + x = y = 2 + d = 1 + while d == 1: + x = (x * x + b) % n + y = (y * y + b) % n + y = (y * y + b) % n + d = gcd(x - y, n) + if d != n: + return d + raise ValueError('prime or under 5') + + +_primes_below_211 = tuple(sieve(211)) + + def factor(n): """Yield the prime factors of n. >>> list(factor(360)) [2, 2, 2, 3, 3, 5] + + Finds small factors with trial division. Larger factors are + either verified as prime with ``is_prime`` or split into + smaller factors with Pollard's rho algorithm. """ - for prime in sieve(math.isqrt(n) + 1): + + # Corner case reduction + if n < 2: + return + + # Trial division reduction + for prime in _primes_below_211: while not n % prime: yield prime n //= prime - if n == 1: - return - if n > 1: - yield n + + # Pollard's rho reduction + primes = [] + todo = [n] if n > 1 else [] + for n in todo: + if n < 211**2 or is_prime(n): + primes.append(n) + else: + fact = _factor_pollard(n) + todo += (fact, n // fact) + yield from sorted(primes) def polynomial_eval(coefficients, x): @@ -1073,3 +1113,87 @@ def totient(n): for prime in set(factor(n)): n -= n // prime return n + + +# Miller–Rabin primality test: https://oeis.org/A014233 +_perfect_tests = [ + (2047, (2,)), + (9080191, (31, 73)), + (4759123141, (2, 7, 61)), + (1122004669633, (2, 13, 23, 1662803)), + (2152302898747, (2, 3, 5, 7, 11)), + (3474749660383, (2, 3, 5, 7, 11, 13)), + (18446744073709551616, (2, 325, 9375, 28178, 450775, 9780504, 1795265022)), + ( + 3317044064679887385961981, + (2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41), + ), +] + + +@lru_cache +def _shift_to_odd(n): + 'Return s, d such that 2**s * d == n' + s = ((n - 1) ^ n).bit_length() - 1 + d = n >> s + assert (1 << s) * d == n and d & 1 and s >= 0 + return s, d + + +def _strong_probable_prime(n, base): + assert (n > 2) and (n & 1) and (2 <= base < n) + + s, d = _shift_to_odd(n - 1) + + x = pow(base, d, n) + if x == 1 or x == n - 1: + return True + + for _ in range(s - 1): + x = x * x % n + if x == n - 1: + return True + + return False + + +def is_prime(n): + """Return ``True`` if *n* is prime and ``False`` otherwise. + + >>> is_prime(37) + True + >>> is_prime(3 * 13) + False + >>> is_prime(18_446_744_073_709_551_557) + True + + This function uses the Miller-Rabin primality test, which can return false + positives for very large inputs. For values of *n* below 10**24 + there are no false positives. For larger values, there is less than + a 1 in 2**128 false positive rate. Multiple tests can further reduce the + chance of a false positive. + """ + if n < 17: + return n in {2, 3, 5, 7, 11, 13} + if not (n & 1 and n % 3 and n % 5 and n % 7 and n % 11 and n % 13): + return False + for limit, bases in _perfect_tests: + if n < limit: + break + else: + bases = [randrange(2, n - 1) for i in range(64)] + return all(_strong_probable_prime(n, base) for base in bases) + + +def loops(n): + """Returns an iterable with *n* elements for efficient looping. + Like ``range(n)`` but doesn't create integers. + + >>> i = 0 + >>> for _ in loops(5): + ... i += 1 + >>> i + 5 + + """ + return repeat(None, n) diff --git a/contrib/python/more-itertools/py3/more_itertools/recipes.pyi b/contrib/python/more-itertools/py3/more_itertools/recipes.pyi index 739acec05fb3..e404f4d3df46 100644 --- a/contrib/python/more-itertools/py3/more_itertools/recipes.pyi +++ b/contrib/python/more-itertools/py3/more_itertools/recipes.pyi @@ -2,17 +2,65 @@ from __future__ import annotations +from collections.abc import Iterable, Iterator, Sequence from typing import ( Any, Callable, - Iterable, - Iterator, - overload, - Sequence, - Type, TypeVar, + overload, ) +__all__ = [ + 'all_equal', + 'batched', + 'before_and_after', + 'consume', + 'convolve', + 'dotproduct', + 'first_true', + 'factor', + 'flatten', + 'grouper', + 'is_prime', + 'iter_except', + 'iter_index', + 'loops', + 'matmul', + 'ncycles', + 'nth', + 'nth_combination', + 'padnone', + 'pad_none', + 'pairwise', + 'partition', + 'polynomial_eval', + 'polynomial_from_roots', + 'polynomial_derivative', + 'powerset', + 'prepend', + 'quantify', + 'reshape', + 'random_combination_with_replacement', + 'random_combination', + 'random_permutation', + 'random_product', + 'repeatfunc', + 'roundrobin', + 'sieve', + 'sliding_window', + 'subslices', + 'sum_of_squares', + 'tabulate', + 'tail', + 'take', + 'totient', + 'transpose', + 'triplewise', + 'unique', + 'unique_everseen', + 'unique_justseen', +] + # Type and type variable definitions _T = TypeVar('_T') _T1 = TypeVar('_T1') @@ -69,13 +117,13 @@ def unique( @overload def iter_except( func: Callable[[], _T], - exception: Type[BaseException] | tuple[Type[BaseException], ...], + exception: type[BaseException] | tuple[type[BaseException], ...], first: None = ..., ) -> Iterator[_T]: ... @overload def iter_except( func: Callable[[], _T], - exception: Type[BaseException] | tuple[Type[BaseException], ...], + exception: type[BaseException] | tuple[type[BaseException], ...], first: Callable[[], _U], ) -> Iterator[_T | _U]: ... @overload @@ -129,8 +177,14 @@ def reshape( matrix: Iterable[Iterable[_T]], cols: int ) -> Iterator[tuple[_T, ...]]: ... def matmul(m1: Sequence[_T], m2: Sequence[_T]) -> Iterator[tuple[_T]]: ... +def _factor_trial(n: int) -> Iterator[int]: ... +def _factor_pollard(n: int) -> int: ... def factor(n: int) -> Iterator[int]: ... def polynomial_eval(coefficients: Sequence[_T], x: _U) -> _U: ... def sum_of_squares(it: Iterable[_T]) -> _T: ... def polynomial_derivative(coefficients: Sequence[_T]) -> list[_T]: ... def totient(n: int) -> int: ... +def _shift_to_odd(n: int) -> tuple[int, int]: ... +def _strong_probable_prime(n: int, base: int) -> bool: ... +def is_prime(n: int) -> bool: ... +def loops(n: int) -> Iterator[None]: ... diff --git a/contrib/python/more-itertools/py3/tests/test_more.py b/contrib/python/more-itertools/py3/tests/test_more.py index 1a70ea08e57b..bfbf583f28f8 100644 --- a/contrib/python/more-itertools/py3/tests/test_more.py +++ b/contrib/python/more-itertools/py3/tests/test_more.py @@ -3836,7 +3836,7 @@ def __hash__(self): return hash(self._collection) def __repr__(self): - return "FrozenSet([{}]".format(", ".join(repr(x) for x in iter(self))) + return f'FrozenSet([{", ".join(repr(x) for x in iter(self))}]' class SetPartitionsTests(TestCase): @@ -4321,6 +4321,33 @@ def test_invariance_under_permutations_weighted(self): # The observed largest difference in 10,000 simulations was 4.337999 self.assertTrue(difference_in_means < 4.4) + def test_error_cases(self): + + # weights and counts are mutally exclusive + with self.assertRaises(TypeError): + mi.sample( + 'abcde', 3, weights=[1, 2, 3, 4, 5], counts=[1, 2, 3, 4, 5] + ) + + # Weighted sample larger than population + with self.assertRaises(ValueError): + mi.sample('abcde', 10, weights=[1, 2, 3, 4, 5], strict=True) + + # Counted sample larger than population + with self.assertRaises(ValueError): + mi.sample('abcde', 10, counts=[1, 1, 1, 1, 1], strict=True) + + +class BarelySortable: + def __init__(self, value): + self.value = value + + def __lt__(self, other): + return self.value < other.value + + def __int__(self): + return int(self.value) + class IsSortedTests(TestCase): def test_basic(self): @@ -4330,7 +4357,6 @@ def test_basic(self): ([1, 2, 3], {}, True), ([1, 1, 2, 3], {}, True), ([1, 10, 2, 3], {}, False), - ([3, float('nan'), 1, 2], {}, True), (['1', '10', '2', '3'], {}, True), (['1', '10', '2', '3'], {'key': int}, False), ([1, 2, 3], {'reverse': True}, False), @@ -4362,17 +4388,6 @@ def test_basic(self): {'strict': True, 'key': int, 'reverse': True}, False, ), - # We'll do the same weird thing as Python here - (['nan', 0, 'nan', 0], {'key': float}, True), - ([0, 'nan', 0, 'nan'], {'key': float}, True), - (['nan', 0, 'nan', 0], {'key': float, 'reverse': True}, True), - ([0, 'nan', 0, 'nan'], {'key': float, 'reverse': True}, True), - ([0, 'nan', 0, 'nan'], {'strict': True, 'key': float}, True), - ( - ['nan', 0, 'nan', 0], - {'strict': True, 'key': float, 'reverse': True}, - True, - ), ]: key = kwargs.get('key', None) reverse = kwargs.get('reverse', False) @@ -4382,7 +4397,10 @@ def test_basic(self): iterable=iterable, key=key, reverse=reverse, strict=strict ): mi_result = mi.is_sorted( - iter(iterable), key=key, reverse=reverse, strict=strict + map(BarelySortable, iterable), + key=key, + reverse=reverse, + strict=strict, ) sorted_iterable = sorted(iterable, key=key, reverse=reverse) diff --git a/contrib/python/more-itertools/py3/tests/test_recipes.py b/contrib/python/more-itertools/py3/tests/test_recipes.py index 684a6fcd0b1f..a810b8de1e07 100644 --- a/contrib/python/more-itertools/py3/tests/test_recipes.py +++ b/contrib/python/more-itertools/py3/tests/test_recipes.py @@ -4,7 +4,7 @@ from functools import reduce from itertools import combinations, count, groupby, permutations from operator import mul -from math import factorial +from math import comb, factorial from sys import version_info from unittest import TestCase, skipIf from unittest.mock import patch @@ -923,6 +923,12 @@ def test_basic(self): actual = mi.polynomial_from_roots(roots) self.assertEqual(actual, expected) + def test_large(self): + n = 1_500 + actual = mi.polynomial_from_roots([-1] * n) + expected = [comb(n, k) for k in range(n + 1)] + self.assertEqual(actual, expected) + class PolynomialEvalTests(TestCase): def test_basic(self): @@ -1147,8 +1153,12 @@ def test_basic(self): (6, [2, 3]), (360, [2, 2, 2, 3, 3, 5]), (128_884_753_939, [128_884_753_939]), - (999953 * 999983, [999953, 999983]), - (909_909_090_909, [3, 3, 7, 13, 13, 751, 113797]), + (999_953 * 999_983, [999_953, 999_983]), + (909_909_090_909, [3, 3, 7, 13, 13, 751, 1_137_97]), + ( + 1_647_403_876_764_101_672_307_088, + [2, 2, 2, 2, 19, 23, 109471, 13571009, 158594251], + ), ): with self.subTest(n=n): actual = list(mi.factor(n)) @@ -1209,3 +1219,169 @@ def test_basic(self): ): with self.subTest(n=n): self.assertEqual(mi.totient(n), expected) + + +class PrimeFunctionTests(TestCase): + def test_is_prime_pseudoprimes(self): + # Carmichael number that strong pseudoprime to prime bases < 307 + # https://doi.org/10.1006/jsco.1995.1042 + p = 29674495668685510550154174642905332730771991799853043350995075531276838753171770199594238596428121188033664754218345562493168782883 # noqa:E501 + gnarly_carmichael = (313 * (p - 1) + 1) * (353 * (p - 1) + 1) + + for n in ( + # Least Carmichael number with n prime factors: + # https://oeis.org/A006931 + 561, + 41041, + 825265, + 321197185, + 5394826801, + 232250619601, + 9746347772161, + 1436697831295441, + 60977817398996785, + 7156857700403137441, + 1791562810662585767521, + 87674969936234821377601, + 6553130926752006031481761, + 1590231231043178376951698401, + # Carmichael numbers with exactly 4 prime factors: + # https://oeis.org/A074379 + 41041, + 62745, + 63973, + 75361, + 101101, + 126217, + 172081, + 188461, + 278545, + 340561, + 449065, + 552721, + 656601, + 658801, + 670033, + 748657, + 838201, + 852841, + 997633, + 1033669, + 1082809, + 1569457, + 1773289, + 2100901, + 2113921, + 2433601, + 2455921, + # Lucas-Carmichael numbers: + # https://oeis.org/A006972 + 399, + 935, + 2015, + 2915, + 4991, + 5719, + 7055, + 8855, + 12719, + 18095, + 20705, + 20999, + 22847, + 29315, + 31535, + 46079, + 51359, + 60059, + 63503, + 67199, + 73535, + 76751, + 80189, + 81719, + 88559, + 90287, + # Strong pseudoprimes to bases 2, 3 and 5: + # https://oeis.org/A056915 + 25326001, + 161304001, + 960946321, + 1157839381, + 3215031751, + 3697278427, + 5764643587, + 6770862367, + 14386156093, + 15579919981, + 18459366157, + 19887974881, + 21276028621, + 27716349961, + 29118033181, + 37131467521, + 41752650241, + 42550716781, + 43536545821, + # Strong pseudoprimes to bases 2, 3, 5, and 7: + # https://oeis.org/A211112 + 39365185894561, + 52657210792621, + 11377272352951, + 15070413782971, + 3343433905957, + 16603327018981, + 3461715915661, + 52384617784801, + 3477707481751, + 18996486073489, + 55712149574381, + gnarly_carmichael, + ): + with self.subTest(n=n): + self.assertFalse(mi.is_prime(n)) + + def test_primes(self): + for i, n in enumerate(mi.sieve(10**5)): + with self.subTest(n=n): + self.assertTrue(mi.is_prime(n)) + self.assertEqual(mi.nth_prime(i), n) + + self.assertFalse(mi.is_prime(-1)) + with self.assertRaises(ValueError): + mi.nth_prime(-1) + + def test_special_primes(self): + for n in ( + # Mersenee primes: + # https://oeis.org/A211112 + 3, + 7, + 31, + 127, + 8191, + 131071, + 524287, + 2147483647, + 2305843009213693951, + 618970019642690137449562111, + 162259276829213363391578010288127, + 170141183460469231731687303715884105727, + # Various big primes: + # https://bigprimes.org/ + 7990614013, + 80358337843874809987, + 814847562949580526031364519741, + 1982427225022428178169740526258124929077, + 91828213828508622559862344537590739566883686537727, + 406414746815201693481517584049440077164779143248351060891669, + ): + with self.subTest(n=n): + self.assertTrue(mi.is_prime(n)) + + +class LoopsTests(TestCase): + def test_basic(self): + self.assertTrue( + all(list(mi.loops(n)) == [None] * n for n in range(-10, 10)) + ) diff --git a/contrib/python/more-itertools/py3/ya.make b/contrib/python/more-itertools/py3/ya.make index 45df93175b64..b604dff01581 100644 --- a/contrib/python/more-itertools/py3/ya.make +++ b/contrib/python/more-itertools/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(10.5.0) +VERSION(10.6.0) LICENSE(MIT) diff --git a/contrib/restricted/aws/aws-crt-cpp/include/aws/crt/external/cJSON.h b/contrib/restricted/aws/aws-crt-cpp/include/aws/crt/external/cJSON.h index ee10f0225d1d..c337f342a0e4 100644 --- a/contrib/restricted/aws/aws-crt-cpp/include/aws/crt/external/cJSON.h +++ b/contrib/restricted/aws/aws-crt-cpp/include/aws/crt/external/cJSON.h @@ -44,8 +44,6 @@ #ifndef cJSON__h #define cJSON__h -namespace Aws { - #if !defined(__WINDOWS__) && (defined(WIN32) || defined(WIN64) || defined(_MSC_VER) || defined(_WIN32)) #define __WINDOWS__ #endif @@ -117,6 +115,8 @@ then using the CJSON_API_VISIBILITY flag to "export" the same symbols the way CJ #define cJSON_IsReference 256 #define cJSON_StringIsConst 512 +namespace Aws { + /* The cJSON structure: */ typedef struct cJSON { diff --git a/contrib/restricted/aws/aws-crt-cpp/patches/01-move-include-from-namespace.patch b/contrib/restricted/aws/aws-crt-cpp/patches/01-move-include-from-namespace.patch new file mode 100644 index 000000000000..807b008f3997 --- /dev/null +++ b/contrib/restricted/aws/aws-crt-cpp/patches/01-move-include-from-namespace.patch @@ -0,0 +1,20 @@ +--- contrib/restricted/aws/aws-crt-cpp/include/aws/crt/external/cJSON.h (f680f339877c3c3539c85345ee15f023012893c9) ++++ contrib/restricted/aws/aws-crt-cpp/include/aws/crt/external/cJSON.h (working tree) +@@ -44,8 +44,6 @@ + #ifndef cJSON__h + #define cJSON__h + +-namespace Aws { +- + #if !defined(__WINDOWS__) && (defined(WIN32) || defined(WIN64) || defined(_MSC_VER) || defined(_WIN32)) + #define __WINDOWS__ + #endif +@@ -117,6 +115,8 @@ then using the CJSON_API_VISIBILITY flag to "export" the same symbols the way CJ + #define cJSON_IsReference 256 + #define cJSON_StringIsConst 512 + ++namespace Aws { ++ + /* The cJSON structure: */ + typedef struct cJSON + { diff --git a/library/cpp/http/simple/http_client.cpp b/library/cpp/http/simple/http_client.cpp index 00145c804f94..bac6bdc39edf 100644 --- a/library/cpp/http/simple/http_client.cpp +++ b/library/cpp/http/simple/http_client.cpp @@ -327,9 +327,18 @@ void TRedirectableHttpClient::ProcessResponse(const TStringBuf relativeUrl, THtt TStringBuf schemeHostPort = GetSchemeHostAndPort(i->Value()); TStringBuf scheme("http://"); TStringBuf host("unknown"); - ui16 port = 80; + ui16 port = 0; GetSchemeHostAndPort(schemeHostPort, scheme, host, port); TStringBuf body = GetPathAndQuery(i->Value(), false); + if (port == 0) { + if (scheme.StartsWith("https")) { + port = 443; + } else if (scheme.StartsWith("http")) { + port = 80; + } else { + port = 80; + } + } auto opts = Opts; opts.Host(TString(scheme) + TString(host)); diff --git a/yql/essentials/core/common_opt/yql_co_simple1.cpp b/yql/essentials/core/common_opt/yql_co_simple1.cpp index 4f0ddab7413d..b8dffb997f84 100644 --- a/yql/essentials/core/common_opt/yql_co_simple1.cpp +++ b/yql/essentials/core/common_opt/yql_co_simple1.cpp @@ -3697,6 +3697,38 @@ TExprNode::TPtr ReplaceFuncWithImpl(const TExprNode::TPtr& node, TExprContext& c .Build(); } +TExprNode::TPtr MemberNthOverFlatMapWithOptional(const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { + YQL_ENSURE(node->IsCallable({"Member", "Nth"})); + YQL_ENSURE(optCtx.Types); + static const char optName[] = "MemberNthOverFlatMap"; + if (!IsOptimizerEnabled(*optCtx.Types) || IsOptimizerDisabled(*optCtx.Types)) { + return node; + } + if (auto maybeFlatMap = TMaybeNode(node->HeadPtr())) { + auto flatMap = maybeFlatMap.Cast(); + if (flatMap.Input().Ref().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Optional && + flatMap.Lambda().Ref().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Optional) + { + YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content(); + return ctx.Builder(node->Pos()) + .Callable(flatMap.CallableName()) + .Add(0, flatMap.Input().Ptr()) + .Lambda(1) + .Param("item") + .Callable(node->Content()) + .Apply(0, flatMap.Lambda().Ptr()) + .With(0, "item") + .Seal() + .Add(1, node->Child(1)) + .Seal() + .Seal() + .Seal() + .Build(); + } + } + return node; +} + } // namespace void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { @@ -4619,7 +4651,7 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { return ctx.NewCallable(node->Pos(), "AsStruct", std::move(asStructChildren)); }; - map["Member"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { + map["Member"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { if (node->Head().IsCallable("AsStruct")) { YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content(); return ExtractMember(*node); @@ -4643,6 +4675,10 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { return ctx.ChangeChild(*node, 0, node->Head().HeadPtr()); } + if (auto opt = MemberNthOverFlatMapWithOptional(node, ctx, optCtx); opt != node) { + return opt; + } + return node; }; @@ -4664,7 +4700,7 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { map["AsStruct"] = std::bind(&OptimizeAsStruct, _1, _2); - map["Nth"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { + map["Nth"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { if (node->Head().Type() == TExprNode::List) { YQL_CLOG(DEBUG, Core) << node->Content() << " over tuple literal"; const auto index = FromString(node->Tail().Content()); @@ -4701,6 +4737,10 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { .Ptr(); } + if (auto opt = MemberNthOverFlatMapWithOptional(node, ctx, optCtx); opt != node) { + return opt; + } + return node; }; diff --git a/yql/essentials/core/yql_opt_utils.h b/yql/essentials/core/yql_opt_utils.h index 771090c78b4a..3af33bed49d1 100644 --- a/yql/essentials/core/yql_opt_utils.h +++ b/yql/essentials/core/yql_opt_utils.h @@ -176,4 +176,26 @@ bool CheckSupportedTypes( bool allowNestedOptionals = true ); +template +bool IsOptimizerEnabled(const TTypeAnnotationContext& types) { + struct TFlag { + TFlag(const TTypeAnnotationContext& types) + : Value(types.OptimizerFlags.contains(to_lower(TString(OptName)))) + {} + const bool Value; + }; + return Singleton(types)->Value; +} + +template +bool IsOptimizerDisabled(const TTypeAnnotationContext& types) { + struct TFlag { + TFlag(const TTypeAnnotationContext& types) + : Value(types.OptimizerFlags.contains(to_lower("Disable" + TString(OptName)))) + {} + const bool Value; + }; + return Singleton(types)->Value; +} + } diff --git a/yql/essentials/docs/ru/toc_i.yaml b/yql/essentials/docs/ru/toc_i.yaml index f1080ccf57af..aa8b7a95dad8 100644 --- a/yql/essentials/docs/ru/toc_i.yaml +++ b/yql/essentials/docs/ru/toc_i.yaml @@ -5,5 +5,5 @@ items: include: { mode: link, path: syntax/toc_i.yaml } - name: Встроенные функции include: { mode: link, path: builtins/toc_i.yaml } -- name: Репецпты +- name: Рецепты include: { mode: link, path: recipes/toc_i.yaml } diff --git a/yql/essentials/sql/v1/format/sql_format.cpp b/yql/essentials/sql/v1/format/sql_format.cpp index 3b180827a658..19619bf6e898 100644 --- a/yql/essentials/sql/v1/format/sql_format.cpp +++ b/yql/essentials/sql/v1/format/sql_format.cpp @@ -496,6 +496,10 @@ friend struct TStaticData; ++CommentLines; } + if (!text.StartsWith("--")) { + CommentLines += CountIf(text, [](auto c) { return c == '\n'; }); + } + Out(text); if (text.StartsWith("--") && !text.EndsWith("\n")) { diff --git a/yql/essentials/sql/v1/format/sql_format_ut.h b/yql/essentials/sql/v1/format/sql_format_ut.h index 0960fec3b693..b22bf1cb21cc 100644 --- a/yql/essentials/sql/v1/format/sql_format_ut.h +++ b/yql/essentials/sql/v1/format/sql_format_ut.h @@ -1,10 +1,10 @@ Y_UNIT_TEST(Pragma) { TCases cases = { - {"pragma user = user;","PRAGMA user = user;\n"}, - {"pragma user = default;","PRAGMA user = default;\n"}, - {"pragma user.user = user;","PRAGMA user.user = user;\n"}, - {"pragma user.user(user);","PRAGMA user.user(user);\n"}, - {"pragma user.user(user, user);","PRAGMA user.user(user, user);\n"}, + {"pragma user = user;", "PRAGMA user = user;\n"}, + {"pragma user = default;", "PRAGMA user = default;\n"}, + {"pragma user.user = user;", "PRAGMA user.user = user;\n"}, + {"pragma user.user(user);", "PRAGMA user.user(user);\n"}, + {"pragma user.user(user, user);", "PRAGMA user.user(user, user);\n"}, }; TSetup setup; @@ -1512,6 +1512,16 @@ Y_UNIT_TEST(Union) { setup.Run(cases); } +Y_UNIT_TEST(Comment) { + TCases cases = { + {"/*\nmulti\nline\ncomment\n*/\npragma foo = \"true\";\npragma bar = \"1\"", + "/*\nmulti\nline\ncomment\n*/\nPRAGMA foo = 'true';\nPRAGMA bar = '1';\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + Y_UNIT_TEST(CommentAfterLastSelect) { TCases cases = { {"SELECT 1--comment\n", diff --git a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp index 6213e35fe2f9..d7566137c570 100644 --- a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp +++ b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp @@ -226,43 +226,13 @@ class TToUnits { } }; -template +template struct TGetTimeComponent { typedef bool TTypeAwareMarker; - template - static TFieldStorage Core(TInput val) { - if constexpr (AlwaysZero) { - return 0; - } - - if constexpr (InputFractional) { - if constexpr (Fractional) { - return (val / Scale) % Limit; - } else { - return (val / 1000000u / Scale) % Limit; - } - } else { - if constexpr (Fractional) { - return 0; - } else { - return (val / Scale) % Limit; - } - } - } - - class TImpl : public TBoxedValue { - public: - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - if (!args[0]) { - return {}; - } - - return TUnboxedValuePod(TFieldStorage((FieldFunc(args[0])) / Divisor)); - } - }; - static const TStringRef& Name() { static auto name = TStringRef(TFuncName, std::strlen(TFuncName)); return name; @@ -278,118 +248,175 @@ struct TGetTimeComponent { return false; } - try { - auto typeInfoHelper = builder.TypeInfoHelper(); - TTupleTypeInspector tuple(*typeInfoHelper, userType); - if (tuple) { - Y_ENSURE(tuple.GetElementsCount() > 0); - TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); - Y_ENSURE(argsTuple); - if (argsTuple.GetElementsCount() != 1) { - builder.SetError("Expected one argument"); - return true; - } + if (!userType) { + builder.SetError("User type is missing"); + return true; + } + builder.UserType(userType); - auto argType = argsTuple.GetElementType(0); - TVector argBlockTypes; - argBlockTypes.push_back(argType); + const auto typeInfoHelper = builder.TypeInfoHelper(); + TTupleTypeInspector tuple(*typeInfoHelper, userType); + Y_ENSURE(tuple, "Tuple with args and options tuples expected"); + Y_ENSURE(tuple.GetElementsCount() > 0, + "Tuple has to contain positional arguments"); - TBlockTypeInspector block(*typeInfoHelper, argType); - if (block) { - Y_ENSURE(!block.IsScalar()); - argType = block.GetItemType(); - } + TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); + Y_ENSURE(argsTuple, "Tuple with args expected"); + if (argsTuple.GetElementsCount() != 1) { + builder.SetError("Single argument expected"); + return true; + } - bool isOptional = false; - if (auto opt = TOptionalTypeInspector(*typeInfoHelper, argType)) { - argType = opt.GetItemType(); - isOptional = true; - } + auto argType = argsTuple.GetElementType(0); - TResourceTypeInspector res(*typeInfoHelper, argType); - if (!res) { - TDataTypeInspector data(*typeInfoHelper, argType); - if (!data) { - builder.SetError("Expected data type"); - return true; - } + TVector argBlockTypes; + argBlockTypes.push_back(argType); - auto typeId = data.GetTypeId(); - if (typeId == TDataType::Id || - typeId == TDataType::Id || - typeId == TDataType::Id) { + TBlockTypeInspector block(*typeInfoHelper, argType); + if (block) { + Y_ENSURE(!block.IsScalar()); + argType = block.GetItemType(); + } - builder.Args()->Add(argsTuple.GetElementType(0)).Done(); - const TType* retType = builder.SimpleType(); + bool isOptional = false; + if (auto opt = TOptionalTypeInspector(*typeInfoHelper, argType)) { + argType = opt.GetItemType(); + isOptional = true; + } - if (isOptional) { - retType = builder.Optional()->Item(retType).Build(); - } + TResourceTypeInspector resource(*typeInfoHelper, argType); + if (!resource) { + TDataTypeInspector data(*typeInfoHelper, argType); + if (!data) { + builder.SetError("Data type expected"); + return true; + } - auto outputType = retType; - if (block) { - retType = builder.Block(block.IsScalar())->Item(retType).Build(); - } + const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features; + if (features & NUdf::BigDateType) { + BuildSignature(builder, typesOnly); + return true; + } + if (features & NUdf::TzDateType) { + BuildSignature(builder, typesOnly); + return true; + } - builder.Returns(retType); - builder.SupportsBlocks(); - builder.IsStrict(); - - builder.UserType(userType); - if (!typesOnly) { - if (typeId == TDataType::Id) { - if (block) { - builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - UnaryPreallocatedExecImpl>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); - } else { - builder.Implementation(new TUnaryOverOptionalImpl>()); - } - } + if (features & NUdf::DateType) { + builder.Args()->Add(argsTuple.GetElementType(0)).Done(); + const TType* retType = builder.SimpleType(); - if (typeId == TDataType::Id) { - if (block) { - builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - UnaryPreallocatedExecImpl>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); - } else { - builder.Implementation(new TUnaryOverOptionalImpl>()); - } - } + if (isOptional) { + retType = builder.Optional()->Item(retType).Build(); + } - if (typeId == TDataType::Id) { - if (block) { - builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - UnaryPreallocatedExecImpl>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); - } else { - builder.Implementation(new TUnaryOverOptionalImpl>()); - } - } + auto outputType = retType; + if (block) { + retType = builder.Block(block.IsScalar())->Item(retType).Build(); + } + + builder.Returns(retType); + builder.SupportsBlocks(); + builder.IsStrict(); + + if (!typesOnly) { + const auto typeId = data.GetTypeId(); + if (typeId == TDataType::Id) { + if (block) { + builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), + UnaryPreallocatedExecImpl>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + } else { + builder.Implementation(new TUnaryOverOptionalImpl>()); } + } - return true; + if (typeId == TDataType::Id) { + if (block) { + builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), + UnaryPreallocatedExecImpl>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + } else { + builder.Implementation(new TUnaryOverOptionalImpl>()); + } } - } else { - Y_ENSURE(!block); - if (res.GetTag() != TStringRef::Of(TMResourceName)) { - builder.SetError("Unexpected resource tag"); - return true; + + if (typeId == TDataType::Id) { + if (block) { + builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), + UnaryPreallocatedExecImpl>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + } else { + builder.Implementation(new TUnaryOverOptionalImpl>()); + } } } + return true; } - // default implementation - builder.Args()->Add>().Flags(ICallablePayload::TArgumentFlags::AutoMap).Done(); - builder.Returns(); - builder.IsStrict(); - if (!typesOnly) { - builder.Implementation(new TImpl()); - } - } catch (const std::exception& e) { - builder.SetError(TStringBuf(e.what())); + ::TStringBuilder sb; + sb << "Invalid argument type: got "; + TTypePrinter(*typeInfoHelper, argType).Out(sb.Out); + sb << ", but Resource<" << TMResourceName <<"> or Resource<" + << TM64ResourceName << "> expected"; + builder.SetError(sb); + return true; } + Y_ENSURE(!block); + + if (resource.GetTag() == TStringRef::Of(TM64ResourceName)) { + BuildSignature(builder, typesOnly); + return true; + } + + if (resource.GetTag() == TStringRef::Of(TMResourceName)) { + BuildSignature(builder, typesOnly); + return true; + } + + builder.SetError("Unexpected Resource tag"); return true; } +private: + template + static TFieldStorage Core(TInput val) { + if constexpr (AlwaysZero) { + return 0; + } + + if constexpr (InputFractional) { + if constexpr (Fractional) { + return (val / Scale) % Limit; + } else { + return (val / 1000000u / Scale) % Limit; + } + } else { + if constexpr (Fractional) { + return 0; + } else { + return (val / Scale) % Limit; + } + } + } + + template + class TImpl : public TBoxedValue { + public: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + EMPTY_RESULT_ON_EMPTY_ARG(0); + return TUnboxedValuePod((TResult(Func(args[0])) / Divisor)); + } + }; + + template + static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { + builder.Returns(); + builder.Args()->Add>>(); + builder.IsStrict(); + if (!typesOnly) { + builder.Implementation(new TImpl()); + } + } }; namespace { @@ -2623,11 +2650,11 @@ TUnboxedValue GetTimezoneName(const IValueBuilder* valueBuilder, const TUnboxedV TGetDateComponent, TGetDateComponent, TGetDateComponentName, GetDayOfWeekName>, - TGetTimeComponent, - TGetTimeComponent, - TGetTimeComponent, - TGetTimeComponent, - TGetTimeComponent, + TGetTimeComponent, + TGetTimeComponent, + TGetTimeComponent, + TGetTimeComponent, + TGetTimeComponent, TGetDateComponent, TGetDateComponentName, GetTimezoneName>, diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Get_/results.txt b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Get_/results.txt index a8562845b586..600a5335a1aa 100644 --- a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Get_/results.txt +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_Get_/results.txt @@ -52,6 +52,26 @@ ] ] ]; + [ + "rhour"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rminute"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; [ "rmonth"; [ @@ -72,6 +92,26 @@ ] ] ]; + [ + "rmsec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "rsecond"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; [ "rtz"; [ @@ -92,6 +132,16 @@ ] ] ]; + [ + "rusec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; [ "rweekofyear"; [ @@ -170,6 +220,26 @@ ] ] ]; + [ + "rhour"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rminute"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; [ "rmonth"; [ @@ -190,6 +260,26 @@ ] ] ]; + [ + "rmsec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "rsecond"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; [ "rtz"; [ @@ -210,6 +300,16 @@ ] ] ]; + [ + "rusec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; [ "rweekofyear"; [ @@ -261,6 +361,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -270,9 +376,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -296,6 +411,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -305,9 +426,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -333,18 +463,33 @@ [ "1" ]; + [ + "23" + ]; + [ + "59" + ]; [ "1" ]; [ "January" ]; + [ + "999" + ]; + [ + "59" + ]; [ "0" ]; [ "GMT" ]; + [ + "999999" + ]; [ "1" ]; @@ -368,18 +513,33 @@ [ "1" ]; + [ + "23" + ]; + [ + "59" + ]; [ "1" ]; [ "January" ]; + [ + "999" + ]; + [ + "59" + ]; [ "0" ]; [ "GMT" ]; + [ + "999999" + ]; [ "1" ]; @@ -405,6 +565,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -414,9 +580,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -440,6 +615,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -449,9 +630,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -477,18 +667,33 @@ [ "365" ]; + [ + "23" + ]; + [ + "59" + ]; [ "12" ]; [ "December" ]; + [ + "999" + ]; + [ + "59" + ]; [ "0" ]; [ "GMT" ]; + [ + "999999" + ]; [ "53" ]; @@ -512,18 +717,33 @@ [ "365" ]; + [ + "23" + ]; + [ + "59" + ]; [ "12" ]; [ "December" ]; + [ + "999" + ]; + [ + "59" + ]; [ "0" ]; [ "GMT" ]; + [ + "999999" + ]; [ "53" ]; @@ -549,6 +769,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -558,9 +784,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -584,6 +819,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -593,9 +834,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -621,6 +871,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -630,9 +886,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -656,6 +921,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -665,9 +936,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -693,6 +973,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -702,9 +988,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -728,6 +1023,12 @@ [ "1" ]; + [ + "0" + ]; + [ + "0" + ]; [ "1" ]; @@ -737,9 +1038,18 @@ [ "0" ]; + [ + "0" + ]; + [ + "0" + ]; [ "GMT" ]; + [ + "0" + ]; [ "1" ]; @@ -765,18 +1075,33 @@ [ "365" ]; + [ + "23" + ]; + [ + "59" + ]; [ "12" ]; [ "December" ]; + [ + "999" + ]; + [ + "59" + ]; [ "0" ]; [ "GMT" ]; + [ + "999999" + ]; [ "53" ]; @@ -800,18 +1125,33 @@ [ "365" ]; + [ + "23" + ]; + [ + "59" + ]; [ "12" ]; [ "December" ]; + [ + "999" + ]; + [ + "59" + ]; [ "0" ]; [ "GMT" ]; + [ + "999999" + ]; [ "53" ]; diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Get.sql b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Get.sql index 6b777c9ab9c6..934e934b2d47 100644 --- a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Get.sql +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/Get.sql @@ -10,6 +10,11 @@ $check = ($arg) -> { rdayofmonth: DateTime::GetDayOfMonth($arg), rdayofweek: DateTime::GetDayOfWeek($arg), rdayofweekname: DateTime::GetDayOfWeekName($arg), + rhour: DateTime::GetHour($arg), + rminute: DateTime::GetMinute($arg), + rsecond: DateTime::GetSecond($arg), + rmsec: DateTime::GetMillisecondOfSecond($arg), + rusec: DateTime::GetMicrosecondOfSecond($arg), rtz: DateTime::GetTimezoneId($arg), rtzname: DateTime::GetTimezoneName($arg), |> diff --git a/yt/cpp/mapreduce/interface/operation.h b/yt/cpp/mapreduce/interface/operation.h index 7da77213d413..aa109575a8a3 100644 --- a/yt/cpp/mapreduce/interface/operation.h +++ b/yt/cpp/mapreduce/interface/operation.h @@ -779,7 +779,7 @@ struct TUserJobSpec /// @note /// When @ref NYT::TOperationOptions::MountSandboxInTmpfs is enabled library will compute /// total size of all files used by this job and add this total size to MemoryLimit. - /// Thus you shouldn't include size of your files (e.g. binary file) into MemoryLimit. + /// Thus, you shouldn't include size of your files (e.g. binary file) into MemoryLimit. /// /// @note /// Final memory memory_limit passed to YT is calculated as follows: diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_content.cpp b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_content.cpp index 0afdbe789a1c..107d17bd4785 100644 --- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_content.cpp +++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_content.cpp @@ -2,6 +2,7 @@ #include #include +#include namespace NYql { @@ -154,6 +155,9 @@ TMaybeNode TYtPhysicalOptProposalTransformer::NonOptimalTableContent( } } if (materialize) { + if (!NPrivate::EnsurePersistableYsonTypes(section.Pos(), *section.Ref().GetTypeAnn()->Cast()->GetItemType(), ctx, state)) { + return {}; + } auto path = CopyOrTrivialMap(section.Pos(), TExprBase(world), TYtDSink(ctx.RenameNode(read.DataSource().Ref(), "DataSink")), diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_join.cpp b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_join.cpp index 918aec0795da..c175bcade9b9 100644 --- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_join.cpp +++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_join.cpp @@ -223,6 +223,9 @@ TMaybeNode TYtPhysicalOptProposalTransformer::EquiJoin(TExprBase node if (NYql::HasSetting(sectionNode.Settings().Ref(), EYtSettingType::Sample)) { auto scheme = list.Ref().GetTypeAnn()->Cast()->GetItemType(); + if (!NPrivate::EnsurePersistableYsonTypes(sectionNode.Pos(), *scheme, ctx, State_)) { + return {}; + } auto path = CopyOrTrivialMap(sectionNode.Pos(), TExprBase(world ? world : ctx.NewWorld(sectionNode.Pos())), dataSink.Cast(), diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_misc.cpp b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_misc.cpp index 30e21070dad0..8ec8d9f1ec09 100644 --- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_misc.cpp +++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_misc.cpp @@ -445,6 +445,9 @@ TMaybeNode TYtPhysicalOptProposalTransformer::Extend(TExprBase node, return node; } auto scheme = section.Ref().GetTypeAnn()->Cast()->GetItemType(); + if (!NPrivate::EnsurePersistableYsonTypes(section.Pos(), *scheme, ctx, State_)) { + return {}; + } auto path = CopyOrTrivialMap(section.Pos(), read.Cast().World(), dataSink, *scheme, @@ -492,6 +495,9 @@ TMaybeNode TYtPhysicalOptProposalTransformer::Extend(TExprBase node, if (State_->Types->EvaluationInProgress) { return node; } + if (!NPrivate::EnsurePersistableYsonTypes(extend.Pos(), *scheme, ctx, State_)) { + return {}; + } auto path = CopyOrTrivialMap(extend.Pos(), world, dataSink, *scheme, @@ -818,6 +824,9 @@ TMaybeNode TYtPhysicalOptProposalTransformer::ResPull(TExprBase node, bool keepSorted = ctx.IsConstraintEnabled() ? (!NYql::HasSetting(section.Settings().Ref(), EYtSettingType::Unordered) && !hasNonTemp && section.Paths().Size() == 1) // single sorted input from operation : (!hasDynamic || !NYql::HasAnySetting(section.Settings().Ref(), EYtSettingType::Take | EYtSettingType::Skip)); // compatibility - all except dynamic with limit + if (!NPrivate::EnsurePersistableYsonTypes(read.Pos(), *scheme, ctx, State_)) { + return {}; + } auto path = CopyOrTrivialMap(read.Pos(), read.World(), TYtDSink(ctx.RenameNode(read.DataSource().Ref(), "DataSink")), diff --git a/yt/yql/tests/sql/suites/join/pullup_extra_columns.cfg b/yt/yql/tests/sql/suites/join/pullup_extra_columns.cfg new file mode 100644 index 000000000000..6098eb907d55 --- /dev/null +++ b/yt/yql/tests/sql/suites/join/pullup_extra_columns.cfg @@ -0,0 +1,2 @@ +in Input1 kv1_sorted.txt +in Input2 kv2_sorted.txt diff --git a/yt/yql/tests/sql/suites/join/pullup_extra_columns.sql b/yt/yql/tests/sql/suites/join/pullup_extra_columns.sql new file mode 100644 index 000000000000..342a7bb967f4 --- /dev/null +++ b/yt/yql/tests/sql/suites/join/pullup_extra_columns.sql @@ -0,0 +1,13 @@ +use plato; + +pragma config.flags('OptimizerFlags', 'MemberNthOverFlatMap'); +pragma yt.MapJoinLimit="1m"; + + +$t1 = select k1, v1 from Input1; +$t2 = select k2, v2, u2 as renamed from Input2; + +select + a.*, + b.v2, +from $t1 as a left join any $t2 as b on a.k1 = b.k2; diff --git a/yt/yt/client/table_client/config.cpp b/yt/yt/client/table_client/config.cpp index 78131f636084..ec590507dc88 100644 --- a/yt/yt/client/table_client/config.cpp +++ b/yt/yt/client/table_client/config.cpp @@ -474,6 +474,11 @@ void TChunkWriterOptions::Register(TRegistrar registrar) registrar.Parameter("max_heavy_columns", &TThis::MaxHeavyColumns) .Default(0); + registrar.Parameter("block_size", &TThis::BlockSize) + .Default(); + registrar.Parameter("buffer_size", &TThis::BufferSize) + .Default(); + registrar.Postprocessor([] (TThis* config) { if (config->ValidateUniqueKeys && !config->ValidateSorted) { THROW_ERROR_EXCEPTION("\"validate_unique_keys\" is allowed to be true only if \"validate_sorted\" is true"); diff --git a/yt/yt/client/table_client/config.h b/yt/yt/client/table_client/config.h index bdb92d90be61..c21ed33d0de2 100644 --- a/yt/yt/client/table_client/config.h +++ b/yt/yt/client/table_client/config.h @@ -440,6 +440,9 @@ class TChunkWriterOptions //! Maximum number of heavy columns in approximate statistics. int MaxHeavyColumns; + std::optional BlockSize; + std::optional BufferSize; + void EnableValidationOptions(bool validateAnyIsValidYson = false); REGISTER_YSON_STRUCT(TChunkWriterOptions); diff --git a/yt/yt/client/table_client/pipe.cpp b/yt/yt/client/table_client/pipe.cpp index be6607b232e5..fcb77361e769 100644 --- a/yt/yt/client/table_client/pipe.cpp +++ b/yt/yt/client/table_client/pipe.cpp @@ -42,10 +42,13 @@ struct TSchemafulPipe::TData void ResetReaderReadyEvent() { ReaderReadyEvent = NewPromise(); - ReaderReadyEvent.OnCanceled(BIND([=, this, this_ = MakeStrong(this)] (const TError& error) { - Fail(TError(NYT::EErrorCode::Canceled, "Pipe reader canceled") - << error); - })); + ReaderReadyEvent.OnCanceled(BIND(&TSchemafulPipe::TData::HandleCancel, MakeWeak(this))); + } + + void HandleCancel(const TError& error) + { + Fail(TError(NYT::EErrorCode::Canceled, "Pipe reader canceled") + << error); } void Fail(const TError& error) diff --git a/yt/yt/client/table_client/unittests/serialization_ut.cpp b/yt/yt/client/table_client/unittests/serialization_ut.cpp index 2f33425d4be4..ffa87259595a 100644 --- a/yt/yt/client/table_client/unittests/serialization_ut.cpp +++ b/yt/yt/client/table_client/unittests/serialization_ut.cpp @@ -2,6 +2,7 @@ #include #include + #include #include diff --git a/yt/yt/core/misc/collection_helpers-inl.h b/yt/yt/core/misc/collection_helpers-inl.h index 1afff7460ed6..fa299a05ca45 100644 --- a/yt/yt/core/misc/collection_helpers-inl.h +++ b/yt/yt/core/misc/collection_helpers-inl.h @@ -155,7 +155,7 @@ TKeySet DropAndReturnMissingKeys(TMap&& map, const TKeySet& set) } template -void DropMissingKeys(TMap&& map, const TKeySet& set) +void DropMissingKeys(TMap&& map, TKeySet&& set) { for (auto it = map.begin(); it != map.end(); ) { if (!set.contains(it->first)) { diff --git a/yt/yt/core/misc/collection_helpers.h b/yt/yt/core/misc/collection_helpers.h index 4695096e2588..f06740e24535 100644 --- a/yt/yt/core/misc/collection_helpers.h +++ b/yt/yt/core/misc/collection_helpers.h @@ -43,7 +43,7 @@ template [[nodiscard]] TKeySet DropAndReturnMissingKeys(TMap&& map, const TKeySet& set); template -void DropMissingKeys(TMap&& map, const TKeySet& set); +void DropMissingKeys(TMap&& map, TKeySet&& set); /*! * This function is supposed to replace a frequent pattern diff --git a/yt/yt/core/net/local_address.cpp b/yt/yt/core/net/local_address.cpp index 541b152cdbfb..19d307faae64 100644 --- a/yt/yt/core/net/local_address.cpp +++ b/yt/yt/core/net/local_address.cpp @@ -169,10 +169,10 @@ void UpdateLocalHostName(const TAddressResolverConfigPtr& config) //////////////////////////////////////////////////////////////////////////////// -const TString& GetLoopbackAddress() +const std::string& GetLoopbackAddress() { - static const TString ipv4result("[127.0.1.1]"); - static const TString ipv6result("[::1]"); + static const std::string ipv4result("[127.0.1.1]"); + static const std::string ipv6result("[::1]"); return IPv6Enabled_.load(std::memory_order::relaxed) ? ipv6result : ipv4result; } diff --git a/yt/yt/core/net/local_address.h b/yt/yt/core/net/local_address.h index 3665feaa4da6..1c1c6bd2ff4e 100644 --- a/yt/yt/core/net/local_address.h +++ b/yt/yt/core/net/local_address.h @@ -41,7 +41,7 @@ TString GetLocalHostName(); TString GetLocalYPCluster(); // Returns the loopback address (either IPv4 or IPv6, depending on the configuration). -const TString& GetLoopbackAddress(); +const std::string& GetLoopbackAddress(); ////////////////////////////////////////////////////////////////////////////////