From d86586dd6b616482a4c7259e389135526db8c5f1 Mon Sep 17 00:00:00 2001 From: Andrej Orsula Date: Tue, 23 Jan 2024 22:54:51 +0100 Subject: [PATCH 01/13] Allow `clippy::{nursery, pedantic}` for generated bindings Signed-off-by: Andrej Orsula --- pyo3_bindgen_engine/src/bindgen/module.rs | 2 ++ pyo3_bindgen_engine/tests/bindgen.rs | 30 ++++++++++++++++------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/pyo3_bindgen_engine/src/bindgen/module.rs b/pyo3_bindgen_engine/src/bindgen/module.rs index ccd0655..2465d1f 100644 --- a/pyo3_bindgen_engine/src/bindgen/module.rs +++ b/pyo3_bindgen_engine/src/bindgen/module.rs @@ -240,6 +240,8 @@ pub fn bind_module( #[doc = #doc] #[allow( clippy::all, + clippy::nursery, + clippy::pedantic, non_camel_case_types, non_snake_case, non_upper_case_globals, diff --git a/pyo3_bindgen_engine/tests/bindgen.rs b/pyo3_bindgen_engine/tests/bindgen.rs index befbc2c..8779fa4 100644 --- a/pyo3_bindgen_engine/tests/bindgen.rs +++ b/pyo3_bindgen_engine/tests/bindgen.rs @@ -43,7 +43,15 @@ test_bindgen! { rs:r#" /// - #[allow(clippy::all, non_camel_case_types, non_snake_case, non_upper_case_globals, unused)] + #[allow( + clippy::all, + clippy::nursery, + clippy::pedantic, + non_camel_case_types, + non_snake_case, + non_upper_case_globals, + unused + )] mod t_mod_test_bindgen_attribute { ///Getter for the `t_const_float` attribute pub fn t_const_float<'py>(py: ::pyo3::marker::Python<'py>) -> ::pyo3::PyResult { @@ -77,10 +85,12 @@ test_bindgen! { /// #[allow( clippy::all, - non_camel_case_types, - non_snake_case, - non_upper_case_globals, - unused + clippy::nursery, + clippy::pedantic, + non_camel_case_types, + non_snake_case, + non_upper_case_globals, + unused )] mod t_mod_test_bindgen_function { ///t_docs @@ -128,10 +138,12 @@ test_bindgen! { /// #[allow( clippy::all, - non_camel_case_types, - non_snake_case, - non_upper_case_globals, - unused + clippy::nursery, + clippy::pedantic, + non_camel_case_types, + non_snake_case, + non_upper_case_globals, + unused )] mod t_mod_test_bindgen_class { ///t_docs From 32b99f682d71b8b5462b6ca060a9bb5cd4e344cd Mon Sep 17 00:00:00 2001 From: Andrej Orsula Date: Wed, 24 Jan 2024 10:24:39 +0100 Subject: [PATCH 02/13] Add support for tuples with ellipsis Signed-off-by: Andrej Orsula --- pyo3_bindgen_engine/src/types.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyo3_bindgen_engine/src/types.rs b/pyo3_bindgen_engine/src/types.rs index b36d862..efbd4aa 100644 --- a/pyo3_bindgen_engine/src/types.rs +++ b/pyo3_bindgen_engine/src/types.rs @@ -629,6 +629,9 @@ impl Type { quote::quote! { &'py ::pyo3::types::PyTuple } + } else if t_sequence.len() == 2 && t_sequence.last().unwrap() == &Self::PyEllipsis { + Self::PyList(Box::new(t_sequence[0].clone())) + .into_rs_owned(module_name, all_types) } else { let inner = t_sequence .into_iter() @@ -828,6 +831,9 @@ impl Type { quote::quote! { &'py ::pyo3::types::PyTuple } + } else if t_sequence.len() == 2 && t_sequence.last().unwrap() == &Self::PyEllipsis { + Self::PyList(Box::new(t_sequence[0].clone())) + .into_rs_borrowed(module_name, all_types) } else { let inner = t_sequence .into_iter() From 2acac4820513b79e9295d8cabd6d4c53c1a24d2b Mon Sep 17 00:00:00 2001 From: Andrej Orsula Date: Wed, 24 Jan 2024 14:57:17 +0100 Subject: [PATCH 03/13] Fix bindings for `__init__()` and `__call__()` Signed-off-by: Andrej Orsula --- pyo3_bindgen_cli/src/main.rs | 4 +- pyo3_bindgen_engine/src/bindgen/class.rs | 115 ++++++++++++-------- pyo3_bindgen_engine/src/bindgen/function.rs | 51 +++++++-- pyo3_bindgen_engine/src/bindgen/module.rs | 22 +++- pyo3_bindgen_engine/tests/bindgen.rs | 54 ++++----- 5 files changed, 151 insertions(+), 95 deletions(-) diff --git a/pyo3_bindgen_cli/src/main.rs b/pyo3_bindgen_cli/src/main.rs index 86e0306..5567db1 100644 --- a/pyo3_bindgen_cli/src/main.rs +++ b/pyo3_bindgen_cli/src/main.rs @@ -7,9 +7,9 @@ fn main() { let args = Args::parse(); // Generate the bindings for the module specified by the `--module-name` argument - let bindings = pyo3_bindgen::generate_bindings(&args.module_name).unwrap_or_else(|_| { + let bindings = pyo3_bindgen::generate_bindings(&args.module_name).unwrap_or_else(|err| { panic!( - "Failed to generate bindings for module: {}", + "Failed to generate bindings for module: {}\n{err}", args.module_name ) }); diff --git a/pyo3_bindgen_engine/src/bindgen/class.rs b/pyo3_bindgen_engine/src/bindgen/class.rs index 0b5ea46..ba2ef48 100644 --- a/pyo3_bindgen_engine/src/bindgen/class.rs +++ b/pyo3_bindgen_engine/src/bindgen/class.rs @@ -14,16 +14,7 @@ pub fn bind_class( let root_module_name = root_module.name()?; let class_full_name = class.name()?; let class_name = class_full_name.split('.').last().unwrap(); - let class_module_name = format!( - "{}{}{}", - class.getattr("__module__")?, - if class_full_name.contains('.') { - "." - } else { - "" - }, - class_full_name.trim_end_matches(&format!(".{class_name}")) - ); + let class_module_name = class.getattr("__module__")?.to_string(); // Create the Rust class identifier (raw string if it is a keyword) let class_ident = if syn::parse_str::(class_name).is_ok() { @@ -32,18 +23,65 @@ pub fn bind_class( quote::format_ident!("r#{class_name}") }; - let mut fn_names = Vec::new(); + // let mut fn_names = Vec::new(); - // Iterate over all attributes of the module while updating the token stream let mut impl_token_stream = proc_macro2::TokenStream::new(); + + // Implement new() + if class.hasattr("__init__")? { + for i in 0.. { + let new_fn_name = if i == 0 { + "new".to_string() + } else { + format!("new{i}") + }; + if !class.hasattr(new_fn_name.as_str())? { + impl_token_stream.extend(bind_function( + py, + &class_module_name, + &new_fn_name, + class.getattr("__init__")?, + all_types, + Some(class), + )); + break; + } + } + } + // Implement call() method + if class.hasattr("__call__")? { + for i in 0.. { + let call_fn_name = if i == 0 { + "call".to_string() + } else { + format!("call{i}") + }; + if !class.hasattr(call_fn_name.as_str())? { + impl_token_stream.extend(bind_function( + py, + &class_module_name, + &call_fn_name, + class.getattr("__call__")?, + all_types, + Some(class), + )); + break; + } + } + } + + // Iterate over all attributes of the module while updating the token stream class .dir() .iter() - .map(|name| { + .filter_map(|name| { let name = name.str().unwrap().to_str().unwrap(); - let attr = class.getattr(name).unwrap(); - let attr_type = attr.get_type(); - (name, attr, attr_type) + if let Ok(attr) = class.getattr(name) { + let attr_type = attr.get_type(); + Some((name, attr, attr_type)) + } else { + None + } }) .filter(|&(_, _, attr_type)| { // Skip builtin functions @@ -52,8 +90,8 @@ pub fn bind_class( .unwrap_or(false) }) .filter(|&(name, _, _)| { - // Skip private attributes (except for __init__ and __call__) - !name.starts_with('_') || name == "__init__" || name == "__call__" + // Skip private attributes + !name.starts_with('_') }) .filter(|(_, attr, attr_type)| { // Skip typing attributes @@ -136,20 +174,22 @@ pub fn bind_class( debug_assert!(![is_class, is_function].iter().all(|&v| v)); if is_class && !is_reexport { - impl_token_stream.extend(bind_class( - py, - root_module, - attr.downcast().unwrap(), - all_types, - )); + // TODO: Properly handle nested classes + // impl_token_stream.extend(bind_class( + // py, + // root_module, + // attr.downcast().unwrap(), + // all_types, + // )); } else if is_function { - fn_names.push(name.to_string()); + // fn_names.push(name.to_string()); impl_token_stream.extend(bind_function( py, &class_module_name, name, attr, all_types, + Some(class), )); } else if !name.starts_with('_') { impl_token_stream.extend(bind_attribute( @@ -164,39 +204,20 @@ pub fn bind_class( } }); - // Add new and call aliases (currently a reimplemented versions of the function) - // TODO: Call the Rust `self.__init__()` and `self.__call__()` functions directly instead of reimplementing it - if fn_names.contains(&"__init__".to_string()) && !fn_names.contains(&"new".to_string()) { - impl_token_stream.extend(bind_function( - py, - &class_module_name, - "new", - class.getattr("__init__")?, - all_types, - )); - } - if fn_names.contains(&"__call__".to_string()) && !fn_names.contains(&"call".to_string()) { - impl_token_stream.extend(bind_function( - py, - &class_module_name, - "call", - class.getattr("__call__")?, - all_types, - )); - } - let mut doc = class.getattr("__doc__")?.to_string(); if doc == "None" { doc = String::new(); }; + let object_name = format!("{class_module_name}.{class_name}"); + Ok(quote::quote! { #[doc = #doc] #[repr(transparent)] pub struct #class_ident(::pyo3::PyAny); // Note: Using these macros is probably not the best idea, but it makes possible wrapping around ::pyo3::PyAny instead of ::pyo3::PyObject, which improves usability ::pyo3::pyobject_native_type_named!(#class_ident); - ::pyo3::pyobject_native_type_info!(#class_ident, ::pyo3::pyobject_native_static_type_object!(::pyo3::ffi::PyBaseObject_Type), ::std::option::Option::Some(#class_module_name)); + ::pyo3::pyobject_native_type_info!(#class_ident, ::pyo3::pyobject_native_static_type_object!(::pyo3::ffi::PyBaseObject_Type), ::std::option::Option::Some(#object_name)); ::pyo3::pyobject_native_type_extract!(#class_ident); #[automatically_derived] impl #class_ident { diff --git a/pyo3_bindgen_engine/src/bindgen/function.rs b/pyo3_bindgen_engine/src/bindgen/function.rs index 54ccfda..7bd310a 100644 --- a/pyo3_bindgen_engine/src/bindgen/function.rs +++ b/pyo3_bindgen_engine/src/bindgen/function.rs @@ -11,6 +11,7 @@ pub fn bind_function( name: &str, function: &pyo3::PyAny, all_types: &std::collections::HashSet, + method_of_class: Option<&pyo3::types::PyType>, ) -> Result { let inspect = py.import("inspect")?; @@ -136,6 +137,8 @@ pub fn bind_function( let has_self_param = parameters .iter() .any(|(param_name, _, _, _)| param_name == "self"); + let is_class_method = + method_of_class.is_some() && (!has_self_param || function_name == "__init__"); let param_idents = parameters .iter() @@ -167,13 +170,25 @@ pub fn bind_function( doc = String::new(); }; - let (maybe_ref_self, callable_object) = if has_self_param { - (quote::quote! { &'py self, }, quote::quote! { self }) + let (has_self_param, is_class_method) = if function_name == "__call__" { + (true, false) } else { - ( + (has_self_param, is_class_method) + }; + + let (maybe_ref_self, callable_object) = match (has_self_param, is_class_method) { + (true, false) => (quote::quote! { &'py self, }, quote::quote! { self }), + (_, true) => { + let class_name = method_of_class.unwrap().name().unwrap(); + ( + quote::quote! {}, + quote::quote! { py.import(::pyo3::intern!(py, #module_name))?.getattr(::pyo3::intern!(py, #class_name))?}, + ) + } + _ => ( quote::quote! {}, quote::quote! { py.import(::pyo3::intern!(py, #module_name))? }, - ) + ), }; let has_positional_args = !positional_args_idents.is_empty(); @@ -211,21 +226,41 @@ pub fn bind_function( #(__internal_kwargs.set_item(::pyo3::intern!(py, #keyword_args_names), #keyword_args_idents)?;)* }; - let call_method = match (has_positional_args, has_kwargs) { - (_, true) => { + let is_init_fn = function_name == "__init__"; + + let call_method = match (is_init_fn, has_positional_args, has_kwargs) { + (true, _, true) => { + quote::quote! { + #set_args + #set_kwargs + #callable_object.call(__internal_args, Some(__internal_kwargs))? + } + } + (true, true, false) => { + quote::quote! { + #set_args + #callable_object.call1(__internal_args)? + } + } + (true, false, false) => { + quote::quote! { + #callable_object.call0()? + } + } + (false, _, true) => { quote::quote! { #set_args #set_kwargs #callable_object.call_method(::pyo3::intern!(py, #function_name), __internal_args, Some(__internal_kwargs))? } } - (true, false) => { + (false, true, false) => { quote::quote! { #set_args #callable_object.call_method1(::pyo3::intern!(py, #function_name), __internal_args)? } } - (false, false) => { + (false, false, false) => { quote::quote! { #callable_object.call_method0(::pyo3::intern!(py, #function_name))? } diff --git a/pyo3_bindgen_engine/src/bindgen/module.rs b/pyo3_bindgen_engine/src/bindgen/module.rs index 2465d1f..653ce3b 100644 --- a/pyo3_bindgen_engine/src/bindgen/module.rs +++ b/pyo3_bindgen_engine/src/bindgen/module.rs @@ -47,7 +47,7 @@ pub fn bind_module( }) .filter(|&(name, _, _)| { // Skip private attributes - !name.starts_with('_') || name == "__init__" || name == "__call__" + !name.starts_with('_') }) .filter(|(_, attr, attr_type)| { // Skip typing attributes @@ -153,7 +153,7 @@ pub fn bind_module( let content = if is_class { bind_class(py, root_module, attr.downcast().unwrap(), all_types).unwrap() } else if is_function { - bind_function(py, full_module_name, name, attr, all_types).unwrap() + bind_function(py, full_module_name, name, attr, all_types, None).unwrap() } else { unreachable!() }; @@ -201,7 +201,10 @@ pub fn bind_module( attr, )); } - } else if is_reexport { + } else if is_reexport + && (is_function + || (is_class && all_types.contains(&format!("{full_module_name}.{name}")))) + { mod_token_stream.extend(bind_reexport( root_module_name, full_module_name, @@ -216,7 +219,14 @@ pub fn bind_module( all_types, )); } else if is_function { - mod_token_stream.extend(bind_function(py, full_module_name, name, attr, all_types)); + mod_token_stream.extend(bind_function( + py, + full_module_name, + name, + attr, + all_types, + None, + )); } else { mod_token_stream.extend(bind_attribute( py, @@ -383,7 +393,7 @@ pub fn collect_types_of_module( }) .filter(|&(name, _, _)| { // Skip private attributes - !name.starts_with('_') || name == "__init__" || name == "__call__" + !name.starts_with('_') }) .filter(|(_, attr, attr_type)| { // Skip typing attributes @@ -502,7 +512,7 @@ pub fn collect_types_of_module( all_types, ); } - } else if is_class { + } else if is_class && !attr.to_string().contains("") { let full_class_name = format!("{}.{}", full_module_name, attr.getattr("__name__").unwrap()); all_types.insert(full_class_name.clone()); diff --git a/pyo3_bindgen_engine/tests/bindgen.rs b/pyo3_bindgen_engine/tests/bindgen.rs index 8779fa4..5771de3 100644 --- a/pyo3_bindgen_engine/tests/bindgen.rs +++ b/pyo3_bindgen_engine/tests/bindgen.rs @@ -138,12 +138,12 @@ test_bindgen! { /// #[allow( clippy::all, - clippy::nursery, - clippy::pedantic, - non_camel_case_types, - non_snake_case, - non_upper_case_globals, - unused + clippy::nursery, + clippy::pedantic, + non_camel_case_types, + non_snake_case, + non_upper_case_globals, + unused )] mod t_mod_test_bindgen_class { ///t_docs @@ -153,14 +153,13 @@ test_bindgen! { ::pyo3::pyobject_native_type_info!( t_class, ::pyo3::pyobject_native_static_type_object!(::pyo3::ffi::PyBaseObject_Type), - ::std::option::Option::Some("t_mod_test_bindgen_classt_class") + ::std::option::Option::Some("t_mod_test_bindgen_class.t_class") ); ::pyo3::pyobject_native_type_extract!(t_class); #[automatically_derived] impl t_class { ///t_docs_init - pub fn __init__<'py>( - &'py self, + pub fn new<'py>( py: ::pyo3::marker::Python<'py>, t_arg1: &str, t_arg2: ::std::option::Option, @@ -169,11 +168,20 @@ test_bindgen! { let __internal_kwargs = ::pyo3::types::PyDict::new(py); __internal_kwargs.set_item(::pyo3::intern!(py, "t_arg1"), t_arg1)?; __internal_kwargs.set_item(::pyo3::intern!(py, "t_arg2"), t_arg2)?; - self.call_method( - ::pyo3::intern!(py, "__init__"), - __internal_args, - Some(__internal_kwargs), - )? + py.import(::pyo3::intern!(py, "t_mod_test_bindgen_class"))? + .getattr(::pyo3::intern!(py, "t_class"))? + .call(__internal_args, Some(__internal_kwargs))? + .extract() + } + ///Call self as a function. + pub fn call<'py>( + &'py self, + py: ::pyo3::marker::Python<'py>, + args: &'py ::pyo3::types::PyTuple, + kwargs: &'py ::pyo3::types::PyDict, + ) -> ::pyo3::PyResult<&'py ::pyo3::types::PyAny> { + let __internal_args = args; + self.call_method1(::pyo3::intern!(py, "__call__"), __internal_args)? .extract() } ///t_docs_method @@ -209,24 +217,6 @@ test_bindgen! { self.setattr(::pyo3::intern!(py, "t_prop"), value)?; Ok(()) } - ///t_docs_init - pub fn new<'py>( - &'py self, - py: ::pyo3::marker::Python<'py>, - t_arg1: &str, - t_arg2: ::std::option::Option, - ) -> ::pyo3::PyResult<&'py ::pyo3::types::PyAny> { - let __internal_args = (); - let __internal_kwargs = ::pyo3::types::PyDict::new(py); - __internal_kwargs.set_item(::pyo3::intern!(py, "t_arg1"), t_arg1)?; - __internal_kwargs.set_item(::pyo3::intern!(py, "t_arg2"), t_arg2)?; - self.call_method( - ::pyo3::intern!(py, "__init__"), - __internal_args, - Some(__internal_kwargs), - )? - .extract() - } } } "# From 0b22727e88fb7096810a3c5b07218161e757b51a Mon Sep 17 00:00:00 2001 From: Andrej Orsula Date: Fri, 26 Jan 2024 22:07:01 +0100 Subject: [PATCH 04/13] Fix return type for `__init__()` bindings Signed-off-by: Andrej Orsula --- pyo3_bindgen_engine/src/bindgen/function.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pyo3_bindgen_engine/src/bindgen/function.rs b/pyo3_bindgen_engine/src/bindgen/function.rs index 7bd310a..3c8218c 100644 --- a/pyo3_bindgen_engine/src/bindgen/function.rs +++ b/pyo3_bindgen_engine/src/bindgen/function.rs @@ -162,8 +162,6 @@ pub fn bind_function( .into_rs_borrowed(module_name, all_types) }) .collect_vec(); - let return_annotation = - Type::try_from(return_annotation.unwrap_or(pynone))?.into_rs_owned(module_name, all_types); let mut doc = function.getattr("__doc__")?.to_string(); if doc == "None" { @@ -228,6 +226,14 @@ pub fn bind_function( let is_init_fn = function_name == "__init__"; + let return_annotation = if is_init_fn && method_of_class.is_some() { + quote::quote! { + &'py Self + } + } else { + Type::try_from(return_annotation.unwrap_or(pynone))?.into_rs_owned(module_name, all_types) + }; + let call_method = match (is_init_fn, has_positional_args, has_kwargs) { (true, _, true) => { quote::quote! { From 37d7d68bf1bb1c2867abfc7b2df5a64cbafc0551 Mon Sep 17 00:00:00 2001 From: Andrej Orsula Date: Fri, 26 Jan 2024 22:08:45 +0100 Subject: [PATCH 05/13] Improve semantics of positional `*args` parameters Signed-off-by: Andrej Orsula --- pyo3_bindgen_engine/src/bindgen/function.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pyo3_bindgen_engine/src/bindgen/function.rs b/pyo3_bindgen_engine/src/bindgen/function.rs index 3c8218c..5350681 100644 --- a/pyo3_bindgen_engine/src/bindgen/function.rs +++ b/pyo3_bindgen_engine/src/bindgen/function.rs @@ -156,10 +156,14 @@ pub fn bind_function( let param_types = parameters .iter() .skip(usize::from(has_self_param)) - .map(|(_, param_annotation, _, _)| { - Type::try_from(param_annotation.unwrap_or_else(|| pynone)) - .unwrap() - .into_rs_borrowed(module_name, all_types) + .map(|&(_, param_annotation, _, param_kind)| { + if param_kind == "VAR_POSITIONAL" { + quote::quote! { impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyTuple>>} + } else { + Type::try_from(param_annotation.unwrap_or_else(|| pynone)) + .unwrap() + .into_rs_borrowed(module_name, all_types) + } }) .collect_vec(); From 4fb32d7c87e3ec1e81e2bef2fdc65a21045ea038 Mon Sep 17 00:00:00 2001 From: Andrej Orsula Date: Fri, 26 Jan 2024 22:16:26 +0100 Subject: [PATCH 06/13] Update test case Signed-off-by: Andrej Orsula --- pyo3_bindgen_engine/tests/bindgen.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyo3_bindgen_engine/tests/bindgen.rs b/pyo3_bindgen_engine/tests/bindgen.rs index 5771de3..620f3ad 100644 --- a/pyo3_bindgen_engine/tests/bindgen.rs +++ b/pyo3_bindgen_engine/tests/bindgen.rs @@ -163,7 +163,7 @@ test_bindgen! { py: ::pyo3::marker::Python<'py>, t_arg1: &str, t_arg2: ::std::option::Option, - ) -> ::pyo3::PyResult<&'py ::pyo3::types::PyAny> { + ) -> ::pyo3::PyResult<&'py Self> { let __internal_args = (); let __internal_kwargs = ::pyo3::types::PyDict::new(py); __internal_kwargs.set_item(::pyo3::intern!(py, "t_arg1"), t_arg1)?; @@ -177,7 +177,7 @@ test_bindgen! { pub fn call<'py>( &'py self, py: ::pyo3::marker::Python<'py>, - args: &'py ::pyo3::types::PyTuple, + args: impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyTuple>>, kwargs: &'py ::pyo3::types::PyDict, ) -> ::pyo3::PyResult<&'py ::pyo3::types::PyAny> { let __internal_args = args; From e4e063e5c6093e1027f5ffa5faf8de0e0164a65f Mon Sep 17 00:00:00 2001 From: Andrej Orsula Date: Fri, 26 Jan 2024 22:17:00 +0100 Subject: [PATCH 07/13] Apply clippy suggestions Signed-off-by: Andrej Orsula --- pyo3_bindgen_engine/src/bindgen/function.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyo3_bindgen_engine/src/bindgen/function.rs b/pyo3_bindgen_engine/src/bindgen/function.rs index 5350681..b2af45a 100644 --- a/pyo3_bindgen_engine/src/bindgen/function.rs +++ b/pyo3_bindgen_engine/src/bindgen/function.rs @@ -160,7 +160,7 @@ pub fn bind_function( if param_kind == "VAR_POSITIONAL" { quote::quote! { impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyTuple>>} } else { - Type::try_from(param_annotation.unwrap_or_else(|| pynone)) + Type::try_from(param_annotation.unwrap_or(pynone)) .unwrap() .into_rs_borrowed(module_name, all_types) } From 198e71b470098d3ea7c45edb18a231801096c0cb Mon Sep 17 00:00:00 2001 From: Andrej Orsula Date: Sun, 25 Feb 2024 01:47:59 +0100 Subject: [PATCH 08/13] Refactoring: Add parsing [skip ci] Signed-off-by: Andrej Orsula --- Cargo.lock | 61 +- Cargo.toml | 3 + pyo3_bindgen_engine/Cargo.toml | 3 + pyo3_bindgen_engine/src/bindgen.rs | 150 ----- pyo3_bindgen_engine/src/bindgen/attribute.rs | 131 ----- pyo3_bindgen_engine/src/bindgen/class.rs | 275 --------- pyo3_bindgen_engine/src/bindgen/function.rs | 290 ---------- pyo3_bindgen_engine/src/bindgen/module.rs | 525 ------------------ pyo3_bindgen_engine/src/build_utils.rs | 54 -- pyo3_bindgen_engine/src/codegen.rs | 101 ++++ pyo3_bindgen_engine/src/config.rs | 45 ++ pyo3_bindgen_engine/src/lib.rs | 17 +- pyo3_bindgen_engine/src/syntax/class.rs | 141 +++++ .../src/syntax/common/attribute_variant.rs | 74 +++ .../src/syntax/common/ident.rs | 98 ++++ pyo3_bindgen_engine/src/syntax/common/mod.rs | 7 + pyo3_bindgen_engine/src/syntax/common/path.rs | 211 +++++++ pyo3_bindgen_engine/src/syntax/function.rs | 165 ++++++ pyo3_bindgen_engine/src/syntax/import.rs | 51 ++ pyo3_bindgen_engine/src/syntax/mod.rs | 15 + pyo3_bindgen_engine/src/syntax/module.rs | 261 +++++++++ pyo3_bindgen_engine/src/syntax/property.rs | 168 ++++++ pyo3_bindgen_engine/src/syntax/type_var.rs | 19 + pyo3_bindgen_engine/src/traits.rs | 9 + .../src/{types.rs => types/mod.rs} | 12 +- pyo3_bindgen_engine/src/utils/error.rs | 18 + pyo3_bindgen_engine/src/utils/io.rs | 43 ++ pyo3_bindgen_engine/src/utils/mod.rs | 6 + pyo3_bindgen_engine/src/utils/result.rs | 5 + 29 files changed, 1521 insertions(+), 1437 deletions(-) delete mode 100644 pyo3_bindgen_engine/src/bindgen.rs delete mode 100644 pyo3_bindgen_engine/src/bindgen/attribute.rs delete mode 100644 pyo3_bindgen_engine/src/bindgen/class.rs delete mode 100644 pyo3_bindgen_engine/src/bindgen/function.rs delete mode 100644 pyo3_bindgen_engine/src/bindgen/module.rs delete mode 100644 pyo3_bindgen_engine/src/build_utils.rs create mode 100644 pyo3_bindgen_engine/src/codegen.rs create mode 100644 pyo3_bindgen_engine/src/config.rs create mode 100644 pyo3_bindgen_engine/src/syntax/class.rs create mode 100644 pyo3_bindgen_engine/src/syntax/common/attribute_variant.rs create mode 100644 pyo3_bindgen_engine/src/syntax/common/ident.rs create mode 100644 pyo3_bindgen_engine/src/syntax/common/mod.rs create mode 100644 pyo3_bindgen_engine/src/syntax/common/path.rs create mode 100644 pyo3_bindgen_engine/src/syntax/function.rs create mode 100644 pyo3_bindgen_engine/src/syntax/import.rs create mode 100644 pyo3_bindgen_engine/src/syntax/mod.rs create mode 100644 pyo3_bindgen_engine/src/syntax/module.rs create mode 100644 pyo3_bindgen_engine/src/syntax/property.rs create mode 100644 pyo3_bindgen_engine/src/syntax/type_var.rs create mode 100644 pyo3_bindgen_engine/src/traits.rs rename pyo3_bindgen_engine/src/{types.rs => types/mod.rs} (99%) create mode 100644 pyo3_bindgen_engine/src/utils/error.rs create mode 100644 pyo3_bindgen_engine/src/utils/io.rs create mode 100644 pyo3_bindgen_engine/src/utils/mod.rs create mode 100644 pyo3_bindgen_engine/src/utils/result.rs diff --git a/Cargo.lock b/Cargo.lock index 68a85bc..3166b63 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,9 +19,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.11" +version = "0.6.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" +checksum = "96b09b5178381e0874812a9b157f7fe84982617e48f71f4e3235482775e5b540" dependencies = [ "anstyle", "anstyle-parse", @@ -67,9 +67,9 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "2.0.13" +version = "2.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00ad3f3a942eee60335ab4342358c161ee296829e0d16ff42fc1d6cb07815467" +checksum = "ed72493ac66d5804837f480ab3766c72bdfab91a65e565fc54fa9e42db0073a8" dependencies = [ "anstyle", "bstr", @@ -100,9 +100,9 @@ checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" [[package]] name = "bstr" -version = "1.9.0" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c48f0051a4b4c5e0b6d365cd04af53aeaa209e3cc15ec2cdb69e73cc87fbd0dc" +checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" dependencies = [ "memchr", "regex-automata", @@ -602,7 +602,10 @@ dependencies = [ "pyo3", "pyo3-build-config", "quote", + "rustc-hash", "syn", + "thiserror", + "typed-builder", ] [[package]] @@ -683,6 +686,12 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustix" version = "0.38.30" @@ -783,6 +792,26 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" +[[package]] +name = "thiserror" +version = "1.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -793,6 +822,26 @@ dependencies = [ "serde_json", ] +[[package]] +name = "typed-builder" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "444d8748011b93cb168770e8092458cb0f8854f931ff82fdf6ddfbd72a9c933e" +dependencies = [ + "typed-builder-macro", +] + +[[package]] +name = "typed-builder-macro" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "563b3b88238ec95680aef36bdece66896eaa7ce3c0f1b4f39d38fb2435261352" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "unicode-ident" version = "1.0.12" diff --git a/Cargo.toml b/Cargo.toml index dcce94a..3af5ef2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,4 +38,7 @@ proc-macro2 = { version = "1" } pyo3 = { version = "0.20", default-features = false } pyo3-build-config = { version = "0.20", features = ["resolve-config"] } quote = { version = "1" } +rustc-hash = { version = "1" } syn = { version = "2" } +thiserror = { version = "1" } +typed-builder = { version = "0.18" } diff --git a/pyo3_bindgen_engine/Cargo.toml b/pyo3_bindgen_engine/Cargo.toml index 99275c3..c408237 100644 --- a/pyo3_bindgen_engine/Cargo.toml +++ b/pyo3_bindgen_engine/Cargo.toml @@ -16,7 +16,10 @@ itertools = { workspace = true } proc-macro2 = { workspace = true } pyo3 = { workspace = true } quote = { workspace = true } +rustc-hash = { workspace = true } syn = { workspace = true } +thiserror = { workspace = true } +typed-builder = { workspace = true } [dev-dependencies] criterion = { workspace = true } diff --git a/pyo3_bindgen_engine/src/bindgen.rs b/pyo3_bindgen_engine/src/bindgen.rs deleted file mode 100644 index 5c73d0f..0000000 --- a/pyo3_bindgen_engine/src/bindgen.rs +++ /dev/null @@ -1,150 +0,0 @@ -//! Module for handling the binding generation process. - -pub mod attribute; -pub mod class; -pub mod function; -pub mod module; - -pub use attribute::bind_attribute; -pub use class::bind_class; -pub use function::bind_function; -pub use module::{bind_module, bind_reexport}; - -// TODO: Refactor everything into a large configurable struct that keeps track of all the -// important information needed to properly generate the bindings -// - Use builder pattern for the configuration of the struct -// - Keep track of all the types/classes that have been generated -// - Keep track of all imports to understand where each type is coming from -// - Keep track of all the external types that are used as parameters/return types and consider generating bindings for them as well - -// TODO: Ensure there are no duplicate entries in the generated code - -/// Generate Rust bindings to a Python module specified by its name. Generating bindings to -/// submodules such as `os.path` is also supported as long as the module can be directly imported -/// from the Python interpreter via `import os.path`. -/// -/// # Arguments -/// -/// * `module_name` - Name of the Python module to generate bindings for. -/// -/// # Returns -/// -/// `Result` containing the generated bindings as a `proc_macro2::TokenStream` on success, or a -/// `pyo3::PyErr` on failure. -/// -/// # Example -/// -/// ``` -/// // use pyo3_bindgen::generate_bindings; -/// use pyo3_bindgen_engine::generate_bindings; -/// -/// fn main() -> Result<(), pyo3::PyErr> { -/// let bindings: proc_macro2::TokenStream = generate_bindings("os")?; -/// Ok(()) -/// } -/// ``` -pub fn generate_bindings(module_name: &str) -> Result { - #[cfg(not(PyPy))] - pyo3::prepare_freethreaded_python(); - - pyo3::Python::with_gil(|py| { - let module = py.import(module_name)?; - generate_bindings_for_module(py, module) - }) -} - -/// Generate Rust bindings to an instance of `pyo3::types::PyModule` Python module. -/// -/// # Arguments -/// -/// * `py` - Python interpreter instance. -/// * `module` - Python module to generate bindings for. -/// -/// # Returns -/// -/// `Result` containing the generated bindings as a `proc_macro2::TokenStream` on success, or a -/// `pyo3::PyErr` on failure. -/// -/// # Example -/// -/// ``` -/// // use pyo3_bindgen::generate_bindings_for_module; -/// use pyo3_bindgen_engine::generate_bindings_for_module; -/// -/// fn main() -> Result<(), pyo3::PyErr> { -/// pyo3::prepare_freethreaded_python(); -/// let bindings: proc_macro2::TokenStream = pyo3::Python::with_gil(|py| { -/// let module = py.import("os")?; -/// generate_bindings_for_module(py, module) -/// })?; -/// Ok(()) -/// } -/// ``` -pub fn generate_bindings_for_module( - py: pyo3::Python, - module: &pyo3::types::PyModule, -) -> Result { - let all_types = module::collect_types_of_module( - py, - module, - module, - &mut std::collections::HashSet::new(), - &mut std::collections::HashSet::default(), - )?; - - bind_module( - py, - module, - module, - &mut std::collections::HashSet::new(), - &all_types, - ) -} - -/// Generate Rust bindings to a Python module specified by its `source_code`. The module will be -/// named `new_module_name` in the generated bindings. However, the generated bindings might not -/// be immediately functional if the module represented by its `source_code` is not a known Python -/// module in the current Python interpreter. -/// -/// # Arguments -/// -/// * `source_code` - Source code of the Python module to generate bindings for. -/// * `new_module_name` - Name of the Python module to generate bindings for. -/// -/// # Returns -/// -/// `Result` containing the generated bindings as a `proc_macro2::TokenStream` on success, or a -/// `pyo3::PyErr` on failure. -/// -/// # Example -/// -/// ``` -/// // use pyo3_bindgen::generate_bindings_from_str; -/// use pyo3_bindgen_engine::generate_bindings_from_str; -/// -/// fn main() -> Result<(), pyo3::PyErr> { -/// const PYTHON_SOURCE_CODE: &str = r#" -/// def string_length(string: str) -> int: -/// return len(string) -/// "#; -/// let bindings = generate_bindings_from_str(PYTHON_SOURCE_CODE, "utils")?; -/// Ok(()) -/// } -/// ``` -pub fn generate_bindings_from_str( - source_code: &str, - new_module_name: &str, -) -> Result { - #[cfg(not(PyPy))] - pyo3::prepare_freethreaded_python(); - - pyo3::Python::with_gil(|py| { - let module = pyo3::types::PyModule::from_code( - py, - source_code, - &format!("{new_module_name}/__init__.py"), - new_module_name, - )?; - generate_bindings_for_module(py, module) - }) -} diff --git a/pyo3_bindgen_engine/src/bindgen/attribute.rs b/pyo3_bindgen_engine/src/bindgen/attribute.rs deleted file mode 100644 index 57ca6bf..0000000 --- a/pyo3_bindgen_engine/src/bindgen/attribute.rs +++ /dev/null @@ -1,131 +0,0 @@ -use crate::types::Type; - -/// Generate Rust bindings to a Python attribute. The attribute can be a standalone -/// attribute or a property of a class. -pub fn bind_attribute( - py: pyo3::Python, - module_name: &str, - is_class: bool, - name: &str, - attr: &pyo3::PyAny, - attr_type: &pyo3::PyAny, - all_types: &std::collections::HashSet, -) -> Result { - let mut token_stream = proc_macro2::TokenStream::new(); - - let mut has_setter = true; - let mut getter_type = attr_type; - let mut setter_type = attr_type; - let getter_doc = py.None(); - let mut getter_doc = getter_doc.as_ref(py); - let setter_doc = py.None(); - let mut setter_doc = setter_doc.as_ref(py); - - // Check if the attribute has a getter and setter (is a property) - if let Ok(getter) = attr.getattr("fget") { - let inspect = py.import("inspect")?; - let signature = inspect.call_method1("signature", (getter,))?; - let empty_return_annotation = signature.getattr("empty")?; - let return_annotation = signature.getattr("return_annotation")?; - if !return_annotation.is(empty_return_annotation) { - getter_type = return_annotation; - } - if let Ok(doc) = getter.getattr("__doc__") { - getter_doc = doc; - } - has_setter = false; - } - if let Ok(setter) = attr.getattr("fset") { - if !setter.is_none() { - let inspect = py.import("inspect")?; - let signature = inspect.call_method1("signature", (setter,))?; - let empty_return_annotation = signature.getattr("empty")?; - let value_annotation = signature - .getattr("parameters")? - .call_method0("values")? - .iter()? - .last() - .unwrap()? - .getattr("annotation")?; - if !value_annotation.is(empty_return_annotation) { - setter_type = value_annotation; - } - if let Ok(doc) = setter.getattr("__doc__") { - setter_doc = doc; - } - has_setter = true; - } - } - - let mut getter_doc = getter_doc.to_string(); - if getter_doc == "None" || getter_doc.is_empty() { - getter_doc = format!("Getter for the `{name}` attribute"); - }; - - let mut setter_doc = setter_doc.to_string(); - if setter_doc == "None" || setter_doc.is_empty() { - setter_doc = format!("Setter for the `{name}` attribute"); - }; - - let getter_ident = if syn::parse_str::(name).is_ok() { - quote::format_ident!("{}", name) - } else { - quote::format_ident!("r#{}", name) - }; - let setter_ident = quote::format_ident!("set_{}", name); - - let getter_type = Type::try_from(getter_type)?.into_rs_owned(module_name, all_types); - let setter_type = Type::try_from(setter_type)?.into_rs_borrowed(module_name, all_types); - - if is_class { - token_stream.extend(quote::quote! { - #[doc = #getter_doc] - pub fn #getter_ident<'py>( - &'py self, - py: ::pyo3::marker::Python<'py>, - ) -> ::pyo3::PyResult<#getter_type> { - self.getattr(::pyo3::intern!(py, #name))? - .extract() - } - }); - if has_setter { - token_stream.extend(quote::quote! { - #[doc = #setter_doc] - pub fn #setter_ident<'py>( - &'py self, - py: ::pyo3::marker::Python<'py>, - value: #setter_type, - ) -> ::pyo3::PyResult<()> { - self.setattr(::pyo3::intern!(py, #name), value)?; - Ok(()) - } - }); - } - } else { - token_stream.extend(quote::quote! { - #[doc = #getter_doc] - pub fn #getter_ident<'py>( - py: ::pyo3::marker::Python<'py>, - ) -> ::pyo3::PyResult<#getter_type> { - py.import(::pyo3::intern!(py, #module_name))? - .getattr(::pyo3::intern!(py, #name))? - .extract() - } - }); - if has_setter { - token_stream.extend(quote::quote! { - #[doc = #setter_doc] - pub fn #setter_ident<'py>( - py: ::pyo3::marker::Python<'py>, - value: #setter_type, - ) -> ::pyo3::PyResult<()> { - py.import(::pyo3::intern!(py, #module_name))? - .setattr(::pyo3::intern!(py, #name), value)?; - Ok(()) - } - }); - } - } - - Ok(token_stream) -} diff --git a/pyo3_bindgen_engine/src/bindgen/class.rs b/pyo3_bindgen_engine/src/bindgen/class.rs deleted file mode 100644 index ba2ef48..0000000 --- a/pyo3_bindgen_engine/src/bindgen/class.rs +++ /dev/null @@ -1,275 +0,0 @@ -use crate::bindgen::{bind_attribute, bind_function}; - -/// Generate Rust bindings to a Python class with all its methods and attributes (properties). -/// This function will call itself recursively to generate bindings to all nested classes. -pub fn bind_class( - py: pyo3::Python, - root_module: &pyo3::types::PyModule, - class: &pyo3::types::PyType, - all_types: &std::collections::HashSet, -) -> Result { - let inspect = py.import("inspect")?; - - // Extract the names of the modules - let root_module_name = root_module.name()?; - let class_full_name = class.name()?; - let class_name = class_full_name.split('.').last().unwrap(); - let class_module_name = class.getattr("__module__")?.to_string(); - - // Create the Rust class identifier (raw string if it is a keyword) - let class_ident = if syn::parse_str::(class_name).is_ok() { - quote::format_ident!("{class_name}") - } else { - quote::format_ident!("r#{class_name}") - }; - - // let mut fn_names = Vec::new(); - - let mut impl_token_stream = proc_macro2::TokenStream::new(); - - // Implement new() - if class.hasattr("__init__")? { - for i in 0.. { - let new_fn_name = if i == 0 { - "new".to_string() - } else { - format!("new{i}") - }; - if !class.hasattr(new_fn_name.as_str())? { - impl_token_stream.extend(bind_function( - py, - &class_module_name, - &new_fn_name, - class.getattr("__init__")?, - all_types, - Some(class), - )); - break; - } - } - } - // Implement call() method - if class.hasattr("__call__")? { - for i in 0.. { - let call_fn_name = if i == 0 { - "call".to_string() - } else { - format!("call{i}") - }; - if !class.hasattr(call_fn_name.as_str())? { - impl_token_stream.extend(bind_function( - py, - &class_module_name, - &call_fn_name, - class.getattr("__call__")?, - all_types, - Some(class), - )); - break; - } - } - } - - // Iterate over all attributes of the module while updating the token stream - class - .dir() - .iter() - .filter_map(|name| { - let name = name.str().unwrap().to_str().unwrap(); - if let Ok(attr) = class.getattr(name) { - let attr_type = attr.get_type(); - Some((name, attr, attr_type)) - } else { - None - } - }) - .filter(|&(_, _, attr_type)| { - // Skip builtin functions - !attr_type - .is_subclass_of::() - .unwrap_or(false) - }) - .filter(|&(name, _, _)| { - // Skip private attributes - !name.starts_with('_') - }) - .filter(|(_, attr, attr_type)| { - // Skip typing attributes - !attr - .getattr("__module__") - .is_ok_and(|module| module.to_string().contains("typing")) - && !attr_type.to_string().contains("typing") - }) - .filter(|(_, attr, _)| { - // Skip __future__ attributes - !attr - .getattr("__module__") - .is_ok_and(|module| module.to_string().contains("__future__")) - }) - .filter(|&(_, attr, _)| { - // Skip classes and functions that are not part of the package - // However, this should keep instances of classes and builtins even if they are builtins or from other packages - if let Ok(module) = attr.getattr("__module__") { - if module.to_string().starts_with(root_module_name) { - true - } else { - !(inspect - .call_method1("isclass", (attr,)) - .unwrap() - .is_true() - .unwrap() - || inspect - .call_method1("isfunction", (attr,)) - .unwrap() - .is_true() - .unwrap()) - } - } else { - true - } - }) - .filter(|&(_, attr, attr_type)| { - // Skip external modules - if attr_type - .is_subclass_of::() - .unwrap_or(false) - { - let is_submodule = attr - .getattr("__package__") - .is_ok_and(|package| package.to_string().starts_with(root_module_name)); - is_submodule - } else { - true - } - }) - .for_each(|(name, attr, attr_type)| { - let is_internal = attr - .getattr("__module__") - .unwrap_or(pyo3::types::PyString::new(py, "")) - .to_string() - .starts_with(root_module_name); - let is_reexport = is_internal - && attr - .getattr("__module__") - .unwrap_or(pyo3::types::PyString::new(py, "")) - .to_string() - .ne(&class_module_name); - - let is_class = attr_type - .is_subclass_of::() - .unwrap_or(false); - - let is_function = inspect - .call_method1("isfunction", (attr,)) - .unwrap() - .is_true() - .unwrap() - || inspect - .call_method1("ismethod", (attr,)) - .unwrap() - .is_true() - .unwrap(); - - // Make sure that only one of the three is true - debug_assert!(![is_class, is_function].iter().all(|&v| v)); - - if is_class && !is_reexport { - // TODO: Properly handle nested classes - // impl_token_stream.extend(bind_class( - // py, - // root_module, - // attr.downcast().unwrap(), - // all_types, - // )); - } else if is_function { - // fn_names.push(name.to_string()); - impl_token_stream.extend(bind_function( - py, - &class_module_name, - name, - attr, - all_types, - Some(class), - )); - } else if !name.starts_with('_') { - impl_token_stream.extend(bind_attribute( - py, - &class_module_name, - true, - name, - attr, - attr_type, - all_types, - )); - } - }); - - let mut doc = class.getattr("__doc__")?.to_string(); - if doc == "None" { - doc = String::new(); - }; - - let object_name = format!("{class_module_name}.{class_name}"); - - Ok(quote::quote! { - #[doc = #doc] - #[repr(transparent)] - pub struct #class_ident(::pyo3::PyAny); - // Note: Using these macros is probably not the best idea, but it makes possible wrapping around ::pyo3::PyAny instead of ::pyo3::PyObject, which improves usability - ::pyo3::pyobject_native_type_named!(#class_ident); - ::pyo3::pyobject_native_type_info!(#class_ident, ::pyo3::pyobject_native_static_type_object!(::pyo3::ffi::PyBaseObject_Type), ::std::option::Option::Some(#object_name)); - ::pyo3::pyobject_native_type_extract!(#class_ident); - #[automatically_derived] - impl #class_ident { - #impl_token_stream - } - }) - - // Ok(quote::quote! { - // #[doc = #doc] - // #[repr(transparent)] - // #[derive(Clone, Debug)] - // pub struct #class_ident(pub ::pyo3::PyObject); - // #[automatically_derived] - // impl ::std::ops::Deref for #class_ident { - // type Target = ::pyo3::PyObject; - // fn deref(&self) -> &Self::Target { - // &self.0 - // } - // } - // #[automatically_derived] - // impl ::std::ops::DerefMut for #class_ident { - // fn deref_mut(&mut self) -> &mut Self::Target { - // &mut self.0 - // } - // } - // #[automatically_derived] - // impl<'py> ::pyo3::FromPyObject<'py> for #class_ident { - // fn extract(value: &'py ::pyo3::PyAny) -> ::pyo3::PyResult { - // Ok(Self(value.into())) - // } - // } - // #[automatically_derived] - // impl ::pyo3::ToPyObject for #class_ident { - // fn to_object<'py>(&'py self, py: ::pyo3::Python<'py>) -> ::pyo3::PyObject { - // self.as_ref(py).to_object(py) - // } - // } - // #[automatically_derived] - // impl From<::pyo3::PyObject> for #class_ident { - // fn from(value: ::pyo3::PyObject) -> Self { - // Self(value) - // } - // } - // #[automatically_derived] - // impl<'py> From<&'py ::pyo3::PyAny> for #class_ident { - // fn from(value: &'py ::pyo3::PyAny) -> Self { - // Self(value.into()) - // } - // } - // #[automatically_derived] - // impl #class_ident { - // #impl_token_stream - // } - // }) -} diff --git a/pyo3_bindgen_engine/src/bindgen/function.rs b/pyo3_bindgen_engine/src/bindgen/function.rs deleted file mode 100644 index b2af45a..0000000 --- a/pyo3_bindgen_engine/src/bindgen/function.rs +++ /dev/null @@ -1,290 +0,0 @@ -use itertools::Itertools; -use pyo3::PyTypeInfo; - -use crate::types::Type; - -/// Generate Rust bindings to a Python function. The function can be a standalone function or a -/// method of a class. -pub fn bind_function( - py: pyo3::Python, - module_name: &str, - name: &str, - function: &pyo3::PyAny, - all_types: &std::collections::HashSet, - method_of_class: Option<&pyo3::types::PyType>, -) -> Result { - let inspect = py.import("inspect")?; - - let signature = inspect.call_method1("signature", (function,))?; - - let empty_return_annotation = signature.getattr("empty")?; - - let parameters = signature.getattr("parameters")?; - let return_annotation = signature.getattr("return_annotation")?; - - let return_annotation = if return_annotation.is(empty_return_annotation) { - None - } else { - Some(return_annotation) - }; - - let mut positional_args_idents = Vec::new(); - let mut keyword_args_idents = Vec::new(); - let mut keyword_args_names = Vec::new(); - let mut var_positional_ident = None; - let mut var_keyword_ident = None; - - let parameters = parameters - .call_method0("values")? - .iter()? - .map(|parameter| { - let parameter = parameter.unwrap(); - - let empty_param_annotation = parameter.getattr("empty").unwrap(); - - let param_name = parameter.getattr("name").unwrap().to_string(); - - let param_default = parameter.getattr("default").unwrap(); - let param_annotation = parameter.getattr("annotation").unwrap(); - let param_kind = parameter.getattr("kind").unwrap(); - - let param_annotation = if param_annotation.is(empty_param_annotation) { - None - } else { - Some(param_annotation) - }; - let param_default = if param_default.is(empty_param_annotation) { - None - } else { - Some(param_default) - }; - // TODO: Turn into enum or process in-place - let param_kind = match param_kind.extract::().unwrap() { - 0 => "POSITIONAL_ONLY", - 1 => "POSITIONAL_OR_KEYWORD", - 2 => "VAR_POSITIONAL", // args - 3 => "KEYWORD_ONLY", - 4 => "VAR_KEYWORD", // kwargs - _ => unreachable!(), - }; - - if param_name != "self" { - match param_kind { - "POSITIONAL_ONLY" => { - positional_args_idents.push( - if syn::parse_str::(¶m_name).is_ok() { - quote::format_ident!("{}", param_name) - } else { - quote::format_ident!("r#{}", param_name) - }, - ); - } - "KEYWORD_ONLY" | "POSITIONAL_OR_KEYWORD" => { - keyword_args_idents.push( - if syn::parse_str::(¶m_name).is_ok() { - quote::format_ident!("{}", param_name) - } else { - quote::format_ident!("r#{}", param_name) - }, - ); - keyword_args_names.push(param_name.clone()); - } - "VAR_POSITIONAL" => { - var_positional_ident = - Some(if syn::parse_str::(¶m_name).is_ok() { - quote::format_ident!("{}", param_name) - } else { - quote::format_ident!("r#{}", param_name) - }); - positional_args_idents.push( - if syn::parse_str::(¶m_name).is_ok() { - quote::format_ident!("{}", param_name) - } else { - quote::format_ident!("r#{}", param_name) - }, - ); - } - "VAR_KEYWORD" => { - var_keyword_ident = - Some(if syn::parse_str::(¶m_name).is_ok() { - quote::format_ident!("{}", param_name) - } else { - quote::format_ident!("r#{}", param_name) - }); - } - _ => unreachable!(), - } - } - - let param_annotation = match param_kind { - "VAR_POSITIONAL" => Some(pyo3::types::PyTuple::type_object(py).downcast().unwrap()), - "VAR_KEYWORD" => Some(pyo3::types::PyDict::type_object(py).downcast().unwrap()), - _ => param_annotation, - }; - - (param_name, param_annotation, param_default, param_kind) - }) - .collect_vec(); - - let function_ident = if syn::parse_str::(name).is_ok() { - quote::format_ident!("{}", name) - } else { - quote::format_ident!("r#{}", name) - }; - let function_name = function.getattr("__name__")?.to_string(); - - // Check if `self` is the first parameter - let has_self_param = parameters - .iter() - .any(|(param_name, _, _, _)| param_name == "self"); - let is_class_method = - method_of_class.is_some() && (!has_self_param || function_name == "__init__"); - - let param_idents = parameters - .iter() - .skip(usize::from(has_self_param)) - .map(|(param_name, _, _, _)| { - if syn::parse_str::(param_name).is_ok() { - quote::format_ident!("{}", param_name) - } else { - quote::format_ident!("r#{}", param_name) - } - }) - .collect_vec(); - let pynone = py.None(); - let pynone = pynone.as_ref(py); - let param_types = parameters - .iter() - .skip(usize::from(has_self_param)) - .map(|&(_, param_annotation, _, param_kind)| { - if param_kind == "VAR_POSITIONAL" { - quote::quote! { impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyTuple>>} - } else { - Type::try_from(param_annotation.unwrap_or(pynone)) - .unwrap() - .into_rs_borrowed(module_name, all_types) - } - }) - .collect_vec(); - - let mut doc = function.getattr("__doc__")?.to_string(); - if doc == "None" { - doc = String::new(); - }; - - let (has_self_param, is_class_method) = if function_name == "__call__" { - (true, false) - } else { - (has_self_param, is_class_method) - }; - - let (maybe_ref_self, callable_object) = match (has_self_param, is_class_method) { - (true, false) => (quote::quote! { &'py self, }, quote::quote! { self }), - (_, true) => { - let class_name = method_of_class.unwrap().name().unwrap(); - ( - quote::quote! {}, - quote::quote! { py.import(::pyo3::intern!(py, #module_name))?.getattr(::pyo3::intern!(py, #class_name))?}, - ) - } - _ => ( - quote::quote! {}, - quote::quote! { py.import(::pyo3::intern!(py, #module_name))? }, - ), - }; - - let has_positional_args = !positional_args_idents.is_empty(); - let set_args = match ( - positional_args_idents.len() > 1, - var_positional_ident.is_some(), - ) { - (true, _) => { - quote::quote! { - let __internal_args = ::pyo3::types::PyTuple::new( - py, - [#(::pyo3::IntoPy::<::pyo3::PyObject>::into_py(#positional_args_idents.to_owned(), py).as_ref(py),)*] - ); - } - } - (false, true) => { - let var_positional_ident = var_positional_ident.unwrap(); - quote::quote! { - let __internal_args = #var_positional_ident; - } - } - (false, false) => { - quote::quote! { let __internal_args = (); } - } - }; - - let has_kwargs = !keyword_args_idents.is_empty(); - let kwargs_initial = if let Some(var_keyword_ident) = var_keyword_ident { - quote::quote! { #var_keyword_ident } - } else { - quote::quote! { ::pyo3::types::PyDict::new(py) } - }; - let set_kwargs = quote::quote! { - let __internal_kwargs = #kwargs_initial; - #(__internal_kwargs.set_item(::pyo3::intern!(py, #keyword_args_names), #keyword_args_idents)?;)* - }; - - let is_init_fn = function_name == "__init__"; - - let return_annotation = if is_init_fn && method_of_class.is_some() { - quote::quote! { - &'py Self - } - } else { - Type::try_from(return_annotation.unwrap_or(pynone))?.into_rs_owned(module_name, all_types) - }; - - let call_method = match (is_init_fn, has_positional_args, has_kwargs) { - (true, _, true) => { - quote::quote! { - #set_args - #set_kwargs - #callable_object.call(__internal_args, Some(__internal_kwargs))? - } - } - (true, true, false) => { - quote::quote! { - #set_args - #callable_object.call1(__internal_args)? - } - } - (true, false, false) => { - quote::quote! { - #callable_object.call0()? - } - } - (false, _, true) => { - quote::quote! { - #set_args - #set_kwargs - #callable_object.call_method(::pyo3::intern!(py, #function_name), __internal_args, Some(__internal_kwargs))? - } - } - (false, true, false) => { - quote::quote! { - #set_args - #callable_object.call_method1(::pyo3::intern!(py, #function_name), __internal_args)? - } - } - (false, false, false) => { - quote::quote! { - #callable_object.call_method0(::pyo3::intern!(py, #function_name))? - } - } - }; - - Ok(quote::quote! { - #[doc = #doc] - pub fn #function_ident<'py>( - #maybe_ref_self - py: ::pyo3::marker::Python<'py>, - #(#param_idents: #param_types),* - ) -> ::pyo3::PyResult<#return_annotation> { - #call_method.extract() - } - }) -} diff --git a/pyo3_bindgen_engine/src/bindgen/module.rs b/pyo3_bindgen_engine/src/bindgen/module.rs deleted file mode 100644 index 653ce3b..0000000 --- a/pyo3_bindgen_engine/src/bindgen/module.rs +++ /dev/null @@ -1,525 +0,0 @@ -use itertools::Itertools; - -use crate::bindgen::{bind_attribute, bind_class, bind_function}; - -/// Generate a Rust module from a Python module. This function is called recursively to generate -/// bindings for all submodules. The generated module will contain all classes, functions, and -/// attributes of the Python module. During the first call, the `root_module` argument should be -/// the same as the `module` argument and the `processed_modules` argument should be an empty -/// `HashSet`. -pub fn bind_module( - py: pyo3::Python, - root_module: &pyo3::types::PyModule, - module: &pyo3::types::PyModule, - processed_modules: &mut std::collections::HashSet, - all_types: &std::collections::HashSet, -) -> Result { - let inspect = py.import("inspect")?; - - // Extract the names of the modules - let root_module_name = root_module.name()?; - let full_module_name = module.name()?; - let module_name: &str = full_module_name.split('.').last().unwrap(); - - // Create the Rust module identifier (raw string if it is a keyword) - let module_ident = if syn::parse_str::(module_name).is_ok() { - quote::format_ident!("{module_name}") - } else { - quote::format_ident!("r#{module_name}") - }; - - // Iterate over all attributes of the module while updating the token stream - let mut mod_token_stream = proc_macro2::TokenStream::new(); - module - .dir() - .iter() - .map(|name| { - let name = name.str().unwrap().to_str().unwrap(); - let attr = module.getattr(name).unwrap(); - let attr_type = attr.get_type(); - (name, attr, attr_type) - }) - .filter(|&(_, _, attr_type)| { - // Skip builtin functions - !attr_type - .is_subclass_of::() - .unwrap_or(false) - }) - .filter(|&(name, _, _)| { - // Skip private attributes - !name.starts_with('_') - }) - .filter(|(_, attr, attr_type)| { - // Skip typing attributes - !attr - .getattr("__module__") - .is_ok_and(|module| module.to_string().contains("typing")) - && !attr_type.to_string().contains("typing") - }) - .filter(|(_, attr, _)| { - // Skip __future__ attributes - !attr - .getattr("__module__") - .is_ok_and(|module| module.to_string().contains("__future__")) - }) - .filter(|&(_, attr, _)| { - // Skip classes and functions that are not part of the package - // However, this should keep instances of classes and builtins even if they are builtins or from other packages - if let Ok(module) = attr.getattr("__module__") { - if module.to_string().starts_with(root_module_name) { - true - } else { - !(inspect - .call_method1("isclass", (attr,)) - .unwrap() - .is_true() - .unwrap() - || inspect - .call_method1("isfunction", (attr,)) - .unwrap() - .is_true() - .unwrap()) - } - } else { - true - } - }) - .filter(|&(_, attr, attr_type)| { - // Skip external modules - if attr_type - .is_subclass_of::() - .unwrap_or(false) - { - let is_part_of_package = attr - .getattr("__package__") - .is_ok_and(|package| package.to_string().starts_with(root_module_name)); - is_part_of_package - } else { - true - } - }) - .for_each(|(name, attr, attr_type)| { - let is_internal = attr - .getattr("__module__") - .unwrap_or(pyo3::types::PyString::new(py, "")) - .to_string() - .starts_with(root_module_name); - let is_reexport = is_internal - && attr - .getattr("__module__") - .unwrap_or(pyo3::types::PyString::new(py, "")) - .to_string() - .ne(full_module_name); - - let is_module = attr_type - .is_subclass_of::() - .unwrap_or(false); - - let is_class = attr_type - .is_subclass_of::() - .unwrap_or(false); - - let is_function = inspect - .call_method1("isfunction", (attr,)) - .unwrap() - .is_true() - .unwrap() - || inspect - .call_method1("ismethod", (attr,)) - .unwrap() - .is_true() - .unwrap(); - - // Process hidden modules (shadowed by re-exported attributes of the same name) - if (is_class || is_function) - && is_reexport - && attr - .getattr("__module__") - .unwrap() - .to_string() - .split('.') - .last() - .unwrap() - == name - && attr - .getattr("__module__") - .unwrap() - .to_string() - .split('.') - .take(full_module_name.split('.').count()) - .join(".") - == full_module_name - { - let content = if is_class { - bind_class(py, root_module, attr.downcast().unwrap(), all_types).unwrap() - } else if is_function { - bind_function(py, full_module_name, name, attr, all_types, None).unwrap() - } else { - unreachable!() - }; - - let shadowed_module_name = attr.getattr("__module__").unwrap().to_string(); - let shadowed_module_name = shadowed_module_name.split('.').last().unwrap(); - let shadowed_module_ident = - if syn::parse_str::(shadowed_module_name).is_ok() { - quote::format_ident!("{}", shadowed_module_name) - } else { - quote::format_ident!("r#{}", shadowed_module_name) - }; - - mod_token_stream.extend(quote::quote! { - pub mod #shadowed_module_ident { - #content - } - }); - } - - if is_module { - let is_submodule_of_current_module = attr - .getattr("__package__") - .is_ok_and(|package| package.to_string().starts_with(full_module_name)); - - if is_submodule_of_current_module { - if processed_modules.insert(format!( - "{}.{}", - attr.getattr("__package__").unwrap(), - name - )) { - mod_token_stream.extend(bind_module( - py, - root_module, - attr.downcast().unwrap(), - processed_modules, - all_types, - )); - } - } else { - mod_token_stream.extend(bind_reexport( - root_module_name, - full_module_name, - name, - attr, - )); - } - } else if is_reexport - && (is_function - || (is_class && all_types.contains(&format!("{full_module_name}.{name}")))) - { - mod_token_stream.extend(bind_reexport( - root_module_name, - full_module_name, - name, - attr, - )); - } else if is_class { - mod_token_stream.extend(bind_class( - py, - root_module, - attr.downcast().unwrap(), - all_types, - )); - } else if is_function { - mod_token_stream.extend(bind_function( - py, - full_module_name, - name, - attr, - all_types, - None, - )); - } else { - mod_token_stream.extend(bind_attribute( - py, - full_module_name, - false, - name, - attr, - attr_type, - all_types, - )); - } - }); - - let mut doc = module.getattr("__doc__")?.to_string(); - if doc == "None" { - doc = String::new(); - }; - - Ok(if module_name == root_module_name { - quote::quote! { - #[doc = #doc] - #[allow( - clippy::all, - clippy::nursery, - clippy::pedantic, - non_camel_case_types, - non_snake_case, - non_upper_case_globals, - unused - )] - mod #module_ident { - #mod_token_stream - } - } - } else { - quote::quote! { - #[doc = #doc] - pub mod #module_ident { - #mod_token_stream - } - } - }) -} - -/// Generate a re-export of an attribute from a submodule. This is commonly used in Python to -/// re-export attributes from submodules in the parent module. For example, `from os import path` -/// makes the `os.path` submodule available in the current module as just `path`. -pub fn bind_reexport( - root_module_name: &str, - module_name: &str, - name: &str, - attr: &pyo3::PyAny, -) -> Result { - let full_attr_name = attr.getattr("__name__")?.to_string(); - let attr_name = if full_attr_name.contains('.') { - full_attr_name.split('.').last().unwrap() - } else { - full_attr_name.as_str() - }; - let is_module; - let attr_origin_module = if let Ok(module) = attr.getattr("__module__") { - is_module = false; - module.to_string() - } else { - is_module = true; - full_attr_name - .clone() - .split('.') - .take((full_attr_name.split('.').count() - 1).max(1)) - .join(".") - }; - - let n_common_ancestors = module_name - .split('.') - .zip(attr_origin_module.split('.')) - .take_while(|(a, b)| a == b) - .count(); - let current_module_depth = module_name.split('.').count(); - let reexport_path = if (current_module_depth - n_common_ancestors) > 0 { - std::iter::repeat("super".to_string()).take( - current_module_depth - n_common_ancestors - + usize::from(is_module && !full_attr_name.contains('.')), - ) - } else { - std::iter::repeat("self".to_string()).take(1) - }; - let reexport_path: String = reexport_path - .chain( - attr_origin_module - .split('.') - .skip(n_common_ancestors) - .map(|s| { - if syn::parse_str::(s).is_ok() { - s.to_owned() - } else { - format!("r#{s}") - } - }), - ) - .chain(std::iter::once(attr_name).map(|s| { - if syn::parse_str::(s).is_ok() { - s.to_owned() - } else { - format!("r#{s}") - } - })) - .join("::"); - - // The path contains both ident and "::", combine into something that can be quoted - let reexport_path = syn::parse_str::(&reexport_path).unwrap(); - - let visibility = if attr_name == root_module_name { - quote::quote! {} - } else { - quote::quote! { - pub - } - }; - - if attr_name == name { - Ok(quote::quote! { - #visibility use #reexport_path; - }) - } else { - let name = if syn::parse_str::(name).is_ok() { - quote::format_ident!("{}", name) - } else { - quote::format_ident!("r#{}", name) - }; - Ok(quote::quote! { - #visibility use #reexport_path as #name; - }) - } -} - -pub fn collect_types_of_module( - py: pyo3::Python, - root_module: &pyo3::types::PyModule, - module: &pyo3::types::PyModule, - processed_modules: &mut std::collections::HashSet, - all_types: &mut std::collections::HashSet, -) -> Result, pyo3::PyErr> { - let inspect = py.import("inspect")?; - - // Extract the names of the modules - let root_module_name = root_module.name()?; - let full_module_name = module.name()?; - - // Iterate over all attributes of the module while updating the token stream - module - .dir() - .iter() - .map(|name| { - let name = name.str().unwrap().to_str().unwrap(); - let attr = module.getattr(name).unwrap(); - let attr_type = attr.get_type(); - (name, attr, attr_type) - }) - .filter(|&(_, _, attr_type)| { - // Skip builtin functions - !attr_type - .is_subclass_of::() - .unwrap_or(false) - }) - .filter(|&(name, _, _)| { - // Skip private attributes - !name.starts_with('_') - }) - .filter(|(_, attr, attr_type)| { - // Skip typing attributes - !attr - .getattr("__module__") - .is_ok_and(|module| module.to_string().contains("typing")) - && !attr_type.to_string().contains("typing") - }) - .filter(|(_, attr, _)| { - // Skip __future__ attributes - !attr - .getattr("__module__") - .is_ok_and(|module| module.to_string().contains("__future__")) - }) - .filter(|&(_, attr, _)| { - // Skip classes and functions that are not part of the package - // However, this should keep instances of classes and builtins even if they are builtins or from other packages - if let Ok(module) = attr.getattr("__module__") { - if module.to_string().starts_with(root_module_name) { - true - } else { - !(inspect - .call_method1("isclass", (attr,)) - .unwrap() - .is_true() - .unwrap() - || inspect - .call_method1("isfunction", (attr,)) - .unwrap() - .is_true() - .unwrap()) - } - } else { - true - } - }) - .filter(|&(_, attr, attr_type)| { - // Skip external modules - if attr_type - .is_subclass_of::() - .unwrap_or(false) - { - let is_part_of_package = attr - .getattr("__package__") - .is_ok_and(|package| package.to_string().starts_with(root_module_name)); - is_part_of_package - } else { - true - } - }) - .for_each(|(name, attr, attr_type)| { - let is_internal = attr - .getattr("__module__") - .unwrap_or(pyo3::types::PyString::new(py, "")) - .to_string() - .starts_with(root_module_name); - let is_reexport = is_internal - && attr - .getattr("__module__") - .unwrap_or(pyo3::types::PyString::new(py, "")) - .to_string() - .ne(full_module_name); - - let is_module = attr_type - .is_subclass_of::() - .unwrap_or(false); - - let is_class = attr_type - .is_subclass_of::() - .unwrap_or(false); - - // Process hidden modules (shadowed by re-exported attributes of the same name) - if is_class - && is_reexport - && attr - .getattr("__module__") - .unwrap() - .to_string() - .split('.') - .last() - .unwrap() - == name - && attr - .getattr("__module__") - .unwrap() - .to_string() - .split('.') - .take(full_module_name.split('.').count()) - .join(".") - == full_module_name - { - let full_class_name = - format!("{}.{}", full_module_name, attr.getattr("__name__").unwrap()); - all_types.insert(full_class_name.clone()); - let full_class_name = format!("{full_module_name}.{name}"); - all_types.insert(full_class_name.clone()); - } - - if is_module { - let is_submodule_of_current_module = attr - .getattr("__package__") - .is_ok_and(|package| package.to_string().starts_with(full_module_name)); - - if is_submodule_of_current_module - && processed_modules.insert(format!( - "{}.{}", - attr.getattr("__package__").unwrap(), - name - )) - { - let _ = collect_types_of_module( - py, - root_module, - attr.downcast().unwrap(), - processed_modules, - all_types, - ); - } - } else if is_class && !attr.to_string().contains("") { - let full_class_name = - format!("{}.{}", full_module_name, attr.getattr("__name__").unwrap()); - all_types.insert(full_class_name.clone()); - let full_class_name = format!("{full_module_name}.{name}"); - all_types.insert(full_class_name.clone()); - } - }); - - Ok(all_types.clone()) -} diff --git a/pyo3_bindgen_engine/src/build_utils.rs b/pyo3_bindgen_engine/src/build_utils.rs deleted file mode 100644 index fd9c1b6..0000000 --- a/pyo3_bindgen_engine/src/build_utils.rs +++ /dev/null @@ -1,54 +0,0 @@ -//! Module with utilities for generating bindings in build scripts. - -/// Convenience function for generating bindings in build scripts. This function is equivalent to -/// calling `generate_bindings` and writing the result to a file. -/// -/// # Arguments -/// -/// * `module_name` - Name of the Python module to generate bindings for. -/// * `output_path` - Path to write the generated bindings to. -/// -/// # Returns -/// -/// `Result` containing `std::io::Error` on failure. -/// -/// # Example -/// -/// 1. Generate bindings using `build.rs` script. -/// -/// ```ignore -/// // build.rs -/// -/// // use pyo3_bindgen::build_bindings; -/// use pyo3_bindgen_engine::build_bindings; -/// -/// fn main() { -/// build_bindings( -/// "os", -/// std::path::Path::new(&std::env::var("OUT_DIR").unwrap()).join("bindings.rs"), -/// ) -/// .unwrap(); -/// } -/// ``` -/// -/// 2. Include the generated bindings in `src/lib.rs`. -/// -/// ```ignore -/// // src/lib.rs -/// -/// include!(concat!(env!("OUT_DIR"), "/bindings.rs")); -/// pub use os::*; -/// ``` -// TODO: Add `println!("cargo:rerun-if-changed={}.py");` for all files of the target Python module -pub fn build_bindings( - module_name: &str, - output_path: impl AsRef, -) -> std::io::Result<()> { - let bindings = crate::generate_bindings(module_name).map_err(|err| { - std::io::Error::new( - std::io::ErrorKind::Other, - format!("Failed to generate bindings for Python module '{module_name}': {err}"), - ) - })?; - std::fs::write(output_path, bindings.to_string()) -} diff --git a/pyo3_bindgen_engine/src/codegen.rs b/pyo3_bindgen_engine/src/codegen.rs new file mode 100644 index 0000000..5c01d18 --- /dev/null +++ b/pyo3_bindgen_engine/src/codegen.rs @@ -0,0 +1,101 @@ +use crate::traits::{Canonicalize, Generate}; +use crate::{syntax::Module, Config, Result}; + +#[derive(Debug, Default, Clone)] +pub struct Codegen { + pub cfg: Config, + pub modules: Vec, +} + +impl Codegen { + pub fn new(cfg: Config) -> Result { + Ok(Self { + cfg, + ..Default::default() + }) + } + + pub fn module(mut self, module: &pyo3::types::PyModule) -> Result { + crate::io_utils::with_suppressed_python_output( + module.py(), + self.cfg.suppress_python_stdout, + self.cfg.suppress_python_stderr, + || { + self.modules.push(Module::parse(&self.cfg, module)?); + Ok(()) + }, + )?; + Ok(self) + } + + pub fn modules(mut self, modules: &[&pyo3::types::PyModule]) -> Result { + self.modules.reserve(modules.len()); + for module in modules { + self = self.module(module)?; + } + Ok(self) + } + + pub fn generate(mut self) -> Result { + let mut tokens = proc_macro2::TokenStream::new(); + + pyo3::Python::with_gil(|py| { + crate::io_utils::with_suppressed_python_output( + py, + self.cfg.suppress_python_stdout, + self.cfg.suppress_python_stderr, + || { + // Parse external modules (if enabled) + if self.cfg.generate_dependencies { + self.parse_dependencies()?; + } + + // Canonicalize the module tree + self.canonicalize(); + + // Generate the bindings for all modules + for module in &self.modules { + tokens.extend(module.generate(&self.cfg)?); + } + Ok(()) + }, + ) + })?; + + Ok(tokens) + } + + pub fn build(self, output_path: impl AsRef) -> Result<()> { + std::fs::write(output_path, self.generate()?.to_string())?; + Ok(()) + } + + fn parse_dependencies(&mut self) -> Result<()> { + // TODO: Parse modules of dependencies + todo!() + } +} + +impl Canonicalize for Codegen { + fn canonicalize(&mut self) { + todo!(); + for module in &mut self.modules { + module.canonicalize(); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_codegen() { + pyo3::prepare_freethreaded_python(); + pyo3::Python::with_gil(|py| { + let cfg = Config::default(); + let module = py.import("gymnasium").unwrap(); + let _codegen = Codegen::new(cfg).unwrap().module(module).unwrap(); + }); + } +} diff --git a/pyo3_bindgen_engine/src/config.rs b/pyo3_bindgen_engine/src/config.rs new file mode 100644 index 0000000..1eb6705 --- /dev/null +++ b/pyo3_bindgen_engine/src/config.rs @@ -0,0 +1,45 @@ +use crate::syntax::{Ident, Path}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, typed_builder::TypedBuilder)] +pub struct Config { + #[builder(default = true)] + pub generate_dependencies: bool, + #[builder(default = true)] + pub generate_preludes: bool, + #[builder(default = true)] + pub suppress_python_stdout: bool, + // TODO: Default to false + #[builder(default = true)] + pub suppress_python_stderr: bool, +} + +impl Default for Config { + fn default() -> Self { + Self::builder().build() + } +} + +impl Config { + pub fn is_attr_allowed( + &self, + _attr: &pyo3::types::PyAny, + attr_name: &Ident, + attr_module: &Path, + attr_type: &pyo3::types::PyType, + ) -> bool { + if + // Skip private attributes + attr_name.as_py().starts_with('_') || + // Skip builtin functions + attr_type.is_subclass_of::().unwrap_or(false) || + // Skip `__future__` attributes + attr_module.iter().any(|segment| segment.as_py() == "__future__") || + // Skip `typing` attributes + attr_module.iter().any(|segment| segment.as_py() == "typing") + { + false + } else { + true + } + } +} diff --git a/pyo3_bindgen_engine/src/lib.rs b/pyo3_bindgen_engine/src/lib.rs index d4400ff..a9eb072 100644 --- a/pyo3_bindgen_engine/src/lib.rs +++ b/pyo3_bindgen_engine/src/lib.rs @@ -1,8 +1,15 @@ //! Engine for automatic generation of Rust FFI bindings to Python modules. -pub mod bindgen; -pub mod build_utils; -pub mod types; +mod codegen; +mod config; +mod syntax; +mod traits; +mod types; +mod utils; -pub use bindgen::{generate_bindings, generate_bindings_for_module, generate_bindings_from_str}; -pub use build_utils::build_bindings; +pub use codegen::Codegen; +pub use config::Config; +pub use utils::{build::build_bindings, error::PyBindgenError, result::PyBindgenResult}; + +use utils::io as io_utils; +use utils::result::Result; diff --git a/pyo3_bindgen_engine/src/syntax/class.rs b/pyo3_bindgen_engine/src/syntax/class.rs new file mode 100644 index 0000000..146299f --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/class.rs @@ -0,0 +1,141 @@ +use super::{ + AttributeVariant, Function, FunctionType, Ident, MethodType, Path, Property, PropertyOwner, +}; +use crate::{ + traits::{Canonicalize, Generate}, + Config, Result, +}; +use itertools::Itertools; + +#[derive(Debug, Clone)] +pub struct Class { + pub name: Path, + pub subclasses: Vec, + pub methods: Vec, + pub properties: Vec, + pub docstring: Option, +} + +impl Class { + pub fn parse(cfg: &Config, class: &pyo3::types::PyType, name: Path) -> Result { + let py = class.py(); + + // Initialize lists for all members of the class + let mut subclasses = Vec::new(); + let mut methods = Vec::new(); + let mut properties = Vec::new(); + + // Extract the list of all attribute names in the module + class + .dir() + .iter() + // Convert each attribute name to an identifier + .map(|attr_name| Ident::from_py(&attr_name.to_string())) + // Expand each attribute to a tuple of (attr, attr_name, attr_module, attr_type) + .map(|attr_name| { + let attr = class.getattr(attr_name.as_py()).unwrap_or_else(|_| { + unreachable!( + "Python object must always have attributes listed in its `__dir__`: {}", + attr_name + ) + }); + let attr_module = Path::from_py( + &attr + .getattr(pyo3::intern!(py, "__module__")) + .map(std::string::ToString::to_string) + .unwrap_or_default(), + ); + let attr_type = attr.get_type(); + + (attr, attr_name, attr_module, attr_type) + }) + // Filter attributes based on various configurable conditions + .filter(|(attr, attr_name, attr_module, attr_type)| { + cfg.is_attr_allowed(attr, attr_name, attr_module, attr_type) + || ["__init__", "__call__"].contains(&attr_name.as_py()) + }) + // Iterate over the remaining attributes and parse them + .try_for_each(|(attr, attr_name, attr_module, attr_type)| { + match AttributeVariant::determine(py, attr, attr_type, &attr_module, &name, false) + .unwrap() + { + AttributeVariant::Import => { + eprintln!("WARN: Imports in classes are not supported: {attr_name}"); + } + AttributeVariant::Module => { + eprintln!("WARN: Submodules in classes are not supported: {attr_name}"); + } + AttributeVariant::Class => { + let subclass = + Self::parse(cfg, attr.downcast().unwrap(), name.join(&attr_name)) + .unwrap(); + subclasses.push(subclass); + } + AttributeVariant::Function | AttributeVariant::Method => { + let method = Function::parse( + cfg, + attr, + name.join(&attr_name), + FunctionType::Method { + class_path: name.clone(), + typ: match attr_name.as_py() { + "__init__" => MethodType::Constructor, + "__call__" => MethodType::Call, + _ => MethodType::Regular, + }, + }, + ) + .unwrap(); + methods.push(method); + } + AttributeVariant::Closure => { + eprintln!("WARN: Closures are not supported in classes: {attr_name}"); + } + AttributeVariant::TypeVar => { + eprintln!("WARN: TypesVars are not supported in classes: {attr_name}"); + } + AttributeVariant::Property => { + let property = Property::parse( + cfg, + attr, + name.join(&attr_name), + PropertyOwner::Class(name.clone()), + ) + .unwrap(); + properties.push(property); + } + } + Result::Ok(()) + })?; + + // Extract the docstring of the class + let docstring = { + let docstring = class.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); + if docstring.is_empty() || docstring == "None" { + None + } else { + Some(docstring) + } + }; + + Ok(Self { + name, + subclasses, + methods, + properties, + docstring, + }) + } +} + +impl Generate for Class { + fn generate(&self, _cfg: &Config) -> Result { + todo!() + } +} + +impl Canonicalize for Class { + fn canonicalize(&mut self) { + todo!() + } +} diff --git a/pyo3_bindgen_engine/src/syntax/common/attribute_variant.rs b/pyo3_bindgen_engine/src/syntax/common/attribute_variant.rs new file mode 100644 index 0000000..336abe1 --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/common/attribute_variant.rs @@ -0,0 +1,74 @@ +use crate::{ + syntax::{Ident, Path}, + Result, +}; + +pub enum AttributeVariant { + Import, + Module, + Class, + Function, + Method, + Closure, + TypeVar, + Property, +} + +impl AttributeVariant { + pub fn determine( + py: pyo3::prelude::Python, + attr: &pyo3::prelude::PyAny, + attr_type: &pyo3::types::PyType, + attr_module: &Path, + owner_name: &Path, + consider_imported: bool, + ) -> Result { + let inspect = py.import("inspect")?; + + // Get the name and module of the attribute type + let attr_type_name = Ident::from_py(attr_type.name().unwrap_or_default()); + let attr_type_module = Path::from_py( + &attr_type + .getattr(pyo3::intern!(py, "__module__")) + .map(std::string::ToString::to_string) + .unwrap_or_default(), + ); + + // Determine the type of the attribute + let is_submodule = attr_type + .is_subclass_of::() + .unwrap_or(false); + let is_class = attr_type + .is_subclass_of::() + .unwrap_or(false); + let is_function = inspect + .call_method1(pyo3::intern!(py, "isfunction"), (attr,))? + .is_true()?; + let is_method = inspect + .call_method1(pyo3::intern!(py, "ismethod"), (attr,))? + .is_true()?; + let is_closure = + attr_type_module.to_py().as_str() == "functools" && attr_type_name.as_py() == "partial"; + let is_type = ["typing", "types"].contains(&attr_type_module.to_py().as_str()); + let is_external = attr_module != owner_name; + let is_imported = is_external && (is_submodule || is_class || is_function || is_method); + + Ok(if consider_imported && is_imported { + AttributeVariant::Import + } else if is_submodule { + AttributeVariant::Module + } else if is_class { + AttributeVariant::Class + } else if is_function { + AttributeVariant::Function + } else if is_method { + AttributeVariant::Method + } else if is_closure { + AttributeVariant::Closure + } else if is_type { + AttributeVariant::TypeVar + } else { + AttributeVariant::Property + }) + } +} diff --git a/pyo3_bindgen_engine/src/syntax/common/ident.rs b/pyo3_bindgen_engine/src/syntax/common/ident.rs new file mode 100644 index 0000000..c011143 --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/common/ident.rs @@ -0,0 +1,98 @@ +#[repr(transparent)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Ident(String); + +impl Ident { + pub fn from_rs(value: &str) -> Self { + Self(value.to_owned()) + } + + pub fn from_py(value: &str) -> Self { + Self(Self::py_to_rs(value)) + } + + pub fn into_rs(self) -> String { + self.0 + } + + pub fn as_rs(&self) -> &str { + &self.0 + } + + pub fn as_py(&self) -> &str { + Self::rs_as_py(&self.0) + } + + fn rs_as_py(value: &str) -> &str { + value.strip_prefix("r#").unwrap_or(value) + } + + fn py_to_rs(value: &str) -> String { + if syn::parse_str::(value).is_ok() { + value.to_owned() + } else { + format!("r#{value}") + } + } +} + +impl TryFrom for syn::Ident { + type Error = syn::Error; + fn try_from(value: Ident) -> Result { + syn::parse_str::(&value.into_rs()) + } +} + +impl TryFrom<&Ident> for syn::Ident { + type Error = syn::Error; + fn try_from(value: &Ident) -> Result { + syn::parse_str::(value.as_rs()) + } +} + +impl std::ops::Deref for Ident { + type Target = str; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::fmt::Display for Ident { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.as_py()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_from_rs() { + let ident = Ident::from_rs("ident"); + assert_eq!(ident.as_rs(), "ident"); + assert_eq!(ident.as_py(), "ident"); + assert_eq!(ident.into_rs(), "ident"); + } + + #[test] + fn test_from_py() { + let ident = Ident::from_py("ident"); + assert_eq!(ident.as_rs(), "ident"); + assert_eq!(ident.as_py(), "ident"); + } + + #[test] + fn test_from_py_keyword() { + let ident = Ident::from_py("struct"); + assert_eq!(ident.as_rs(), "r#struct"); + assert_eq!(ident.as_py(), "struct"); + } + + #[test] + fn test_into_syn() { + let ident = Ident::from_rs("ident"); + let _syn_ident: syn::Ident = (&ident).try_into().unwrap(); + let _syn_ident: syn::Ident = ident.try_into().unwrap(); + } +} diff --git a/pyo3_bindgen_engine/src/syntax/common/mod.rs b/pyo3_bindgen_engine/src/syntax/common/mod.rs new file mode 100644 index 0000000..c225c84 --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/common/mod.rs @@ -0,0 +1,7 @@ +mod attribute_variant; +mod ident; +mod path; + +pub use attribute_variant::AttributeVariant; +pub use ident::Ident; +pub use path::Path; diff --git a/pyo3_bindgen_engine/src/syntax/common/path.rs b/pyo3_bindgen_engine/src/syntax/common/path.rs new file mode 100644 index 0000000..7eefaf3 --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/common/path.rs @@ -0,0 +1,211 @@ +use super::Ident; +use itertools::Itertools; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Path { + pub leading_colon: bool, + segments: Vec, +} + +impl Path { + pub fn from_rs(value: &str) -> Self { + assert!(!value.is_empty(), "Empty Rust path is not allowed"); + debug_assert!(!value.contains('.'), "Invalid Rust path: {value}"); + Self { + leading_colon: value.starts_with("::"), + segments: value + .split("::") + .filter(|s| !s.is_empty()) + .map(Ident::from_rs) + .collect(), + } + } + + pub fn from_py(value: &str) -> Self { + debug_assert!(!value.contains("::"), "Invalid Python path: {value}"); + Self { + leading_colon: false, + segments: std::iter::repeat(Ident::from_rs("super")) + .take(value.chars().take_while(|&c| c == '.').count()) + .chain( + value + .split('.') + .filter(|s| !s.is_empty()) + .map(Ident::from_py), + ) + .collect_vec(), + } + } + + pub fn into_rs(self) -> String { + std::iter::repeat(String::new()) + .take(usize::from(self.leading_colon)) + .chain(self.segments.into_iter().map(Ident::into_rs)) + .collect_vec() + .join("::") + } + + pub fn to_rs(&self) -> String { + std::iter::repeat("") + .take(usize::from(self.leading_colon)) + .chain(self.segments.iter().map(Ident::as_rs)) + .collect_vec() + .join("::") + } + + pub fn to_py(&self) -> String { + self.segments + .iter() + .map(Ident::as_py) + .map(|s| if s == "super" { "" } else { s }) + .collect_vec() + .join(".") + } + + pub fn join(&self, other: &Ident) -> Self { + Self { + leading_colon: self.leading_colon, + segments: self + .segments + .iter() + .cloned() + .chain(std::iter::once(other.clone())) + .collect(), + } + } + + pub fn concat(&self, other: &Path) -> Self { + assert!( + !other.leading_colon, + "Leading colon is not allowed in the second path when concatenating" + ); + Self { + leading_colon: self.leading_colon, + segments: self + .segments + .iter() + .chain(&other.segments) + .cloned() + .collect(), + } + } + + pub fn name(&self) -> &Ident { + self.segments.last().unwrap() + } + + pub fn root(&self) -> Option { + if !self.segments.is_empty() { + Some(Self { + leading_colon: self.leading_colon, + segments: vec![self.segments[0].clone()], + }) + } else { + None + } + } + + pub fn parent(&self) -> Option { + if self.segments.len() > 1 { + Some(Self { + leading_colon: self.leading_colon, + segments: self.segments[..self.segments.len() - 1].to_vec(), + }) + } else { + None + } + } +} + +impl TryFrom for syn::Path { + type Error = syn::Error; + fn try_from(value: Path) -> Result { + syn::parse_str::(&value.into_rs()) + } +} + +impl TryFrom<&Path> for syn::Path { + type Error = syn::Error; + fn try_from(value: &Path) -> Result { + syn::parse_str::(&value.to_rs()) + } +} + +impl std::ops::Deref for Path { + type Target = [Ident]; + fn deref(&self) -> &Self::Target { + &self.segments + } +} + +impl std::fmt::Display for Path { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.to_py()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_from_rs() { + let path = Path::from_rs("long::path::to"); + assert_eq!(path.to_rs(), "long::path::to"); + assert_eq!(path.to_py(), "long.path.to"); + assert_eq!(path.into_rs(), "long::path::to"); + } + + #[test] + fn test_from_rs_leading_colon() { + let path = Path::from_rs("::long::path::to"); + assert_eq!(path.to_rs(), "::long::path::to"); + assert_eq!(path.to_py(), "long.path.to"); + } + + #[test] + fn test_from_py() { + let path = Path::from_py("long.path.to"); + assert_eq!(path.to_py(), "long.path.to"); + assert_eq!(path.to_rs(), "long::path::to"); + } + + #[test] + fn test_from_py_relative() { + let path = Path::from_py("..long.path.to"); + assert_eq!(path.to_py(), "..long.path.to"); + assert_eq!(path.to_rs(), "super::super::long::path::to"); + } + + #[test] + fn test_from_py_keyword() { + let path = Path::from_py("mod.struct"); + assert_eq!(path.to_py(), "mod.struct"); + assert_eq!(path.to_rs(), "r#mod::r#struct"); + } + + #[test] + fn test_name() { + let path = Path::from_rs("long::path::to"); + assert_eq!(path.name().as_rs(), "to"); + } + + #[test] + fn test_root() { + let path = Path::from_rs("long::path::to"); + assert_eq!(path.root().unwrap().to_rs(), "long"); + } + + #[test] + fn test_parent() { + let path = Path::from_rs("long::path::to"); + assert_eq!(path.parent().unwrap().to_rs(), "long::path"); + } + + #[test] + fn test_into_syn() { + let path = Path::from_rs("long::path::to"); + let _syn_path: syn::Path = (&path).try_into().unwrap(); + let _syn_path: syn::Path = path.try_into().unwrap(); + } +} diff --git a/pyo3_bindgen_engine/src/syntax/function.rs b/pyo3_bindgen_engine/src/syntax/function.rs new file mode 100644 index 0000000..e2c9eb4 --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/function.rs @@ -0,0 +1,165 @@ +use super::{Ident, Path}; +use crate::{ + traits::{Canonicalize, Generate}, + types::Type, + Config, Result, +}; + +use pyo3::ToPyObject; + +#[derive(Debug, Clone)] +pub struct Function { + pub name: Path, + pub typ: FunctionType, + pub parameters: Vec, + pub return_annotation: Type, + pub docstring: Option, +} + +impl Function { + pub fn parse( + _cfg: &Config, + function: &pyo3::types::PyAny, + name: Path, + typ: FunctionType, + ) -> Result { + let py = function.py(); + + // Extract the signature of the function + let function_signature = py + .import(pyo3::intern!(py, "inspect")) + .unwrap() + .call_method1(pyo3::intern!(py, "signature"), (function,)) + .unwrap(); + + // Extract the parameters of the function + let parameters = function_signature + .getattr(pyo3::intern!(py, "parameters")) + .unwrap() + .call_method0(pyo3::intern!(py, "values")) + .unwrap() + .iter() + .unwrap() + .map(|param| { + let param = param?; + + let name = Ident::from_py(¶m.getattr(pyo3::intern!(py, "name"))?.to_string()); + let kind = + ParameterKind::from(param.getattr(pyo3::intern!(py, "kind"))?.extract::()?); + let annotation = { + let annotation = param.getattr(pyo3::intern!(py, "annotation"))?; + if annotation.is(param.getattr(pyo3::intern!(py, "empty")).unwrap()) { + None + } else { + Some(annotation) + } + } + .try_into()?; + let default = { + let default = param.getattr(pyo3::intern!(py, "default"))?; + if default.is(param.getattr(pyo3::intern!(py, "empty")).unwrap()) { + None + } else { + Some(default.to_object(py)) + } + }; + + Result::Ok(Parameter { + name, + kind, + annotation, + default, + }) + }) + .collect::>>()?; + + // Extract the return annotation of the function + let return_annotation = { + let return_annotation = + function_signature.getattr(pyo3::intern!(py, "return_annotation"))?; + if return_annotation.is(function_signature + .getattr(pyo3::intern!(py, "empty")) + .unwrap()) + { + None + } else { + Some(return_annotation) + } + } + .try_into()?; + + // Extract the docstring of the function + let docstring = { + let docstring = function.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); + if docstring.is_empty() || docstring == "None" { + None + } else { + Some(docstring) + } + }; + + Ok(Self { + name, + typ, + parameters, + return_annotation, + docstring, + }) + } +} + +impl Generate for Function { + fn generate(&self, _cfg: &Config) -> Result { + todo!() + } +} + +impl Canonicalize for Function { + fn canonicalize(&mut self) { + todo!() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum FunctionType { + Function, + Method { class_path: Path, typ: MethodType }, + Closure(Path), +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum MethodType { + Constructor, + Call, + Regular, +} + +#[derive(Debug, Clone)] +pub struct Parameter { + pub name: Ident, + pub kind: ParameterKind, + pub annotation: Type, + pub default: Option>, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum ParameterKind { + PositionalOnly, + PositionalOrKeyword, + VarPositional, + KeywordOnly, + VarKeyword, +} + +impl From for ParameterKind { + fn from(kind: u8) -> Self { + match kind { + 0 => Self::PositionalOnly, + 1 => Self::PositionalOrKeyword, + 2 => Self::VarPositional, + 3 => Self::KeywordOnly, + 4 => Self::VarKeyword, + _ => unreachable!(), + } + } +} diff --git a/pyo3_bindgen_engine/src/syntax/import.rs b/pyo3_bindgen_engine/src/syntax/import.rs new file mode 100644 index 0000000..884323a --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/import.rs @@ -0,0 +1,51 @@ +use super::Path; +use crate::{traits::Generate, Config, Result}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Import { + pub origin: Path, + pub target: Path, + import_type: ImportType, +} + +impl Import { + pub fn new(origin: Path, target: Path) -> Result { + let import_type = ImportType::from_paths(&origin, &target); + Ok(Self { + origin, + target, + import_type, + }) + } +} + +impl Generate for Import { + fn generate(&self, _cfg: &Config) -> Result { + todo!() + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +enum ImportType { + ExternalImport, + PackageReexport, + SubmoduleReexport, +} + +impl ImportType { + fn from_paths(origin: &Path, target: &Path) -> Self { + let is_package_reexport = target + .root() + .is_some_and(|root_module| origin.starts_with(&root_module)); + let is_submodule_reexport = is_package_reexport + && target + .parent() + .is_some_and(|parent_module| origin.starts_with(&parent_module)); + match (is_package_reexport, is_submodule_reexport) { + (false, false) => Self::ExternalImport, + (true, false) => Self::PackageReexport, + (true, true) => Self::SubmoduleReexport, + _ => unreachable!(), + } + } +} diff --git a/pyo3_bindgen_engine/src/syntax/mod.rs b/pyo3_bindgen_engine/src/syntax/mod.rs new file mode 100644 index 0000000..08cc875 --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/mod.rs @@ -0,0 +1,15 @@ +mod class; +mod common; +mod function; +mod import; +mod module; +mod property; +mod type_var; + +pub use class::Class; +pub use common::{AttributeVariant, Ident, Path}; +pub use function::{Function, FunctionType, MethodType}; +pub use import::Import; +pub use module::Module; +pub use property::{Property, PropertyOwner}; +pub use type_var::TypeVar; diff --git a/pyo3_bindgen_engine/src/syntax/module.rs b/pyo3_bindgen_engine/src/syntax/module.rs new file mode 100644 index 0000000..3fd727d --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/module.rs @@ -0,0 +1,261 @@ +use super::{ + AttributeVariant, Class, Function, FunctionType, Ident, Import, Path, Property, PropertyOwner, + TypeVar, +}; +use crate::{ + traits::{Canonicalize, Generate}, + Config, Result, +}; +use itertools::Itertools; +use rustc_hash::FxHashSet as HashSet; + +#[derive(Debug, Clone)] +pub struct Module { + pub name: Path, + pub prelude: Vec, + pub imports: Vec, + pub submodules: Vec, + pub classes: Vec, + pub functions: Vec, + pub type_vars: Vec, + pub properties: Vec, + pub docstring: Option, +} + +impl Module { + pub fn parse(cfg: &Config, module: &pyo3::types::PyModule) -> Result { + let py = module.py(); + + // Extract the name of the module + let name = Path::from_py(module.name().unwrap()); + + // Extract the index of the module as prelude (if enabled) + let prelude = if cfg.generate_preludes { + Self::extract_prelude(module) + } else { + Vec::new() + }; + + // Extract the list of all submodules in the module + let mut submodules_to_process = Self::extract_submodules(module).unwrap(); + + // Initialize lists for all other members of the module + let mut imports = Vec::new(); + let mut classes = Vec::new(); + let mut functions = Vec::new(); + let mut type_vars = Vec::new(); + let mut properties = Vec::new(); + + // Extract the list of all attribute names in the module + module + .dir() + .iter() + // Convert each attribute name to an identifier + .map(|attr_name| Ident::from_py(&attr_name.to_string())) + // Expand each attribute to a tuple of (attr, attr_name, attr_module, attr_type) + .map(|attr_name| { + let attr = module.getattr(attr_name.as_py()).unwrap_or_else(|_| { + unreachable!( + "Python object must always have attributes listed in its `__dir__`: {}", + attr_name + ) + }); + let attr_module = Path::from_py( + &attr + .getattr(pyo3::intern!(py, "__module__")) + .map(std::string::ToString::to_string) + .unwrap_or_default(), + ); + let attr_type = attr.get_type(); + + (attr, attr_name, attr_module, attr_type) + }) + // Filter attributes based on various configurable conditions + .filter(|(attr, attr_name, attr_module, attr_type)| { + cfg.is_attr_allowed(attr, attr_name, attr_module, attr_type) + }) + // Iterate over the remaining attributes and parse them + .try_for_each(|(attr, attr_name, attr_module, attr_type)| { + match AttributeVariant::determine(py, attr, attr_type, &attr_module, &name, true) + .unwrap() + { + AttributeVariant::Import => { + let import = Import::new( + attr_module.join(&Ident::from_py( + &attr + .getattr(pyo3::intern!(py, "__name__")) + .map(std::string::ToString::to_string) + .unwrap_or(attr_name.as_py().to_owned()), + )), + name.join(&attr_name), + ) + .unwrap(); + imports.push(import); + } + AttributeVariant::Module => { + // Note: This should technically not be necessary as `Self::extract_submodules` is supposed to extract all submodules + submodules_to_process.insert(attr_name); + } + AttributeVariant::Class => { + let class = + Class::parse(cfg, attr.downcast().unwrap(), name.join(&attr_name)) + .unwrap(); + classes.push(class); + } + AttributeVariant::Function | AttributeVariant::Method => { + let function = Function::parse( + cfg, + attr, + name.join(&attr_name), + FunctionType::Function, + ) + .unwrap(); + functions.push(function); + } + AttributeVariant::Closure => { + let function = Function::parse( + cfg, + attr, + name.join(&attr_name), + FunctionType::Closure(name.join(&attr_name)), + ) + .unwrap(); + functions.push(function); + } + AttributeVariant::TypeVar => { + let type_var = TypeVar::new(name.join(&attr_name)).unwrap(); + type_vars.push(type_var); + } + AttributeVariant::Property => { + let property = Property::parse( + cfg, + attr, + name.join(&attr_name), + PropertyOwner::Module(name.clone()), + ) + .unwrap(); + properties.push(property); + } + } + Result::Ok(()) + })?; + + // Process submodules + let submodules = submodules_to_process + .into_iter() + .filter_map(|submodule_name| { + py.import(name.join(&submodule_name).to_py().as_str()).ok() + }) + .map(|submodule| Self::parse(cfg, submodule)) + .collect::>() + .unwrap(); + + // Extract the docstring of the module + let docstring = { + let docstring = module.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); + if docstring.is_empty() || docstring == "None" { + None + } else { + Some(docstring) + } + }; + + Ok(Self { + name, + prelude, + imports, + submodules, + classes, + functions, + type_vars, + properties, + docstring, + }) + } + + fn extract_prelude(module: &pyo3::prelude::PyModule) -> Vec { + // Extract the index (__all__) of the module if it exists + let index_attr_names = if let Ok(index) = module.index() { + index + .iter() + .map(|x| Ident::from_py(&x.to_string())) + .collect_vec() + } else { + Vec::new() + }; + + // Compare the index with public attrs of the module + // Return an empty vector if they are identical (no need to generate a prelude) + { + let public_attr_names_set: HashSet<_> = module + .dir() + .iter() + .map(|attr_name| Ident::from_py(&attr_name.to_string())) + .filter(|attr_name| !attr_name.as_py().starts_with('_')) + .collect(); + let index_attr_names_set = index_attr_names.iter().cloned().collect::>(); + + if index_attr_names_set == public_attr_names_set { + return Vec::new(); + } + } + + index_attr_names + } + + fn extract_submodules(module: &pyo3::prelude::PyModule) -> Result> { + let py = module.py(); + let pkgutil = py.import(pyo3::intern!(py, "pkgutil")).unwrap(); + + // Determine if the module is a package that contains submodules + let module_name = Path::from_py(module.name().unwrap()); + let is_pkg = module + .getattr(pyo3::intern!(py, "__package__")) + .map(|package| Path::from_py(&package.to_string())) + .is_ok_and(|package_name| package_name == module_name); + + // If the module is not a package, return an empty set + if !is_pkg { + return Ok(HashSet::default()); + } + + // Extract the paths of the module + let module_paths = module + .getattr(pyo3::intern!(py, "__path__")) + .unwrap() + .extract::<&pyo3::types::PyList>() + .unwrap() + .iter() + .map(|x| std::path::PathBuf::from(x.to_string())) + .collect_vec(); + + // Extract the names of all submodules via `pkgutil.iter_modules` + pkgutil + .call_method1(pyo3::intern!(py, "iter_modules"), (module_paths,)) + .unwrap() + .iter() + .unwrap() + .map(|submodule| { + Ok(Ident::from_py( + &submodule + .unwrap() + .getattr(pyo3::intern!(py, "name")) + .unwrap() + .to_string(), + )) + }) + .collect() + } +} + +impl Generate for Module { + fn generate(&self, _cfg: &Config) -> Result { + todo!() + } +} + +impl Canonicalize for Module { + fn canonicalize(&mut self) { + todo!() + } +} diff --git a/pyo3_bindgen_engine/src/syntax/property.rs b/pyo3_bindgen_engine/src/syntax/property.rs new file mode 100644 index 0000000..cbf320c --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/property.rs @@ -0,0 +1,168 @@ +use super::Path; +use crate::{ + traits::{Canonicalize, Generate}, + types::Type, + Config, Result, +}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Property { + pub name: Path, + pub owner: PropertyOwner, + pub is_mutable: bool, + pub annotation: Type, + pub setter_annotation: Option, + pub docstring: Option, +} + +impl Property { + pub fn parse( + _cfg: &Config, + property: &pyo3::types::PyAny, + name: Path, + owner: PropertyOwner, + ) -> Result { + let py = property.py(); + + // Extract the type of the property + let typ = property.get_type(); + + // Extract the docstring of the property + let mut docstring = { + let docstring = property.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); + if docstring.is_empty() || docstring == "None" { + None + } else { + Some(docstring) + } + }; + + // Determine the mutability and type of the property + let (is_mutable, annotation, setter_annotation); + match owner { + PropertyOwner::Module(_) => { + is_mutable = true; + annotation = Type::try_from(typ)?; + setter_annotation = None; + } + PropertyOwner::Class(_) => { + let signature = py + .import(pyo3::intern!(py, "inspect")) + .unwrap() + .getattr(pyo3::intern!(py, "signature")) + .unwrap(); + + if let Ok(getter) = property.getattr(pyo3::intern!(py, "fget")) { + // Extract the signature of the function + let function_signature = signature.call1((getter,)).unwrap(); + + // Extract the annotation from the return of the function + annotation = { + let return_annotation = + function_signature.getattr(pyo3::intern!(py, "return_annotation"))?; + if return_annotation.is(function_signature + .getattr(pyo3::intern!(py, "empty")) + .unwrap()) + { + None + } else { + Some(return_annotation) + } + } + .try_into()?; + + // Update the docstring if it is empty + if docstring.is_none() { + docstring = { + let docstring = + getter.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); + if docstring.is_empty() || docstring == "None" { + None + } else { + Some(docstring) + } + }; + } + } else { + annotation = Type::try_from(typ)?; + } + + match property.getattr(pyo3::intern!(py, "fset")) { + Ok(setter) if !setter.is_none() => { + // Extract the signature of the function + let function_signature = signature.call1((setter,)).unwrap(); + + // Extract the annotation from the parameter of the function + setter_annotation = Some( + { + let param = function_signature + .getattr(pyo3::intern!(py, "parameters")) + .unwrap() + .call_method0(pyo3::intern!(py, "values")) + .unwrap() + .iter() + .unwrap() + .nth(1) + .unwrap() + .unwrap(); + let annotation = param.getattr(pyo3::intern!(py, "annotation"))?; + if annotation.is(param.getattr(pyo3::intern!(py, "empty")).unwrap()) + { + None + } else { + Some(annotation) + } + } + .try_into()?, + ); + is_mutable = true; + + // Update the docstring if it is still empty + if docstring.is_none() { + docstring = { + let docstring = + setter.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); + if docstring.is_empty() || docstring == "None" { + None + } else { + Some(docstring) + } + }; + } + } + _ => { + setter_annotation = None; + is_mutable = false; + } + } + } + } + + Ok(Self { + name, + owner, + is_mutable, + annotation, + setter_annotation, + docstring, + }) + } +} + +impl Generate for Property { + fn generate(&self, _cfg: &Config) -> Result { + todo!() + } +} + +impl Canonicalize for Property { + fn canonicalize(&mut self) { + todo!() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum PropertyOwner { + Module(Path), + Class(Path), +} diff --git a/pyo3_bindgen_engine/src/syntax/type_var.rs b/pyo3_bindgen_engine/src/syntax/type_var.rs new file mode 100644 index 0000000..c4848ff --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/type_var.rs @@ -0,0 +1,19 @@ +use super::Path; +use crate::{traits::Generate, Config, Result}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct TypeVar { + pub name: Path, +} + +impl TypeVar { + pub fn new(name: Path) -> Result { + Ok(Self { name }) + } +} + +impl Generate for TypeVar { + fn generate(&self, _cfg: &Config) -> Result { + todo!() + } +} diff --git a/pyo3_bindgen_engine/src/traits.rs b/pyo3_bindgen_engine/src/traits.rs new file mode 100644 index 0000000..f73ffb6 --- /dev/null +++ b/pyo3_bindgen_engine/src/traits.rs @@ -0,0 +1,9 @@ +use crate::{Config, Result}; + +pub trait Generate { + fn generate(&self, cfg: &Config) -> Result; +} + +pub trait Canonicalize: Sized { + fn canonicalize(&mut self); +} diff --git a/pyo3_bindgen_engine/src/types.rs b/pyo3_bindgen_engine/src/types/mod.rs similarity index 99% rename from pyo3_bindgen_engine/src/types.rs rename to pyo3_bindgen_engine/src/types/mod.rs index efbd4aa..3fb888c 100644 --- a/pyo3_bindgen_engine/src/types.rs +++ b/pyo3_bindgen_engine/src/types/mod.rs @@ -9,7 +9,7 @@ use std::str::FromStr; /// /// Note that this is not a complete mapping at the moment. The public API is /// subject to large changes. -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Type { PyAny, Unhandled(String), @@ -76,6 +76,16 @@ pub enum Type { PyType, } +impl TryFrom> for Type { + type Error = pyo3::PyErr; + fn try_from(value: Option<&pyo3::types::PyAny>) -> Result { + Ok(match value { + Some(t) => Self::try_from(t)?, + None => Self::PyNone, + }) + } +} + impl TryFrom<&pyo3::types::PyAny> for Type { type Error = pyo3::PyErr; fn try_from(value: &pyo3::types::PyAny) -> Result { diff --git a/pyo3_bindgen_engine/src/utils/error.rs b/pyo3_bindgen_engine/src/utils/error.rs new file mode 100644 index 0000000..d82cc2d --- /dev/null +++ b/pyo3_bindgen_engine/src/utils/error.rs @@ -0,0 +1,18 @@ +/// Error type for `pyo3_bindgen` operations. +#[derive(Debug, thiserror::Error)] +pub enum PyBindgenError { + #[error(transparent)] + PyError(#[from] pyo3::PyErr), + #[error("Failed to convert `pyo3::PyAny` to a more specific Python type: {0}")] + PyDowncastError(String), + #[error(transparent)] + IoError(#[from] std::io::Error), + #[error(transparent)] + SynError(#[from] syn::Error), +} + +impl<'py> From> for PyBindgenError { + fn from(value: pyo3::PyDowncastError) -> Self { + PyBindgenError::PyDowncastError(value.to_string()) + } +} diff --git a/pyo3_bindgen_engine/src/utils/io.rs b/pyo3_bindgen_engine/src/utils/io.rs new file mode 100644 index 0000000..06f898f --- /dev/null +++ b/pyo3_bindgen_engine/src/utils/io.rs @@ -0,0 +1,43 @@ +use crate::Result; + +pub fn with_suppressed_python_output( + py: pyo3::Python, + suppress_stdout: bool, + suppress_stderr: bool, + f: impl FnOnce() -> Result, +) -> Result { + // If both stdout and stderr are suppressed, there's no need to do anything + if !suppress_stdout && !suppress_stderr { + return f(); + } + + let sys = py.import(pyo3::intern!(py, "sys"))?; + let stdout_ident = pyo3::intern!(py, "stdout"); + let stderr_ident = pyo3::intern!(py, "stderr"); + + // Record the original stdout and stderr + let original_stdout = sys.getattr(stdout_ident)?; + let original_stderr = sys.getattr(stderr_ident)?; + + // Suppress the output + let supressed_output = py.eval(r"lambda: type('SupressedOutput', (), {'write': lambda self, x: None, 'flush': lambda self: None})", None, None)?; + if suppress_stdout { + sys.setattr(stdout_ident, supressed_output)?; + } + if suppress_stderr { + sys.setattr(stderr_ident, supressed_output)?; + } + + // Run the function + let ret = f()?; + + // Restore the original stdout and stderr + if suppress_stdout { + sys.setattr(stdout_ident, original_stdout)?; + } + if suppress_stderr { + sys.setattr(stderr_ident, original_stderr)?; + } + + Ok(ret) +} diff --git a/pyo3_bindgen_engine/src/utils/mod.rs b/pyo3_bindgen_engine/src/utils/mod.rs new file mode 100644 index 0000000..f708c24 --- /dev/null +++ b/pyo3_bindgen_engine/src/utils/mod.rs @@ -0,0 +1,6 @@ +//! Various utilities. + +pub mod build; +pub mod error; +pub(crate) mod io; +pub mod result; diff --git a/pyo3_bindgen_engine/src/utils/result.rs b/pyo3_bindgen_engine/src/utils/result.rs new file mode 100644 index 0000000..35673bc --- /dev/null +++ b/pyo3_bindgen_engine/src/utils/result.rs @@ -0,0 +1,5 @@ +/// Result wrapper for `PyBindgenError`. +pub type PyBindgenResult = std::result::Result; + +/// Crate-local alias for `PyBindgenResult`. +pub(crate) type Result = PyBindgenResult; From 4c7812078313d8cba8377779124d272f9a60b967 Mon Sep 17 00:00:00 2001 From: Andrej Orsula Date: Sun, 25 Feb 2024 17:25:56 +0100 Subject: [PATCH 09/13] Refactoring: Add generators for imports & prelude [skip ci] Signed-off-by: Andrej Orsula --- pyo3_bindgen_engine/src/codegen.rs | 83 ++++---- pyo3_bindgen_engine/src/config.rs | 1 - pyo3_bindgen_engine/src/lib.rs | 8 +- pyo3_bindgen_engine/src/syntax/class.rs | 26 +-- .../src/syntax/common/ident.rs | 2 + pyo3_bindgen_engine/src/syntax/common/path.rs | 75 ++++++- pyo3_bindgen_engine/src/syntax/function.rs | 183 +++++++++-------- pyo3_bindgen_engine/src/syntax/import.rs | 55 ++++- pyo3_bindgen_engine/src/syntax/module.rs | 193 ++++++++++++++---- pyo3_bindgen_engine/src/syntax/property.rs | 14 +- pyo3_bindgen_engine/src/syntax/type_var.rs | 6 +- pyo3_bindgen_engine/src/traits.rs | 9 - 12 files changed, 428 insertions(+), 227 deletions(-) delete mode 100644 pyo3_bindgen_engine/src/traits.rs diff --git a/pyo3_bindgen_engine/src/codegen.rs b/pyo3_bindgen_engine/src/codegen.rs index 5c01d18..c306a02 100644 --- a/pyo3_bindgen_engine/src/codegen.rs +++ b/pyo3_bindgen_engine/src/codegen.rs @@ -1,4 +1,3 @@ -use crate::traits::{Canonicalize, Generate}; use crate::{syntax::Module, Config, Result}; #[derive(Debug, Default, Clone)] @@ -28,6 +27,15 @@ impl Codegen { Ok(self) } + pub fn module_from_str(self, module_name: &str) -> Result { + #[cfg(not(PyPy))] + pyo3::prepare_freethreaded_python(); + pyo3::Python::with_gil(|py| { + let module = py.import(module_name)?; + self.module(module) + }) + } + pub fn modules(mut self, modules: &[&pyo3::types::PyModule]) -> Result { self.modules.reserve(modules.len()); for module in modules { @@ -36,33 +44,28 @@ impl Codegen { Ok(self) } - pub fn generate(mut self) -> Result { - let mut tokens = proc_macro2::TokenStream::new(); - - pyo3::Python::with_gil(|py| { - crate::io_utils::with_suppressed_python_output( - py, - self.cfg.suppress_python_stdout, - self.cfg.suppress_python_stderr, - || { - // Parse external modules (if enabled) - if self.cfg.generate_dependencies { - self.parse_dependencies()?; - } + pub fn modules_from_str(mut self, module_names: &[&str]) -> Result { + self.modules.reserve(module_names.len()); + for module_name in module_names { + self = self.module_from_str(module_name)?; + } + Ok(self) + } - // Canonicalize the module tree - self.canonicalize(); + pub fn generate(mut self) -> Result { + // Parse external modules (if enabled) + if self.cfg.generate_dependencies { + self.parse_dependencies()?; + } - // Generate the bindings for all modules - for module in &self.modules { - tokens.extend(module.generate(&self.cfg)?); - } - Ok(()) - }, - ) - })?; + // Canonicalize the module tree + self.canonicalize(); - Ok(tokens) + // Generate the bindings for all modules + self.modules + .iter() + .map(|module| module.generate(&self.cfg, true)) + .collect::>() } pub fn build(self, output_path: impl AsRef) -> Result<()> { @@ -71,31 +74,15 @@ impl Codegen { } fn parse_dependencies(&mut self) -> Result<()> { - // TODO: Parse modules of dependencies - todo!() + // // TODO: Parse modules of dependencies + // todo!() + Ok(()) } -} -impl Canonicalize for Codegen { fn canonicalize(&mut self) { - todo!(); - for module in &mut self.modules { - module.canonicalize(); - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_codegen() { - pyo3::prepare_freethreaded_python(); - pyo3::Python::with_gil(|py| { - let cfg = Config::default(); - let module = py.import("gymnasium").unwrap(); - let _codegen = Codegen::new(cfg).unwrap().module(module).unwrap(); - }); + // todo!(); + // for module in &mut self.modules { + // module.canonicalize(); + // } } } diff --git a/pyo3_bindgen_engine/src/config.rs b/pyo3_bindgen_engine/src/config.rs index 1eb6705..b7aabfb 100644 --- a/pyo3_bindgen_engine/src/config.rs +++ b/pyo3_bindgen_engine/src/config.rs @@ -22,7 +22,6 @@ impl Default for Config { impl Config { pub fn is_attr_allowed( &self, - _attr: &pyo3::types::PyAny, attr_name: &Ident, attr_module: &Path, attr_type: &pyo3::types::PyType, diff --git a/pyo3_bindgen_engine/src/lib.rs b/pyo3_bindgen_engine/src/lib.rs index a9eb072..823619e 100644 --- a/pyo3_bindgen_engine/src/lib.rs +++ b/pyo3_bindgen_engine/src/lib.rs @@ -3,13 +3,17 @@ mod codegen; mod config; mod syntax; -mod traits; mod types; mod utils; +// Re-export the public API pub use codegen::Codegen; pub use config::Config; -pub use utils::{build::build_bindings, error::PyBindgenError, result::PyBindgenResult}; +pub use utils::{error::PyBindgenError, result::PyBindgenResult}; +// Re-export pyo3 for convenience +pub use pyo3; + +// Internal re-exports for convenience use utils::io as io_utils; use utils::result::Result; diff --git a/pyo3_bindgen_engine/src/syntax/class.rs b/pyo3_bindgen_engine/src/syntax/class.rs index 146299f..551140a 100644 --- a/pyo3_bindgen_engine/src/syntax/class.rs +++ b/pyo3_bindgen_engine/src/syntax/class.rs @@ -1,11 +1,7 @@ use super::{ AttributeVariant, Function, FunctionType, Ident, MethodType, Path, Property, PropertyOwner, }; -use crate::{ - traits::{Canonicalize, Generate}, - Config, Result, -}; -use itertools::Itertools; +use crate::{Config, Result}; #[derive(Debug, Clone)] pub struct Class { @@ -50,12 +46,13 @@ impl Class { (attr, attr_name, attr_module, attr_type) }) // Filter attributes based on various configurable conditions - .filter(|(attr, attr_name, attr_module, attr_type)| { - cfg.is_attr_allowed(attr, attr_name, attr_module, attr_type) + .filter(|(_attr, attr_name, attr_module, attr_type)| { + cfg.is_attr_allowed(attr_name, attr_module, attr_type) || ["__init__", "__call__"].contains(&attr_name.as_py()) }) // Iterate over the remaining attributes and parse them .try_for_each(|(attr, attr_name, attr_module, attr_type)| { + let attr_name_full = name.join(&attr_name.clone().into()); match AttributeVariant::determine(py, attr, attr_type, &attr_module, &name, false) .unwrap() { @@ -67,15 +64,14 @@ impl Class { } AttributeVariant::Class => { let subclass = - Self::parse(cfg, attr.downcast().unwrap(), name.join(&attr_name)) - .unwrap(); + Self::parse(cfg, attr.downcast().unwrap(), attr_name_full).unwrap(); subclasses.push(subclass); } AttributeVariant::Function | AttributeVariant::Method => { let method = Function::parse( cfg, attr, - name.join(&attr_name), + attr_name_full, FunctionType::Method { class_path: name.clone(), typ: match attr_name.as_py() { @@ -98,7 +94,7 @@ impl Class { let property = Property::parse( cfg, attr, - name.join(&attr_name), + attr_name_full, PropertyOwner::Class(name.clone()), ) .unwrap(); @@ -128,14 +124,14 @@ impl Class { } } -impl Generate for Class { - fn generate(&self, _cfg: &Config) -> Result { +impl Class { + pub fn generate(&self, _cfg: &Config) -> Result { todo!() } } -impl Canonicalize for Class { - fn canonicalize(&mut self) { +impl Class { + pub fn canonicalize(&mut self) { todo!() } } diff --git a/pyo3_bindgen_engine/src/syntax/common/ident.rs b/pyo3_bindgen_engine/src/syntax/common/ident.rs index c011143..d977f3a 100644 --- a/pyo3_bindgen_engine/src/syntax/common/ident.rs +++ b/pyo3_bindgen_engine/src/syntax/common/ident.rs @@ -4,10 +4,12 @@ pub struct Ident(String); impl Ident { pub fn from_rs(value: &str) -> Self { + debug_assert!(!value.is_empty()); Self(value.to_owned()) } pub fn from_py(value: &str) -> Self { + debug_assert!(!value.is_empty()); Self(Self::py_to_rs(value)) } diff --git a/pyo3_bindgen_engine/src/syntax/common/path.rs b/pyo3_bindgen_engine/src/syntax/common/path.rs index 7eefaf3..126acbf 100644 --- a/pyo3_bindgen_engine/src/syntax/common/path.rs +++ b/pyo3_bindgen_engine/src/syntax/common/path.rs @@ -1,7 +1,7 @@ use super::Ident; use itertools::Itertools; -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)] pub struct Path { pub leading_colon: bool, segments: Vec, @@ -9,7 +9,9 @@ pub struct Path { impl Path { pub fn from_rs(value: &str) -> Self { - assert!(!value.is_empty(), "Empty Rust path is not allowed"); + if value.is_empty() { + return Self::default(); + } debug_assert!(!value.contains('.'), "Invalid Rust path: {value}"); Self { leading_colon: value.starts_with("::"), @@ -22,6 +24,9 @@ impl Path { } pub fn from_py(value: &str) -> Self { + if value.is_empty() { + return Self::default(); + } debug_assert!(!value.contains("::"), "Invalid Python path: {value}"); Self { leading_colon: false, @@ -62,14 +67,18 @@ impl Path { .join(".") } - pub fn join(&self, other: &Ident) -> Self { + pub fn join(&self, other: &Path) -> Self { + assert!( + !other.leading_colon, + "Leading colon is not allowed in the second path when joining" + ); Self { leading_colon: self.leading_colon, segments: self .segments .iter() .cloned() - .chain(std::iter::once(other.clone())) + .chain(other.iter().cloned()) .collect(), } } @@ -115,6 +124,64 @@ impl Path { None } } + + /// Define a fully qualified path from self to target. + /// Use self if they start at the same point. + /// Use super to go up the hierarchy. + /// If they do not share any common prefix, use super until the nothing is reached + pub fn relative_to(&self, target: &Path) -> Self { + if self == target { + return Path { + leading_colon: false, + segments: vec![Ident::from_rs("self")], + }; + } + + // Find the length of the common prefix + let common_prefix_length = self + .segments + .iter() + .zip(target.segments.iter()) + .take_while(|(a, b)| a == b) + .count(); + + // Determine the relative path + let relative_segments = match common_prefix_length { + n if n < self.segments.len() => std::iter::repeat(Ident::from_rs("super")) + .take(self.segments.len() - n) + .chain(target.segments.iter().skip(n).cloned()) + .collect_vec(), + n if n == self.segments.len() => std::iter::once(Ident::from_rs("self")) + .chain(target.segments.iter().skip(n).cloned()) + .collect_vec(), + _ => { + unreachable!() + } + }; + + Path { + leading_colon: false, + segments: relative_segments, + } + } +} + +impl From for Path { + fn from(ident: Ident) -> Self { + Self { + leading_colon: false, + segments: vec![ident], + } + } +} + +impl From<&[Ident]> for Path { + fn from(segments: &[Ident]) -> Self { + Self { + leading_colon: false, + segments: segments.to_owned(), + } + } } impl TryFrom for syn::Path { diff --git a/pyo3_bindgen_engine/src/syntax/function.rs b/pyo3_bindgen_engine/src/syntax/function.rs index e2c9eb4..83f2ea5 100644 --- a/pyo3_bindgen_engine/src/syntax/function.rs +++ b/pyo3_bindgen_engine/src/syntax/function.rs @@ -1,10 +1,5 @@ use super::{Ident, Path}; -use crate::{ - traits::{Canonicalize, Generate}, - types::Type, - Config, Result, -}; - +use crate::{types::Type, Config, Result}; use pyo3::ToPyObject; #[derive(Debug, Clone)] @@ -25,69 +20,6 @@ impl Function { ) -> Result { let py = function.py(); - // Extract the signature of the function - let function_signature = py - .import(pyo3::intern!(py, "inspect")) - .unwrap() - .call_method1(pyo3::intern!(py, "signature"), (function,)) - .unwrap(); - - // Extract the parameters of the function - let parameters = function_signature - .getattr(pyo3::intern!(py, "parameters")) - .unwrap() - .call_method0(pyo3::intern!(py, "values")) - .unwrap() - .iter() - .unwrap() - .map(|param| { - let param = param?; - - let name = Ident::from_py(¶m.getattr(pyo3::intern!(py, "name"))?.to_string()); - let kind = - ParameterKind::from(param.getattr(pyo3::intern!(py, "kind"))?.extract::()?); - let annotation = { - let annotation = param.getattr(pyo3::intern!(py, "annotation"))?; - if annotation.is(param.getattr(pyo3::intern!(py, "empty")).unwrap()) { - None - } else { - Some(annotation) - } - } - .try_into()?; - let default = { - let default = param.getattr(pyo3::intern!(py, "default"))?; - if default.is(param.getattr(pyo3::intern!(py, "empty")).unwrap()) { - None - } else { - Some(default.to_object(py)) - } - }; - - Result::Ok(Parameter { - name, - kind, - annotation, - default, - }) - }) - .collect::>>()?; - - // Extract the return annotation of the function - let return_annotation = { - let return_annotation = - function_signature.getattr(pyo3::intern!(py, "return_annotation"))?; - if return_annotation.is(function_signature - .getattr(pyo3::intern!(py, "empty")) - .unwrap()) - { - None - } else { - Some(return_annotation) - } - } - .try_into()?; - // Extract the docstring of the function let docstring = { let docstring = function.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); @@ -98,24 +30,113 @@ impl Function { } }; - Ok(Self { - name, - typ, - parameters, - return_annotation, - docstring, - }) + // Extract the signature of the function + if let Ok(function_signature) = py + .import(pyo3::intern!(py, "inspect")) + .unwrap() + .call_method1(pyo3::intern!(py, "signature"), (function,)) + { + // Extract the parameters of the function + let parameters = function_signature + .getattr(pyo3::intern!(py, "parameters")) + .unwrap() + .call_method0(pyo3::intern!(py, "values")) + .unwrap() + .iter() + .unwrap() + .map(|param| { + let param = param?; + + let name = + Ident::from_py(¶m.getattr(pyo3::intern!(py, "name"))?.to_string()); + let kind = ParameterKind::from( + param.getattr(pyo3::intern!(py, "kind"))?.extract::()?, + ); + let annotation = { + let annotation = param.getattr(pyo3::intern!(py, "annotation"))?; + if annotation.is(param.getattr(pyo3::intern!(py, "empty")).unwrap()) { + None + } else { + Some(annotation) + } + } + .try_into()?; + let default = { + let default = param.getattr(pyo3::intern!(py, "default"))?; + if default.is(param.getattr(pyo3::intern!(py, "empty")).unwrap()) { + None + } else { + Some(default.to_object(py)) + } + }; + + Result::Ok(Parameter { + name, + kind, + annotation, + default, + }) + }) + .collect::>>()?; + + // Extract the return annotation of the function + let return_annotation = { + let return_annotation = + function_signature.getattr(pyo3::intern!(py, "return_annotation"))?; + if return_annotation.is(function_signature + .getattr(pyo3::intern!(py, "empty")) + .unwrap()) + { + None + } else { + Some(return_annotation) + } + } + .try_into()?; + + Ok(Self { + name, + typ, + parameters, + return_annotation, + docstring, + }) + } else { + Ok(Self { + name, + typ, + parameters: vec![ + Parameter { + name: Ident::from_rs("args"), + kind: ParameterKind::VarPositional, + annotation: Type::PyTuple(vec![Type::Unknown]), + default: None, + }, + Parameter { + name: Ident::from_rs("kwargs"), + kind: ParameterKind::VarKeyword, + annotation: Type::PyDict { + t_key: Box::new(Type::PyString), + t_value: Box::new(Type::Unknown), + }, + default: None, + }, + ], + return_annotation: Type::Unknown, + docstring, + }) + } } } -impl Generate for Function { - fn generate(&self, _cfg: &Config) -> Result { +impl Function { + pub fn generate(&self, _cfg: &Config) -> Result { todo!() } } -impl Canonicalize for Function { - fn canonicalize(&mut self) { +impl Function { + pub fn canonicalize(&mut self) { todo!() } } @@ -124,7 +145,7 @@ impl Canonicalize for Function { pub enum FunctionType { Function, Method { class_path: Path, typ: MethodType }, - Closure(Path), + Closure, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] diff --git a/pyo3_bindgen_engine/src/syntax/import.rs b/pyo3_bindgen_engine/src/syntax/import.rs index 884323a..c8cb270 100644 --- a/pyo3_bindgen_engine/src/syntax/import.rs +++ b/pyo3_bindgen_engine/src/syntax/import.rs @@ -1,11 +1,11 @@ use super::Path; -use crate::{traits::Generate, Config, Result}; +use crate::{Config, Result}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Import { pub origin: Path, pub target: Path, - import_type: ImportType, + pub import_type: ImportType, } impl Import { @@ -19,17 +19,52 @@ impl Import { } } -impl Generate for Import { - fn generate(&self, _cfg: &Config) -> Result { - todo!() +impl Import { + pub fn generate(&self, cfg: &Config) -> Result { + // Skip external imports if their generation is disabled + if !cfg.generate_dependencies && self.import_type == ImportType::ExternalImport { + return Ok(proc_macro2::TokenStream::new()); + } + + // Skip identity imports + if self.origin == self.target { + return Ok(proc_macro2::TokenStream::new()); + } + + // Determine the visibility of the import based on its type + let visibility = match self.import_type { + ImportType::ExternalImport => proc_macro2::TokenStream::new(), + ImportType::Reexport => quote::quote! { pub(crate) }, + ImportType::ScopedReexport => quote::quote! { pub }, + }; + + // Generate the path to the target module + let relative_path: syn::Path = self + .target + .parent() + .unwrap_or_default() + .relative_to(&self.origin) + .try_into()?; + + // Use alias for the target module if it has a different name than the last segment of its path + let maybe_alias = if self.origin.name() != self.target.name() { + let alias: syn::Ident = self.target.name().try_into()?; + quote::quote! { as #alias } + } else { + proc_macro2::TokenStream::new() + }; + + Ok(quote::quote! { + #visibility use #relative_path #maybe_alias; + }) } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -enum ImportType { +pub enum ImportType { ExternalImport, - PackageReexport, - SubmoduleReexport, + Reexport, + ScopedReexport, } impl ImportType { @@ -43,8 +78,8 @@ impl ImportType { .is_some_and(|parent_module| origin.starts_with(&parent_module)); match (is_package_reexport, is_submodule_reexport) { (false, false) => Self::ExternalImport, - (true, false) => Self::PackageReexport, - (true, true) => Self::SubmoduleReexport, + (true, false) => Self::Reexport, + (true, true) => Self::ScopedReexport, _ => unreachable!(), } } diff --git a/pyo3_bindgen_engine/src/syntax/module.rs b/pyo3_bindgen_engine/src/syntax/module.rs index 3fd727d..458f77e 100644 --- a/pyo3_bindgen_engine/src/syntax/module.rs +++ b/pyo3_bindgen_engine/src/syntax/module.rs @@ -2,10 +2,7 @@ use super::{ AttributeVariant, Class, Function, FunctionType, Ident, Import, Path, Property, PropertyOwner, TypeVar, }; -use crate::{ - traits::{Canonicalize, Generate}, - Config, Result, -}; +use crate::{Config, Result}; use itertools::Itertools; use rustc_hash::FxHashSet as HashSet; @@ -31,7 +28,7 @@ impl Module { // Extract the index of the module as prelude (if enabled) let prelude = if cfg.generate_preludes { - Self::extract_prelude(module) + Self::extract_prelude(cfg, module) } else { Vec::new() }; @@ -71,26 +68,41 @@ impl Module { (attr, attr_name, attr_module, attr_type) }) // Filter attributes based on various configurable conditions - .filter(|(attr, attr_name, attr_module, attr_type)| { - cfg.is_attr_allowed(attr, attr_name, attr_module, attr_type) + .filter(|(_attr, attr_name, attr_module, attr_type)| { + cfg.is_attr_allowed(attr_name, attr_module, attr_type) }) // Iterate over the remaining attributes and parse them .try_for_each(|(attr, attr_name, attr_module, attr_type)| { + let attr_name_full = name.join(&attr_name.clone().into()); match AttributeVariant::determine(py, attr, attr_type, &attr_module, &name, true) .unwrap() { AttributeVariant::Import => { - let import = Import::new( - attr_module.join(&Ident::from_py( - &attr - .getattr(pyo3::intern!(py, "__name__")) - .map(std::string::ToString::to_string) - .unwrap_or(attr_name.as_py().to_owned()), - )), - name.join(&attr_name), - ) - .unwrap(); - imports.push(import); + let origin = attr_module.join(&Path::from_py( + &attr + .getattr(pyo3::intern!(py, "__name__")) + .map(std::string::ToString::to_string) + .unwrap_or(attr_name.as_py().to_owned()), + )); + // Make sure the origin attribute is allowed (each segment of the path) + if (0..origin.len()).all(|i| { + let _attr_name; + let _attr_module; + let _attr_type; + if i < origin.len() - 1 { + _attr_name = &origin[i]; + _attr_module = origin[..i].into(); + _attr_type = py.get_type::(); + } else { + _attr_name = &attr_name; + _attr_module = attr_module.clone(); + _attr_type = attr_type; + }; + cfg.is_attr_allowed(_attr_name, &_attr_module, _attr_type) + }) { + let import = Import::new(origin, attr_name_full).unwrap(); + imports.push(import); + } } AttributeVariant::Module => { // Note: This should technically not be necessary as `Self::extract_submodules` is supposed to extract all submodules @@ -98,39 +110,44 @@ impl Module { } AttributeVariant::Class => { let class = - Class::parse(cfg, attr.downcast().unwrap(), name.join(&attr_name)) - .unwrap(); + Class::parse(cfg, attr.downcast().unwrap(), attr_name_full).unwrap(); classes.push(class); } - AttributeVariant::Function | AttributeVariant::Method => { - let function = Function::parse( - cfg, - attr, - name.join(&attr_name), - FunctionType::Function, - ) - .unwrap(); + AttributeVariant::Function => { + let function = + Function::parse(cfg, attr, attr_name_full, FunctionType::Function) + .unwrap(); functions.push(function); } + AttributeVariant::Method => { + eprintln!("WARN: Methods in modules are not supported: {attr_name}"); + // let function = Function::parse( + // cfg, + // attr, + // attr_name_full, + // FunctionType::Method { + // class_path: todo!(), + // typ: super::MethodType::Regular, + // }, + // ) + // .unwrap(); + // functions.push(function); + } AttributeVariant::Closure => { - let function = Function::parse( - cfg, - attr, - name.join(&attr_name), - FunctionType::Closure(name.join(&attr_name)), - ) - .unwrap(); + let function = + Function::parse(cfg, attr, attr_name_full, FunctionType::Closure) + .unwrap(); functions.push(function); } AttributeVariant::TypeVar => { - let type_var = TypeVar::new(name.join(&attr_name)).unwrap(); + let type_var = TypeVar::new(attr_name_full).unwrap(); type_vars.push(type_var); } AttributeVariant::Property => { let property = Property::parse( cfg, attr, - name.join(&attr_name), + attr_name_full, PropertyOwner::Module(name.clone()), ) .unwrap(); @@ -144,7 +161,8 @@ impl Module { let submodules = submodules_to_process .into_iter() .filter_map(|submodule_name| { - py.import(name.join(&submodule_name).to_py().as_str()).ok() + py.import(name.join(&submodule_name.into()).to_py().as_str()) + .ok() }) .map(|submodule| Self::parse(cfg, submodule)) .collect::>() @@ -173,9 +191,9 @@ impl Module { }) } - fn extract_prelude(module: &pyo3::prelude::PyModule) -> Vec { + fn extract_prelude(cfg: &Config, module: &pyo3::prelude::PyModule) -> Vec { // Extract the index (__all__) of the module if it exists - let index_attr_names = if let Ok(index) = module.index() { + let mut index_attr_names = if let Ok(index) = module.index() { index .iter() .map(|x| Ident::from_py(&x.to_string())) @@ -200,6 +218,17 @@ impl Module { } } + // Retain only allowed attributes + index_attr_names.retain(|attr_name| { + let attr_module = Path::from_py(module.name().unwrap()); + if let Ok(attr) = module.getattr(attr_name.as_py()) { + let attr_type = attr.get_type(); + cfg.is_attr_allowed(attr_name, &attr_module, attr_type) + } else { + false + } + }); + index_attr_names } @@ -248,14 +277,88 @@ impl Module { } } -impl Generate for Module { - fn generate(&self, _cfg: &Config) -> Result { - todo!() +impl Module { + pub fn generate(&self, cfg: &Config, is_top_level: bool) -> Result { + let mut output = proc_macro2::TokenStream::new(); + + // Extra configuration for top-level modules + if is_top_level { + output.extend(quote::quote! { + #[allow( + clippy::all, + clippy::nursery, + clippy::pedantic, + non_camel_case_types, + non_snake_case, + non_upper_case_globals, + unused + )] + }); + } + + // Documentation + if let Some(docstring) = &self.docstring { + // Trim the docstring and add a leading whitespace (looks better in the generated code) + let mut docstring = docstring.trim().to_owned(); + docstring.insert(0, ' '); + + output.extend(quote::quote! { + #[doc = #docstring] + }); + } + + // Generate the module content + let mut module_content = proc_macro2::TokenStream::new(); + // Imports + module_content.extend( + self.imports + .iter() + .map(|import| import.generate(cfg)) + .collect::>()?, + ); + // Prelude + module_content.extend(self.generate_prelude()); + + // Finalize the module with its content + let module_ident: syn::Ident = self.name.name().try_into()?; + output.extend(quote::quote! { + pub mod #module_ident { + #module_content + } + }); + + Ok(output) + } + + fn generate_prelude(&self) -> proc_macro2::TokenStream { + // Skip if the prelude is empty + if self.prelude.is_empty() { + return proc_macro2::TokenStream::new(); + } + + // Generate the prelude content (re-export all prelude items) + let prelude_content = self + .prelude + .iter() + .map(|ident| { + let ident: syn::Ident = ident.try_into().unwrap(); + quote::quote! { + pub use super::#ident; + } + }) + .collect::(); + + // Finalize the prelude with its content + quote::quote! { + pub mod prelude { + #prelude_content + } + } } } -impl Canonicalize for Module { - fn canonicalize(&mut self) { +impl Module { + pub fn canonicalize(&mut self) { todo!() } } diff --git a/pyo3_bindgen_engine/src/syntax/property.rs b/pyo3_bindgen_engine/src/syntax/property.rs index cbf320c..13d3196 100644 --- a/pyo3_bindgen_engine/src/syntax/property.rs +++ b/pyo3_bindgen_engine/src/syntax/property.rs @@ -1,9 +1,5 @@ use super::Path; -use crate::{ - traits::{Canonicalize, Generate}, - types::Type, - Config, Result, -}; +use crate::{types::Type, Config, Result}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Property { @@ -149,14 +145,14 @@ impl Property { } } -impl Generate for Property { - fn generate(&self, _cfg: &Config) -> Result { +impl Property { + pub fn generate(&self, _cfg: &Config) -> Result { todo!() } } -impl Canonicalize for Property { - fn canonicalize(&mut self) { +impl Property { + pub fn canonicalize(&mut self) { todo!() } } diff --git a/pyo3_bindgen_engine/src/syntax/type_var.rs b/pyo3_bindgen_engine/src/syntax/type_var.rs index c4848ff..00a90a0 100644 --- a/pyo3_bindgen_engine/src/syntax/type_var.rs +++ b/pyo3_bindgen_engine/src/syntax/type_var.rs @@ -1,5 +1,5 @@ use super::Path; -use crate::{traits::Generate, Config, Result}; +use crate::{Config, Result}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TypeVar { @@ -12,8 +12,8 @@ impl TypeVar { } } -impl Generate for TypeVar { - fn generate(&self, _cfg: &Config) -> Result { +impl TypeVar { + pub fn generate(&self, _cfg: &Config) -> Result { todo!() } } diff --git a/pyo3_bindgen_engine/src/traits.rs b/pyo3_bindgen_engine/src/traits.rs deleted file mode 100644 index f73ffb6..0000000 --- a/pyo3_bindgen_engine/src/traits.rs +++ /dev/null @@ -1,9 +0,0 @@ -use crate::{Config, Result}; - -pub trait Generate { - fn generate(&self, cfg: &Config) -> Result; -} - -pub trait Canonicalize: Sized { - fn canonicalize(&mut self); -} From 75670641792b3db93f30cac0f6abf07adfc2ec91 Mon Sep 17 00:00:00 2001 From: Andrej Orsula Date: Sun, 3 Mar 2024 23:59:12 +0100 Subject: [PATCH 10/13] Refactoring: Add remaining generators Signed-off-by: Andrej Orsula --- .github/workflows/dependabot.yml | 29 + .github/workflows/rust.yml | 4 +- Cargo.lock | 266 ++++---- Cargo.toml | 10 +- README.md | 49 +- pyo3_bindgen/src/lib.rs | 80 +-- pyo3_bindgen_cli/src/main.rs | 50 +- pyo3_bindgen_cli/tests/cli.rs | 4 +- pyo3_bindgen_engine/Cargo.toml | 5 +- pyo3_bindgen_engine/benches/bindgen.rs | 155 +++-- pyo3_bindgen_engine/src/codegen.rs | 278 +++++++- pyo3_bindgen_engine/src/config.rs | 58 +- pyo3_bindgen_engine/src/lib.rs | 14 +- pyo3_bindgen_engine/src/syntax/class.rs | 194 +++++- .../src/syntax/common/attribute_variant.rs | 4 +- .../src/syntax/common/ident.rs | 12 + pyo3_bindgen_engine/src/syntax/common/path.rs | 21 +- pyo3_bindgen_engine/src/syntax/function.rs | 613 ++++++++++++++++-- pyo3_bindgen_engine/src/syntax/import.rs | 36 +- pyo3_bindgen_engine/src/syntax/module.rs | 524 ++++++++++----- pyo3_bindgen_engine/src/syntax/property.rs | 319 ++++++--- pyo3_bindgen_engine/src/syntax/type_var.rs | 9 +- .../src/{types => typing}/mod.rs | 41 +- pyo3_bindgen_engine/src/utils/error.rs | 14 +- pyo3_bindgen_engine/src/utils/mod.rs | 3 +- pyo3_bindgen_engine/tests/bindgen.rs | 165 +++-- pyo3_bindgen_macros/src/lib.rs | 23 +- pyo3_bindgen_macros/src/parser.rs | 10 +- 28 files changed, 2152 insertions(+), 838 deletions(-) create mode 100644 .github/workflows/dependabot.yml rename pyo3_bindgen_engine/src/{types => typing}/mod.rs (97%) diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml new file mode 100644 index 0000000..822ffc7 --- /dev/null +++ b/.github/workflows/dependabot.yml @@ -0,0 +1,29 @@ +name: Dependabot auto-merge +on: + pull_request: + +permissions: + contents: write + pull-requests: write + +jobs: + dependabot: + runs-on: ubuntu-latest + if: github.actor == 'dependabot[bot]' + steps: + - name: Fetch metadata + id: metadata + uses: dependabot/fetch-metadata@v1 + with: + github-token: "${{ secrets.GITHUB_TOKEN }}" + - name: Approve PR + env: + PR_URL: ${{github.event.pull_request.html_url}} + GH_TOKEN: ${{secrets.GITHUB_TOKEN}} + run: gh pr review --approve "$PR_URL" + - name: Enable auto-merge + if: steps.metadata.outputs.update-type == 'version-update:semver-patch' + env: + PR_URL: ${{github.event.pull_request.html_url}} + GH_TOKEN: ${{secrets.GITHUB_TOKEN}} + run: gh pr merge --auto --merge "$PR_URL" diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 6d6a5a4..024b442 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -41,7 +41,7 @@ jobs: fail-fast: false matrix: toolchain: - - "1.70" # Minimal supported Rust version (MSRV) + - "1.74" # Minimal supported Rust version (MSRV) - stable - beta steps: @@ -106,7 +106,7 @@ jobs: with: token: ${{ secrets.CODECOV_TOKEN }} files: lcov.info - fail_ci_if_error: true + fail_ci_if_error: false deny: runs-on: ubuntu-latest diff --git a/Cargo.lock b/Cargo.lock index 3166b63..76a9513 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,9 +19,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.12" +version = "0.6.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96b09b5178381e0874812a9b157f7fe84982617e48f71f4e3235482775e5b540" +checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" dependencies = [ "anstyle", "anstyle-parse", @@ -33,9 +33,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.4" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" [[package]] name = "anstyle-parse" @@ -92,12 +92,6 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" -[[package]] -name = "bitflags" -version = "2.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" - [[package]] name = "bstr" version = "1.9.1" @@ -111,9 +105,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.14.0" +version = "3.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" +checksum = "8ea184aa71bb362a1157c896979544cc23974e08fd265f29ea96b59f0b4a555b" [[package]] name = "cast" @@ -129,9 +123,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "ciborium" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ "ciborium-io", "ciborium-ll", @@ -140,15 +134,15 @@ dependencies = [ [[package]] name = "ciborium-io" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", "half", @@ -156,9 +150,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.18" +version = "4.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" +checksum = "c918d541ef2913577a0f9566e9ce27cb35b6df072075769e0b26cb5a554520da" dependencies = [ "clap_builder", "clap_derive", @@ -166,9 +160,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.18" +version = "4.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" +checksum = "9f3e7391dad68afb0c2ede1bf619f579a3dc9c2ec67f089baa397123a2f3d1eb" dependencies = [ "anstream", "anstyle", @@ -178,9 +172,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.4.7" +version = "4.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" +checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" dependencies = [ "heck", "proc-macro2", @@ -190,9 +184,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" [[package]] name = "colorchoice" @@ -261,6 +255,12 @@ version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "difflib" version = "0.4.0" @@ -275,19 +275,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "either" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" - -[[package]] -name = "errno" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" -dependencies = [ - "libc", - "windows-sys", -] +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "float-cmp" @@ -300,9 +290,13 @@ dependencies = [ [[package]] name = "half" -version = "1.8.2" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" +checksum = "b5eceaaeec696539ddaf7b333340f1af35a5aa87ae3e4f3ead0532f72affab2e" +dependencies = [ + "cfg-if", + "crunchy", +] [[package]] name = "heck" @@ -312,9 +306,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.4" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "indoc" @@ -324,12 +318,12 @@ checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8" [[package]] name = "is-terminal" -version = "0.4.10" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" dependencies = [ "hermit-abi", - "rustix", + "libc", "windows-sys", ] @@ -344,9 +338,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ "either", ] @@ -359,24 +353,18 @@ checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "js-sys" -version = "0.3.67" +version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a1d36f1235bc969acba30b7f5990b864423a6068a10f7c90ae8f0112e3a59d1" +checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee" dependencies = [ "wasm-bindgen", ] [[package]] name = "libc" -version = "0.2.152" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" - -[[package]] -name = "linux-raw-sys" -version = "0.4.13" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "lock_api" @@ -390,9 +378,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.20" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "memchr" @@ -417,9 +405,9 @@ checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" [[package]] name = "num-traits" -version = "0.2.17" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ "autocfg", ] @@ -487,6 +475,12 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "portable-atomic" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" + [[package]] name = "predicates" version = "3.1.0" @@ -538,23 +532,24 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.20.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a89dc7a5850d0e983be1ec2a463a171d20990487c3cfcd68b5363f1ee3d6fe0" +checksum = "53bdbb96d49157e65d45cc287af5f32ffadd5f4761438b527b055fb0d4bb8233" dependencies = [ "cfg-if", "libc", "memoffset", "parking_lot", + "portable-atomic", "pyo3-build-config", "pyo3-ffi", ] [[package]] name = "pyo3-build-config" -version = "0.20.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07426f0d8fe5a601f26293f300afd1a7b1ed5e78b2a705870c5f30893c5163be" +checksum = "deaa5745de3f5231ce10517a1f5dd97d53e5a2fd77aa6b5842292085831d48d7" dependencies = [ "once_cell", "target-lexicon", @@ -562,9 +557,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.20.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb7dec17e17766b46bca4f1a4215a85006b4c2ecde122076c562dd058da6cf1" +checksum = "62b42531d03e08d4ef1f6e85a2ed422eb678b8cd62b762e53891c05faf0d4afa" dependencies = [ "libc", "pyo3-build-config", @@ -572,7 +567,7 @@ dependencies = [ [[package]] name = "pyo3_bindgen" -version = "0.2.0" +version = "0.3.0" dependencies = [ "pyo3_bindgen_engine", "pyo3_bindgen_macros", @@ -580,7 +575,7 @@ dependencies = [ [[package]] name = "pyo3_bindgen_cli" -version = "0.2.0" +version = "0.3.0" dependencies = [ "assert_cmd", "clap", @@ -592,11 +587,11 @@ dependencies = [ [[package]] name = "pyo3_bindgen_engine" -version = "0.2.0" +version = "0.3.0" dependencies = [ "criterion", "indoc", - "itertools 0.12.0", + "itertools 0.12.1", "prettyplease", "proc-macro2", "pyo3", @@ -610,7 +605,7 @@ dependencies = [ [[package]] name = "pyo3_bindgen_macros" -version = "0.2.0" +version = "0.3.0" dependencies = [ "proc-macro2", "pyo3", @@ -630,9 +625,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" +checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd" dependencies = [ "either", "rayon-core", @@ -654,7 +649,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ - "bitflags 1.3.2", + "bitflags", ] [[package]] @@ -671,9 +666,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b7fa1134405e2ec9353fd416b17f8dacd46c473d7d3fd1cf202706a14eb792a" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" dependencies = [ "aho-corasick", "memchr", @@ -692,24 +687,11 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" -[[package]] -name = "rustix" -version = "0.38.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "322394588aaf33c24007e8bb3238ee3e4c5c09c084ab32bc73890b99ff326bca" -dependencies = [ - "bitflags 2.4.2", - "errno", - "libc", - "linux-raw-sys", - "windows-sys", -] - [[package]] name = "ryu" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] name = "same-file" @@ -728,18 +710,18 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" -version = "1.0.195" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.195" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", @@ -748,9 +730,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.111" +version = "1.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4" +checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" dependencies = [ "itoa", "ryu", @@ -765,15 +747,15 @@ checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] name = "strsim" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" [[package]] name = "syn" -version = "2.0.48" +version = "2.0.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" dependencies = [ "proc-macro2", "quote", @@ -782,9 +764,9 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.13" +version = "0.12.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69758bda2e78f098e4ccb393021a0963bb3442eac05f135c30f61b7370bbafae" +checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" [[package]] name = "termtree" @@ -865,9 +847,9 @@ dependencies = [ [[package]] name = "walkdir" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", @@ -875,9 +857,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1223296a201415c7fad14792dbefaace9bd52b62d33453ade1c5b5f07555406" +checksum = "c1e124130aee3fb58c5bdd6b639a0509486b0338acaaae0c84a5124b0f588b7f" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -885,9 +867,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcdc935b63408d58a32f8cc9738a0bffd8f05cc7c002086c6ef20b7312ad9dcd" +checksum = "c9e7e1900c352b609c8488ad12639a311045f40a35491fb69ba8c12f758af70b" dependencies = [ "bumpalo", "log", @@ -900,9 +882,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e4c238561b2d428924c49815533a8b9121c664599558a5d9ec51f8a1740a999" +checksum = "b30af9e2d358182b5c7449424f017eba305ed32a7010509ede96cdc4696c46ed" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -910,9 +892,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7" +checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" dependencies = [ "proc-macro2", "quote", @@ -923,15 +905,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" +checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" [[package]] name = "web-sys" -version = "0.3.67" +version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58cd2333b6e0be7a39605f0e255892fd7418a682d8da8fe042fe25128794d2ed" +checksum = "96565907687f7aceb35bc5fc03770a8a0471d82e479f25832f54a0e3f4b28446" dependencies = [ "js-sys", "wasm-bindgen", @@ -974,7 +956,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.0", + "windows-targets 0.52.4", ] [[package]] @@ -994,17 +976,17 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" dependencies = [ - "windows_aarch64_gnullvm 0.52.0", - "windows_aarch64_msvc 0.52.0", - "windows_i686_gnu 0.52.0", - "windows_i686_msvc 0.52.0", - "windows_x86_64_gnu 0.52.0", - "windows_x86_64_gnullvm 0.52.0", - "windows_x86_64_msvc 0.52.0", + "windows_aarch64_gnullvm 0.52.4", + "windows_aarch64_msvc 0.52.4", + "windows_i686_gnu 0.52.4", + "windows_i686_msvc 0.52.4", + "windows_x86_64_gnu 0.52.4", + "windows_x86_64_gnullvm 0.52.4", + "windows_x86_64_msvc 0.52.4", ] [[package]] @@ -1015,9 +997,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" [[package]] name = "windows_aarch64_msvc" @@ -1027,9 +1009,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" [[package]] name = "windows_i686_gnu" @@ -1039,9 +1021,9 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" [[package]] name = "windows_i686_msvc" @@ -1051,9 +1033,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" [[package]] name = "windows_x86_64_gnu" @@ -1063,9 +1045,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" [[package]] name = "windows_x86_64_gnullvm" @@ -1075,9 +1057,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" [[package]] name = "windows_x86_64_msvc" @@ -1087,6 +1069,6 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" diff --git a/Cargo.toml b/Cargo.toml index 3af5ef2..493eaec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,13 +19,13 @@ keywords = ["bindgen", "ffi", "pyo3", "python"] license = "MIT OR Apache-2.0" readme = "README.md" repository = "https://github.com/AndrejOrsula/pyo3_bindgen" -rust-version = "1.70" -version = "0.2.0" +rust-version = "1.74" +version = "0.3.0" [workspace.dependencies] -pyo3_bindgen = { path = "pyo3_bindgen", version = "0.2.0" } -pyo3_bindgen_engine = { path = "pyo3_bindgen_engine", version = "0.2.0" } -pyo3_bindgen_macros = { path = "pyo3_bindgen_macros", version = "0.2.0" } +pyo3_bindgen = { path = "pyo3_bindgen", version = "0.3.0" } +pyo3_bindgen_engine = { path = "pyo3_bindgen_engine", version = "0.3.0" } +pyo3_bindgen_macros = { path = "pyo3_bindgen_macros", version = "0.3.0" } assert_cmd = { version = "2" } clap = { version = "4.4", features = ["derive"] } diff --git a/README.md b/README.md index f536dc0..8be2fde 100644 --- a/README.md +++ b/README.md @@ -92,23 +92,24 @@ Add `pyo3` as a dependency and `pyo3_bindgen` as a build dependency to your [`Ca pyo3 = { version = "0.20", features = ["auto-initialize"] } [build-dependencies] -pyo3_bindgen = { version = "0.1" } +pyo3_bindgen = { version = "0.3" } ``` ### Option 1: Build script -Create a [`build.rs`](https://doc.rust-lang.org/cargo/reference/build-scripts.html) script in the root of your crate that generates bindings to the `target_module` Python module. +Create a [`build.rs`](https://doc.rust-lang.org/cargo/reference/build-scripts.html) script in the root of your crate that generates bindings to the `py_module` Python module. ```rs // build.rs - -fn main() { - // Generate Rust bindings to the Python module - pyo3_bindgen::build_bindings( - "target_module", - std::path::Path::new(&std::env::var("OUT_DIR").unwrap()).join("bindings.rs"), - ) - .unwrap(); +use pyo3_bindgen::{Codegen, Config}; + +fn main() -> Result<(), Box> { + // Generate Rust bindings to Python modules + Codegen::new(Config::default())? + .module_name("py_module")? + .module_name("other_module")? + .build(std::path::Path::new(&std::env::var("OUT_DIR")?).join("bindings.rs"))?; + Ok(()) } ``` @@ -116,7 +117,7 @@ Afterwards, include the generated bindings anywhere in your crate. ```rs include!(concat!(env!("OUT_DIR"), "/bindings.rs")); -pub use target_module::*; +pub use py_module::*; ``` ### Option 2: CLI tool @@ -131,7 +132,7 @@ Afterwards, run the `pyo3_bindgen` executable while passing the name of the targ ```bash # Pass `--help` to show the usage and available options -pyo3_bindgen -m target_module -o bindings.rs +pyo3_bindgen -m py_module other_module -o bindings.rs ``` ### Option 3 \[Experimental\]: Procedural macros @@ -142,28 +143,30 @@ Enable the `macros` feature of `pyo3_bindgen`. ```toml [build-dependencies] -pyo3_bindgen = { version = "0.1", features = ["macros"] } +pyo3_bindgen = { version = "0.3", features = ["macros"] } ``` Then, you can call the `import_python!` macro anywhere in your crate. ```rs -pyo3_bindgen::import_python!("target_module"); -pub use target_module::*; +pyo3_bindgen::import_python!("py_module"); +pub use py_module::*; ``` ## Status This project is in early development, and as such, the API of the generated bindings is not yet stable. -- Not all Python types are mapped to their Rust equivalents yet. For this reason, some additional typecasting might be currently required when using the generated bindings (e.g. `let typed_value: target_module::Class = any_value.extract()?;`). -- The binding generation is primarily designed to be used inside build scripts or via procedural macros. Therefore, the performance of the codegen process is [benchmarked](./pyo3_bindgen_engine/benches/bindgen.rs) to understand the potential impact on build times. Although there is currently plenty of room for optimization in the current naive implementation, even the largest modules are processed in less than a second on a *modern* laptop. -- The generation of bindings should never panic as long as the target Python module can be successfully imported. If it does, it is a bug resulting from an unexpected edge-case Python module structure or an unforeseen combination of enabled PyO3 features. -- However, the generated bindings might not directly compile in some specific cases. Currently, there are two known issue; bindings will contain duplicate function definitions if present in the original code, and function parameters might use the same name as a class defined in the same scope (allowed in Python but not in Rust). If you encounter any other issues, consider manually rewriting the problematic parts of the bindings. -- Although implemented, the procedural macros might not work in all cases - especially when some PyO3 features are enabled. In most cases, PyO3 fails to import the target Python module when used from within a `proc_macro` crate. Therefore, it is recommended to use build scripts instead for now. -- The code will be refactored and cleaned up in the upcoming releases. The current implementation is a result of a very quick prototype that was built to test the feasibility of the idea. For example, configurability of the generated bindings is planned (e.g. allowlist/ignorelist of attributes). Furthermore, automatic generation of dependent Python modules will be considered in order to provide a more complete typing experience. - -Please [report](https://github.com/AndrejOrsula/pyo3_bindgen/issues/new) any issues that you might encounter. Contributions are more than welcome! If you are looking for a place to start, consider searching for `TODO` comments in the codebase. +- Not all Python types are mapped to their Rust equivalents yet. For this reason, some additional typecasting might be currently required when using the generated bindings (e.g. `let typed_value: py_module::MyClass = get_value()?.extract()?;`). +- The binding generation is primarily designed to be used inside build scripts or via procedural macros. Therefore, the performance of the codegen process is [benchmarked](./pyo3_bindgen_engine/benches/bindgen.rs) to understand the potential impact on build times. Here are some preliminary results for version `0.3.0` (measured: parsing IO & codegen | not measured: compilation of the generated bindings, which takes much longer): + - `sys`: 1.49 ms (1.1k total LoC) + - `os`: 10.72 ms (7.7k total LoC) + - `numpy`: 1.01 s (563k total LoC) + - `torch`: 3.54 s (1.23M total LoC) +- The generation of bindings should never panic as long as the target Python module can be successfully imported. If it does, please [report](https://github.com/AndrejOrsula/pyo3_bindgen/issues/new) this as a bug. +- The generated bindings should always be compilable and usable in Rust. If you encounter any issues, consider manually fixing the problematic parts of the bindings and please [report](https://github.com/AndrejOrsula/pyo3_bindgen/issues/new) this as a bug. +- However, the generated bindings are based on the introspection of the target Python module. Therefore, the correctness of the generated bindings is directly dependent on the quality of the type annotations and docstrings in the target Python module. Ideally, the generated bindings should be considered unsafe and serve as a starting point for safe and idiomatic Rust APIs. +- Although implemented, the procedural macro does not work in many cases because PyO3 fails to import the target Python module when used from within a `proc_macro` crate. Therefore, it is recommended to use build scripts instead for now. ## License diff --git a/pyo3_bindgen/src/lib.rs b/pyo3_bindgen/src/lib.rs index 67ae92a..c337265 100644 --- a/pyo3_bindgen/src/lib.rs +++ b/pyo3_bindgen/src/lib.rs @@ -1,78 +1,8 @@ -//! Public API library for automatic generation of Rust FFI bindings to Python modules. -//! -//! ## Instructions -//! -//! Add `pyo3` as a dependency and `pyo3_bindgen` as a build dependency to your [`Cargo.toml`](https://doc.rust-lang.org/cargo/reference/manifest.html) manifest (`auto-initialize` feature of `pyo3` is optional and shown here for your convenience). -//! -//! ```toml -//! [dependencies] -//! pyo3 = { version = "0.20", features = ["auto-initialize"] } -//! -//! [build-dependencies] -//! pyo3_bindgen = { version = "0.1" } -//! ``` -//! -//! ### Option 1: Build script -//! -//! Create a [`build.rs`](https://doc.rust-lang.org/cargo/reference/build-scripts.html) script in the root of your crate that generates bindings to the `target_module` Python module. -//! -//! ```rs -//! // build.rs -//! -//! fn main() { -//! // Generate Rust bindings to the Python module -//! pyo3_bindgen::build_bindings( -//! "target_module", -//! std::path::Path::new(&std::env::var("OUT_DIR").unwrap()).join("bindings.rs"), -//! ) -//! .unwrap(); -//! } -//! ``` -//! -//! Afterwards, include the generated bindings anywhere in your crate. -//! -//! ```rs -//! include!(concat!(env!("OUT_DIR"), "/bindings.rs")); -//! pub use target_module::*; -//! ``` -//! -//! ### Option 2: CLI tool -//! -//! Install the `pyo3_bindgen` executable with `cargo`. -//! -//! ```bash -//! cargo install --locked pyo3_bindgen_cli -//! ``` -//! -//! Afterwards, run the `pyo3_bindgen` executable while passing the name of the target Python module. -//! -//! ```bash -//! # Pass `--help` to show the usage and available options -//! pyo3_bindgen -m target_module -o bindings.rs -//! ``` -//! -//! ### Option 3 \[Experimental\]: Procedural macros -//! -//! > **Note:** This feature is experimental and will probably fail in many cases. It is recommended to use build scripts instead. -//! -//! Enable the `macros` feature of `pyo3_bindgen`. -//! -//! ```toml -//! [build-dependencies] -//! pyo3_bindgen = { version = "0.1", features = ["macros"] } -//! ``` -//! -//! Then, you can call the `import_python!` macro anywhere in your crate. -//! -//! ```rs -//! pyo3_bindgen::import_python!("target_module"); -//! pub use target_module::*; -//! ``` +#![doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/../README.md"))] -pub use pyo3_bindgen_engine::{ - self as engine, build_bindings, generate_bindings, generate_bindings_for_module, - generate_bindings_from_str, -}; +// Public API re-exports from engine +pub use pyo3_bindgen_engine::{pyo3, Codegen, Config, PyBindgenError, PyBindgenResult}; +// Public API re-exports from macros #[cfg(feature = "macros")] -pub use pyo3_bindgen_macros::{self as macros, import_python}; +pub use pyo3_bindgen_macros::import_python; diff --git a/pyo3_bindgen_cli/src/main.rs b/pyo3_bindgen_cli/src/main.rs index 5567db1..ee7e8d1 100644 --- a/pyo3_bindgen_cli/src/main.rs +++ b/pyo3_bindgen_cli/src/main.rs @@ -1,19 +1,25 @@ //! CLI tool for automatic generation of Rust FFI bindings to Python modules. use clap::Parser; +use std::io::Write; fn main() { // Parse the CLI arguments let args = Args::parse(); // Generate the bindings for the module specified by the `--module-name` argument - let bindings = pyo3_bindgen::generate_bindings(&args.module_name).unwrap_or_else(|err| { - panic!( - "Failed to generate bindings for module: {}\n{err}", - args.module_name - ) - }); - + let bindings = args + .module_names + .iter() + .fold(pyo3_bindgen::Codegen::default(), |codegen, module_name| { + codegen.module_name(module_name).unwrap_or_else(|err| { + panic!("Failed to parse the content of '{module_name}' Python module:\n{err}") + }) + }) + .generate() + .unwrap_or_else(|err| panic!("Failed to generate bindings for Python modules:\n{err}")); + + // Format the bindings with prettyplease let bindings = prettyplease::unparse(&syn::parse2(bindings).unwrap()); if let Some(output) = args.output { @@ -27,7 +33,7 @@ fn main() { .unwrap_or_else(|_| panic!("Failed to write to file: {}", output.display())); } else { // Otherwise, print the bindings to STDOUT - println!("{bindings}"); + std::io::stdout().write_all(bindings.as_bytes()).unwrap(); } } @@ -35,9 +41,9 @@ fn main() { #[derive(Parser)] #[command(author, version, about)] struct Args { - #[arg(short, long)] + #[arg(short='m', long="module-name", required=true, num_args=1..)] /// Name of the Python module for which to generate the bindings - pub module_name: String, + pub module_names: Vec, #[arg(short, long)] /// Name of the output file to which to write the bindings [default: STDOUT] pub output: Option, @@ -50,37 +56,49 @@ mod tests { #[test] fn test_parser_all() { // Arrange - let input = ["", "-m", "pip", "--output", "bindings.rs"]; + let input = ["", "-m", "os", "--output", "bindings.rs"]; // Act let args = Args::parse_from(input); // Assert - assert_eq!(args.module_name, "pip"); + assert_eq!(args.module_names, ["os"]); assert_eq!(args.output, Some("bindings.rs".into())); } #[test] fn test_parser_short() { // Arrange - let input = ["", "-m", "numpy"]; + let input = ["", "-m", "sys"]; // Act let args = Args::parse_from(input); // Assert - assert_eq!(args.module_name, "numpy"); + assert_eq!(args.module_names, ["sys"]); } #[test] fn test_parser_long() { // Arrange - let input = ["", "--module-name", "setuptools"]; + let input = ["", "--module-name", "io"]; + + // Act + let args = Args::parse_from(input); + + // Assert + assert_eq!(args.module_names, ["io"]); + } + + #[test] + fn test_parser_multiple() { + // Arrange + let input = ["", "-m", "os", "sys", "--module-name", "io"]; // Act let args = Args::parse_from(input); // Assert - assert_eq!(args.module_name, "setuptools"); + assert_eq!(args.module_names, ["os", "sys", "io"]); } } diff --git a/pyo3_bindgen_cli/tests/cli.rs b/pyo3_bindgen_cli/tests/cli.rs index 221034f..e0c64a8 100644 --- a/pyo3_bindgen_cli/tests/cli.rs +++ b/pyo3_bindgen_cli/tests/cli.rs @@ -17,7 +17,7 @@ mod test_cli { assert.success().stdout( predicate::str::contains(format!("Usage: {BIN_NAME}")) .and(predicate::str::contains("Options:")) - .and(predicate::str::contains("--module-name ")) + .and(predicate::str::contains("--module-name ")) .and(predicate::str::contains("--output ")), ); } @@ -33,7 +33,7 @@ mod test_cli { // Assert assert.failure().stderr( predicate::str::contains("error: the following required arguments") - .and(predicate::str::contains("--module-name ")) + .and(predicate::str::contains("--module-name ")) .and(predicate::str::contains(format!("Usage: {BIN_NAME}"))), ); } diff --git a/pyo3_bindgen_engine/Cargo.toml b/pyo3_bindgen_engine/Cargo.toml index c408237..90983d3 100644 --- a/pyo3_bindgen_engine/Cargo.toml +++ b/pyo3_bindgen_engine/Cargo.toml @@ -2,7 +2,7 @@ name = "pyo3_bindgen_engine" authors.workspace = true categories.workspace = true -description = "Engine behind automatic generation of Rust bindings to Python modules" +description = "Engine for automatic generation of Rust bindings to Python modules" edition.workspace = true keywords.workspace = true license.workspace = true @@ -34,6 +34,9 @@ name = "pyo3_bindgen_engine" path = "src/lib.rs" crate-type = ["rlib"] +[features] +default = [] + [[bench]] name = "bindgen" harness = false diff --git a/pyo3_bindgen_engine/benches/bindgen.rs b/pyo3_bindgen_engine/benches/bindgen.rs index 1383b99..59ad1e2 100644 --- a/pyo3_bindgen_engine/benches/bindgen.rs +++ b/pyo3_bindgen_engine/benches/bindgen.rs @@ -1,68 +1,61 @@ -macro_rules! bench_bindgen_from_str { - { - |$criterion:ident| $(,)? - $bench_name:ident $(,)? - $(py)?$(python)? $(:)? $code_py:literal $(,)? - } => { - { - const CODE_PY: &str = indoc::indoc! { $code_py }; - $criterion.bench_function(stringify!($bench_name), |b| { - b.iter(|| { - pyo3_bindgen_engine::generate_bindings_from_str( - criterion::black_box(CODE_PY), - criterion::black_box(concat!("bench_mod_", stringify!($bench_name))), - ) - .unwrap() - }); - }); - } - }; -} +criterion::criterion_group!(benches, criterion_benchmark); +criterion::criterion_main!(benches); -macro_rules! try_bench_bindgen_for_module { - { - |$py:ident, $criterion:ident| $(,)? - $(module)? $(:)? $module_name:literal $(,)? - } => { - if let Ok(module) = $py.import($module_name) { - $criterion.bench_function(concat!("bench_bindgen_module_", $module_name), |b| { - b.iter(|| { - pyo3_bindgen_engine::generate_bindings_for_module( - criterion::black_box($py), - criterion::black_box(module), - ) - .unwrap() - }); - }); - } - }; +fn criterion_benchmark(crit: &mut criterion::Criterion) { + bench_from_str(crit); + bench_mod(crit); } -fn criterion_benchmark(crit: &mut criterion::Criterion) { - let mut group_from_str = crit.benchmark_group("generate_bindings_from_str"); +fn bench_from_str(crit: &mut criterion::Criterion) { + let mut group_from_str = crit.benchmark_group("bindgen_str"); group_from_str .warm_up_time(std::time::Duration::from_millis(250)) .sample_size(100); - bench_bindgen_from_str! { + + macro_rules! bench_impl { + { + |$criterion:ident| $(,)? + $bench_name:ident $(,)? + $(py)?$(python)?$(:)? $code_py:literal $(,)? + } => { + { + const CODE_PY: &str = indoc::indoc! { $code_py }; + $criterion.bench_function(stringify!($bench_name), |b| { + b.iter(|| { + pyo3_bindgen_engine::Codegen::default() + .module_from_str( + criterion::black_box(CODE_PY), + criterion::black_box(concat!("bench_mod_", stringify!($bench_name))) + ) + .unwrap() + .generate() + .unwrap() + }); + }); + } + }; + } + + bench_impl! { |group_from_str| - bench_bindgen_attribute - py: r#" + attribute + r#" t_const_float: float = 0.42 "# } - bench_bindgen_from_str! { + bench_impl! { |group_from_str| - bench_bindgen_function - py: r#" + function + r#" def t_fn(t_arg1: str) -> int: """t_docs""" ... "# } - bench_bindgen_from_str! { + bench_impl! { |group_from_str| - bench_bindgen_class - py: r#" + class + r#" from typing import Dict, Optional class t_class: """t_docs""" @@ -80,28 +73,54 @@ fn criterion_benchmark(crit: &mut criterion::Criterion) { ... "# } + group_from_str.finish(); +} - let mut group_for_module = crit.benchmark_group("generate_bindings_for_module"); - group_for_module +fn bench_mod(crit: &mut criterion::Criterion) { + let mut group_module = crit.benchmark_group("bindgen_mod"); + group_module .warm_up_time(std::time::Duration::from_secs(2)) .sample_size(10); - pyo3::Python::with_gil(|py| { - try_bench_bindgen_for_module! { - |py, group_for_module| - module: "os" - } - try_bench_bindgen_for_module! { - |py, group_for_module| - module: "sys" - } - try_bench_bindgen_for_module! { - |py, group_for_module| - module: "numpy" - } - }); - group_for_module.finish(); -} -criterion::criterion_group!(benches, criterion_benchmark); -criterion::criterion_main!(benches); + macro_rules! bench_impl { + ( + |$criterion:ident| $(,)? + $(module:)? $module_name:literal $(,)? + ) => { + $criterion.bench_function($module_name, |b| { + b.iter(|| { + pyo3_bindgen_engine::Codegen::default() + .module_name( + criterion::black_box($module_name) + ) + .unwrap() + .generate() + .unwrap() + }); + }); + }; + { + |$criterion:ident| $(,)? + $(modules:)? [ $($module:literal),+ $(,)? ] $(,)? + } => { + $( + bench_impl!(|$criterion| $module); + )+ + }; + } + + bench_impl! { + |group_module| + modules: [ + "io", + "math", + "os", + "re", + "sys", + "time", + ] + } + + group_module.finish(); +} diff --git a/pyo3_bindgen_engine/src/codegen.rs b/pyo3_bindgen_engine/src/codegen.rs index c306a02..a71f448 100644 --- a/pyo3_bindgen_engine/src/codegen.rs +++ b/pyo3_bindgen_engine/src/codegen.rs @@ -1,12 +1,34 @@ -use crate::{syntax::Module, Config, Result}; +use crate::{ + syntax::{Ident, Import, Module, Path}, + Config, Result, +}; +use itertools::Itertools; +use rustc_hash::FxHashSet as HashSet; +/// Engine for automatic generation of Rust FFI bindings to Python modules. +/// +/// # Example +/// +/// ```no_run +/// // use pyo3_bindgen::{Codegen, Config}; +/// use pyo3_bindgen_engine::{Codegen, Config}; +/// +/// fn main() -> Result<(), Box> { +/// Codegen::new(Config::default())? +/// .module_name("os")? +/// .module_name("sys")? +/// .generate()?; +/// Ok(()) +/// } +/// ``` #[derive(Debug, Default, Clone)] pub struct Codegen { - pub cfg: Config, - pub modules: Vec, + cfg: Config, + modules: Vec, } impl Codegen { + /// Create a new `Codegen` engine with the given configuration. pub fn new(cfg: Config) -> Result { Ok(Self { cfg, @@ -14,6 +36,7 @@ impl Codegen { }) } + /// Add a Python module to the list of modules for which to generate bindings. pub fn module(mut self, module: &pyo3::types::PyModule) -> Result { crate::io_utils::with_suppressed_python_output( module.py(), @@ -27,7 +50,8 @@ impl Codegen { Ok(self) } - pub fn module_from_str(self, module_name: &str) -> Result { + /// Add a Python module by its name to the list of modules for which to generate bindings. + pub fn module_name(self, module_name: &str) -> Result { #[cfg(not(PyPy))] pyo3::prepare_freethreaded_python(); pyo3::Python::with_gil(|py| { @@ -36,6 +60,22 @@ impl Codegen { }) } + /// Add a Python module from its source code and name to the list of modules for which to generate bindings. + pub fn module_from_str(self, source_code: &str, new_module_name: &str) -> Result { + #[cfg(not(PyPy))] + pyo3::prepare_freethreaded_python(); + pyo3::Python::with_gil(|py| { + let module = pyo3::types::PyModule::from_code( + py, + source_code, + &format!("{new_module_name}/__init__.py"), + new_module_name, + )?; + self.module(module) + }) + } + + /// Add multiple Python modules to the list of modules for which to generate bindings. pub fn modules(mut self, modules: &[&pyo3::types::PyModule]) -> Result { self.modules.reserve(modules.len()); for module in modules { @@ -44,15 +84,22 @@ impl Codegen { Ok(self) } - pub fn modules_from_str(mut self, module_names: &[&str]) -> Result { + /// Add multiple Python modules by their names to the list of modules for which to generate bindings. + pub fn module_names(mut self, module_names: &[&str]) -> Result { self.modules.reserve(module_names.len()); for module_name in module_names { - self = self.module_from_str(module_name)?; + self = self.module_name(module_name)?; } Ok(self) } + /// Generate the Rust FFI bindings for all modules added to the engine. pub fn generate(mut self) -> Result { + assert!( + !self.modules.is_empty(), + "There are no modules for which to generate bindings" + ); + // Parse external modules (if enabled) if self.cfg.generate_dependencies { self.parse_dependencies()?; @@ -64,25 +111,224 @@ impl Codegen { // Generate the bindings for all modules self.modules .iter() - .map(|module| module.generate(&self.cfg, true)) + .map(|module| module.generate(&self.cfg, true, &self.modules)) .collect::>() } + /// Generate the Rust FFI bindings for all modules added to the engine and write them to the given file. + /// This is a convenience method that combines `generate` and `std::fs::write`. pub fn build(self, output_path: impl AsRef) -> Result<()> { - std::fs::write(output_path, self.generate()?.to_string())?; - Ok(()) + Ok(std::fs::write(output_path, self.generate()?.to_string())?) } + // pub fn all_types_in_module(&self, module_path: &Path) -> Vec { + // // self.modules + // // .iter() + // // .find(|module| module.name == *module_path) + // // .map(|module| module.retrieve_types()) + // // .unwrap_or_default() + // todo!() + // } + fn parse_dependencies(&mut self) -> Result<()> { - // // TODO: Parse modules of dependencies - // todo!() - Ok(()) + fn get_imports_recursive(input: &[Module]) -> Vec { + let mut imports = Vec::new(); + input.iter().for_each(|module| { + imports.extend( + module + .imports + .iter() + .filter(|import| import.is_external()) + .cloned(), + ); + imports.extend(get_imports_recursive(&module.submodules)); + }); + imports + } + + // Get a unique list of all external imports (these could be modules, classes, functions, etc.) + let external_imports = get_imports_recursive(&self.modules) + .into_iter() + .filter(|import| import.is_external()) + .map(|import| import.origin.clone()) + .unique() + .collect_vec(); + + // Parse the external imports and add them to the module tree + pyo3::Python::with_gil(|py| { + external_imports + .iter() + // Get the last valid module within the path of the import + .map(|import| { + let mut last_module = py + .import( + import + .root() + .unwrap_or_else(|| unreachable!()) + .to_py() + .as_str(), + ) + .unwrap(); + for path in import[1..].iter() { + if let Ok(attr) = last_module.getattr(path.as_py()) { + if let Ok(module) = attr.extract::<&pyo3::types::PyModule>() { + last_module = module; + } else { + break; + } + } else { + break; + } + } + last_module + }) + // Parse the module and add it to the module tree + .unique_by(|module| module.name().unwrap().to_string()) + // Filter attributes based on various configurable conditions + .filter(|module| { + self.cfg.is_attr_allowed( + &Ident::from_py(module.name().unwrap()), + &Path::from_py( + &module + .getattr(pyo3::intern!(py, "__module__")) + .map(std::string::ToString::to_string) + .unwrap_or_default(), + ), + py.get_type::(), + ) + }) + .try_for_each(|module| { + Module::parse(&self.cfg, module).map(|module| { + self.modules.push(module); + }) + })?; + Ok(()) + }) } fn canonicalize(&mut self) { - // todo!(); - // for module in &mut self.modules { - // module.canonicalize(); - // } + // Canonicalize the module tree, such that no submodules remain at the top-level + // Example: If `mod.submod.subsubmod` is currently top-level, it will be embedded as submodule into `mod.submod` + // and `mod.submod` will be embedded in top-level `mod` + pyo3::Python::with_gil(|py| { + self.modules.iter_mut().for_each(|module| { + if module.name.len() > 1 { + *module = + (0..module.name.len() - 1) + .rev() + .fold(module.clone(), |package, i| { + let name = Path::from(&module.name[0..=i]); + let mut parent_package = + Module::empty(py, name).unwrap_or_else(|_| unreachable!()); + parent_package.submodules.push(package); + parent_package + }); + } + }); + }); + + // Merge duplicate modules in the tree + self.merge_duplicate_modules(); + } + + fn merge_duplicate_modules(&mut self) { + fn get_duplicate_modules(modules: &mut [Module]) -> Vec> { + modules.sort_by(|a, b| a.name.cmp(&b.name)); + let mut i = 0; + let mut duplicates = Vec::new(); + while i < modules.len() { + let name = modules[i].name.clone(); + let span = modules + .iter() + .skip(i) + .take_while(|module| module.name == name) + .count(); + if span > 1 { + duplicates.push(i..i + span); + } + i += span; + } + duplicates + } + + fn merge_duplicate_submodules_recursive(input: &[Module]) -> Module { + Module { + name: input[0].name.clone(), + prelude: input + .iter() + .fold(HashSet::default(), |mut prelude, module| { + prelude.extend(module.prelude.iter().cloned()); + prelude + }) + .into_iter() + .collect(), + imports: input + .iter() + .fold(HashSet::default(), |mut prelude, module| { + prelude.extend(module.imports.iter().cloned()); + prelude + }) + .into_iter() + .collect(), + submodules: { + let mut submodules = + input.iter().fold(Vec::default(), |mut submodule, module| { + submodule.extend(module.submodules.iter().cloned()); + submodule + }); + get_duplicate_modules(&mut submodules) + .into_iter() + .rev() + .for_each(|range| { + submodules[range.start] = + merge_duplicate_submodules_recursive(&submodules[range.clone()]); + submodules.drain(range.start + 1..range.end); + }); + submodules + }, + classes: input + .iter() + .fold(HashSet::default(), |mut prelude, module| { + prelude.extend(module.classes.iter().cloned()); + prelude + }) + .into_iter() + .collect(), + functions: input + .iter() + .fold(HashSet::default(), |mut prelude, module| { + prelude.extend(module.functions.iter().cloned()); + prelude + }) + .into_iter() + .collect(), + type_vars: input + .iter() + .fold(HashSet::default(), |mut prelude, module| { + prelude.extend(module.type_vars.iter().cloned()); + prelude + }) + .into_iter() + .collect(), + properties: input + .iter() + .fold(HashSet::default(), |mut prelude, module| { + prelude.extend(module.properties.iter().cloned()); + prelude + }) + .into_iter() + .collect(), + docstring: input[0].docstring.clone(), + } + } + + get_duplicate_modules(&mut self.modules) + .into_iter() + .rev() + .for_each(|range| { + self.modules[range.start] = + merge_duplicate_submodules_recursive(&self.modules[range.clone()]); + self.modules.drain(range.start + 1..range.end); + }); } } diff --git a/pyo3_bindgen_engine/src/config.rs b/pyo3_bindgen_engine/src/config.rs index b7aabfb..4e12da6 100644 --- a/pyo3_bindgen_engine/src/config.rs +++ b/pyo3_bindgen_engine/src/config.rs @@ -1,16 +1,55 @@ use crate::syntax::{Ident, Path}; +/// Array of forbidden attribute names that are reserved for internal use by derived traits +pub const FORBIDDEN_FUNCTION_NAMES: [&str; 4] = ["get_type", "obj", "repr", "str"]; +/// Default array of blocklisted attribute names +const DEFAULT_BLOCKLIST_ATTRIBUTE_NAMES: [&str; 4] = ["builtins", "testing", "tests", "test"]; + +/// Configuration for `Codegen` engine. #[derive(Debug, Clone, PartialEq, Eq, Hash, typed_builder::TypedBuilder)] pub struct Config { + /// Flag that determines whether to recursively generate code for all submodules of the target modules. + #[builder(default = true)] + pub(crate) traverse_submodules: bool, + + /// Flag that determines whether to generate code for prelude modules (Python `__all__` attribute). + #[builder(default = true)] + pub(crate) generate_preludes: bool, + /// Flag that determines whether to generate code for imports. + #[builder(default = true)] + pub(crate) generate_imports: bool, + /// Flag that determines whether to generate code for classes. + #[builder(default = true)] + pub(crate) generate_classes: bool, + /// Flag that determines whether to generate code for type variables. #[builder(default = true)] - pub generate_dependencies: bool, + pub(crate) generate_type_vars: bool, + /// Flag that determines whether to generate code for functions. #[builder(default = true)] - pub generate_preludes: bool, + pub(crate) generate_functions: bool, + /// Flag that determines whether to generate code for properties. + #[builder(default = true)] + pub(crate) generate_properties: bool, + /// Flag that determines whether to documentation for the generate code. + /// The documentation is based on Python docstrings. + #[builder(default = true)] + pub(crate) generate_docs: bool, + + /// List of blocklisted attribute names that are skipped during the code generation. + #[builder(default = DEFAULT_BLOCKLIST_ATTRIBUTE_NAMES.iter().map(|&s| s.to_string()).collect())] + pub(crate) blocklist_names: Vec, + + /// Flag that determines whether to generate code for all dependencies of the target modules. + /// The list of dependent modules is derived from the imports of the target modules. + #[builder(default = false)] + pub(crate) generate_dependencies: bool, + + /// Flag that suppresses the generation of Python STDOUT while parsing the Python code. #[builder(default = true)] - pub suppress_python_stdout: bool, - // TODO: Default to false + pub(crate) suppress_python_stdout: bool, + /// Flag that suppresses the generation of Python STDERR while parsing the Python code. #[builder(default = true)] - pub suppress_python_stderr: bool, + pub(crate) suppress_python_stderr: bool, } impl Default for Config { @@ -20,15 +59,22 @@ impl Default for Config { } impl Config { - pub fn is_attr_allowed( + pub(crate) fn is_attr_allowed( &self, attr_name: &Ident, attr_module: &Path, attr_type: &pyo3::types::PyType, ) -> bool { if + // Skip always forbidden attribute names + FORBIDDEN_FUNCTION_NAMES.contains(&attr_name.as_py()) || // Skip private attributes attr_name.as_py().starts_with('_') || + attr_module.iter().any(|segment| segment.as_py().starts_with('_')) || + // Skip blocklisted attributes + self.blocklist_names.iter().any(|blocklist_match| { + attr_name.as_py() == blocklist_match + }) || // Skip builtin functions attr_type.is_subclass_of::().unwrap_or(false) || // Skip `__future__` attributes diff --git a/pyo3_bindgen_engine/src/lib.rs b/pyo3_bindgen_engine/src/lib.rs index 823619e..4ccbcb4 100644 --- a/pyo3_bindgen_engine/src/lib.rs +++ b/pyo3_bindgen_engine/src/lib.rs @@ -3,17 +3,17 @@ mod codegen; mod config; mod syntax; -mod types; +mod typing; mod utils; -// Re-export the public API +// Internal re-exports for convenience +use utils::io as io_utils; +use utils::result::Result; + +// Public API re-exports pub use codegen::Codegen; pub use config::Config; pub use utils::{error::PyBindgenError, result::PyBindgenResult}; -// Re-export pyo3 for convenience +// Public re-export of PyO3 for convenience pub use pyo3; - -// Internal re-exports for convenience -use utils::io as io_utils; -use utils::result::Result; diff --git a/pyo3_bindgen_engine/src/syntax/class.rs b/pyo3_bindgen_engine/src/syntax/class.rs index 551140a..7844f8c 100644 --- a/pyo3_bindgen_engine/src/syntax/class.rs +++ b/pyo3_bindgen_engine/src/syntax/class.rs @@ -1,12 +1,14 @@ +use itertools::Itertools; + use super::{ AttributeVariant, Function, FunctionType, Ident, MethodType, Path, Property, PropertyOwner, }; use crate::{Config, Result}; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Class { pub name: Path, - pub subclasses: Vec, + // pub subclasses: Vec, pub methods: Vec, pub properties: Vec, pub docstring: Option, @@ -17,7 +19,7 @@ impl Class { let py = class.py(); // Initialize lists for all members of the class - let mut subclasses = Vec::new(); + // let mut subclasses = Vec::new(); let mut methods = Vec::new(); let mut properties = Vec::new(); @@ -27,23 +29,29 @@ impl Class { .iter() // Convert each attribute name to an identifier .map(|attr_name| Ident::from_py(&attr_name.to_string())) + .unique() + // TODO: Try to first access the attribute via __dict__ because Python's descriptor protocol might change the attributes obtained via getattr() + // - For example, classmethod and staticmethod are converted to method/function + // - However, this might also change some of the parsing and it would need to be fixed // Expand each attribute to a tuple of (attr, attr_name, attr_module, attr_type) - .map(|attr_name| { - let attr = class.getattr(attr_name.as_py()).unwrap_or_else(|_| { - unreachable!( - "Python object must always have attributes listed in its `__dir__`: {}", - attr_name - ) - }); - let attr_module = Path::from_py( - &attr + .filter_map(|attr_name| { + if let Ok(attr) = class.getattr(attr_name.as_py()) { + + let attr_module = Path::from_py( + &attr .getattr(pyo3::intern!(py, "__module__")) .map(std::string::ToString::to_string) .unwrap_or_default(), - ); - let attr_type = attr.get_type(); + ); + let attr_type = attr.get_type(); - (attr, attr_name, attr_module, attr_type) + Some((attr, attr_name, attr_module, attr_type)) + } else { + eprintln!( + "WARN: Cannot get attribute '{attr_name}' of '{name}' even though it is listed in its `__dir__`. Bindings will not be generated.", + ); + None + } }) // Filter attributes based on various configurable conditions .filter(|(_attr, attr_name, attr_module, attr_type)| { @@ -54,18 +62,23 @@ impl Class { .try_for_each(|(attr, attr_name, attr_module, attr_type)| { let attr_name_full = name.join(&attr_name.clone().into()); match AttributeVariant::determine(py, attr, attr_type, &attr_module, &name, false) - .unwrap() + ? { AttributeVariant::Import => { - eprintln!("WARN: Imports in classes are not supported: {attr_name}"); + eprintln!("WARN: Imports in classes are not supported: '{name}.{attr_name}'. Bindings will not be generated."); } AttributeVariant::Module => { - eprintln!("WARN: Submodules in classes are not supported: {attr_name}"); + eprintln!( + "WARN: Submodules in classes are not supported: '{name}.{attr_name}'. Bindings will not be generated.", + ); } AttributeVariant::Class => { - let subclass = - Self::parse(cfg, attr.downcast().unwrap(), attr_name_full).unwrap(); - subclasses.push(subclass); + // let subclass = + // Self::parse(cfg, attr.downcast()?, attr_name_full)?; + // subclasses.push(subclass); + eprintln!( + "WARN: Subclasses in classes are not supported: '{name}.{attr_name}'. Bindings will not be generated.", + ); } AttributeVariant::Function | AttributeVariant::Method => { let method = Function::parse( @@ -76,28 +89,28 @@ impl Class { class_path: name.clone(), typ: match attr_name.as_py() { "__init__" => MethodType::Constructor, - "__call__" => MethodType::Call, - _ => MethodType::Regular, + "__call__" => MethodType::Callable, + _ => MethodType::Unknown, }, }, ) - .unwrap(); + ?; methods.push(method); } AttributeVariant::Closure => { - eprintln!("WARN: Closures are not supported in classes: {attr_name}"); + eprintln!("WARN: Closures are not supported in classes: '{attr_name}'. Bindings will not be generated."); } AttributeVariant::TypeVar => { - eprintln!("WARN: TypesVars are not supported in classes: {attr_name}"); + eprintln!("WARN: TypesVars are not supported in classes: '{attr_name}'. Bindings will not be generated."); } AttributeVariant::Property => { let property = Property::parse( cfg, attr, attr_name_full, - PropertyOwner::Class(name.clone()), + PropertyOwner::Class, ) - .unwrap(); + ?; properties.push(property); } } @@ -116,7 +129,7 @@ impl Class { Ok(Self { name, - subclasses, + // subclasses, methods, properties, docstring, @@ -125,13 +138,122 @@ impl Class { } impl Class { - pub fn generate(&self, _cfg: &Config) -> Result { - todo!() - } -} + pub fn generate(&self, cfg: &Config) -> Result { + let mut output = proc_macro2::TokenStream::new(); -impl Class { - pub fn canonicalize(&mut self) { - todo!() + // Documentation + if cfg.generate_docs { + if let Some(docstring) = &self.docstring { + // Trim the docstring and add a leading whitespace (looks better in the generated code) + let mut docstring = docstring.trim().trim_end_matches('/').to_owned(); + docstring.insert(0, ' '); + // Replace all double quotes with single quotes + docstring = docstring.replace('"', "'"); + + output.extend(quote::quote! { + #[doc = #docstring] + }); + } + } + + // Generate the struct + let struct_ident: syn::Ident = { + let name = self.name.name(); + if let Ok(ident) = name.try_into() { + ident + } else { + // Sanitize the struct name + let new_name = Ident::from_py(&format!( + "s_{}", + name.as_py().replace(|c: char| !c.is_alphanumeric(), "_") + )); + if let Ok(sanitized_ident) = new_name.clone().try_into() { + eprintln!( + "WARN: Struct '{}' is an invalid Rust ident for a struct name. Renamed to '{}'.", + self.name, self.name.parent().unwrap_or_default().join(&new_name.into()) + ); + sanitized_ident + } else { + eprintln!( + "WARN: Struct '{}' is an invalid Rust ident for a struct name. Renaming failed. Bindings will not be generated.", + self.name + ); + return Ok(proc_macro2::TokenStream::new()); + } + } + }; + output.extend(quote::quote! { + #[repr(transparent)] + pub struct #struct_ident(::pyo3::PyAny); + }); + + // Employ pyo3 macros for native types + // Note: Using these macros is probably not the best idea, but it makes possible wrapping around ::pyo3::PyAny instead of ::pyo3::PyObject, which improves usability + let object_name = self.name.to_py(); + output.extend(quote::quote! { + ::pyo3::pyobject_native_type_named!(#struct_ident); + ::pyo3::pyobject_native_type_info!(#struct_ident, ::pyo3::pyobject_native_static_type_object!(::pyo3::ffi::PyBaseObject_Type), ::std::option::Option::Some(#object_name)); + ::pyo3::pyobject_native_type_extract!(#struct_ident); + }); + + // Get the names of all methods to avoid name clashes + let mut scoped_function_idents = self + .methods + .iter() + .map(|method| method.name.name()) + .collect::>(); + + // Generate the struct implementation block + let mut struct_impl = proc_macro2::TokenStream::new(); + // Methods + struct_impl.extend( + self.methods + .iter() + .map(|method| method.generate(cfg, &scoped_function_idents)) + .collect::>()?, + ); + // Properties + { + let mut scoped_function_idents_extra = Vec::with_capacity(2); + if self.methods.iter().any(|method| { + matches!( + method.typ, + FunctionType::Method { + typ: MethodType::Constructor, + .. + } + ) + }) { + scoped_function_idents_extra.push(Ident::from_py("new")); + } + if self.methods.iter().any(|method| { + matches!( + method.typ, + FunctionType::Method { + typ: MethodType::Callable, + .. + } + ) + }) { + scoped_function_idents_extra.push(Ident::from_py("call")); + } + scoped_function_idents.extend(scoped_function_idents_extra.iter()); + struct_impl.extend( + self.properties + .iter() + .map(|property| property.generate(cfg, &scoped_function_idents)) + .collect::>()?, + ); + } + + // Finalize the implementation block of the struct + output.extend(quote::quote! { + #[automatically_derived] + impl #struct_ident { + #struct_impl + } + }); + + Ok(output) } } diff --git a/pyo3_bindgen_engine/src/syntax/common/attribute_variant.rs b/pyo3_bindgen_engine/src/syntax/common/attribute_variant.rs index 336abe1..f7f3bdb 100644 --- a/pyo3_bindgen_engine/src/syntax/common/attribute_variant.rs +++ b/pyo3_bindgen_engine/src/syntax/common/attribute_variant.rs @@ -21,7 +21,7 @@ impl AttributeVariant { attr_type: &pyo3::types::PyType, attr_module: &Path, owner_name: &Path, - consider_imported: bool, + consider_import: bool, ) -> Result { let inspect = py.import("inspect")?; @@ -53,7 +53,7 @@ impl AttributeVariant { let is_external = attr_module != owner_name; let is_imported = is_external && (is_submodule || is_class || is_function || is_method); - Ok(if consider_imported && is_imported { + Ok(if consider_import && is_imported { AttributeVariant::Import } else if is_submodule { AttributeVariant::Module diff --git a/pyo3_bindgen_engine/src/syntax/common/ident.rs b/pyo3_bindgen_engine/src/syntax/common/ident.rs index d977f3a..b6fda6a 100644 --- a/pyo3_bindgen_engine/src/syntax/common/ident.rs +++ b/pyo3_bindgen_engine/src/syntax/common/ident.rs @@ -52,6 +52,18 @@ impl TryFrom<&Ident> for syn::Ident { } } +impl std::cmp::PartialOrd for Ident { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl std::cmp::Ord for Ident { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.as_py().cmp(other.as_py()) + } +} + impl std::ops::Deref for Ident { type Target = str; fn deref(&self) -> &Self::Target { diff --git a/pyo3_bindgen_engine/src/syntax/common/path.rs b/pyo3_bindgen_engine/src/syntax/common/path.rs index 126acbf..e8b7417 100644 --- a/pyo3_bindgen_engine/src/syntax/common/path.rs +++ b/pyo3_bindgen_engine/src/syntax/common/path.rs @@ -133,7 +133,7 @@ impl Path { if self == target { return Path { leading_colon: false, - segments: vec![Ident::from_rs("self")], + segments: vec![Ident::from_rs("super"), target.name().clone()], }; } @@ -146,7 +146,7 @@ impl Path { .count(); // Determine the relative path - let relative_segments = match common_prefix_length { + let mut relative_segments = match common_prefix_length { n if n < self.segments.len() => std::iter::repeat(Ident::from_rs("super")) .take(self.segments.len() - n) .chain(target.segments.iter().skip(n).cloned()) @@ -159,6 +159,11 @@ impl Path { } }; + // If the relative segment ends with "super", fully specify the path by adding another "super" and the name of the target + if relative_segments.last().map(Ident::as_rs) == Some("super") { + relative_segments.extend([Ident::from_rs("super"), target.name().clone()]); + } + Path { leading_colon: false, segments: relative_segments, @@ -184,6 +189,18 @@ impl From<&[Ident]> for Path { } } +impl std::cmp::PartialOrd for Path { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl std::cmp::Ord for Path { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.to_py().cmp(&other.to_py()) + } +} + impl TryFrom for syn::Path { type Error = syn::Error; fn try_from(value: Path) -> Result { diff --git a/pyo3_bindgen_engine/src/syntax/function.rs b/pyo3_bindgen_engine/src/syntax/function.rs index 83f2ea5..92f9e41 100644 --- a/pyo3_bindgen_engine/src/syntax/function.rs +++ b/pyo3_bindgen_engine/src/syntax/function.rs @@ -1,8 +1,10 @@ use super::{Ident, Path}; -use crate::{types::Type, Config, Result}; -use pyo3::ToPyObject; +use crate::{typing::Type, Config, Result}; +use itertools::Itertools; +use pyo3::{types::IntoPyDict, ToPyObject}; +use rustc_hash::FxHashSet as HashSet; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Function { pub name: Path, pub typ: FunctionType, @@ -16,7 +18,7 @@ impl Function { _cfg: &Config, function: &pyo3::types::PyAny, name: Path, - typ: FunctionType, + mut typ: FunctionType, ) -> Result { let py = function.py(); @@ -32,18 +34,14 @@ impl Function { // Extract the signature of the function if let Ok(function_signature) = py - .import(pyo3::intern!(py, "inspect")) - .unwrap() + .import(pyo3::intern!(py, "inspect"))? .call_method1(pyo3::intern!(py, "signature"), (function,)) { // Extract the parameters of the function - let parameters = function_signature - .getattr(pyo3::intern!(py, "parameters")) - .unwrap() - .call_method0(pyo3::intern!(py, "values")) - .unwrap() - .iter() - .unwrap() + let mut parameters = function_signature + .getattr(pyo3::intern!(py, "parameters"))? + .call_method0(pyo3::intern!(py, "values"))? + .iter()? .map(|param| { let param = param?; @@ -52,18 +50,25 @@ impl Function { let kind = ParameterKind::from( param.getattr(pyo3::intern!(py, "kind"))?.extract::()?, ); - let annotation = { - let annotation = param.getattr(pyo3::intern!(py, "annotation"))?; - if annotation.is(param.getattr(pyo3::intern!(py, "empty")).unwrap()) { - None - } else { - Some(annotation) + let annotation = match kind { + ParameterKind::VarPositional => Type::PyTuple(vec![Type::Unknown]), + ParameterKind::VarKeyword => Type::PyDict { + t_key: Box::new(Type::Unknown), + t_value: Box::new(Type::Unknown), + }, + _ => { + let annotation = param.getattr(pyo3::intern!(py, "annotation"))?; + if annotation.is(param.getattr(pyo3::intern!(py, "empty"))?) { + Type::Unknown + } else { + annotation.try_into()? + } } - } - .try_into()?; + }; + let default = { let default = param.getattr(pyo3::intern!(py, "default"))?; - if default.is(param.getattr(pyo3::intern!(py, "empty")).unwrap()) { + if default.is(param.getattr(pyo3::intern!(py, "empty"))?) { None } else { Some(default.to_object(py)) @@ -79,20 +84,186 @@ impl Function { }) .collect::>>()?; + // Retain only used parameters (discard unused `_` parameters) + parameters.retain(|param| param.name.as_rs() != "r#_"); + // Extract the return annotation of the function let return_annotation = { let return_annotation = function_signature.getattr(pyo3::intern!(py, "return_annotation"))?; - if return_annotation.is(function_signature - .getattr(pyo3::intern!(py, "empty")) - .unwrap()) - { - None + if return_annotation.is(function_signature.getattr(pyo3::intern!(py, "empty"))?) { + Type::Unknown } else { - Some(return_annotation) + return_annotation.try_into()? + } + }; + + // If marked as an unknown method, try to infer the method type + match &typ { + FunctionType::Method { + class_path, + typ: method_typ, + } if *method_typ == MethodType::Unknown => { + // Get the class object from its class path + let class = py + .import( + class_path + .root() + .unwrap_or_else(|| unreachable!()) + .to_py() + .as_str(), + ) + .and_then(|root_module| { + class_path.iter().skip(1).try_fold( + root_module.extract::<&pyo3::types::PyAny>()?, + |module, name| module.getattr(name.as_py()), + ) + }); + + // Try to get the static object of the method (from __dict__), which still contains information about what kind of method it is + if let Ok(static_fn_obj) = class.and_then(|class| { + class + .getattr(pyo3::intern!(py, "__dict__"))? + .get_item(name.name().as_py()) + }) { + let locals = [("obj", static_fn_obj)].into_py_dict(py); + let method_type = if py + .eval("isinstance(obj, classmethod)", None, Some(locals))? + .is_true()? + { + MethodType::ClassMethod + } else if py + .eval("isinstance(obj, staticmethod)", None, Some(locals))? + .is_true()? + { + MethodType::StaticMethod + } else { + MethodType::InstanceMethod + }; + typ = FunctionType::Method { + class_path: class_path.clone(), + typ: method_type, + }; + } else { + // Cannot determine the method type, default to static method (will be changed to instance method if the first parameter is named 'self') + typ = FunctionType::Method { + class_path: class_path.clone(), + typ: MethodType::StaticMethod, + }; + } } + _ => {} + }; + + // As a final step in determining the method type, check parameters for all non-instance/callable methods + // Note: This is not 100% reliable, because Python does not enforce the first parameter to be named "self" + // TODO: See if there is a better way to infer the method type from parameters alone + match &typ { + FunctionType::Method { + typ: MethodType::InstanceMethod | MethodType::Constructor | MethodType::Callable, + .. + } => {} + FunctionType::Method { class_path, typ: _ } => { + if parameters.first().map(|p| p.name.as_rs()) == Some("r#self") { + typ = FunctionType::Method { + class_path: class_path.clone(), + typ: MethodType::InstanceMethod, + }; + } + } + FunctionType::Function | FunctionType::Closure => { + if parameters.first().map(|p| p.name.as_rs()) == Some("r#self") { + if [ + ParameterKind::PositionalOnly, + ParameterKind::PositionalOrKeyword, + ] + .contains(¶meters[0].kind) + { + eprintln!( + "WARN: Function '{name}' has the first parameter named 'self', but is not marked as a method. The parameter is renamed to '__unknown_self__'." + ); + parameters[0].name = Ident::from_rs("__unknown_self__"); + parameters[0].annotation = Type::Unknown; + } else { + eprintln!( + "WARN: Function '{name}' has the first parameter named 'self', but is not marked as a method. All parameters are replaced with '*args' and '**kwargs'." + ); + parameters = vec![ + Parameter { + name: Ident::from_rs("args"), + kind: ParameterKind::VarPositional, + annotation: Type::PyTuple(vec![Type::Unknown]), + default: None, + }, + Parameter { + name: Ident::from_rs("kwargs"), + kind: ParameterKind::VarKeyword, + annotation: Type::PyDict { + t_key: Box::new(Type::Unknown), + t_value: Box::new(Type::Unknown), + }, + default: None, + }, + ]; + } + } + } + }; + + // Hack: Reassign InstanceMethod with no parameter to StaticMethod + // This should not be necessary as every InstanceMethod should have at least one parameter (self), but it does for certain complex Python modules + if let FunctionType::Method { + typ: MethodType::InstanceMethod, + .. + } = &typ + { + if parameters.is_empty() { + eprintln!( + "WARN: Method '{name}' is marked as an instance method, but has no parameters. Changed to static method.", + ); + typ = FunctionType::Method { + class_path: name.clone(), + typ: MethodType::StaticMethod, + }; + } + }; + + // Skip the first parameter if it's an instance method (or `__init__`/`__call__`) + if let FunctionType::Method { + typ: MethodType::InstanceMethod | MethodType::Constructor | MethodType::Callable, + .. + } = typ + { + parameters.remove(0); + }; + + // If any of the parameters is still called 'self', do not handle the parameters + if parameters + .iter() + .any(|param| param.name.as_rs() == "r#self") + { + eprintln!( + "WARN: Method '{name}' has a non-first parameter named 'self'. All parameters are replaced with '*args' and '**kwargs'.", + + ); + parameters = vec![ + Parameter { + name: Ident::from_rs("args"), + kind: ParameterKind::VarPositional, + annotation: Type::PyTuple(vec![Type::Unknown]), + default: None, + }, + Parameter { + name: Ident::from_rs("kwargs"), + kind: ParameterKind::VarKeyword, + annotation: Type::PyDict { + t_key: Box::new(Type::Unknown), + t_value: Box::new(Type::Unknown), + }, + default: None, + }, + ]; } - .try_into()?; Ok(Self { name, @@ -116,7 +287,7 @@ impl Function { name: Ident::from_rs("kwargs"), kind: ParameterKind::VarKeyword, annotation: Type::PyDict { - t_key: Box::new(Type::PyString), + t_key: Box::new(Type::Unknown), t_value: Box::new(Type::Unknown), }, default: None, @@ -130,14 +301,355 @@ impl Function { } impl Function { - pub fn generate(&self, _cfg: &Config) -> Result { - todo!() - } -} + pub fn generate( + &self, + cfg: &Config, + scoped_function_idents: &[&Ident], + ) -> Result { + let mut output = proc_macro2::TokenStream::new(); -impl Function { - pub fn canonicalize(&mut self) { - todo!() + // Documentation + if cfg.generate_docs { + if let Some(docstring) = &self.docstring { + // Trim the docstring and add a leading whitespace (looks better in the generated code) + let mut docstring = docstring.trim().trim_end_matches('/').to_owned(); + docstring.insert(0, ' '); + // Replace all double quotes with single quotes + docstring = docstring.replace('"', "'"); + + output.extend(quote::quote! { + #[doc = #docstring] + }); + } + } + + // Function signature + let function_ident: syn::Ident = { + let name = self.name.name(); + if let Ok(ident) = name.try_into() { + if crate::config::FORBIDDEN_FUNCTION_NAMES.contains(&name.as_py()) { + return Ok(proc_macro2::TokenStream::new()); + } else { + ident + } + } else { + // Sanitize the function name + let new_name = Ident::from_py(&format!( + "f_{}", + name.as_py().replace(|c: char| !c.is_alphanumeric(), "_") + )); + if let Ok(sanitized_ident) = new_name.clone().try_into() { + eprintln!( + "WARN: Function '{}' is an invalid Rust ident for a function name. Renamed to '{}'.", + self.name, self.name.parent().unwrap_or_default().join(&new_name.into()) + ); + sanitized_ident + } else { + eprintln!( + "WARN: Function '{}' is an invalid Rust ident for a function name. Renaming failed. Bindings will not be generated.", + self.name + ); + return Ok(proc_macro2::TokenStream::new()); + } + } + }; + let param_idents: Vec = self + .parameters + .iter() + .map(|param| Ok(Ident::from_py(&format!("p_{}", param.name)).try_into()?)) + .collect::>>()?; + let param_types: Vec = self + .parameters + .iter() + .map(|param| { + Result::Ok( + param + .annotation + .clone() + .into_rs_borrowed("", &HashSet::default()), + ) + }) + .collect::>>()?; + let return_type = self + .return_annotation + .clone() + .into_rs_owned("", &HashSet::default()); + output.extend(match &self.typ { + FunctionType::Method { + typ: MethodType::InstanceMethod, + .. + } => { + quote::quote! { + pub fn #function_ident<'py>( + &'py self, + py: ::pyo3::marker::Python<'py>, + #(#param_idents: #param_types),* + ) -> ::pyo3::PyResult<#return_type> + } + } + FunctionType::Method { + typ: MethodType::Callable, + .. + } => { + let call_fn_ident: syn::Ident = { + let mut i = 0; + loop { + let ident = Ident::from_py(&format!( + "call{}", + (i > 0).then(|| i.to_string()).unwrap_or_default() + )); + if !scoped_function_idents.contains(&&ident) { + break ident; + } + i += 1; + } + } + .try_into()?; + quote::quote! { + pub fn #call_fn_ident<'py>( + &'py self, + py: ::pyo3::marker::Python<'py>, + #(#param_idents: #param_types),* + ) -> ::pyo3::PyResult<#return_type> + } + } + FunctionType::Method { + typ: MethodType::Constructor, + .. + } => { + let new_fn_ident: syn::Ident = { + let mut i = 0; + loop { + let ident = Ident::from_py(&format!( + "new{}", + (i > 0).then(|| i.to_string()).unwrap_or_default() + )); + if !scoped_function_idents.contains(&&ident) { + break ident; + } + i += 1; + } + } + .try_into()?; + quote::quote! { + pub fn #new_fn_ident<'py>( + py: ::pyo3::marker::Python<'py>, + #(#param_idents: #param_types),* + ) -> ::pyo3::PyResult<&'py Self> + } + } + _ => { + quote::quote! { + pub fn #function_ident<'py>( + py: ::pyo3::marker::Python<'py>, + #(#param_idents: #param_types),* + ) -> ::pyo3::PyResult<#return_type> + } + } + }); + + // Function body (function dispatcher) + let function_dispatcher = match &self.typ { + FunctionType::Function | FunctionType::Closure => { + let package = self.name.root().unwrap_or_else(|| unreachable!()).to_py(); + let module_path = if self.name.len() > 1 { + &self.name[1..] + } else { + &[] + } + .iter() + .map(|ident| ident.as_py().to_owned()) + .collect_vec(); + quote::quote! { + py.import(::pyo3::intern!(py, #package))?#(.getattr(::pyo3::intern!(py, #module_path))?)* + } + } + FunctionType::Method { + class_path, + typ: MethodType::ClassMethod | MethodType::StaticMethod | MethodType::Constructor, + } => { + let package = class_path.root().unwrap_or_else(|| unreachable!()).to_py(); + let class_path = if class_path.len() > 1 { + &class_path[1..] + } else { + &[] + } + .iter() + .map(|ident| ident.as_py().to_owned()) + .collect_vec(); + quote::quote! { + py.import(::pyo3::intern!(py, #package))?#(.getattr(::pyo3::intern!(py, #class_path))?)* + } + } + FunctionType::Method { + typ: MethodType::InstanceMethod | MethodType::Callable, + .. + } => { + quote::quote! { + self.0 + } + } + _ => { + eprintln!( + "WARN: Method '{}' has an unknown type. Bindings will not be generated.", + self.name + ); + return Ok(proc_macro2::TokenStream::new()); + } + }; + + // Function body: positional args + let positional_args_idents: Vec = self + .parameters + .iter() + .filter(|param| { + [ + ParameterKind::PositionalOnly, + ParameterKind::PositionalOrKeyword, + ] + .contains(¶m.kind) + }) + .map(|param| Ok(Ident::from_py(&format!("p_{}", param.name)).try_into()?)) + .collect::>()?; + let var_positional_args_ident: Option = self + .parameters + .iter() + .find(|param| param.kind == ParameterKind::VarPositional) + .and_then(|param| Ident::from_py(&format!("p_{}", param.name)).try_into().ok()); + let has_positional_args = + !positional_args_idents.is_empty() || var_positional_args_ident.is_some(); + let positional_args = if let Some(var_positional_args_ident) = var_positional_args_ident { + if positional_args_idents.is_empty() { + quote::quote! { + #var_positional_args_ident + } + } else { + let n_args_fixed = positional_args_idents.len(); + // TODO: The reference here might be incorrect (&#positional_args_idents could cause double reference) - check + quote::quote! { + { + let mut __internal__args = Vec::with_capacity(#n_args_fixed + #var_positional_args_ident.len()); + __internal__args.extend([#(::pyo3::ToPyObject::to_object(&#positional_args_idents, py),)*]); + __internal__args.extend(#var_positional_args_ident.iter().map(|__internal__arg| ::pyo3::ToPyObject::to_object(__internal__arg, py))); + ::pyo3::types::PyTuple::new( + py, + __internal__args, + ) + } + } + } + } else if positional_args_idents.is_empty() { + quote::quote! { + () + } + } else { + // TODO: The reference here might be incorrect (&#positional_args_idents could cause double reference) - check + quote::quote! { + ::pyo3::types::PyTuple::new( + py, + [#(::pyo3::ToPyObject::to_object(&#positional_args_idents, py),)*], + ) + } + }; + // Function body: keyword args + let keyword_args: Vec<&Parameter> = self + .parameters + .iter() + .filter(|param| [ParameterKind::KeywordOnly].contains(¶m.kind)) + .collect_vec(); + let keyword_args_names: Vec<&str> = keyword_args + .iter() + .map(|param| param.name.as_py()) + .collect(); + let keyword_args_idents: Vec = keyword_args + .iter() + .map(|param| Ok(Ident::from_py(&format!("p_{}", param.name)).try_into()?)) + .collect::>()?; + let var_keyword_args_ident: Option = self + .parameters + .iter() + .find(|param| param.kind == ParameterKind::VarKeyword) + .and_then(|param| Ident::from_py(&format!("p_{}", param.name)).try_into().ok()); + let has_keyword_args = !keyword_args_idents.is_empty() || var_keyword_args_ident.is_some(); + let keyword_args = if let Some(var_keyword_args_ident) = var_keyword_args_ident { + if keyword_args_idents.is_empty() { + quote::quote! { + #var_keyword_args_ident + } + } else { + quote::quote! { + { + let __internal__kwargs = #var_keyword_args_ident; + #( + __internal__kwargs.set_item(::pyo3::intern!(py, #keyword_args_names), #keyword_args_idents); + )* + __internal__kwargs + } + } + } + } else if keyword_args_idents.is_empty() { + quote::quote! { + ::pyo3::types::PyDict::new(py) + } + } else { + quote::quote! { + { + let __internal__kwargs = ::pyo3::types::PyDict::new(py); + #( + __internal__kwargs.set_item(::pyo3::intern!(py, #keyword_args_names), #keyword_args_idents); + )* + __internal__kwargs + } + } + }; + // Function body: call + let call = match &self.typ { + FunctionType::Method { + typ: MethodType::InstanceMethod | MethodType::Constructor | MethodType::Callable, + .. + } => { + if has_keyword_args { + quote::quote! { + call(#positional_args, Some(#keyword_args)) + } + } else if has_positional_args { + quote::quote! { + call1(#positional_args) + } + } else { + quote::quote! { + call0() + } + } + } + _ => { + let method_name = self.name.name().as_py(); + if has_keyword_args { + quote::quote! { + call_method(::pyo3::intern!(py, #method_name), #positional_args, Some(#keyword_args)) + } + } else if has_positional_args { + quote::quote! { + call_method1(::pyo3::intern!(py, #method_name), #positional_args) + } + } else { + quote::quote! { + call_method0(::pyo3::intern!(py, #method_name)) + } + } + } + }; + + // Function body + output.extend(quote::quote! { + { + ::pyo3::FromPyObject::extract( + #function_dispatcher.#call? + ) + } + }); + + Ok(output) } } @@ -150,9 +662,12 @@ pub enum FunctionType { #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum MethodType { + InstanceMethod, + ClassMethod, + StaticMethod, Constructor, - Call, - Regular, + Callable, + Unknown, } #[derive(Debug, Clone)] @@ -163,6 +678,26 @@ pub struct Parameter { pub default: Option>, } +impl PartialEq for Parameter { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + && self.kind == other.kind + && self.annotation == other.annotation + && self.default.is_some() == other.default.is_some() + } +} + +impl Eq for Parameter {} + +impl std::hash::Hash for Parameter { + fn hash(&self, state: &mut H) { + self.name.hash(state); + self.kind.hash(state); + self.annotation.hash(state); + self.default.is_some().hash(state); + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum ParameterKind { PositionalOnly, diff --git a/pyo3_bindgen_engine/src/syntax/import.rs b/pyo3_bindgen_engine/src/syntax/import.rs index c8cb270..5376c06 100644 --- a/pyo3_bindgen_engine/src/syntax/import.rs +++ b/pyo3_bindgen_engine/src/syntax/import.rs @@ -17,9 +17,11 @@ impl Import { import_type, }) } -} -impl Import { + pub fn is_external(&self) -> bool { + self.import_type == ImportType::ExternalImport + } + pub fn generate(&self, cfg: &Config) -> Result { // Skip external imports if their generation is disabled if !cfg.generate_dependencies && self.import_type == ImportType::ExternalImport { @@ -34,29 +36,31 @@ impl Import { // Determine the visibility of the import based on its type let visibility = match self.import_type { ImportType::ExternalImport => proc_macro2::TokenStream::new(), - ImportType::Reexport => quote::quote! { pub(crate) }, - ImportType::ScopedReexport => quote::quote! { pub }, + ImportType::Reexport | ImportType::ScopedReexport => quote::quote! { pub }, }; // Generate the path to the target module - let relative_path: syn::Path = self + let relative_path: std::result::Result = self .target .parent() .unwrap_or_default() .relative_to(&self.origin) - .try_into()?; + .try_into(); + if let Ok(relative_path) = relative_path { + // Use alias for the target module if it has a different name than the last segment of its path + let maybe_alias = if self.origin.name() != self.target.name() { + let alias: syn::Ident = self.target.name().try_into()?; + quote::quote! { as #alias } + } else { + proc_macro2::TokenStream::new() + }; - // Use alias for the target module if it has a different name than the last segment of its path - let maybe_alias = if self.origin.name() != self.target.name() { - let alias: syn::Ident = self.target.name().try_into()?; - quote::quote! { as #alias } + Ok(quote::quote! { + #visibility use #relative_path #maybe_alias; + }) } else { - proc_macro2::TokenStream::new() - }; - - Ok(quote::quote! { - #visibility use #relative_path #maybe_alias; - }) + Ok(proc_macro2::TokenStream::new()) + } } } diff --git a/pyo3_bindgen_engine/src/syntax/module.rs b/pyo3_bindgen_engine/src/syntax/module.rs index 458f77e..ac06f8b 100644 --- a/pyo3_bindgen_engine/src/syntax/module.rs +++ b/pyo3_bindgen_engine/src/syntax/module.rs @@ -6,41 +6,67 @@ use crate::{Config, Result}; use itertools::Itertools; use rustc_hash::FxHashSet as HashSet; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Module { pub name: Path, pub prelude: Vec, pub imports: Vec, pub submodules: Vec, pub classes: Vec, - pub functions: Vec, pub type_vars: Vec, + pub functions: Vec, pub properties: Vec, pub docstring: Option, } impl Module { + pub fn empty(py: pyo3::Python, name: Path) -> Result { + let module = py.import(name.to_py().as_str())?; + + // Extract the docstring of the module + let docstring = { + let docstring = module.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); + if docstring.is_empty() || docstring == "None" { + None + } else { + Some(docstring) + } + }; + + Ok(Self { + name, + prelude: Vec::default(), + imports: Vec::default(), + submodules: Vec::default(), + classes: Vec::default(), + type_vars: Vec::default(), + functions: Vec::default(), + properties: Vec::default(), + docstring, + }) + } + pub fn parse(cfg: &Config, module: &pyo3::types::PyModule) -> Result { let py = module.py(); // Extract the name of the module - let name = Path::from_py(module.name().unwrap()); + let name = Path::from_py(module.name()?); // Extract the index of the module as prelude (if enabled) let prelude = if cfg.generate_preludes { - Self::extract_prelude(cfg, module) + Self::extract_prelude(cfg, module, &name) } else { Vec::new() }; // Extract the list of all submodules in the module - let mut submodules_to_process = Self::extract_submodules(module).unwrap(); + let mut submodules_to_process = Self::extract_submodules(cfg, module)?; // Initialize lists for all other members of the module let mut imports = Vec::new(); let mut classes = Vec::new(); - let mut functions = Vec::new(); let mut type_vars = Vec::new(); + let mut functions = Vec::new(); let mut properties = Vec::new(); // Extract the list of all attribute names in the module @@ -49,23 +75,30 @@ impl Module { .iter() // Convert each attribute name to an identifier .map(|attr_name| Ident::from_py(&attr_name.to_string())) + // Remove duplicates + .unique() + // TODO: Try to first access the attribute via __dict__ because Python's descriptor protocol might change the attributes obtained via getattr() + // - For example, classmethod and staticmethod are converted to method/function + // - However, this might also change some of the parsing and it would need to be fixed // Expand each attribute to a tuple of (attr, attr_name, attr_module, attr_type) - .map(|attr_name| { - let attr = module.getattr(attr_name.as_py()).unwrap_or_else(|_| { - unreachable!( - "Python object must always have attributes listed in its `__dir__`: {}", - attr_name - ) - }); - let attr_module = Path::from_py( - &attr + .filter_map(|attr_name| { + if let Ok(attr) = module.getattr(attr_name.as_py()) { + + let attr_module = Path::from_py( + &attr .getattr(pyo3::intern!(py, "__module__")) .map(std::string::ToString::to_string) .unwrap_or_default(), - ); - let attr_type = attr.get_type(); - - (attr, attr_name, attr_module, attr_type) + ); + let attr_type = attr.get_type(); + + Some((attr, attr_name, attr_module, attr_type)) + } else { + eprintln!( + "WARN: Cannot get attribute '{attr_name}' of '{name}' even though it is listed in its `__dir__`. Bindings will not be generated.", + ); + None + } }) // Filter attributes based on various configurable conditions .filter(|(_attr, attr_name, attr_module, attr_type)| { @@ -75,7 +108,7 @@ impl Module { .try_for_each(|(attr, attr_name, attr_module, attr_type)| { let attr_name_full = name.join(&attr_name.clone().into()); match AttributeVariant::determine(py, attr, attr_type, &attr_module, &name, true) - .unwrap() + ? { AttributeVariant::Import => { let origin = attr_module.join(&Path::from_py( @@ -84,73 +117,69 @@ impl Module { .map(std::string::ToString::to_string) .unwrap_or(attr_name.as_py().to_owned()), )); + + // Make sure the import does not create a conflict with a submodule + let does_import_conflict_with_submodule = submodules_to_process.contains(&attr_name); + if does_import_conflict_with_submodule { + eprintln!( + "WARN: Import `{origin}` -> '{attr_name_full}' would conflict with a submodule of equal name. Bindings will not be generated.", + ); + } // Make sure the origin attribute is allowed (each segment of the path) - if (0..origin.len()).all(|i| { - let _attr_name; - let _attr_module; - let _attr_type; - if i < origin.len() - 1 { - _attr_name = &origin[i]; - _attr_module = origin[..i].into(); - _attr_type = py.get_type::(); + let is_origin_attr_allowed = !does_import_conflict_with_submodule && (0..origin.len()).all(|i| { + let attr_name = &origin[i]; + let attr_module = origin[..i].into(); + let attr_type = if i == origin.len() - 1 { + attr_type } else { - _attr_name = &attr_name; - _attr_module = attr_module.clone(); - _attr_type = attr_type; + py.get_type::() }; - cfg.is_attr_allowed(_attr_name, &_attr_module, _attr_type) - }) { - let import = Import::new(origin, attr_name_full).unwrap(); + cfg.is_attr_allowed(attr_name, &attr_module, attr_type) + }); + + if is_origin_attr_allowed { + let import = Import::new(origin, attr_name_full)?; imports.push(import); } } AttributeVariant::Module => { // Note: This should technically not be necessary as `Self::extract_submodules` is supposed to extract all submodules - submodules_to_process.insert(attr_name); + submodules_to_process.insert(attr_name.clone()); } AttributeVariant::Class => { let class = - Class::parse(cfg, attr.downcast().unwrap(), attr_name_full).unwrap(); + Class::parse(cfg, attr.downcast().unwrap_or_else(|_| unreachable!( + "The attribute is known to be a class at this point" + )), attr_name_full)?; classes.push(class); } + AttributeVariant::TypeVar => { + let type_var = TypeVar::new(attr_name_full); + type_vars.push(type_var); + } AttributeVariant::Function => { let function = Function::parse(cfg, attr, attr_name_full, FunctionType::Function) - .unwrap(); + ?; functions.push(function); } AttributeVariant::Method => { - eprintln!("WARN: Methods in modules are not supported: {attr_name}"); - // let function = Function::parse( - // cfg, - // attr, - // attr_name_full, - // FunctionType::Method { - // class_path: todo!(), - // typ: super::MethodType::Regular, - // }, - // ) - // .unwrap(); - // functions.push(function); + eprintln!("WARN: Methods in modules are not supported: '{name}.{attr_name}'. Bindings will not be generated."); } AttributeVariant::Closure => { let function = Function::parse(cfg, attr, attr_name_full, FunctionType::Closure) - .unwrap(); + ?; functions.push(function); } - AttributeVariant::TypeVar => { - let type_var = TypeVar::new(attr_name_full).unwrap(); - type_vars.push(type_var); - } AttributeVariant::Property => { let property = Property::parse( cfg, attr, attr_name_full, - PropertyOwner::Module(name.clone()), + PropertyOwner::Module, ) - .unwrap(); + ?; properties.push(property); } } @@ -158,15 +187,18 @@ impl Module { })?; // Process submodules - let submodules = submodules_to_process - .into_iter() - .filter_map(|submodule_name| { - py.import(name.join(&submodule_name.into()).to_py().as_str()) - .ok() - }) - .map(|submodule| Self::parse(cfg, submodule)) - .collect::>() - .unwrap(); + let submodules = if cfg.traverse_submodules { + submodules_to_process + .into_iter() + .filter_map(|submodule_name| { + py.import(name.join(&submodule_name.into()).to_py().as_str()) + .ok() + }) + .map(|submodule| Self::parse(cfg, submodule)) + .collect::>()? + } else { + Vec::default() + }; // Extract the docstring of the module let docstring = { @@ -184,60 +216,141 @@ impl Module { imports, submodules, classes, - functions, type_vars, + functions, properties, docstring, }) } - fn extract_prelude(cfg: &Config, module: &pyo3::prelude::PyModule) -> Vec { - // Extract the index (__all__) of the module if it exists - let mut index_attr_names = if let Ok(index) = module.index() { - index - .iter() - .map(|x| Ident::from_py(&x.to_string())) - .collect_vec() - } else { - Vec::new() - }; + pub fn generate( + &self, + cfg: &Config, + is_top_level: bool, + top_level_modules: &[Self], + ) -> Result { + let mut output = proc_macro2::TokenStream::new(); - // Compare the index with public attrs of the module - // Return an empty vector if they are identical (no need to generate a prelude) - { - let public_attr_names_set: HashSet<_> = module - .dir() - .iter() - .map(|attr_name| Ident::from_py(&attr_name.to_string())) - .filter(|attr_name| !attr_name.as_py().starts_with('_')) - .collect(); - let index_attr_names_set = index_attr_names.iter().cloned().collect::>(); + // Extra configuration for top-level modules + if is_top_level { + output.extend(quote::quote! { + #[allow( + clippy::all, + clippy::nursery, + clippy::pedantic, + non_camel_case_types, + non_snake_case, + non_upper_case_globals, + unused + )] + }); + } - if index_attr_names_set == public_attr_names_set { - return Vec::new(); + // Documentation + if cfg.generate_docs { + if let Some(docstring) = &self.docstring { + // Trim the docstring and add a leading whitespace (looks better in the generated code) + let mut docstring = docstring.trim().trim_end_matches('/').to_owned(); + docstring.insert(0, ' '); + // Replace all double quotes with single quotes + docstring = docstring.replace('"', "'"); + + output.extend(quote::quote! { + #[doc = #docstring] + }); } } - // Retain only allowed attributes - index_attr_names.retain(|attr_name| { - let attr_module = Path::from_py(module.name().unwrap()); - if let Ok(attr) = module.getattr(attr_name.as_py()) { - let attr_type = attr.get_type(); - cfg.is_attr_allowed(attr_name, &attr_module, attr_type) - } else { - false + // Get the names of all functions to avoid name clashes + let scoped_function_idents = self + .functions + .iter() + .map(|function| function.name.name()) + .collect::>(); + + // Generate the module content + let mut module_content = proc_macro2::TokenStream::new(); + // Imports + if cfg.generate_imports { + module_content.extend( + self.imports + .iter() + .filter(|import| { + top_level_modules + .iter() + .any(|module| module.check_path_exists_recursive(&import.origin, false)) + }) + .map(|import| import.generate(cfg)) + .collect::>()?, + ); + } + // Prelude + if cfg.generate_preludes { + module_content.extend(self.generate_prelude()); + } + // Type variables + if cfg.generate_type_vars { + module_content.extend( + self.type_vars + .iter() + .map(|type_var| type_var.generate(cfg)) + .collect::>()?, + ); + } + // Classes + if cfg.generate_classes { + module_content.extend( + self.classes + .iter() + .map(|class| class.generate(cfg)) + .collect::>()?, + ); + } + // Functions + if cfg.generate_functions { + module_content.extend( + self.functions + .iter() + .map(|function| function.generate(cfg, &scoped_function_idents)) + .collect::>()?, + ); + } + // Properties + if cfg.generate_properties { + module_content.extend( + self.properties + .iter() + .map(|property| property.generate(cfg, &scoped_function_idents)) + .collect::>()?, + ); + } + // Submodules + if cfg.traverse_submodules { + module_content.extend( + self.submodules + .iter() + .map(|module| module.generate(cfg, false, top_level_modules)) + .collect::>()?, + ); + } + + // Finalize the module with its content + let module_ident: syn::Ident = self.name.name().try_into()?; + output.extend(quote::quote! { + pub mod #module_ident { + #module_content } }); - index_attr_names + Ok(output) } - fn extract_submodules(module: &pyo3::prelude::PyModule) -> Result> { + fn extract_submodules(cfg: &Config, module: &pyo3::types::PyModule) -> Result> { let py = module.py(); - let pkgutil = py.import(pyo3::intern!(py, "pkgutil")).unwrap(); + let pkgutil = py.import(pyo3::intern!(py, "pkgutil"))?; // Determine if the module is a package that contains submodules - let module_name = Path::from_py(module.name().unwrap()); + let module_name = Path::from_py(module.name()?); let is_pkg = module .getattr(pyo3::intern!(py, "__package__")) .map(|package| Path::from_py(&package.to_string())) @@ -250,115 +363,184 @@ impl Module { // Extract the paths of the module let module_paths = module - .getattr(pyo3::intern!(py, "__path__")) - .unwrap() - .extract::<&pyo3::types::PyList>() - .unwrap() + .getattr(pyo3::intern!(py, "__path__"))? + .extract::<&pyo3::types::PyList>()? .iter() .map(|x| std::path::PathBuf::from(x.to_string())) .collect_vec(); // Extract the names of all submodules via `pkgutil.iter_modules` pkgutil - .call_method1(pyo3::intern!(py, "iter_modules"), (module_paths,)) - .unwrap() - .iter() - .unwrap() + .call_method1(pyo3::intern!(py, "iter_modules"), (module_paths,))? + .iter()? .map(|submodule| { Ok(Ident::from_py( - &submodule - .unwrap() - .getattr(pyo3::intern!(py, "name")) - .unwrap() - .to_string(), + &submodule?.getattr(pyo3::intern!(py, "name"))?.to_string(), )) }) + // Filter based on various configurable conditions + .filter(|submodule_name| { + submodule_name.as_ref().is_ok_and(|submodule_name| { + cfg.is_attr_allowed( + submodule_name, + &module_name, + py.get_type::(), + ) + }) + }) .collect() } -} -impl Module { - pub fn generate(&self, cfg: &Config, is_top_level: bool) -> Result { - let mut output = proc_macro2::TokenStream::new(); + fn extract_prelude( + cfg: &Config, + module: &pyo3::types::PyModule, + module_name: &Path, + ) -> Vec { + // Extract the index (__all__) of the module if it exists + let mut index_attr_names = if let Ok(index) = module.index() { + index + .iter() + .map(|x| Ident::from_py(&x.to_string())) + .unique() + .collect() + } else { + Vec::default() + }; - // Extra configuration for top-level modules - if is_top_level { - output.extend(quote::quote! { - #[allow( - clippy::all, - clippy::nursery, - clippy::pedantic, - non_camel_case_types, - non_snake_case, - non_upper_case_globals, - unused - )] - }); - } + // Compare the index with public attrs of the module + // Return an empty vector if they are identical (no need to generate a prelude) + { + let public_attr_names_set: HashSet<_> = module + .dir() + .iter() + .map(|attr_name| Ident::from_py(&attr_name.to_string())) + .filter(|attr_name| !attr_name.as_py().starts_with('_')) + .collect(); + let index_attr_names_set: HashSet<_> = index_attr_names.iter().cloned().collect(); - // Documentation - if let Some(docstring) = &self.docstring { - // Trim the docstring and add a leading whitespace (looks better in the generated code) - let mut docstring = docstring.trim().to_owned(); - docstring.insert(0, ' '); + if index_attr_names_set == public_attr_names_set { + return Vec::new(); + } + } - output.extend(quote::quote! { - #[doc = #docstring] + // If the generation of dependencies is disabled, retain only reexports + if !cfg.generate_dependencies { + index_attr_names.retain(|attr_name| { + if let Ok(attr) = module.getattr(attr_name.as_py()) { + let is_reexport = module_name.root().is_some_and(|root_module| { + let attr_module = Path::from_py( + &attr + .getattr(pyo3::intern!(module.py(), "__module__")) + .map(std::string::ToString::to_string) + .unwrap_or_default(), + ); + attr_module.starts_with(&root_module) + }); + is_reexport + } else { + false + } }); } - // Generate the module content - let mut module_content = proc_macro2::TokenStream::new(); - // Imports - module_content.extend( - self.imports - .iter() - .map(|import| import.generate(cfg)) - .collect::>()?, - ); - // Prelude - module_content.extend(self.generate_prelude()); - - // Finalize the module with its content - let module_ident: syn::Ident = self.name.name().try_into()?; - output.extend(quote::quote! { - pub mod #module_ident { - #module_content + // Retain only allowed attributes + index_attr_names.retain(|attr_name| { + if let Ok(attr) = module.getattr(attr_name.as_py()) { + let attr_type = attr.get_type(); + cfg.is_attr_allowed(attr_name, module_name, attr_type) + } else { + false } }); - Ok(output) + index_attr_names } - fn generate_prelude(&self) -> proc_macro2::TokenStream { + fn generate_prelude(&self) -> Result { // Skip if the prelude is empty if self.prelude.is_empty() { - return proc_macro2::TokenStream::new(); + return Ok(proc_macro2::TokenStream::new()); } // Generate the prelude content (re-export all prelude items) - let prelude_content = self + let exports = self .prelude .iter() + // Retain only attributes that are within self.modules, self.classes, self.functions, self.type_vars, self.properties + .filter(|&ident| self.check_ident_exists_immediate(ident, false)) .map(|ident| { - let ident: syn::Ident = ident.try_into().unwrap(); - quote::quote! { - pub use super::#ident; - } + let ident: syn::Ident = ident.try_into()?; + Ok(quote::quote! { + #ident, + }) }) - .collect::(); + .collect::>()?; + + // Return empty prelude if there are no exports + if exports.is_empty() { + return Ok(proc_macro2::TokenStream::new()); + } // Finalize the prelude with its content - quote::quote! { - pub mod prelude { - #prelude_content + let prelude_ident: syn::Ident = { + let mut i = 0; + loop { + let ident = Ident::from_py(&format!( + "call{}", + (i > 0).then(|| i.to_string()).unwrap_or_default() + )); + if !self.check_ident_exists_immediate(&ident, true) { + break ident; + } + i += 1; } } + .try_into()?; + Ok(quote::quote! { + pub mod #prelude_ident { + pub use super::{#exports}; + } + }) } -} -impl Module { - pub fn canonicalize(&mut self) { - todo!() + fn check_path_exists_recursive(&self, path: &Path, consider_imports: bool) -> bool { + (consider_imports && self.imports.iter().any(|import| import.target == *path)) + || self.submodules.iter().any(|module| module.name == *path) + || self.classes.iter().any(|class| class.name == *path) + || self.functions.iter().any(|function| function.name == *path) + || self.type_vars.iter().any(|type_var| type_var.name == *path) + || self + .properties + .iter() + .any(|property| property.name == *path) + || self + .submodules + .iter() + .any(|module| module.check_path_exists_recursive(path, consider_imports)) + } + + fn check_ident_exists_immediate(&self, ident: &Ident, consider_imports: bool) -> bool { + (consider_imports + && self + .imports + .iter() + .any(|import| import.target.name() == ident)) + || self + .submodules + .iter() + .any(|module| module.name.name() == ident) + || self.classes.iter().any(|class| class.name.name() == ident) + || self + .functions + .iter() + .any(|function| function.name.name() == ident) + || self + .type_vars + .iter() + .any(|type_var| type_var.name.name() == ident) + || self + .properties + .iter() + .any(|property| property.name.name() == ident) } } diff --git a/pyo3_bindgen_engine/src/syntax/property.rs b/pyo3_bindgen_engine/src/syntax/property.rs index 13d3196..28a2384 100644 --- a/pyo3_bindgen_engine/src/syntax/property.rs +++ b/pyo3_bindgen_engine/src/syntax/property.rs @@ -1,5 +1,7 @@ -use super::Path; -use crate::{types::Type, Config, Result}; +use super::{Ident, Path}; +use crate::{typing::Type, Config, Result}; +use itertools::Itertools; +use rustc_hash::FxHashSet as HashSet; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Property { @@ -7,8 +9,9 @@ pub struct Property { pub owner: PropertyOwner, pub is_mutable: bool, pub annotation: Type, - pub setter_annotation: Option, + pub setter_annotation: Type, pub docstring: Option, + pub setter_docstring: Option, } impl Property { @@ -23,49 +26,40 @@ impl Property { // Extract the type of the property let typ = property.get_type(); - // Extract the docstring of the property - let mut docstring = { - let docstring = property.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); - if docstring.is_empty() || docstring == "None" { - None - } else { - Some(docstring) - } - }; + // Do not extract the docstring of the property, because it would point to the docstring of the type/class itself, not this property + let mut docstring = None; // Determine the mutability and type of the property - let (is_mutable, annotation, setter_annotation); + let (is_mutable, annotation, setter_annotation, mut setter_docstring); match owner { - PropertyOwner::Module(_) => { + PropertyOwner::Module => { is_mutable = true; annotation = Type::try_from(typ)?; - setter_annotation = None; + setter_annotation = annotation.clone(); + setter_docstring = docstring.clone(); } - PropertyOwner::Class(_) => { + PropertyOwner::Class => { let signature = py - .import(pyo3::intern!(py, "inspect")) - .unwrap() - .getattr(pyo3::intern!(py, "signature")) - .unwrap(); + .import(pyo3::intern!(py, "inspect"))? + .getattr(pyo3::intern!(py, "signature"))?; if let Ok(getter) = property.getattr(pyo3::intern!(py, "fget")) { - // Extract the signature of the function - let function_signature = signature.call1((getter,)).unwrap(); - - // Extract the annotation from the return of the function - annotation = { - let return_annotation = - function_signature.getattr(pyo3::intern!(py, "return_annotation"))?; - if return_annotation.is(function_signature - .getattr(pyo3::intern!(py, "empty")) - .unwrap()) - { - None - } else { - Some(return_annotation) - } + // Extract the annotation from the return of the function (if available) + if let Ok(function_signature) = signature.call1((getter,)) { + annotation = { + let return_annotation = function_signature + .getattr(pyo3::intern!(py, "return_annotation"))?; + if return_annotation + .is(function_signature.getattr(pyo3::intern!(py, "empty"))?) + { + Type::Unknown + } else { + return_annotation.try_into()? + } + }; + } else { + annotation = Type::try_from(typ)?; } - .try_into()?; // Update the docstring if it is empty if docstring.is_none() { @@ -85,50 +79,50 @@ impl Property { match property.getattr(pyo3::intern!(py, "fset")) { Ok(setter) if !setter.is_none() => { - // Extract the signature of the function - let function_signature = signature.call1((setter,)).unwrap(); + is_mutable = true; - // Extract the annotation from the parameter of the function - setter_annotation = Some( - { + // Extract the annotation from the parameter of the function (if available) + if let Ok(function_signature) = signature.call1((setter,)) { + setter_annotation = { let param = function_signature - .getattr(pyo3::intern!(py, "parameters")) - .unwrap() - .call_method0(pyo3::intern!(py, "values")) - .unwrap() - .iter() - .unwrap() + .getattr(pyo3::intern!(py, "parameters"))? + .call_method0(pyo3::intern!(py, "values"))? + .iter()? .nth(1) - .unwrap() - .unwrap(); + .unwrap()?; let annotation = param.getattr(pyo3::intern!(py, "annotation"))?; - if annotation.is(param.getattr(pyo3::intern!(py, "empty")).unwrap()) - { - None + if annotation.is(param.getattr(pyo3::intern!(py, "empty"))?) { + Type::Unknown } else { - Some(annotation) + annotation.try_into()? } + }; + } else { + setter_annotation = Type::Unknown; + } + + setter_docstring = { + let docstring = + setter.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); + if docstring.is_empty() || docstring == "None" { + None + } else { + Some(docstring) } - .try_into()?, - ); - is_mutable = true; + }; - // Update the docstring if it is still empty if docstring.is_none() { - docstring = { - let docstring = - setter.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); - if docstring.is_empty() || docstring == "None" { - None - } else { - Some(docstring) - } - }; + // Update the getter docstring to match setter docstring if it is still empty + docstring = setter_docstring.clone(); + } else if setter_docstring.is_none() { + // Otherwise, update the setter docstring to match the getter docstring if it is still empty + setter_docstring = docstring.clone(); } } _ => { - setter_annotation = None; is_mutable = false; + setter_annotation = Type::Unknown; + setter_docstring = None; } } } @@ -141,24 +135,199 @@ impl Property { annotation, setter_annotation, docstring, + setter_docstring, }) } } impl Property { - pub fn generate(&self, _cfg: &Config) -> Result { - todo!() + pub fn generate( + &self, + cfg: &Config, + scoped_function_idents: &[&Ident], + ) -> Result { + let mut output = proc_macro2::TokenStream::new(); + + // Getter + output.extend(self.generate_getter(cfg, scoped_function_idents)?); + + // Setter (if mutable) + if self.is_mutable { + output.extend(self.generate_setter(cfg, scoped_function_idents)?); + } + + Ok(output) } -} -impl Property { - pub fn canonicalize(&mut self) { - todo!() + pub fn generate_getter( + &self, + cfg: &Config, + scoped_function_idents: &[&Ident], + ) -> Result { + let mut output = proc_macro2::TokenStream::new(); + + // Documentation + if cfg.generate_docs { + if let Some(docstring) = &self.docstring { + // Trim the docstring and add a leading whitespace (looks better in the generated code) + let mut docstring = docstring.trim().trim_end_matches('/').to_owned(); + docstring.insert(0, ' '); + // Replace all double quotes with single quotes + docstring = docstring.replace('"', "'"); + + output.extend(quote::quote! { + #[doc = #docstring] + }); + } + } + + // Function + let function_ident: syn::Ident = { + let name = self.name.name(); + if let Ok(ident) = name.try_into() { + if scoped_function_idents.contains(&name) + || crate::config::FORBIDDEN_FUNCTION_NAMES.contains(&name.as_py()) + { + let getter_name = Ident::from_py(&format!("get_{}", name.as_py())); + if scoped_function_idents.contains(&&getter_name) + || crate::config::FORBIDDEN_FUNCTION_NAMES.contains(&getter_name.as_py()) + { + return Ok(proc_macro2::TokenStream::new()); + } else { + getter_name.try_into()? + } + } else { + ident + } + } else { + let getter_name = Ident::from_py(&format!("get_{}", name.as_py())); + if scoped_function_idents.contains(&&getter_name) + || crate::config::FORBIDDEN_FUNCTION_NAMES.contains(&getter_name.as_py()) + { + return Ok(proc_macro2::TokenStream::new()); + } else { + getter_name.try_into()? + } + } + }; + let param_type = self + .annotation + .clone() + .into_rs_owned("", &HashSet::default()); + match &self.owner { + PropertyOwner::Module => { + let package = self.name.root().unwrap_or_else(|| unreachable!()).to_py(); + let module_path = if self.name.len() > 1 { + &self.name[1..] + } else { + &[] + } + .iter() + .map(|ident| ident.as_py().to_owned()) + .collect_vec(); + + output.extend(quote::quote! { + pub fn #function_ident<'py>( + py: ::pyo3::marker::Python<'py>, + ) -> ::pyo3::PyResult<#param_type> { + py.import(::pyo3::intern!(py, #package))?#(.getattr(::pyo3::intern!(py, #module_path))?)*.extract() + } + }); + } + PropertyOwner::Class => { + let param_name = self.name.name().as_py(); + + output.extend(quote::quote! { + pub fn #function_ident<'py>( + &'py self, + py: ::pyo3::marker::Python<'py>, + ) -> ::pyo3::PyResult<#param_type> { + self.0.getattr(::pyo3::intern!(py, #param_name))? + .extract() + } + }); + } + } + + Ok(output) + } + + pub fn generate_setter( + &self, + _cfg: &Config, + scoped_function_idents: &[&Ident], + ) -> Result { + let mut output = proc_macro2::TokenStream::new(); + + // Documentation + if let Some(docstring) = &self.setter_docstring { + // Trim the docstring and add a leading whitespace (looks better in the generated code) + let mut docstring = docstring.trim().trim_end_matches('/').to_owned(); + docstring.insert(0, ' '); + // Replace all double quotes with single quotes + docstring = docstring.replace('"', "'"); + + output.extend(quote::quote! { + #[doc = #docstring] + }); + } + + // Function + let function_ident: syn::Ident = { + let setter_name = Ident::from_py(&format!("set_{}", self.name.name().as_py())); + if scoped_function_idents.contains(&&setter_name) + || crate::config::FORBIDDEN_FUNCTION_NAMES.contains(&setter_name.as_py()) + { + return Ok(proc_macro2::TokenStream::new()); + } else { + setter_name.try_into()? + } + }; + let param_name = self.name.name().as_py(); + let param_type = self + .annotation + .clone() + .into_rs_borrowed("", &HashSet::default()); + match &self.owner { + PropertyOwner::Module => { + let package = self.name.root().unwrap_or_else(|| unreachable!()).to_py(); + let module_path = if self.name.len() > 1 { + &self.name[1..self.name.len() - 1] + } else { + &[] + } + .iter() + .map(|ident| ident.as_py().to_owned()) + .collect_vec(); + + output.extend(quote::quote! { + pub fn #function_ident<'py>( + py: ::pyo3::marker::Python<'py>, + p_value: #param_type, + ) -> ::pyo3::PyResult<()> { + py.import(::pyo3::intern!(py, #package))?#(.getattr(::pyo3::intern!(py, #module_path))?)*.setattr(::pyo3::intern!(py, #param_name), p_value) + } + }); + } + PropertyOwner::Class => { + output.extend(quote::quote! { + pub fn #function_ident<'py>( + &'py self, + py: ::pyo3::marker::Python<'py>, + p_value: #param_type, + ) -> ::pyo3::PyResult<()> { + self.0.setattr(::pyo3::intern!(py, #param_name), p_value) + } + }); + } + } + + Ok(output) } } #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum PropertyOwner { - Module(Path), - Class(Path), + Module, + Class, } diff --git a/pyo3_bindgen_engine/src/syntax/type_var.rs b/pyo3_bindgen_engine/src/syntax/type_var.rs index 00a90a0..16f9aa1 100644 --- a/pyo3_bindgen_engine/src/syntax/type_var.rs +++ b/pyo3_bindgen_engine/src/syntax/type_var.rs @@ -7,13 +7,16 @@ pub struct TypeVar { } impl TypeVar { - pub fn new(name: Path) -> Result { - Ok(Self { name }) + pub fn new(name: Path) -> Self { + Self { name } } } impl TypeVar { pub fn generate(&self, _cfg: &Config) -> Result { - todo!() + let typevar_ident: syn::Ident = self.name.name().try_into()?; + Ok(quote::quote! { + pub type #typevar_ident<'py> = &'py ::pyo3::types::PyAny; + }) } } diff --git a/pyo3_bindgen_engine/src/types/mod.rs b/pyo3_bindgen_engine/src/typing/mod.rs similarity index 97% rename from pyo3_bindgen_engine/src/types/mod.rs rename to pyo3_bindgen_engine/src/typing/mod.rs index 3fb888c..8c693b7 100644 --- a/pyo3_bindgen_engine/src/types/mod.rs +++ b/pyo3_bindgen_engine/src/typing/mod.rs @@ -1,8 +1,10 @@ //! Module for handling Rust, Python and `PyO3` types. -// TODO: Remove allow once impl is finished #![allow(unused)] +// TODO: Refactor typing + use itertools::Itertools; +use rustc_hash::FxHashSet as HashSet; use std::str::FromStr; /// Enum that maps Python types to Rust types. @@ -73,6 +75,7 @@ pub enum Type { #[cfg(not(PyPy))] PySuper, PyTraceback, + #[allow(clippy::enum_variant_names)] PyType, } @@ -81,7 +84,7 @@ impl TryFrom> for Type { fn try_from(value: Option<&pyo3::types::PyAny>) -> Result { Ok(match value { Some(t) => Self::try_from(t)?, - None => Self::PyNone, + None => Self::Unknown, }) } } @@ -439,7 +442,7 @@ impl Type { _ => { // Noop - processed as string below // eprintln!( - // "Warning: Unexpected type encountered: {value}\n \ + // "WARN: Unexpected type encountered: {value}\n \ // Bindings could be improved by handling the type here \ // Please report this as a bug. [scope: Type::from_typing()]", // ); @@ -522,11 +525,11 @@ impl Type { } #[must_use] - pub fn into_rs( + pub fn into_rs( self, owned: bool, module_name: &str, - all_types: &std::collections::HashSet, + all_types: &HashSet, ) -> proc_macro2::TokenStream { if owned { self.into_rs_owned(module_name, all_types) @@ -536,10 +539,10 @@ impl Type { } #[must_use] - pub fn into_rs_owned( + pub fn into_rs_owned( self, module_name: &str, - all_types: &std::collections::HashSet, + all_types: &HashSet, ) -> proc_macro2::TokenStream { match self { Self::PyAny => { @@ -682,7 +685,9 @@ impl Type { quote::quote! {&'py ::pyo3::types::PyDateTime} } Self::PyDelta => { - quote::quote! {::std::time::Duration} + // The trait `ToPyObject` is not implemented for `Duration`, so we can't use it here yet + // quote::quote! {::std::time::Duration} + quote::quote! {&'py ::pyo3::types::PyAny} } #[cfg(not(Py_LIMITED_API))] Self::PyTime => { @@ -739,10 +744,10 @@ impl Type { } #[must_use] - pub fn into_rs_borrowed( + pub fn into_rs_borrowed( self, module_name: &str, - all_types: &std::collections::HashSet, + all_types: &HashSet, ) -> proc_macro2::TokenStream { match self { Self::PyAny => { @@ -808,7 +813,7 @@ impl Type { if t.is_owned_hashable() { let t = t.into_rs_owned(module_name, all_types); quote::quote! { - &::std::collections::HashSet<#t> + &::HashSet<#t> } } else { quote::quote! { @@ -826,7 +831,7 @@ impl Type { if t.is_owned_hashable() { let t = t.into_rs_owned(module_name, all_types); quote::quote! { - &::std::collections::HashSet<#t> + &::HashSet<#t> } } else { quote::quote! { @@ -884,7 +889,9 @@ impl Type { quote::quote! {&'py ::pyo3::types::PyDateTime} } Self::PyDelta => { - quote::quote! {::std::time::Duration} + // The trait `ToPyObject` is not implemented for `Duration`, so we can't use it here yet + // quote::quote! {::std::time::Duration} + quote::quote! {&'py ::pyo3::types::PyAny} } #[cfg(not(Py_LIMITED_API))] Self::PyTime => { @@ -940,10 +947,10 @@ impl Type { } } - fn try_into_module_path( + fn try_into_module_path( self, module_name: &str, - all_types: &std::collections::HashSet, + all_types: &HashSet, ) -> proc_macro2::TokenStream { let Self::Unhandled(value) = self else { unreachable!() @@ -969,8 +976,6 @@ impl Type { return quote::quote! {&'py ::pyo3::types::PyAny}; } - let value_name = module_member_full.split('.').last().unwrap(); - let n_common_ancestors = module_name .split('.') .zip(module_member_full.split('.')) @@ -1032,7 +1037,7 @@ impl Type { // Approach: Find the shallowest match that contains the value // TODO: Fix this! The matching might be wrong in many cases - let mut possible_matches = std::collections::HashSet::::default(); + let mut possible_matches = HashSet::default(); for i in 0..n_module_scopes { let module_member_scopes_end = module_scopes.clone().skip(i).join("."); all_types diff --git a/pyo3_bindgen_engine/src/utils/error.rs b/pyo3_bindgen_engine/src/utils/error.rs index d82cc2d..d803043 100644 --- a/pyo3_bindgen_engine/src/utils/error.rs +++ b/pyo3_bindgen_engine/src/utils/error.rs @@ -1,18 +1,10 @@ /// Error type for `pyo3_bindgen` operations. -#[derive(Debug, thiserror::Error)] +#[derive(thiserror::Error, Debug)] pub enum PyBindgenError { - #[error(transparent)] - PyError(#[from] pyo3::PyErr), - #[error("Failed to convert `pyo3::PyAny` to a more specific Python type: {0}")] - PyDowncastError(String), #[error(transparent)] IoError(#[from] std::io::Error), #[error(transparent)] + PyError(#[from] pyo3::PyErr), + #[error(transparent)] SynError(#[from] syn::Error), } - -impl<'py> From> for PyBindgenError { - fn from(value: pyo3::PyDowncastError) -> Self { - PyBindgenError::PyDowncastError(value.to_string()) - } -} diff --git a/pyo3_bindgen_engine/src/utils/mod.rs b/pyo3_bindgen_engine/src/utils/mod.rs index f708c24..b6398da 100644 --- a/pyo3_bindgen_engine/src/utils/mod.rs +++ b/pyo3_bindgen_engine/src/utils/mod.rs @@ -1,6 +1,5 @@ //! Various utilities. -pub mod build; pub mod error; -pub(crate) mod io; +pub mod io; pub mod result; diff --git a/pyo3_bindgen_engine/tests/bindgen.rs b/pyo3_bindgen_engine/tests/bindgen.rs index 620f3ad..55c6cf3 100644 --- a/pyo3_bindgen_engine/tests/bindgen.rs +++ b/pyo3_bindgen_engine/tests/bindgen.rs @@ -1,9 +1,9 @@ macro_rules! test_bindgen { { $(#[$meta:meta])* - $test_name:ident $(,)? - $(py)?$(python)? $(:)? $code_py:literal $(,)? - $(rs)?$(rust)? $(:)? $code_rs:literal $(,)? + $test_name:ident $(,)? + $(py)?$(python)?$(:)? $code_py:literal $(,)? + $(rs)?$(rust)?$(:)? $code_rs:literal $(,)? } => { #[test] $(#[$meta])* @@ -13,13 +13,16 @@ macro_rules! test_bindgen { const CODE_RS: &str = indoc::indoc! { $code_rs }; // Act - let bindings = pyo3_bindgen_engine::generate_bindings_from_str( - CODE_PY, - concat!("t_mod_", stringify!($test_name)), - ) - .unwrap(); + let bindings = pyo3_bindgen_engine::Codegen::default() + .module_from_str(CODE_PY, concat!("t_mod_", stringify!($test_name))) + .unwrap() + .generate() + .unwrap(); // Assert + fn format_code(input: &str) -> String { + prettyplease::unparse(&syn::parse_str(input).unwrap()) + } let generated_code = format_code(&bindings.to_string()); let target_code = format_code(CODE_RS); assert_eq!( @@ -30,19 +33,14 @@ macro_rules! test_bindgen { }; } -fn format_code(input: &str) -> String { - prettyplease::unparse(&syn::parse_str(input).unwrap()) -} - test_bindgen! { test_bindgen_attribute - py:r#" + py: r#" t_const_float: float = 0.42 "# - rs:r#" - /// + rs: r#" #[allow( clippy::all, clippy::nursery, @@ -52,21 +50,18 @@ test_bindgen! { non_upper_case_globals, unused )] - mod t_mod_test_bindgen_attribute { - ///Getter for the `t_const_float` attribute + pub mod t_mod_test_bindgen_attribute { pub fn t_const_float<'py>(py: ::pyo3::marker::Python<'py>) -> ::pyo3::PyResult { py.import(::pyo3::intern!(py, "t_mod_test_bindgen_attribute"))? .getattr(::pyo3::intern!(py, "t_const_float"))? .extract() } - ///Setter for the `t_const_float` attribute pub fn set_t_const_float<'py>( py: ::pyo3::marker::Python<'py>, - value: f64, + p_value: f64, ) -> ::pyo3::PyResult<()> { py.import(::pyo3::intern!(py, "t_mod_test_bindgen_attribute"))? - .setattr(::pyo3::intern!(py, "t_const_float"), value)?; - Ok(()) + .setattr(::pyo3::intern!(py, "t_const_float"), p_value) } } "# @@ -75,39 +70,40 @@ test_bindgen! { test_bindgen! { test_bindgen_function - py:r#" + py: r#" def t_fn(t_arg1: str) -> int: """t_docs""" ... "# - rs:r#" - /// + rs: r#" #[allow( clippy::all, - clippy::nursery, - clippy::pedantic, - non_camel_case_types, - non_snake_case, - non_upper_case_globals, - unused + clippy::nursery, + clippy::pedantic, + non_camel_case_types, + non_snake_case, + non_upper_case_globals, + unused )] - mod t_mod_test_bindgen_function { - ///t_docs + pub mod t_mod_test_bindgen_function { + /// t_docs pub fn t_fn<'py>( py: ::pyo3::marker::Python<'py>, - t_arg1: &str, + p_t_arg1: &str, ) -> ::pyo3::PyResult { - let __internal_args = (); - let __internal_kwargs = ::pyo3::types::PyDict::new(py); - __internal_kwargs.set_item(::pyo3::intern!(py, "t_arg1"), t_arg1)?; - py.import(::pyo3::intern!(py, "t_mod_test_bindgen_function"))? - .call_method( - ::pyo3::intern!(py, "t_fn"), - __internal_args, - Some(__internal_kwargs), - )? - .extract() + ::pyo3::FromPyObject::extract( + py + .import(::pyo3::intern!(py, "t_mod_test_bindgen_function"))? + .getattr(::pyo3::intern!(py, "t_fn"))? + .call_method1( + ::pyo3::intern!(py, "t_fn"), + ::pyo3::types::PyTuple::new( + py, + [::pyo3::ToPyObject::to_object(&p_t_arg1, py)], + ), + )?, + ) } } "# @@ -116,7 +112,7 @@ test_bindgen! { test_bindgen! { test_bindgen_class - py:r#" + py: r#" from typing import Dict, Optional class t_class: """t_docs""" @@ -134,8 +130,7 @@ test_bindgen! { ... "# - rs:r#" - /// + rs: r#" #[allow( clippy::all, clippy::nursery, @@ -145,8 +140,8 @@ test_bindgen! { non_upper_case_globals, unused )] - mod t_mod_test_bindgen_class { - ///t_docs + pub mod t_mod_test_bindgen_class { + /// t_docs #[repr(transparent)] pub struct t_class(::pyo3::PyAny); ::pyo3::pyobject_native_type_named!(t_class); @@ -158,64 +153,58 @@ test_bindgen! { ::pyo3::pyobject_native_type_extract!(t_class); #[automatically_derived] impl t_class { - ///t_docs_init + /// t_docs_init pub fn new<'py>( py: ::pyo3::marker::Python<'py>, - t_arg1: &str, - t_arg2: ::std::option::Option, + p_t_arg1: &str, + p_t_arg2: ::std::option::Option, ) -> ::pyo3::PyResult<&'py Self> { - let __internal_args = (); - let __internal_kwargs = ::pyo3::types::PyDict::new(py); - __internal_kwargs.set_item(::pyo3::intern!(py, "t_arg1"), t_arg1)?; - __internal_kwargs.set_item(::pyo3::intern!(py, "t_arg2"), t_arg2)?; - py.import(::pyo3::intern!(py, "t_mod_test_bindgen_class"))? - .getattr(::pyo3::intern!(py, "t_class"))? - .call(__internal_args, Some(__internal_kwargs))? - .extract() - } - ///Call self as a function. - pub fn call<'py>( - &'py self, - py: ::pyo3::marker::Python<'py>, - args: impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyTuple>>, - kwargs: &'py ::pyo3::types::PyDict, - ) -> ::pyo3::PyResult<&'py ::pyo3::types::PyAny> { - let __internal_args = args; - self.call_method1(::pyo3::intern!(py, "__call__"), __internal_args)? - .extract() + ::pyo3::FromPyObject::extract( + py + .import(::pyo3::intern!(py, "t_mod_test_bindgen_class"))? + .getattr(::pyo3::intern!(py, "t_class"))? + .call1( + ::pyo3::types::PyTuple::new( + py, + [ + ::pyo3::ToPyObject::to_object(&p_t_arg1, py), + ::pyo3::ToPyObject::to_object(&p_t_arg2, py), + ], + ), + )?, + ) } - ///t_docs_method + /// t_docs_method pub fn t_method<'py>( &'py self, py: ::pyo3::marker::Python<'py>, - t_arg1: &::std::collections::HashMap<::std::string::String, i64>, - kwargs: &'py ::pyo3::types::PyDict, + p_t_arg1: &::std::collections::HashMap<::std::string::String, i64>, + p_kwargs: &'py ::pyo3::types::PyDict, ) -> ::pyo3::PyResult<&'py ::pyo3::types::PyAny> { - let __internal_args = (); - let __internal_kwargs = kwargs; - __internal_kwargs.set_item(::pyo3::intern!(py, "t_arg1"), t_arg1)?; - self.call_method( - ::pyo3::intern!(py, "t_method"), - __internal_args, - Some(__internal_kwargs), - )? - .extract() + ::pyo3::FromPyObject::extract( + self + .0 + .call( + ::pyo3::types::PyTuple::new( + py, + [::pyo3::ToPyObject::to_object(&p_t_arg1, py)], + ), + Some(p_kwargs), + )?, + ) } - ///Getter for the `t_prop` attribute pub fn t_prop<'py>( &'py self, py: ::pyo3::marker::Python<'py>, ) -> ::pyo3::PyResult { - self.getattr(::pyo3::intern!(py, "t_prop"))?.extract() + self.0.getattr(::pyo3::intern!(py, "t_prop"))?.extract() } - ///Setter for the `t_prop` attribute pub fn set_t_prop<'py>( &'py self, py: ::pyo3::marker::Python<'py>, - value: i64, + p_value: i64, ) -> ::pyo3::PyResult<()> { - self.setattr(::pyo3::intern!(py, "t_prop"), value)?; - Ok(()) + self.0.setattr(::pyo3::intern!(py, "t_prop"), p_value) } } } diff --git a/pyo3_bindgen_macros/src/lib.rs b/pyo3_bindgen_macros/src/lib.rs index 5fad22a..bd589db 100644 --- a/pyo3_bindgen_macros/src/lib.rs +++ b/pyo3_bindgen_macros/src/lib.rs @@ -11,21 +11,30 @@ mod parser; /// # Example /// /// ```ignore -/// // use pyo3_bindgen::import_python; -/// use pyo3_bindgen_macros::import_python; -/// /// import_python!("sys"); /// pub use sys::*; /// +/// // The top-level package is always included in the generated bindings for consistency +/// import_python!("mod.submod.subsubmod"); +/// pub use mod::submod::subsubmod::*; +/// +/// // The actual name of the package is always used, regardless of how it is aliased /// import_python!("os.path"); -/// pub use path::*; +/// pub use posixpath::*; /// ``` #[proc_macro] pub fn import_python(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - let parser::Args { module_name_py } = syn::parse_macro_input!(input as parser::Args); + let parser::Args { module_name } = syn::parse_macro_input!(input as parser::Args); // Generate the bindings - pyo3_bindgen_engine::generate_bindings(&module_name_py) - .unwrap_or_else(|_| panic!("Failed to generate bindings for module: {module_name_py}")) + pyo3_bindgen_engine::Codegen::default() + .module_name(&module_name) + .unwrap_or_else(|err| { + panic!("Failed to parse the content of '{module_name}' Python module:\n{err}") + }) + .generate() + .unwrap_or_else(|err| { + panic!("Failed to generate bindings for '{module_name}' Python module:\n{err}") + }) .into() } diff --git a/pyo3_bindgen_macros/src/parser.rs b/pyo3_bindgen_macros/src/parser.rs index 1110419..5ed890e 100644 --- a/pyo3_bindgen_macros/src/parser.rs +++ b/pyo3_bindgen_macros/src/parser.rs @@ -5,16 +5,16 @@ use syn::{ LitStr, }; -/// Arguments for the `import_python` procedural macro +/// Arguments for the `import_python` procedural macro. pub struct Args { - /// Name of the Python module to generate bindings for - pub module_name_py: String, + /// Name of the Python module for which to generate the bindings. + pub module_name: String, } impl Parse for Args { fn parse(input: ParseStream) -> Result { // Python module name might contain dots, so it is parsed as a string literal - let module_name_py = input.parse::()?.value(); - Ok(Args { module_name_py }) + let module_name = input.parse::()?.value(); + Ok(Args { module_name }) } } From 49445df829a04fdfd80281d833f72a2f5f1e7459 Mon Sep 17 00:00:00 2001 From: Andrej Orsula Date: Mon, 4 Mar 2024 22:13:44 +0100 Subject: [PATCH 11/13] Refactoring: Add type mapping Signed-off-by: Andrej Orsula --- .github/workflows/dependabot.yml | 18 +- README.md | 8 +- pyo3_bindgen_engine/src/codegen.rs | 41 +- pyo3_bindgen_engine/src/config.rs | 12 + pyo3_bindgen_engine/src/syntax/class.rs | 24 +- pyo3_bindgen_engine/src/syntax/common/mod.rs | 6 +- pyo3_bindgen_engine/src/syntax/common/path.rs | 70 +- pyo3_bindgen_engine/src/syntax/function.rs | 156 ++- pyo3_bindgen_engine/src/syntax/import.rs | 34 +- pyo3_bindgen_engine/src/syntax/mod.rs | 14 +- pyo3_bindgen_engine/src/syntax/module.rs | 46 +- pyo3_bindgen_engine/src/syntax/property.rs | 45 +- pyo3_bindgen_engine/src/syntax/type_var.rs | 4 +- pyo3_bindgen_engine/src/typing/from_py.rs | 501 ++++++++ pyo3_bindgen_engine/src/typing/into_rs.rs | 208 ++++ pyo3_bindgen_engine/src/typing/mod.rs | 1049 +---------------- pyo3_bindgen_engine/src/utils/error.rs | 8 + pyo3_bindgen_engine/tests/bindgen.rs | 80 +- 18 files changed, 1053 insertions(+), 1271 deletions(-) create mode 100644 pyo3_bindgen_engine/src/typing/from_py.rs create mode 100644 pyo3_bindgen_engine/src/typing/into_rs.rs diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml index 822ffc7..00b8f4b 100644 --- a/.github/workflows/dependabot.yml +++ b/.github/workflows/dependabot.yml @@ -1,15 +1,17 @@ -name: Dependabot auto-merge +name: Dependabot automation on: pull_request: + check_run: + types: [completed] permissions: contents: write pull-requests: write jobs: - dependabot: + approve: runs-on: ubuntu-latest - if: github.actor == 'dependabot[bot]' + if: github.actor == 'dependabot[bot]' && github.event_name == 'pull_request' steps: - name: Fetch metadata id: metadata @@ -21,6 +23,16 @@ jobs: PR_URL: ${{github.event.pull_request.html_url}} GH_TOKEN: ${{secrets.GITHUB_TOKEN}} run: gh pr review --approve "$PR_URL" + + auto_merge: + runs-on: ubuntu-latest + if: github.actor == 'dependabot[bot]' && github.event_name == 'check_run' + steps: + - name: Fetch metadata + id: metadata + uses: dependabot/fetch-metadata@v1 + with: + github-token: "${{ secrets.GITHUB_TOKEN }}" - name: Enable auto-merge if: steps.metadata.outputs.update-type == 'version-update:semver-patch' env: diff --git a/README.md b/README.md index 8be2fde..9adb71f 100644 --- a/README.md +++ b/README.md @@ -159,10 +159,10 @@ This project is in early development, and as such, the API of the generated bind - Not all Python types are mapped to their Rust equivalents yet. For this reason, some additional typecasting might be currently required when using the generated bindings (e.g. `let typed_value: py_module::MyClass = get_value()?.extract()?;`). - The binding generation is primarily designed to be used inside build scripts or via procedural macros. Therefore, the performance of the codegen process is [benchmarked](./pyo3_bindgen_engine/benches/bindgen.rs) to understand the potential impact on build times. Here are some preliminary results for version `0.3.0` (measured: parsing IO & codegen | not measured: compilation of the generated bindings, which takes much longer): - - `sys`: 1.49 ms (1.1k total LoC) - - `os`: 10.72 ms (7.7k total LoC) - - `numpy`: 1.01 s (563k total LoC) - - `torch`: 3.54 s (1.23M total LoC) + - `sys`: 1.17 ms (0.56k total LoC) + - `os`: 7.03 ms (3.30k total LoC) + - `numpy`: 819 ms (242k total LoC) + - `torch`: 6.42 s (1.02M total LoC) - The generation of bindings should never panic as long as the target Python module can be successfully imported. If it does, please [report](https://github.com/AndrejOrsula/pyo3_bindgen/issues/new) this as a bug. - The generated bindings should always be compilable and usable in Rust. If you encounter any issues, consider manually fixing the problematic parts of the bindings and please [report](https://github.com/AndrejOrsula/pyo3_bindgen/issues/new) this as a bug. - However, the generated bindings are based on the introspection of the target Python module. Therefore, the correctness of the generated bindings is directly dependent on the quality of the type annotations and docstrings in the target Python module. Ideally, the generated bindings should be considered unsafe and serve as a starting point for safe and idiomatic Rust APIs. diff --git a/pyo3_bindgen_engine/src/codegen.rs b/pyo3_bindgen_engine/src/codegen.rs index a71f448..22231fb 100644 --- a/pyo3_bindgen_engine/src/codegen.rs +++ b/pyo3_bindgen_engine/src/codegen.rs @@ -111,7 +111,7 @@ impl Codegen { // Generate the bindings for all modules self.modules .iter() - .map(|module| module.generate(&self.cfg, true, &self.modules)) + .map(|module| module.generate(&self.cfg, &self.modules, &self.get_all_types())) .collect::>() } @@ -121,19 +121,10 @@ impl Codegen { Ok(std::fs::write(output_path, self.generate()?.to_string())?) } - // pub fn all_types_in_module(&self, module_path: &Path) -> Vec { - // // self.modules - // // .iter() - // // .find(|module| module.name == *module_path) - // // .map(|module| module.retrieve_types()) - // // .unwrap_or_default() - // todo!() - // } - fn parse_dependencies(&mut self) -> Result<()> { fn get_imports_recursive(input: &[Module]) -> Vec { let mut imports = Vec::new(); - input.iter().for_each(|module| { + for module in input { imports.extend( module .imports @@ -142,14 +133,14 @@ impl Codegen { .cloned(), ); imports.extend(get_imports_recursive(&module.submodules)); - }); + } imports } // Get a unique list of all external imports (these could be modules, classes, functions, etc.) let external_imports = get_imports_recursive(&self.modules) .into_iter() - .filter(|import| import.is_external()) + .filter(super::syntax::import::Import::is_external) .map(|import| import.origin.clone()) .unique() .collect_vec(); @@ -169,7 +160,7 @@ impl Codegen { .as_str(), ) .unwrap(); - for path in import[1..].iter() { + for path in &import[1..] { if let Ok(attr) = last_module.getattr(path.as_py()) { if let Ok(module) = attr.extract::<&pyo3::types::PyModule>() { last_module = module; @@ -331,4 +322,26 @@ impl Codegen { self.modules.drain(range.start + 1..range.end); }); } + + fn get_all_types(&self) -> Vec { + fn get_types_recursive(input: &[Module]) -> Vec { + let mut types = Vec::new(); + for module in input { + types.extend(module.classes.iter().map(|class| class.name.clone())); + types.extend( + module + .type_vars + .iter() + .map(|type_var| type_var.name.clone()), + ); + types.extend(get_types_recursive(&module.submodules)); + } + types + } + + get_types_recursive(&self.modules) + .into_iter() + .unique() + .collect() + } } diff --git a/pyo3_bindgen_engine/src/config.rs b/pyo3_bindgen_engine/src/config.rs index 4e12da6..038e43c 100644 --- a/pyo3_bindgen_engine/src/config.rs +++ b/pyo3_bindgen_engine/src/config.rs @@ -2,6 +2,16 @@ use crate::syntax::{Ident, Path}; /// Array of forbidden attribute names that are reserved for internal use by derived traits pub const FORBIDDEN_FUNCTION_NAMES: [&str; 4] = ["get_type", "obj", "repr", "str"]; +/// Array of forbidden type names +pub const FORBIDDEN_TYPE_NAMES: [&str; 6] = [ + "_collections._tuplegetter", + "AsyncState", + "getset_descriptor", + "member_descriptor", + "method_descriptor", + "property", +]; + /// Default array of blocklisted attribute names const DEFAULT_BLOCKLIST_ATTRIBUTE_NAMES: [&str; 4] = ["builtins", "testing", "tests", "test"]; @@ -41,6 +51,8 @@ pub struct Config { /// Flag that determines whether to generate code for all dependencies of the target modules. /// The list of dependent modules is derived from the imports of the target modules. + /// + /// Warning: This feature is not fully supported yet. #[builder(default = false)] pub(crate) generate_dependencies: bool, diff --git a/pyo3_bindgen_engine/src/syntax/class.rs b/pyo3_bindgen_engine/src/syntax/class.rs index 7844f8c..95ff3fe 100644 --- a/pyo3_bindgen_engine/src/syntax/class.rs +++ b/pyo3_bindgen_engine/src/syntax/class.rs @@ -1,17 +1,17 @@ -use itertools::Itertools; - use super::{ AttributeVariant, Function, FunctionType, Ident, MethodType, Path, Property, PropertyOwner, }; use crate::{Config, Result}; +use itertools::Itertools; +use rustc_hash::FxHashMap as HashMap; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Class { pub name: Path, - // pub subclasses: Vec, - pub methods: Vec, - pub properties: Vec, - pub docstring: Option, + // subclasses: Vec, + methods: Vec, + properties: Vec, + docstring: Option, } impl Class { @@ -135,10 +135,12 @@ impl Class { docstring, }) } -} -impl Class { - pub fn generate(&self, cfg: &Config) -> Result { + pub fn generate( + &self, + cfg: &Config, + local_types: &HashMap, + ) -> Result { let mut output = proc_macro2::TokenStream::new(); // Documentation @@ -209,7 +211,7 @@ impl Class { struct_impl.extend( self.methods .iter() - .map(|method| method.generate(cfg, &scoped_function_idents)) + .map(|method| method.generate(cfg, &scoped_function_idents, local_types)) .collect::>()?, ); // Properties @@ -241,7 +243,7 @@ impl Class { struct_impl.extend( self.properties .iter() - .map(|property| property.generate(cfg, &scoped_function_idents)) + .map(|property| property.generate(cfg, &scoped_function_idents, local_types)) .collect::>()?, ); } diff --git a/pyo3_bindgen_engine/src/syntax/common/mod.rs b/pyo3_bindgen_engine/src/syntax/common/mod.rs index c225c84..42a109c 100644 --- a/pyo3_bindgen_engine/src/syntax/common/mod.rs +++ b/pyo3_bindgen_engine/src/syntax/common/mod.rs @@ -1,6 +1,6 @@ -mod attribute_variant; -mod ident; -mod path; +pub(crate) mod attribute_variant; +pub(crate) mod ident; +pub(crate) mod path; pub use attribute_variant::AttributeVariant; pub use ident::Ident; diff --git a/pyo3_bindgen_engine/src/syntax/common/path.rs b/pyo3_bindgen_engine/src/syntax/common/path.rs index e8b7417..2b4f502 100644 --- a/pyo3_bindgen_engine/src/syntax/common/path.rs +++ b/pyo3_bindgen_engine/src/syntax/common/path.rs @@ -104,13 +104,13 @@ impl Path { } pub fn root(&self) -> Option { - if !self.segments.is_empty() { + if self.segments.is_empty() { + None + } else { Some(Self { leading_colon: self.leading_colon, segments: vec![self.segments[0].clone()], }) - } else { - None } } @@ -129,11 +129,18 @@ impl Path { /// Use self if they start at the same point. /// Use super to go up the hierarchy. /// If they do not share any common prefix, use super until the nothing is reached - pub fn relative_to(&self, target: &Path) -> Self { + pub fn relative_to(&self, target: &Path, fully_unambiguous: bool) -> Self { if self == target { - return Path { - leading_colon: false, - segments: vec![Ident::from_rs("super"), target.name().clone()], + return if fully_unambiguous { + Path { + leading_colon: false, + segments: vec![Ident::from_rs("super"), target.name().clone()], + } + } else { + Path { + leading_colon: false, + segments: vec![Ident::from_rs("self")], + } }; } @@ -146,22 +153,39 @@ impl Path { .count(); // Determine the relative path - let mut relative_segments = match common_prefix_length { - n if n < self.segments.len() => std::iter::repeat(Ident::from_rs("super")) - .take(self.segments.len() - n) - .chain(target.segments.iter().skip(n).cloned()) - .collect_vec(), - n if n == self.segments.len() => std::iter::once(Ident::from_rs("self")) - .chain(target.segments.iter().skip(n).cloned()) - .collect_vec(), - _ => { - unreachable!() + let mut relative_segments = if fully_unambiguous { + match common_prefix_length { + n if n < self.segments.len() => std::iter::repeat(Ident::from_rs("super")) + .take(self.segments.len() - n) + .chain(target.segments.iter().skip(n).cloned()) + .collect_vec(), + n if n == self.segments.len() => std::iter::once(Ident::from_rs("self")) + .chain(target.segments.iter().skip(n).cloned()) + .collect_vec(), + _ => { + unreachable!() + } + } + } else { + match common_prefix_length { + n if n < self.segments.len() => std::iter::repeat(Ident::from_rs("super")) + .take(self.segments.len() - n) + .chain(target.segments.iter().skip(n).cloned()) + .collect_vec(), + n if n == self.segments.len() => { + target.segments.iter().skip(n).cloned().collect_vec() + } + _ => { + unreachable!() + } } }; - // If the relative segment ends with "super", fully specify the path by adding another "super" and the name of the target - if relative_segments.last().map(Ident::as_rs) == Some("super") { - relative_segments.extend([Ident::from_rs("super"), target.name().clone()]); + if fully_unambiguous { + // If the relative segment ends with "super", fully specify the path by adding another "super" and the name of the target + if relative_segments.last().map(Ident::as_rs) == Some("super") { + relative_segments.extend([Ident::from_rs("super"), target.name().clone()]); + } } Path { @@ -222,6 +246,12 @@ impl std::ops::Deref for Path { } } +impl std::ops::DerefMut for Path { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.segments + } +} + impl std::fmt::Display for Path { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.to_py()) diff --git a/pyo3_bindgen_engine/src/syntax/function.rs b/pyo3_bindgen_engine/src/syntax/function.rs index 92f9e41..2ad3257 100644 --- a/pyo3_bindgen_engine/src/syntax/function.rs +++ b/pyo3_bindgen_engine/src/syntax/function.rs @@ -2,15 +2,32 @@ use super::{Ident, Path}; use crate::{typing::Type, Config, Result}; use itertools::Itertools; use pyo3::{types::IntoPyDict, ToPyObject}; -use rustc_hash::FxHashSet as HashSet; +use rustc_hash::FxHashMap as HashMap; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Function { pub name: Path, pub typ: FunctionType, - pub parameters: Vec, - pub return_annotation: Type, - pub docstring: Option, + parameters: Vec, + return_annotation: Type, + docstring: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum FunctionType { + Function, + Method { class_path: Path, typ: MethodType }, + Closure, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum MethodType { + InstanceMethod, + ClassMethod, + StaticMethod, + Constructor, + Callable, + Unknown, } impl Function { @@ -53,8 +70,8 @@ impl Function { let annotation = match kind { ParameterKind::VarPositional => Type::PyTuple(vec![Type::Unknown]), ParameterKind::VarKeyword => Type::PyDict { - t_key: Box::new(Type::Unknown), - t_value: Box::new(Type::Unknown), + key_type: Box::new(Type::Unknown), + value_type: Box::new(Type::Unknown), }, _ => { let annotation = param.getattr(pyo3::intern!(py, "annotation"))?; @@ -199,8 +216,8 @@ impl Function { name: Ident::from_rs("kwargs"), kind: ParameterKind::VarKeyword, annotation: Type::PyDict { - t_key: Box::new(Type::Unknown), - t_value: Box::new(Type::Unknown), + key_type: Box::new(Type::Unknown), + value_type: Box::new(Type::Unknown), }, default: None, }, @@ -257,8 +274,8 @@ impl Function { name: Ident::from_rs("kwargs"), kind: ParameterKind::VarKeyword, annotation: Type::PyDict { - t_key: Box::new(Type::Unknown), - t_value: Box::new(Type::Unknown), + key_type: Box::new(Type::Unknown), + value_type: Box::new(Type::Unknown), }, default: None, }, @@ -287,8 +304,8 @@ impl Function { name: Ident::from_rs("kwargs"), kind: ParameterKind::VarKeyword, annotation: Type::PyDict { - t_key: Box::new(Type::Unknown), - t_value: Box::new(Type::Unknown), + key_type: Box::new(Type::Unknown), + value_type: Box::new(Type::Unknown), }, default: None, }, @@ -298,13 +315,12 @@ impl Function { }) } } -} -impl Function { pub fn generate( &self, cfg: &Config, scoped_function_idents: &[&Ident], + local_types: &HashMap, ) -> Result { let mut output = proc_macro2::TokenStream::new(); @@ -361,19 +377,9 @@ impl Function { let param_types: Vec = self .parameters .iter() - .map(|param| { - Result::Ok( - param - .annotation - .clone() - .into_rs_borrowed("", &HashSet::default()), - ) - }) + .map(|param| Result::Ok(param.annotation.clone().into_rs_borrowed(local_types))) .collect::>>()?; - let return_type = self - .return_annotation - .clone() - .into_rs_owned("", &HashSet::default()); + let return_type = self.return_annotation.clone().into_rs_owned(local_types); output.extend(match &self.typ { FunctionType::Method { typ: MethodType::InstanceMethod, @@ -452,8 +458,8 @@ impl Function { let function_dispatcher = match &self.typ { FunctionType::Function | FunctionType::Closure => { let package = self.name.root().unwrap_or_else(|| unreachable!()).to_py(); - let module_path = if self.name.len() > 1 { - &self.name[1..] + let module_path = if self.name.len() > 2 { + &self.name[1..self.name.len() - 1] } else { &[] } @@ -489,7 +495,10 @@ impl Function { self.0 } } - _ => { + FunctionType::Method { + typ: MethodType::Unknown, + .. + } => { eprintln!( "WARN: Method '{}' has an unknown type. Bindings will not be generated.", self.name @@ -603,39 +612,37 @@ impl Function { } }; // Function body: call - let call = match &self.typ { - FunctionType::Method { - typ: MethodType::InstanceMethod | MethodType::Constructor | MethodType::Callable, - .. - } => { - if has_keyword_args { - quote::quote! { - call(#positional_args, Some(#keyword_args)) - } - } else if has_positional_args { - quote::quote! { - call1(#positional_args) - } - } else { - quote::quote! { - call0() - } + let call = if let FunctionType::Method { + typ: MethodType::Constructor | MethodType::Callable, + .. + } = &self.typ + { + if has_keyword_args { + quote::quote! { + call(#positional_args, Some(#keyword_args)) + } + } else if has_positional_args { + quote::quote! { + call1(#positional_args) + } + } else { + quote::quote! { + call0() } } - _ => { - let method_name = self.name.name().as_py(); - if has_keyword_args { - quote::quote! { - call_method(::pyo3::intern!(py, #method_name), #positional_args, Some(#keyword_args)) - } - } else if has_positional_args { - quote::quote! { - call_method1(::pyo3::intern!(py, #method_name), #positional_args) - } - } else { - quote::quote! { - call_method0(::pyo3::intern!(py, #method_name)) - } + } else { + let method_name = self.name.name().as_py(); + if has_keyword_args { + quote::quote! { + call_method(::pyo3::intern!(py, #method_name), #positional_args, Some(#keyword_args)) + } + } else if has_positional_args { + quote::quote! { + call_method1(::pyo3::intern!(py, #method_name), #positional_args) + } + } else { + quote::quote! { + call_method0(::pyo3::intern!(py, #method_name)) } } }; @@ -653,29 +660,12 @@ impl Function { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum FunctionType { - Function, - Method { class_path: Path, typ: MethodType }, - Closure, -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum MethodType { - InstanceMethod, - ClassMethod, - StaticMethod, - Constructor, - Callable, - Unknown, -} - #[derive(Debug, Clone)] -pub struct Parameter { - pub name: Ident, - pub kind: ParameterKind, - pub annotation: Type, - pub default: Option>, +struct Parameter { + name: Ident, + kind: ParameterKind, + annotation: Type, + default: Option>, } impl PartialEq for Parameter { @@ -699,7 +689,7 @@ impl std::hash::Hash for Parameter { } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum ParameterKind { +enum ParameterKind { PositionalOnly, PositionalOrKeyword, VarPositional, diff --git a/pyo3_bindgen_engine/src/syntax/import.rs b/pyo3_bindgen_engine/src/syntax/import.rs index 5376c06..a3e3b5b 100644 --- a/pyo3_bindgen_engine/src/syntax/import.rs +++ b/pyo3_bindgen_engine/src/syntax/import.rs @@ -9,22 +9,22 @@ pub struct Import { } impl Import { - pub fn new(origin: Path, target: Path) -> Result { + pub fn new(origin: Path, target: Path) -> Self { let import_type = ImportType::from_paths(&origin, &target); - Ok(Self { + Self { origin, target, import_type, - }) + } } pub fn is_external(&self) -> bool { self.import_type == ImportType::ExternalImport } - pub fn generate(&self, cfg: &Config) -> Result { - // Skip external imports if their generation is disabled - if !cfg.generate_dependencies && self.import_type == ImportType::ExternalImport { + pub fn generate(&self, _cfg: &Config) -> Result { + // For now, we only generate imports for submodule reexports + if self.import_type != ImportType::SubmoduleReexport { return Ok(proc_macro2::TokenStream::new()); } @@ -35,8 +35,10 @@ impl Import { // Determine the visibility of the import based on its type let visibility = match self.import_type { - ImportType::ExternalImport => proc_macro2::TokenStream::new(), - ImportType::Reexport | ImportType::ScopedReexport => quote::quote! { pub }, + ImportType::ExternalImport | ImportType::PackageReexport => { + proc_macro2::TokenStream::new() + } + ImportType::SubmoduleReexport => quote::quote! { pub }, }; // Generate the path to the target module @@ -44,15 +46,15 @@ impl Import { .target .parent() .unwrap_or_default() - .relative_to(&self.origin) + .relative_to(&self.origin, true) .try_into(); if let Ok(relative_path) = relative_path { // Use alias for the target module if it has a different name than the last segment of its path - let maybe_alias = if self.origin.name() != self.target.name() { + let maybe_alias = if self.origin.name() == self.target.name() { + proc_macro2::TokenStream::new() + } else { let alias: syn::Ident = self.target.name().try_into()?; quote::quote! { as #alias } - } else { - proc_macro2::TokenStream::new() }; Ok(quote::quote! { @@ -67,8 +69,8 @@ impl Import { #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum ImportType { ExternalImport, - Reexport, - ScopedReexport, + PackageReexport, + SubmoduleReexport, } impl ImportType { @@ -82,8 +84,8 @@ impl ImportType { .is_some_and(|parent_module| origin.starts_with(&parent_module)); match (is_package_reexport, is_submodule_reexport) { (false, false) => Self::ExternalImport, - (true, false) => Self::Reexport, - (true, true) => Self::ScopedReexport, + (true, false) => Self::PackageReexport, + (true, true) => Self::SubmoduleReexport, _ => unreachable!(), } } diff --git a/pyo3_bindgen_engine/src/syntax/mod.rs b/pyo3_bindgen_engine/src/syntax/mod.rs index 08cc875..2ad8f1e 100644 --- a/pyo3_bindgen_engine/src/syntax/mod.rs +++ b/pyo3_bindgen_engine/src/syntax/mod.rs @@ -1,10 +1,10 @@ -mod class; -mod common; -mod function; -mod import; -mod module; -mod property; -mod type_var; +pub(crate) mod class; +pub(crate) mod common; +pub(crate) mod function; +pub(crate) mod import; +pub(crate) mod module; +pub(crate) mod property; +pub(crate) mod type_var; pub use class::Class; pub use common::{AttributeVariant, Ident, Path}; diff --git a/pyo3_bindgen_engine/src/syntax/module.rs b/pyo3_bindgen_engine/src/syntax/module.rs index ac06f8b..09ea250 100644 --- a/pyo3_bindgen_engine/src/syntax/module.rs +++ b/pyo3_bindgen_engine/src/syntax/module.rs @@ -138,7 +138,7 @@ impl Module { }); if is_origin_attr_allowed { - let import = Import::new(origin, attr_name_full)?; + let import = Import::new(origin, attr_name_full); imports.push(import); } } @@ -226,12 +226,13 @@ impl Module { pub fn generate( &self, cfg: &Config, - is_top_level: bool, top_level_modules: &[Self], + all_types: &[Path], ) -> Result { let mut output = proc_macro2::TokenStream::new(); // Extra configuration for top-level modules + let is_top_level = top_level_modules.contains(self); if is_top_level { output.extend(quote::quote! { #[allow( @@ -268,6 +269,39 @@ impl Module { .map(|function| function.name.name()) .collect::>(); + // Get all local types mapped to the full path + let local_types = all_types + .iter() + .cloned() + .map(|path| { + let relative_path = self.name.relative_to(&path, false); + (path, relative_path) + }) + .chain(self.imports.iter().flat_map(|import| { + all_types + .iter() + .filter(|&path| path.starts_with(&import.origin)) + .cloned() + .map(|path| { + let imported_path = { + if let Some(stripped_path) = path + .to_py() + .strip_prefix(&format!("{}.", import.origin.to_py())) + { + let mut path = Path::from_py(stripped_path); + // Overwrite the first segment with the target name to support aliasing + path[0] = import.target.name().to_owned(); + path + } else { + import.target.name().to_owned().into() + } + }; + let relative_path = self.name.relative_to(&path, false); + (imported_path, relative_path) + }) + })) + .collect(); + // Generate the module content let mut module_content = proc_macro2::TokenStream::new(); // Imports @@ -302,7 +336,7 @@ impl Module { module_content.extend( self.classes .iter() - .map(|class| class.generate(cfg)) + .map(|class| class.generate(cfg, &local_types)) .collect::>()?, ); } @@ -311,7 +345,7 @@ impl Module { module_content.extend( self.functions .iter() - .map(|function| function.generate(cfg, &scoped_function_idents)) + .map(|function| function.generate(cfg, &scoped_function_idents, &local_types)) .collect::>()?, ); } @@ -320,7 +354,7 @@ impl Module { module_content.extend( self.properties .iter() - .map(|property| property.generate(cfg, &scoped_function_idents)) + .map(|property| property.generate(cfg, &scoped_function_idents, &local_types)) .collect::>()?, ); } @@ -329,7 +363,7 @@ impl Module { module_content.extend( self.submodules .iter() - .map(|module| module.generate(cfg, false, top_level_modules)) + .map(|module| module.generate(cfg, top_level_modules, all_types)) .collect::>()?, ); } diff --git a/pyo3_bindgen_engine/src/syntax/property.rs b/pyo3_bindgen_engine/src/syntax/property.rs index 28a2384..e709530 100644 --- a/pyo3_bindgen_engine/src/syntax/property.rs +++ b/pyo3_bindgen_engine/src/syntax/property.rs @@ -1,17 +1,23 @@ use super::{Ident, Path}; use crate::{typing::Type, Config, Result}; use itertools::Itertools; -use rustc_hash::FxHashSet as HashSet; +use rustc_hash::FxHashMap as HashMap; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Property { pub name: Path, - pub owner: PropertyOwner, - pub is_mutable: bool, - pub annotation: Type, - pub setter_annotation: Type, - pub docstring: Option, - pub setter_docstring: Option, + owner: PropertyOwner, + is_mutable: bool, + annotation: Type, + setter_annotation: Type, + docstring: Option, + setter_docstring: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum PropertyOwner { + Module, + Class, } impl Property { @@ -138,22 +144,21 @@ impl Property { setter_docstring, }) } -} -impl Property { pub fn generate( &self, cfg: &Config, scoped_function_idents: &[&Ident], + local_types: &HashMap, ) -> Result { let mut output = proc_macro2::TokenStream::new(); // Getter - output.extend(self.generate_getter(cfg, scoped_function_idents)?); + output.extend(self.generate_getter(cfg, scoped_function_idents, local_types)?); // Setter (if mutable) if self.is_mutable { - output.extend(self.generate_setter(cfg, scoped_function_idents)?); + output.extend(self.generate_setter(cfg, scoped_function_idents, local_types)?); } Ok(output) @@ -163,6 +168,7 @@ impl Property { &self, cfg: &Config, scoped_function_idents: &[&Ident], + local_types: &HashMap, ) -> Result { let mut output = proc_macro2::TokenStream::new(); @@ -210,10 +216,7 @@ impl Property { } } }; - let param_type = self - .annotation - .clone() - .into_rs_owned("", &HashSet::default()); + let param_type = self.annotation.clone().into_rs_owned(local_types); match &self.owner { PropertyOwner::Module => { let package = self.name.root().unwrap_or_else(|| unreachable!()).to_py(); @@ -256,6 +259,7 @@ impl Property { &self, _cfg: &Config, scoped_function_idents: &[&Ident], + local_types: &HashMap, ) -> Result { let mut output = proc_macro2::TokenStream::new(); @@ -284,10 +288,7 @@ impl Property { } }; let param_name = self.name.name().as_py(); - let param_type = self - .annotation - .clone() - .into_rs_borrowed("", &HashSet::default()); + let param_type = self.annotation.clone().into_rs_borrowed(local_types); match &self.owner { PropertyOwner::Module => { let package = self.name.root().unwrap_or_else(|| unreachable!()).to_py(); @@ -325,9 +326,3 @@ impl Property { Ok(output) } } - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum PropertyOwner { - Module, - Class, -} diff --git a/pyo3_bindgen_engine/src/syntax/type_var.rs b/pyo3_bindgen_engine/src/syntax/type_var.rs index 16f9aa1..7813c58 100644 --- a/pyo3_bindgen_engine/src/syntax/type_var.rs +++ b/pyo3_bindgen_engine/src/syntax/type_var.rs @@ -10,13 +10,11 @@ impl TypeVar { pub fn new(name: Path) -> Self { Self { name } } -} -impl TypeVar { pub fn generate(&self, _cfg: &Config) -> Result { let typevar_ident: syn::Ident = self.name.name().try_into()?; Ok(quote::quote! { - pub type #typevar_ident<'py> = &'py ::pyo3::types::PyAny; + pub type #typevar_ident = ::pyo3::types::PyAny; }) } } diff --git a/pyo3_bindgen_engine/src/typing/from_py.rs b/pyo3_bindgen_engine/src/typing/from_py.rs new file mode 100644 index 0000000..3d224a3 --- /dev/null +++ b/pyo3_bindgen_engine/src/typing/from_py.rs @@ -0,0 +1,501 @@ +use super::Type; +use crate::{PyBindgenError, Result}; +use itertools::Itertools; +use std::str::FromStr; + +impl TryFrom<&pyo3::types::PyAny> for Type { + type Error = PyBindgenError; + fn try_from(value: &pyo3::types::PyAny) -> Result { + match value { + // None -> Unknown type + none if none.is_none() => Ok(Self::Unknown), + // Handle PyType + t if t.is_instance_of::() => { + Self::try_from(t.downcast::()?) + } + // Handle typing + typing + if typing + .get_type() + .getattr(pyo3::intern!(value.py(), "__module__"))? + .to_string() + == "typing" => + { + Self::from_typing(typing) + } + // Handle everything else as string + _ => { + if value.is_instance_of::() { + Self::from_str(value.downcast::()?.to_str()?) + } else { + Self::from_str(&value.to_string()) + } + } + } + } +} + +impl TryFrom<&pyo3::types::PyType> for Type { + type Error = PyBindgenError; + fn try_from(value: &pyo3::types::PyType) -> Result { + Ok(match value { + // Primitives + t if t.is_subclass_of::()? => Self::PyBool, + t if t.is_subclass_of::()? => Self::PyByteArray, + t if t.is_subclass_of::()? => Self::PyBytes, + t if t.is_subclass_of::()? => Self::PyFloat, + t if t.is_subclass_of::()? => Self::PyLong, + t if t.is_subclass_of::()? => Self::PyString, + + // Collections + t if t.is_subclass_of::()? => Self::PyDict { + key_type: Box::new(Self::Unknown), + value_type: Box::new(Self::Unknown), + }, + t if t.is_subclass_of::()? => { + Self::PyFrozenSet(Box::new(Self::Unknown)) + } + t if t.is_subclass_of::()? => { + Self::PyList(Box::new(Self::Unknown)) + } + t if t.is_subclass_of::()? => Self::PySet(Box::new(Self::Unknown)), + t if t.is_subclass_of::()? => Self::PyTuple(vec![Self::Unknown]), + + // Additional types - std + t if t.is_subclass_of::()? => Self::PySlice, + + // Additional types - num-complex + t if t.is_subclass_of::()? => Self::PyComplex, + + // Additional types - datetime + #[cfg(not(Py_LIMITED_API))] + t if t.is_subclass_of::()? => Self::PyDate, + #[cfg(not(Py_LIMITED_API))] + t if t.is_subclass_of::()? => Self::PyDateTime, + #[cfg(not(Py_LIMITED_API))] + t if t.is_subclass_of::()? => Self::PyDelta, + #[cfg(not(Py_LIMITED_API))] + t if t.is_subclass_of::()? => Self::PyTime, + #[cfg(not(Py_LIMITED_API))] + t if t.is_subclass_of::()? => Self::PyTzInfo, + + // Python-specific types + t if t.is_subclass_of::()? => Self::PyCapsule, + t if t.is_subclass_of::()? => Self::PyCFunction, + #[cfg(not(Py_LIMITED_API))] + t if t.is_subclass_of::()? => Self::PyCode, + #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] + t if t.is_subclass_of::()? => Self::PyFrame, + #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] + t if t.is_subclass_of::()? => Self::PyFunction { + param_types: vec![Self::PyEllipsis], + return_annotation: Box::new(Self::Unknown), + }, + t if t.is_subclass_of::()? => Self::PyModule, + #[cfg(not(PyPy))] + t if t.is_subclass_of::()? => Self::PySuper, + t if t.is_subclass_of::()? => Self::PyTraceback, + t if t.is_subclass_of::()? => Self::PyType, + + // Handle everything else as string + _ => Self::from_str(&value.to_string())?, + }) + } +} + +impl Type { + fn from_typing(value: &pyo3::types::PyAny) -> Result { + let py = value.py(); + debug_assert_eq!( + value + .get_type() + .getattr(pyo3::intern!(py, "__module__"))? + .to_string(), + "typing" + ); + + if let Ok(wrapping_type) = value.getattr(pyo3::intern!(py, "__origin__")) { + let wrapping_type = Self::try_from(wrapping_type)?; + Ok( + if let Ok(inner_types) = value + .getattr(pyo3::intern!(py, "__args__")) + .and_then(|inner_types| Ok(inner_types.downcast::()?)) + { + let inner_types = inner_types + .iter() + .map(Self::try_from) + .collect::>>()?; + match wrapping_type { + Self::Union(..) => { + if inner_types.len() == 2 && inner_types.contains(&Self::PyNone) { + Self::Optional(Box::new( + inner_types + .iter() + .find(|x| **x != Self::PyNone) + .unwrap_or_else(|| unreachable!()) + .to_owned(), + )) + } else { + Self::Union(inner_types) + } + } + Self::Optional(..) => { + debug_assert_eq!(inner_types.len(), 1); + Self::Optional(Box::new(inner_types[0].clone())) + } + Self::PyDict { .. } => { + debug_assert_eq!(inner_types.len(), 2); + Self::PyDict { + key_type: Box::new(inner_types[0].clone()), + value_type: Box::new(inner_types[1].clone()), + } + } + Self::PyFrozenSet(..) => { + debug_assert_eq!(inner_types.len(), 1); + Self::PyFrozenSet(Box::new(inner_types[0].clone())) + } + Self::PyList(..) => { + debug_assert_eq!(inner_types.len(), 1); + Self::PyList(Box::new(inner_types[0].clone())) + } + Self::PySet(..) => { + debug_assert_eq!(inner_types.len(), 1); + Self::PySet(Box::new(inner_types[0].clone())) + } + Self::PyTuple(..) => Self::PyTuple(inner_types), + Self::PyFunction { .. } => { + debug_assert!(!inner_types.is_empty()); + Self::PyFunction { + param_types: match inner_types.len() { + 1 => Vec::default(), + _ => inner_types[..inner_types.len() - 1].to_owned(), + }, + return_annotation: Box::new( + inner_types + .last() + .unwrap_or_else(|| unreachable!()) + .to_owned(), + ), + } + } + Self::PyType => { + debug_assert_eq!(inner_types.len(), 1); + inner_types[0].clone() + } + _ => { + // TODO: Handle other types with inner types if useful (e.g. Generator) + wrapping_type + } + } + } else { + // If there are no inner types, return just the wrapping type + wrapping_type + }, + ) + } else { + // Handle everything else as string + Type::from_str(&value.to_string()) + } + } +} + +impl std::str::FromStr for Type { + type Err = PyBindgenError; + fn from_str(value: &str) -> Result { + Ok(match value { + "Any" => Self::PyAny, + + // Primitives + "bool" => Self::PyBool, + "bytearray" => Self::PyByteArray, + "bytes" => Self::PyBytes, + "float" => Self::PyFloat, + "int" => Self::PyLong, + "str" => Self::PyString, + + // Enums + optional + if optional.matches('|').count() == 1 && optional.matches("None").count() == 1 => + { + let inner_type = Self::from_str( + optional + .split('|') + .map(str::trim) + .find(|x| *x != "None") + .unwrap_or_else(|| unreachable!()), + )?; + Self::Optional(Box::new(inner_type)) + } + r#union if r#union.contains('|') => { + let mut inner_types = r#union + .split('|') + .map(|x| x.trim().to_owned()) + .collect_vec(); + repair_complex_sequence(&mut inner_types, ','); + let inner_types = inner_types + .iter() + .map(|x| Self::from_str(x)) + .collect::>()?; + Self::Union(inner_types) + } + "Union" => Self::Union(vec![Self::Unknown]), + "" | "None" | "NoneType" => Self::PyNone, + + // Collections + dict if dict.starts_with("dict[") && dict.ends_with(']') => { + let mut inner_types = dict + .strip_prefix("dict[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()) + .split(',') + .map(|x| x.trim().to_owned()) + .collect_vec(); + repair_complex_sequence(&mut inner_types, ','); + debug_assert_eq!(inner_types.len(), 2); + let inner_types = inner_types + .iter() + .map(|x| Self::from_str(x)) + .collect::>>()?; + Self::PyDict { + key_type: Box::new(inner_types[0].clone()), + value_type: Box::new(inner_types[1].clone()), + } + } + "dict" | "Dict" | "Mapping" => Self::PyDict { + key_type: Box::new(Self::Unknown), + value_type: Box::new(Self::Unknown), + }, + frozenset if frozenset.starts_with("frozenset[") && frozenset.ends_with(']') => { + let inner_type = Self::from_str( + frozenset + .strip_prefix("frozenset[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()), + )?; + Self::PyFrozenSet(Box::new(inner_type)) + } + list if list.starts_with("list[") && list.ends_with(']') => { + let inner_type = Self::from_str( + list.strip_prefix("list[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()), + )?; + Self::PyList(Box::new(inner_type)) + } + "list" => Self::PyList(Box::new(Self::Unknown)), + sequence if sequence.starts_with("Sequence[") && sequence.ends_with(']') => { + let inner_type = Self::from_str( + sequence + .strip_prefix("Sequence[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()), + )?; + Self::PyList(Box::new(inner_type)) + } + "Sequence" | "Iterable" | "Iterator" => Self::PyList(Box::new(Self::Unknown)), + set if set.starts_with("set[") && set.ends_with(']') => { + let inner_type = Self::from_str( + set.strip_prefix("set[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()), + )?; + Self::PySet(Box::new(inner_type)) + } + tuple if tuple.starts_with("tuple[") && tuple.ends_with(']') => { + let mut inner_types = tuple + .strip_prefix("tuple[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()) + .split(',') + .map(|x| x.trim().to_owned()) + .collect_vec(); + repair_complex_sequence(&mut inner_types, ','); + let inner_types = inner_types + .iter() + .map(|x| Self::from_str(x)) + .collect::>()?; + Self::PyTuple(inner_types) + } + + // Additional types - std + "ipaddress.IPv4Address" => Self::IpV4Addr, + "ipaddress.IPv6Address" => Self::IpV6Addr, + "os.PathLike" | "pathlib.Path" => Self::Path, + "slice" => Self::PySlice, + + // Additional types - num-complex + "complex" => Self::PyComplex, + + // Additional types - datetime + #[cfg(not(Py_LIMITED_API))] + "datetime.date" => Self::PyDate, + #[cfg(not(Py_LIMITED_API))] + "datetime.datetime" => Self::PyDateTime, + "timedelta" => Self::PyDelta, + #[cfg(not(Py_LIMITED_API))] + "datetime.time" => Self::PyTime, + #[cfg(not(Py_LIMITED_API))] + "datetime.tzinfo" => Self::PyTzInfo, + + // Python-specific types + "capsule" => Self::PyCapsule, + "cfunction" => Self::PyCFunction, + #[cfg(not(Py_LIMITED_API))] + "code" => Self::PyCode, + "Ellipsis" | "..." => Self::PyEllipsis, + #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] + "frame" => Self::PyFrame, + "function" => Self::PyFunction { + param_types: vec![Self::PyEllipsis], + return_annotation: Box::new(Self::Unknown), + }, + callable if callable.starts_with("Callable[") && callable.ends_with(']') => { + let mut inner_types = callable + .strip_prefix("Callable[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()) + .split(',') + .map(|x| x.trim().to_owned()) + .collect_vec(); + repair_complex_sequence(&mut inner_types, ','); + debug_assert!(!inner_types.is_empty()); + let inner_types = inner_types + .iter() + .map(|x| Self::from_str(x)) + .collect::>>()?; + Self::PyFunction { + param_types: match inner_types.len() { + 1 => Vec::default(), + _ => inner_types[..inner_types.len() - 1].to_owned(), + }, + return_annotation: Box::new( + inner_types + .last() + .unwrap_or_else(|| unreachable!()) + .to_owned(), + ), + } + } + "Callable" | "callable" => Self::PyFunction { + param_types: vec![Self::PyEllipsis], + return_annotation: Box::new(Self::Unknown), + }, + "module" => Self::PyModule, + #[cfg(not(PyPy))] + "super" => Self::PySuper, + "traceback" => Self::PyTraceback, + typ if typ.starts_with("type[") && typ.ends_with(']') => Self::from_str( + typ.strip_prefix("type[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()), + )?, + + // classes + class if class.starts_with("") => Self::from_str( + class + .strip_prefix("") + .unwrap_or_else(|| unreachable!()), + )?, + + // enums + enume if enume.starts_with("") => Self::from_str( + enume + .strip_prefix("") + .unwrap_or_else(|| unreachable!()), + )?, + + // typing + typing if typing.starts_with("typing.") => Self::from_str( + typing + .strip_prefix("typing.") + .unwrap_or_else(|| unreachable!()), + )?, + + // collections.abc + collections_abc if collections_abc.starts_with("collections.abc.") => Self::from_str( + collections_abc + .strip_prefix("collections.abc.") + .unwrap_or_else(|| unreachable!()), + )?, + // collections + collections if collections.starts_with("collections.") => Self::from_str( + collections + .strip_prefix("collections.") + .unwrap_or_else(|| unreachable!()), + )?, + + // Other types, that might be known (custom types of modules) + other => Self::Other(other.to_owned()), + }) + } +} + +// TODO: Refactor `repair_complex_sequence()` into something more sensible +/// Repairs complex wrapped sequences. +fn repair_complex_sequence(sequence: &mut Vec, separator: char) { + debug_assert!(!sequence.is_empty()); + debug_assert!({ + let merged_sequence = sequence.iter().join(""); + merged_sequence.matches('[').count() == merged_sequence.matches(']').count() + }); + + let mut traversed_all_elements = false; + let mut start_index = 0; + 'outer: while !traversed_all_elements { + traversed_all_elements = true; + 'inner: for i in start_index..(sequence.len() - 1) { + let mut n_scopes = sequence[i].matches('[').count() - sequence[i].matches(']').count(); + if n_scopes == 0 { + continue; + } + for j in (i + 1)..sequence.len() { + n_scopes += sequence[j].matches('[').count(); + n_scopes -= sequence[j].matches(']').count(); + if n_scopes == 0 { + let mut new_element = sequence[i].clone(); + for relevant_element in sequence.iter().take(j + 1).skip(i + 1) { + new_element = format!("{new_element}{separator}{relevant_element}"); + } + sequence[i] = new_element; + sequence.drain((i + 1)..=j); + if j < sequence.len() - 1 { + traversed_all_elements = false; + start_index = i; + break 'inner; + } else { + break 'outer; + } + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_repair_complex_sequence() { + // Arrange + let mut sequence = vec!["dict[str".to_string(), "Any]".to_string()]; + + // Act + repair_complex_sequence(&mut sequence, ','); + + // Assert + assert_eq!(sequence, vec!["dict[str,Any]".to_string()]); + } +} diff --git a/pyo3_bindgen_engine/src/typing/into_rs.rs b/pyo3_bindgen_engine/src/typing/into_rs.rs new file mode 100644 index 0000000..83f8eff --- /dev/null +++ b/pyo3_bindgen_engine/src/typing/into_rs.rs @@ -0,0 +1,208 @@ +use super::Type; +use crate::syntax::Path; +use itertools::Itertools; +use quote::quote; +use rustc_hash::FxHashMap as HashMap; +use std::rc::Rc; + +impl Type { + pub fn into_rs_owned(self, local_types: &HashMap) -> proc_macro2::TokenStream { + let owned = self.into_rs(local_types).owned; + Rc::into_inner(owned).unwrap_or_else(|| unreachable!()) + } + + pub fn into_rs_borrowed(self, local_types: &HashMap) -> proc_macro2::TokenStream { + let borrowed = self.into_rs(local_types).borrowed; + Rc::into_inner(borrowed).unwrap_or_else(|| unreachable!()) + } + + fn into_rs(self, local_types: &HashMap) -> OutputType { + match self { + Self::PyAny | Self::Unknown => { + OutputType::new_identical(quote!(&'py ::pyo3::types::PyAny)) + } + Self::Other(..) => self.map_local_type(local_types), + + // Primitives + Self::PyBool => OutputType::new_identical(quote!(bool)), + Self::PyByteArray | Self::PyBytes => OutputType::new(quote!(Vec), quote!(&[u8])), + Self::PyFloat => OutputType::new_identical(quote!(f64)), + Self::PyLong => OutputType::new_identical(quote!(i64)), + Self::PyString => OutputType::new(quote!(::std::string::String), quote!(&str)), + + // Enums + Self::Optional(inner_type) => { + let inner_type = inner_type.into_rs(local_types).owned; + OutputType::new_identical(quote!(::std::option::Option<#inner_type>)) + } + Self::Union(_inner_types) => { + // TODO: Support Rust enums where possible | alternatively, overload functions for each variant + OutputType::new_identical(quote!(&'py ::pyo3::types::PyAny)) + } + Self::PyNone => { + // TODO: Determine if PyNone is even possible + OutputType::new_identical(quote!(&'py ::pyo3::types::PyAny)) + } + + // Collections + Self::PyDict { + key_type, + value_type, + } => { + if key_type.is_hashable() { + let key_type = key_type.into_rs(local_types).owned; + let value_type = value_type.into_rs(local_types).owned; + OutputType::new( + quote!(::std::collections::HashMap<#key_type, #value_type>), + quote!(&::std::collections::HashMap<#key_type, #value_type>), + ) + } else { + OutputType::new_identical(quote!(&'py ::pyo3::types::PyDict)) + } + } + Self::PyFrozenSet(inner_type) => { + if inner_type.is_hashable() { + let inner_type = inner_type.into_rs(local_types).owned; + OutputType::new( + quote!(::std::collections::HashSet<#inner_type>), + quote!(&::std::collections::HashSet<#inner_type>), + ) + } else { + OutputType::new_identical(quote!(&'py ::pyo3::types::PyFrozenSet)) + } + } + Self::PyList(inner_type) => { + let inner_type = inner_type.into_rs(local_types).owned; + OutputType::new(quote!(Vec<#inner_type>), quote!(&[#inner_type])) + } + Self::PySet(inner_type) => { + if inner_type.is_hashable() { + let inner_type = inner_type.into_rs(local_types).owned; + OutputType::new( + quote!(::std::collections::HashSet<#inner_type>), + quote!(&::std::collections::HashSet<#inner_type>), + ) + } else { + OutputType::new_identical(quote!(&'py ::pyo3::types::PySet)) + } + } + Self::PyTuple(inner_types) => { + if inner_types.len() < 2 { + OutputType::new_identical(quote!(&'py ::pyo3::types::PyTuple)) + } else if inner_types.len() == 2 + && *inner_types.last().unwrap_or_else(|| unreachable!()) == Self::PyEllipsis + { + Self::PyList(Box::new(inner_types[0].clone())).into_rs(local_types) + } else { + let inner_types = inner_types + .into_iter() + .map(|inner_type| inner_type.into_rs(local_types).owned) + .collect_vec(); + OutputType::new_identical(quote!((#(#inner_types),*))) + } + } + + // Additional types - std + Self::IpV4Addr => OutputType::new_identical(quote!(::std::net::IpV4Addr)), + Self::IpV6Addr => OutputType::new_identical(quote!(::std::net::IpV6Addr)), + Self::Path => OutputType::new(quote!(::std::path::PathBuf), quote!(&::std::path::Path)), + // TODO: Map `PySlice` to `std::ops::Range` if possible + Self::PySlice => OutputType::new_identical(quote!(&'py ::pyo3::types::PySlice)), + + // Additional types - num-complex + // TODO: Support conversion of `PyComplex` to `num_complex::Complex` if enabled via `num-complex` feature + Self::PyComplex => OutputType::new_identical(quote!(&'py ::pyo3::types::PyComplex)), + + // Additional types - datetime + #[cfg(not(Py_LIMITED_API))] + Self::PyDate => OutputType::new_identical(quote!(&'py ::pyo3::types::PyDate)), + #[cfg(not(Py_LIMITED_API))] + Self::PyDateTime => OutputType::new_identical(quote!(&'py ::pyo3::types::PyDateTime)), + Self::PyDelta => { + // The trait `ToPyObject` is not implemented for `Duration`, so we can't use it here yet + // OutputType::new_identical(quote!(::std::time::Duration)) + OutputType::new_identical(quote!(&'py ::pyo3::types::PyAny)) + } + #[cfg(not(Py_LIMITED_API))] + Self::PyTime => OutputType::new_identical(quote!(&'py ::pyo3::types::PyTime)), + #[cfg(not(Py_LIMITED_API))] + Self::PyTzInfo => OutputType::new_identical(quote!(&'py ::pyo3::types::PyTzInfo)), + + // Python-specific types + Self::PyCapsule => OutputType::new_identical(quote!(&'py ::pyo3::types::PyCapsule)), + Self::PyCFunction => OutputType::new_identical(quote!(&'py ::pyo3::types::PyCFunction)), + #[cfg(not(Py_LIMITED_API))] + Self::PyCode => OutputType::new_identical(quote!(&'py ::pyo3::types::PyCode)), + Self::PyEllipsis => { + // TODO: Determine if PyEllipsis is even possible + OutputType::new_identical(quote!(&'py ::pyo3::types::PyAny)) + } + #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] + Self::PyFrame => OutputType::new_identical(quote!(&'py ::pyo3::types::PyFrame)), + #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] + Self::PyFunction { .. } => { + OutputType::new_identical(quote!(&'py ::pyo3::types::PyFunction)) + } + #[cfg(not(all(not(Py_LIMITED_API), not(PyPy))))] + Self::PyFunction { .. } => OutputType::new_identical(quote!(&'py ::pyo3::types::PyAny)), + Self::PyModule => OutputType::new_identical(quote!(&'py ::pyo3::types::PyModule)), + #[cfg(not(PyPy))] + Self::PySuper => OutputType::new_identical(quote!(&'py ::pyo3::types::PySuper)), + Self::PyTraceback => OutputType::new_identical(quote!(&'py ::pyo3::types::PyTraceback)), + Self::PyType => OutputType::new_identical(quote!(&'py ::pyo3::types::PyType)), + } + } + + fn map_local_type(self, local_types: &HashMap) -> OutputType { + // Get the inner name of the type + let Self::Other(type_name) = self else { + unreachable!() + }; + + // Ignore forbidden types + if crate::config::FORBIDDEN_TYPE_NAMES.contains(&type_name.as_str()) { + return OutputType::new_identical(quote!(&'py ::pyo3::types::PyAny)); + } + + // Try to map the external types + if let Some(external_type) = Self::try_map_external_type(&type_name) { + return external_type; + } + + // Try to map the local types + if let Some(relative_path) = local_types.get(&Path::from_py(&type_name)) { + let relative_path: syn::Path = relative_path.try_into().unwrap(); + return OutputType::new_identical(quote!(&'py #relative_path)); + } + + OutputType::new_identical(quote!(&'py ::pyo3::types::PyAny)) + } + + fn try_map_external_type(_type_name: &str) -> Option { + // TODO: Handle types from other packages with Rust bindings here (e.g. NumPy) + None + } +} + +#[derive(Debug, Clone)] +struct OutputType { + owned: Rc, + borrowed: Rc, +} + +impl OutputType { + fn new(own: proc_macro2::TokenStream, bor: proc_macro2::TokenStream) -> Self { + Self { + owned: Rc::new(own), + borrowed: Rc::new(bor), + } + } + + fn new_identical(output_type: proc_macro2::TokenStream) -> Self { + let output_type = Rc::new(output_type); + Self { + owned: output_type.clone(), + borrowed: output_type, + } + } +} diff --git a/pyo3_bindgen_engine/src/typing/mod.rs b/pyo3_bindgen_engine/src/typing/mod.rs index 8c693b7..5922240 100644 --- a/pyo3_bindgen_engine/src/typing/mod.rs +++ b/pyo3_bindgen_engine/src/typing/mod.rs @@ -1,20 +1,11 @@ -//! Module for handling Rust, Python and `PyO3` types. -#![allow(unused)] - -// TODO: Refactor typing - -use itertools::Itertools; -use rustc_hash::FxHashSet as HashSet; -use std::str::FromStr; +mod from_py; +mod into_rs; /// Enum that maps Python types to Rust types. -/// -/// Note that this is not a complete mapping at the moment. The public API is -/// subject to large changes. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Type { PyAny, - Unhandled(String), + Other(String), Unknown, // Primitives @@ -32,8 +23,8 @@ pub enum Type { // Collections PyDict { - t_key: Box, - t_value: Box, + key_type: Box, + value_type: Box, }, PyFrozenSet(Box), PyList(Box), @@ -44,11 +35,9 @@ pub enum Type { IpV4Addr, IpV6Addr, Path, - // TODO: Map `PySlice` to `std::ops::Range` if possible PySlice, // Additional types - num-complex - // TODO: Support conversion of `PyComplex`` to `num_complex::Complex` if enabled via `num-complex` feature PyComplex, // Additional types - datetime @@ -70,7 +59,10 @@ pub enum Type { PyEllipsis, #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] PyFrame, - PyFunction, + PyFunction { + param_types: Vec, + return_annotation: Box, + }, PyModule, #[cfg(not(PyPy))] PySuper, @@ -79,992 +71,8 @@ pub enum Type { PyType, } -impl TryFrom> for Type { - type Error = pyo3::PyErr; - fn try_from(value: Option<&pyo3::types::PyAny>) -> Result { - Ok(match value { - Some(t) => Self::try_from(t)?, - None => Self::Unknown, - }) - } -} - -impl TryFrom<&pyo3::types::PyAny> for Type { - type Error = pyo3::PyErr; - fn try_from(value: &pyo3::types::PyAny) -> Result { - Ok(match value { - t if t.is_instance_of::() => { - let t = t.downcast::()?; - Self::try_from(t)? - } - s if s.is_instance_of::() => { - let s = s.downcast::()?; - Self::from_str(s.to_str()?)? - } - typing if typing.get_type().getattr("__module__")?.to_string() == "typing" => { - Self::from_typing(typing)? - } - none if none.is_none() => Self::Unknown, - // Unknown | Handle as string if possible - _ => { - let value = value.to_string(); - match &value { - _class if value.starts_with("") => { - let value = value - .strip_prefix("") - .unwrap(); - Self::from_str(value)? - } - _enum if value.starts_with("") => { - let value = value - .strip_prefix("") - .unwrap(); - Self::from_str(value)? - } - _ => Self::from_str(&value)?, - } - } - }) - } -} - -impl TryFrom<&pyo3::types::PyType> for Type { - type Error = pyo3::PyErr; - fn try_from(value: &pyo3::types::PyType) -> Result { - Ok(match value { - // Primitives - t if t.is_subclass_of::()? => Self::PyBool, - t if t.is_subclass_of::()? => Self::PyByteArray, - t if t.is_subclass_of::()? => Self::PyBytes, - t if t.is_subclass_of::()? => Self::PyFloat, - t if t.is_subclass_of::()? => Self::PyLong, - t if t.is_subclass_of::()? => Self::PyString, - - // Collections - t if t.is_subclass_of::()? => Self::PyDict { - t_key: Box::new(Self::Unknown), - t_value: Box::new(Self::Unknown), - }, - t if t.is_subclass_of::()? => { - Self::PyFrozenSet(Box::new(Self::Unknown)) - } - t if t.is_subclass_of::()? => { - Self::PyList(Box::new(Self::Unknown)) - } - t if t.is_subclass_of::()? => Self::PySet(Box::new(Self::Unknown)), - t if t.is_subclass_of::()? => Self::PyTuple(vec![Self::Unknown]), - - // Additional types - std - t if t.is_subclass_of::()? => Self::PySlice, - - // Additional types - num-complex - t if t.is_subclass_of::()? => Self::PyComplex, - - // Additional types - datetime - #[cfg(not(Py_LIMITED_API))] - t if t.is_subclass_of::()? => Self::PyDate, - #[cfg(not(Py_LIMITED_API))] - t if t.is_subclass_of::()? => Self::PyDateTime, - #[cfg(not(Py_LIMITED_API))] - t if t.is_subclass_of::()? => Self::PyDelta, - #[cfg(not(Py_LIMITED_API))] - t if t.is_subclass_of::()? => Self::PyTime, - #[cfg(not(Py_LIMITED_API))] - t if t.is_subclass_of::()? => Self::PyTzInfo, - - // Python-specific types - t if t.is_subclass_of::()? => Self::PyCapsule, - t if t.is_subclass_of::()? => Self::PyCFunction, - #[cfg(not(Py_LIMITED_API))] - t if t.is_subclass_of::()? => Self::PyCode, - #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] - t if t.is_subclass_of::()? => Self::PyFrame, - #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] - t if t.is_subclass_of::()? => Self::PyFunction, - t if t.is_subclass_of::()? => Self::PyModule, - #[cfg(not(PyPy))] - t if t.is_subclass_of::()? => Self::PySuper, - t if t.is_subclass_of::()? => Self::PyTraceback, - t if t.is_subclass_of::()? => Self::PyType, - - // Unknown | Handle as string if possible - _ => { - let value = value.to_string(); - match &value { - _class if value.starts_with("") => { - let value = value - .strip_prefix("") - .unwrap(); - Self::from_str(value)? - } - _enum if value.starts_with("") => { - let value = value - .strip_prefix("") - .unwrap(); - Self::from_str(value)? - } - _ => Self::Unhandled(value), - } - } - }) - } -} - -impl std::str::FromStr for Type { - type Err = pyo3::PyErr; - fn from_str(value: &str) -> Result { - Ok(match value { - "Any" => Self::PyAny, - - // Primitives - "bool" => Self::PyBool, - "bytearray" => Self::PyByteArray, - "bytes" => Self::PyBytes, - "float" => Self::PyFloat, - "int" => Self::PyLong, - "str" => Self::PyString, - - // Enums - optional - if optional.matches('|').count() == 1 && optional.matches("None").count() == 1 => - { - let t = optional - .split('|') - .map(str::trim) - .find(|x| *x != "None") - .unwrap(); - Self::Optional(Box::new(Self::from_str(t)?)) - } - r#union if r#union.contains('|') => { - let mut t_sequence = r#union - .split('|') - .map(|x| x.trim().to_string()) - .collect::>(); - ugly_hack_repair_complex_split_sequence(&mut t_sequence); - Self::Union( - t_sequence - .iter() - .map(|x| Self::from_str(x)) - .collect::, _>>()?, - ) - } - "None" | "NoneType" => Self::PyNone, - - // Collections - dict if dict.starts_with("dict[") && dict.ends_with(']') => { - let (key, value) = dict - .strip_prefix("dict[") - .unwrap() - .strip_suffix(']') - .unwrap() - .split_once(',') - .unwrap(); - let key = key.trim(); - let value = value.trim(); - Self::PyDict { - t_key: Box::new(Self::from_str(key)?), - t_value: Box::new(Self::from_str(value)?), - } - } - "dict" | "Dict" => Self::PyDict { - t_key: Box::new(Self::Unknown), - t_value: Box::new(Self::Unknown), - }, - frozenset if frozenset.starts_with("frozenset[") && frozenset.ends_with(']') => { - let t = frozenset - .strip_prefix("frozenset[") - .unwrap() - .strip_suffix(']') - .unwrap(); - Self::PyFrozenSet(Box::new(Self::from_str(t)?)) - } - list if list.starts_with("list[") && list.ends_with(']') => { - let t = list - .strip_prefix("list[") - .unwrap() - .strip_suffix(']') - .unwrap(); - Self::PyList(Box::new(Self::from_str(t)?)) - } - "list" => Self::PyList(Box::new(Self::Unknown)), - sequence if sequence.starts_with("Sequence[") && sequence.ends_with(']') => { - let t = sequence - .strip_prefix("Sequence[") - .unwrap() - .strip_suffix(']') - .unwrap(); - Self::PyList(Box::new(Self::from_str(t)?)) - } - set if set.starts_with("set[") && set.ends_with(']') => { - let t = set.strip_prefix("set[").unwrap().strip_suffix(']').unwrap(); - Self::PySet(Box::new(Self::from_str(t)?)) - } - tuple if tuple.starts_with("tuple[") && tuple.ends_with(']') => { - let mut t_sequence = tuple - .strip_prefix("tuple[") - .unwrap() - .strip_suffix(']') - .unwrap() - .split(',') - .map(|x| x.trim().to_string()) - .collect::>(); - ugly_hack_repair_complex_split_sequence(&mut t_sequence); - Self::PyTuple( - t_sequence - .iter() - .map(|x| Self::from_str(x)) - .collect::, _>>()?, - ) - } - - // Additional types - std - "ipaddress.IPv4Address" => Self::IpV4Addr, - "ipaddress.IPv6Address" => Self::IpV6Addr, - "os.PathLike" | "pathlib.Path" => Self::Path, - "slice" => Self::PySlice, - - // Additional types - num-complex - "complex" => Self::PyComplex, - - // Additional types - datetime - #[cfg(not(Py_LIMITED_API))] - "datetime.date" => Self::PyDate, - #[cfg(not(Py_LIMITED_API))] - "datetime.datetime" => Self::PyDateTime, - "timedelta" => Self::PyDelta, - #[cfg(not(Py_LIMITED_API))] - "datetime.time" => Self::PyTime, - #[cfg(not(Py_LIMITED_API))] - "datetime.tzinfo" => Self::PyTzInfo, - - // Python-specific types - "capsule" => Self::PyCapsule, - "cfunction" => Self::PyCFunction, - #[cfg(not(Py_LIMITED_API))] - "code" => Self::PyCode, - "Ellipsis" | "..." => Self::PyEllipsis, - #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] - "frame" => Self::PyFrame, - "function" => Self::PyFunction, - callable if callable.starts_with("Callable[") && callable.ends_with(']') => { - // TODO: Use callable types for something if useful - // let (args, return_value) = callable - // .strip_prefix("Callable[") - // .unwrap() - // .strip_suffix(']') - // .unwrap() - // .split_once(',') - // .unwrap(); - // let args = args - // .strip_prefix("[") - // .unwrap() - // .strip_suffix("]") - // .unwrap() - // .split(',') - // .map(|x| x.trim()) - // .collect::>(); - // let return_value = return_value.trim(); - Self::PyFunction - } - "Callable" | "callable" => Self::PyFunction, - "module" => Self::PyModule, - #[cfg(not(PyPy))] - "super" => Self::PySuper, - "traceback" => Self::PyTraceback, - typ if typ.starts_with("type[") && typ.ends_with(']') => { - // TODO: Use inner type for something if useful - // let t = typ - // .strip_prefix("type[") - // .unwrap() - // .strip_suffix(']') - // .unwrap(); - Self::PyType - } - - // typing - typing if typing.starts_with("typing.") => { - let s = typing.strip_prefix("typing.").unwrap(); - Self::from_str(s)? - } - - // collection.abc - collection if collection.starts_with("collection.abc.") => { - let s = collection.strip_prefix("collection.abc.").unwrap(); - Self::from_str(s)? - } - - unhandled => Self::Unhandled(unhandled.to_owned()), - }) - } -} - impl Type { - pub fn from_typing(value: &pyo3::types::PyAny) -> pyo3::PyResult { - if let (Ok(t), Ok(t_inner)) = (value.getattr("__origin__"), value.getattr("__args__")) { - let t_inner = t_inner.downcast::()?; - - if t.is_instance_of::() { - let t = t.downcast::()?; - match Self::try_from(t)? { - Self::PyDict { .. } => { - let (t_key, t_value) = ( - Self::try_from(t_inner.get_item(0)?)?, - Self::try_from(t_inner.get_item(1)?)?, - ); - return Ok(Self::PyDict { - t_key: Box::new(t_key), - t_value: Box::new(t_value), - }); - } - Self::PyList(..) => { - let t_inner = Self::try_from(t_inner.get_item(0)?)?; - return Ok(Self::PyList(Box::new(t_inner))); - } - Self::PyTuple(..) => { - let t_sequence = t_inner - .iter() - .map(Self::try_from) - .collect::, _>>()?; - return Ok(Self::PyTuple(t_sequence)); - } - Self::PyType => { - // TODO: See if the inner type is useful for something here - return Ok(Self::PyType); - } - _ => { - // Noop - processed as string below - // eprintln!( - // "WARN: Unexpected type encountered: {value}\n \ - // Bindings could be improved by handling the type here \ - // Please report this as a bug. [scope: Type::from_typing()]", - // ); - } - } - } - - let t = t.to_string(); - Ok(match &t { - _typing if t.starts_with("typing.") => { - let t = t.strip_prefix("typing.").unwrap(); - match t { - "Union" => { - let t_sequence = t_inner - .iter() - .map(Self::try_from) - .collect::, _>>()?; - - if t_sequence.len() == 2 && t_sequence.contains(&Self::PyNone) { - let t = t_sequence - .iter() - .find(|x| **x != Self::PyNone) - .unwrap() - .clone(); - Self::Optional(Box::new(t)) - } else { - Self::Union(t_sequence) - } - } - _ => Self::Unhandled(value.to_string()), - } - } - _collections if t.starts_with("") => { - let t = t - .strip_prefix("") - .unwrap(); - match t { - "Iterable" | "Sequence" => { - let t_inner = Self::try_from(t_inner.get_item(0)?)?; - Self::PyList(Box::new(t_inner)) - } - "Callable" => { - // TODO: Use callable types for something if useful (t_inner) - Self::PyFunction - } - _ => Self::Unhandled(value.to_string()), - } - } - // Unknown | Handle the type as string if possible - _ => { - // TODO: Handle also the inner type here if possible - let t = t.to_string(); - match &t { - _class if t.starts_with("") => { - let t = t - .strip_prefix("") - .unwrap(); - Self::from_str(t)? - } - _enum if t.starts_with("") => { - let t = t - .strip_prefix("") - .unwrap(); - Self::from_str(t)? - } - _ => Self::from_str(&t)?, - } - } - }) - } else { - let value = value.to_string(); - Type::from_str(&value) - } - } - - #[must_use] - pub fn into_rs( - self, - owned: bool, - module_name: &str, - all_types: &HashSet, - ) -> proc_macro2::TokenStream { - if owned { - self.into_rs_owned(module_name, all_types) - } else { - self.into_rs_borrowed(module_name, all_types) - } - } - - #[must_use] - pub fn into_rs_owned( - self, - module_name: &str, - all_types: &HashSet, - ) -> proc_macro2::TokenStream { - match self { - Self::PyAny => { - quote::quote! {&'py ::pyo3::types::PyAny} - } - Self::Unhandled(..) => self.try_into_module_path(module_name, all_types), - - Self::Unknown => { - quote::quote! {&'py ::pyo3::types::PyAny} - } - - // Primitives - Self::PyBool => { - quote::quote! {bool} - } - Self::PyByteArray | Self::PyBytes => { - quote::quote! {Vec} - } - Self::PyFloat => { - quote::quote! {f64} - } - Self::PyLong => { - quote::quote! {i64} - } - Self::PyString => { - quote::quote! {::std::string::String} - } - - // Enums - Self::Optional(t) => { - let inner = t.into_rs_owned(module_name, all_types); - quote::quote! { - ::std::option::Option<#inner> - } - } - Self::Union(t_alternatives) => { - // TODO: Support Rust enum where possible - quote::quote! { - &'py ::pyo3::types::PyAny - } - } - Self::PyNone => { - // TODO: Not sure what to do with None - quote::quote! { - &'py ::pyo3::types::PyAny - } - } - - // Collections - Self::PyDict { t_key, t_value } => { - if t_key.is_owned_hashable() { - let t_key = t_key.into_rs_owned(module_name, all_types); - let t_value = t_value.into_rs_owned(module_name, all_types); - quote::quote! { - ::std::collections::HashMap<#t_key, #t_value> - } - } else { - quote::quote! { - &'py ::pyo3::types::PyDict - } - } - } - Self::PyFrozenSet(t) => { - if t.is_owned_hashable() { - let t = t.into_rs_owned(module_name, all_types); - quote::quote! { - ::std::collections::HashSet<#t> - } - } else { - quote::quote! { - &'py ::pyo3::types::PyFrozenSet - } - } - } - Self::PyList(t) => { - let inner = t.into_rs_owned(module_name, all_types); - quote::quote! { - Vec<#inner> - } - } - Self::PySet(t) => { - if t.is_owned_hashable() { - let t = t.into_rs_owned(module_name, all_types); - quote::quote! { - ::std::collections::HashSet<#t> - } - } else { - quote::quote! { - &'py ::pyo3::types::PySet - } - } - } - Self::PyTuple(t_sequence) => { - if t_sequence.is_empty() - || (t_sequence.len() == 1 && t_sequence[0] == Self::Unknown) - { - quote::quote! { - &'py ::pyo3::types::PyTuple - } - } else if t_sequence.len() == 2 && t_sequence.last().unwrap() == &Self::PyEllipsis { - Self::PyList(Box::new(t_sequence[0].clone())) - .into_rs_owned(module_name, all_types) - } else { - let inner = t_sequence - .into_iter() - .map(|x| x.into_rs_owned(module_name, all_types)) - .collect::>(); - quote::quote! { - (#(#inner),*) - } - } - } - - // Additional types - std - Self::IpV4Addr => { - quote::quote! {::std::net::IpV4Addr} - } - Self::IpV6Addr => { - quote::quote! {::std::net::IpV6Addr} - } - Self::Path => { - quote::quote! {::std::path::PathBuf} - } - Self::PySlice => { - quote::quote! {&'py ::pyo3::types::PySlice} - } - - // Additional types - num-complex - Self::PyComplex => { - quote::quote! {&'py ::pyo3::types::PyComplex} - } - - // Additional types - datetime - #[cfg(not(Py_LIMITED_API))] - Self::PyDate => { - quote::quote! {&'py ::pyo3::types::PyDate} - } - #[cfg(not(Py_LIMITED_API))] - Self::PyDateTime => { - quote::quote! {&'py ::pyo3::types::PyDateTime} - } - Self::PyDelta => { - // The trait `ToPyObject` is not implemented for `Duration`, so we can't use it here yet - // quote::quote! {::std::time::Duration} - quote::quote! {&'py ::pyo3::types::PyAny} - } - #[cfg(not(Py_LIMITED_API))] - Self::PyTime => { - quote::quote! {&'py ::pyo3::types::PyTime} - } - #[cfg(not(Py_LIMITED_API))] - Self::PyTzInfo => { - quote::quote! {&'py ::pyo3::types::PyTzInfo} - } - - // Python-specific types - Self::PyCapsule => { - quote::quote! {&'py ::pyo3::types::PyCapsule} - } - Self::PyCFunction => { - quote::quote! {&'py ::pyo3::types::PyCFunction} - } - #[cfg(not(Py_LIMITED_API))] - Self::PyCode => { - quote::quote! {&'py ::pyo3::types::PyCode} - } - Self::PyEllipsis => { - // TODO: Not sure what to do with ellipsis - quote::quote! { - &'py ::pyo3::types::PyAny - } - } - #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] - Self::PyFrame => { - quote::quote! {&'py ::pyo3::types::PyFrame} - } - #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] - Self::PyFunction => { - quote::quote! {&'py ::pyo3::types::PyFunction} - } - #[cfg(not(all(not(Py_LIMITED_API), not(PyPy))))] - Self::PyFunction => { - quote::quote! {&'py ::pyo3::types::PyAny} - } - Self::PyModule => { - quote::quote! {&'py ::pyo3::types::PyModule} - } - #[cfg(not(PyPy))] - Self::PySuper => { - quote::quote! {&'py ::pyo3::types::PySuper} - } - Self::PyTraceback => { - quote::quote! {&'py ::pyo3::types::PyTraceback} - } - Self::PyType => { - quote::quote! {&'py ::pyo3::types::PyType} - } - } - } - - #[must_use] - pub fn into_rs_borrowed( - self, - module_name: &str, - all_types: &HashSet, - ) -> proc_macro2::TokenStream { - match self { - Self::PyAny => { - quote::quote! {&'py ::pyo3::types::PyAny} - } - Self::Unhandled(..) => self.try_into_module_path(module_name, all_types), - Self::Unknown => { - quote::quote! {&'py ::pyo3::types::PyAny} - } - - // Primitives - Self::PyBool => { - quote::quote! {bool} - } - Self::PyByteArray | Self::PyBytes => { - quote::quote! {&[u8]} - } - Self::PyFloat => { - quote::quote! {f64} - } - Self::PyLong => { - quote::quote! {i64} - } - Self::PyString => { - quote::quote! {&str} - } - - // Enums - Self::Optional(t) => { - let inner = t.into_rs_owned(module_name, all_types); - quote::quote! { - ::std::option::Option<#inner> - } - } - Self::Union(t_alternatives) => { - // TODO: Support Rust enum where possible - quote::quote! { - &'py ::pyo3::types::PyAny - } - } - Self::PyNone => { - // TODO: Not sure what to do with None - quote::quote! { - &'py ::pyo3::types::PyAny - } - } - - // Collections - Self::PyDict { t_key, t_value } => { - if t_key.is_owned_hashable() { - let t_key = t_key.into_rs_owned(module_name, all_types); - let t_value = t_value.into_rs_owned(module_name, all_types); - quote::quote! { - &::std::collections::HashMap<#t_key, #t_value> - } - } else { - quote::quote! { - &'py ::pyo3::types::PyDict - } - } - } - Self::PyFrozenSet(t) => { - if t.is_owned_hashable() { - let t = t.into_rs_owned(module_name, all_types); - quote::quote! { - &::HashSet<#t> - } - } else { - quote::quote! { - &'py ::pyo3::types::PyFrozenSet - } - } - } - Self::PyList(t) => { - let inner = t.into_rs_owned(module_name, all_types); - quote::quote! { - &[#inner] - } - } - Self::PySet(t) => { - if t.is_owned_hashable() { - let t = t.into_rs_owned(module_name, all_types); - quote::quote! { - &::HashSet<#t> - } - } else { - quote::quote! { - &'py ::pyo3::types::PySet - } - } - } - Self::PyTuple(t_sequence) => { - if t_sequence.is_empty() - || (t_sequence.len() == 1 && t_sequence[0] == Self::Unknown) - { - quote::quote! { - &'py ::pyo3::types::PyTuple - } - } else if t_sequence.len() == 2 && t_sequence.last().unwrap() == &Self::PyEllipsis { - Self::PyList(Box::new(t_sequence[0].clone())) - .into_rs_borrowed(module_name, all_types) - } else { - let inner = t_sequence - .into_iter() - .map(|x| x.into_rs_owned(module_name, all_types)) - .collect::>(); - quote::quote! { - (#(#inner),*) - } - } - } - - // Additional types - std - Self::IpV4Addr => { - quote::quote! {::std::net::IpV4Addr} - } - Self::IpV6Addr => { - quote::quote! {::std::net::IpV6Addr} - } - Self::Path => { - quote::quote! {::std::path::PathBuf} - } - Self::PySlice => { - quote::quote! {&'py ::pyo3::types::PySlice} - } - - // Additional types - num-complex - Self::PyComplex => { - quote::quote! {&'py ::pyo3::types::PyComplex} - } - - // Additional types - datetime - #[cfg(not(Py_LIMITED_API))] - Self::PyDate => { - quote::quote! {&'py ::pyo3::types::PyDate} - } - #[cfg(not(Py_LIMITED_API))] - Self::PyDateTime => { - quote::quote! {&'py ::pyo3::types::PyDateTime} - } - Self::PyDelta => { - // The trait `ToPyObject` is not implemented for `Duration`, so we can't use it here yet - // quote::quote! {::std::time::Duration} - quote::quote! {&'py ::pyo3::types::PyAny} - } - #[cfg(not(Py_LIMITED_API))] - Self::PyTime => { - quote::quote! {&'py ::pyo3::types::PyTime} - } - #[cfg(not(Py_LIMITED_API))] - Self::PyTzInfo => { - quote::quote! {&'py ::pyo3::types::PyTzInfo} - } - - // Python-specific types - Self::PyCapsule => { - quote::quote! {&'py ::pyo3::types::PyCapsule} - } - Self::PyCFunction => { - quote::quote! {&'py ::pyo3::types::PyCFunction} - } - #[cfg(not(Py_LIMITED_API))] - Self::PyCode => { - quote::quote! {&'py ::pyo3::types::PyCode} - } - Self::PyEllipsis => { - // TODO: Not sure what to do with ellipsis - quote::quote! { - &'py ::pyo3::types::PyAny - } - } - #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] - Self::PyFrame => { - quote::quote! {&'py ::pyo3::types::PyFrame} - } - #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] - Self::PyFunction => { - quote::quote! {&'py ::pyo3::types::PyFunction} - } - #[cfg(not(all(not(Py_LIMITED_API), not(PyPy))))] - Self::PyFunction => { - quote::quote! {&'py ::pyo3::types::PyAny} - } - Self::PyModule => { - quote::quote! {&'py ::pyo3::types::PyModule} - } - #[cfg(not(PyPy))] - Self::PySuper => { - quote::quote! {&'py ::pyo3::types::PySuper} - } - Self::PyTraceback => { - quote::quote! {&'py ::pyo3::types::PyTraceback} - } - Self::PyType => { - quote::quote! {&'py ::pyo3::types::PyType} - } - } - } - - fn try_into_module_path( - self, - module_name: &str, - all_types: &HashSet, - ) -> proc_macro2::TokenStream { - let Self::Unhandled(value) = self else { - unreachable!() - }; - let module_root = if module_name.contains('.') { - module_name.split('.').next().unwrap() - } else { - module_name - }; - match value.as_str() { - // Ignorelist - "property" - | "member_descriptor" - | "method_descriptor" - | "getset_descriptor" - | "_collections._tuplegetter" - | "AsyncState" => { - quote::quote! {&'py ::pyo3::types::PyAny} - } - module_member_full if module_member_full.starts_with(module_root) => { - // Ignore unknown types - if !all_types.contains(module_member_full) { - return quote::quote! {&'py ::pyo3::types::PyAny}; - } - - let n_common_ancestors = module_name - .split('.') - .zip(module_member_full.split('.')) - .take_while(|(a, b)| a == b) - .count(); - let current_module_depth = module_name.split('.').count(); - let reexport_path = if (current_module_depth - n_common_ancestors) > 0 { - std::iter::repeat("super".to_string()) - .take(current_module_depth - n_common_ancestors) - } else { - std::iter::repeat("self".to_string()).take(1) - }; - let reexport_path: String = reexport_path - .chain( - module_member_full - .split('.') - .skip(n_common_ancestors) - .map(|s| { - if syn::parse_str::(s).is_ok() { - s.to_owned() - } else { - format!("r#{s}") - } - }), - ) - .join("::"); - - // The path contains both ident and "::", combine into something that can be quoted - let reexport_path = syn::parse_str::(&reexport_path).unwrap(); - quote::quote! { - &'py #reexport_path - } - } - _ => { - let value_without_brackets = value.split_once('[').unwrap_or((&value, "")).0; - let module_scopes = value_without_brackets.split('.'); - let n_module_scopes = module_scopes.clone().count(); - - // Approach: Find types without a module scope (no dot) and check if the type is local (or imported in the current module) - if !value_without_brackets.contains('.') { - if let Some(member) = all_types - .iter() - .filter(|member| { - member - .split('.') - .take(member.split('.').count() - 1) - .join(".") - == module_name - }) - .find(|&member| { - member.trim_start_matches(&format!("{module_name}.")) - == value_without_brackets - }) - { - return Self::Unhandled(member.to_owned()) - .try_into_module_path(module_name, all_types); - } - } - - // Approach: Find the shallowest match that contains the value - // TODO: Fix this! The matching might be wrong in many cases - let mut possible_matches = HashSet::default(); - for i in 0..n_module_scopes { - let module_member_scopes_end = module_scopes.clone().skip(i).join("."); - all_types - .iter() - .filter(|member| member.ends_with(&module_member_scopes_end)) - .for_each(|member| { - possible_matches.insert(member.to_owned()); - }); - if !possible_matches.is_empty() { - let shallowest_match = possible_matches - .iter() - .min_by(|m1, m2| m1.split('.').count().cmp(&m2.split('.').count())) - .unwrap(); - return Self::Unhandled(shallowest_match.to_owned()) - .try_into_module_path(module_name, all_types); - } - } - - // Unsupported - // TODO: Support more types - // dbg!(value); - quote::quote! {&'py ::pyo3::types::PyAny} - } - } - } - - fn is_owned_hashable(&self) -> bool { + fn is_hashable(&self) -> bool { matches!( self, Self::PyBool @@ -1080,40 +88,3 @@ impl Type { ) } } - -// TODO: Replace this with something more sensible -fn ugly_hack_repair_complex_split_sequence(sequence: &mut Vec) { - let mut traversed_all_elements = false; - let mut start_index = 0; - 'outer: while !traversed_all_elements { - traversed_all_elements = true; - 'inner: for i in start_index..(sequence.len() - 1) { - let mut n_scopes = sequence[i].matches('[').count() - sequence[i].matches(']').count(); - if n_scopes == 0 { - continue; - } - for j in (i + 1)..sequence.len() { - n_scopes += sequence[j].matches('[').count(); - n_scopes -= sequence[j].matches(']').count(); - if n_scopes == 0 { - let mut new_element = sequence[i].clone(); - for relevant_element in sequence.iter().take(j + 1).skip(i + 1) { - new_element = format!("{new_element},{relevant_element}"); - } - - // Update sequence and remove the elements that were merged - sequence[i] = new_element; - sequence.drain((i + 1)..=j); - - if j < sequence.len() - 1 { - traversed_all_elements = false; - start_index = i; - break 'inner; - } else { - break 'outer; - } - } - } - } - } -} diff --git a/pyo3_bindgen_engine/src/utils/error.rs b/pyo3_bindgen_engine/src/utils/error.rs index d803043..f1f445e 100644 --- a/pyo3_bindgen_engine/src/utils/error.rs +++ b/pyo3_bindgen_engine/src/utils/error.rs @@ -5,6 +5,14 @@ pub enum PyBindgenError { IoError(#[from] std::io::Error), #[error(transparent)] PyError(#[from] pyo3::PyErr), + #[error("Failed to downcast Python object")] + PyDowncastError, #[error(transparent)] SynError(#[from] syn::Error), } + +impl From> for PyBindgenError { + fn from(value: pyo3::PyDowncastError) -> Self { + pyo3::PyErr::from(value).into() + } +} diff --git a/pyo3_bindgen_engine/tests/bindgen.rs b/pyo3_bindgen_engine/tests/bindgen.rs index 55c6cf3..1ccd712 100644 --- a/pyo3_bindgen_engine/tests/bindgen.rs +++ b/pyo3_bindgen_engine/tests/bindgen.rs @@ -88,20 +88,12 @@ test_bindgen! { )] pub mod t_mod_test_bindgen_function { /// t_docs - pub fn t_fn<'py>( - py: ::pyo3::marker::Python<'py>, - p_t_arg1: &str, - ) -> ::pyo3::PyResult { + pub fn t_fn<'py>(py: ::pyo3::marker::Python<'py>, p_t_arg1: &str) -> ::pyo3::PyResult { ::pyo3::FromPyObject::extract( - py - .import(::pyo3::intern!(py, "t_mod_test_bindgen_function"))? - .getattr(::pyo3::intern!(py, "t_fn"))? + py.import(::pyo3::intern!(py, "t_mod_test_bindgen_function"))? .call_method1( ::pyo3::intern!(py, "t_fn"), - ::pyo3::types::PyTuple::new( - py, - [::pyo3::ToPyObject::to_object(&p_t_arg1, py)], - ), + ::pyo3::types::PyTuple::new(py, [::pyo3::ToPyObject::to_object(&p_t_arg1, py)]), )?, ) } @@ -128,6 +120,12 @@ test_bindgen! { @t_prop.setter def t_prop(self, value: int): ... + + def t_fn_class_param(t_arg1: t_class): + ... + + def t_fn_class_return() -> t_class: + ... "# rs: r#" @@ -160,18 +158,15 @@ test_bindgen! { p_t_arg2: ::std::option::Option, ) -> ::pyo3::PyResult<&'py Self> { ::pyo3::FromPyObject::extract( - py - .import(::pyo3::intern!(py, "t_mod_test_bindgen_class"))? + py.import(::pyo3::intern!(py, "t_mod_test_bindgen_class"))? .getattr(::pyo3::intern!(py, "t_class"))? - .call1( - ::pyo3::types::PyTuple::new( - py, - [ - ::pyo3::ToPyObject::to_object(&p_t_arg1, py), - ::pyo3::ToPyObject::to_object(&p_t_arg2, py), - ], - ), - )?, + .call1(::pyo3::types::PyTuple::new( + py, + [ + ::pyo3::ToPyObject::to_object(&p_t_arg1, py), + ::pyo3::ToPyObject::to_object(&p_t_arg2, py), + ], + ))?, ) } /// t_docs_method @@ -181,22 +176,13 @@ test_bindgen! { p_t_arg1: &::std::collections::HashMap<::std::string::String, i64>, p_kwargs: &'py ::pyo3::types::PyDict, ) -> ::pyo3::PyResult<&'py ::pyo3::types::PyAny> { - ::pyo3::FromPyObject::extract( - self - .0 - .call( - ::pyo3::types::PyTuple::new( - py, - [::pyo3::ToPyObject::to_object(&p_t_arg1, py)], - ), - Some(p_kwargs), - )?, - ) + ::pyo3::FromPyObject::extract(self.0.call_method( + ::pyo3::intern!(py, "t_method"), + ::pyo3::types::PyTuple::new(py, [::pyo3::ToPyObject::to_object(&p_t_arg1, py)]), + Some(p_kwargs), + )?) } - pub fn t_prop<'py>( - &'py self, - py: ::pyo3::marker::Python<'py>, - ) -> ::pyo3::PyResult { + pub fn t_prop<'py>(&'py self, py: ::pyo3::marker::Python<'py>) -> ::pyo3::PyResult { self.0.getattr(::pyo3::intern!(py, "t_prop"))?.extract() } pub fn set_t_prop<'py>( @@ -207,6 +193,26 @@ test_bindgen! { self.0.setattr(::pyo3::intern!(py, "t_prop"), p_value) } } + pub fn t_fn_class_param<'py>( + py: ::pyo3::marker::Python<'py>, + p_t_arg1: &'py t_class, + ) -> ::pyo3::PyResult<&'py ::pyo3::types::PyAny> { + ::pyo3::FromPyObject::extract( + py.import(::pyo3::intern!(py, "t_mod_test_bindgen_class"))? + .call_method1( + ::pyo3::intern!(py, "t_fn_class_param"), + ::pyo3::types::PyTuple::new(py, [::pyo3::ToPyObject::to_object(&p_t_arg1, py)]), + )?, + ) + } + pub fn t_fn_class_return<'py>( + py: ::pyo3::marker::Python<'py>, + ) -> ::pyo3::PyResult<&'py t_class> { + ::pyo3::FromPyObject::extract( + py.import(::pyo3::intern!(py, "t_mod_test_bindgen_class"))? + .call_method0(::pyo3::intern!(py, "t_fn_class_return"))?, + ) + } } "# } From 4be5967bcccb841e4b6de7f8c7d195587358043b Mon Sep 17 00:00:00 2001 From: Andrej Orsula Date: Mon, 4 Mar 2024 22:18:56 +0100 Subject: [PATCH 12/13] CI: Format dependabot workflow Signed-off-by: Andrej Orsula --- .github/workflows/dependabot.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml index 00b8f4b..544bfee 100644 --- a/.github/workflows/dependabot.yml +++ b/.github/workflows/dependabot.yml @@ -11,7 +11,7 @@ permissions: jobs: approve: runs-on: ubuntu-latest - if: github.actor == 'dependabot[bot]' && github.event_name == 'pull_request' + if: ${{ github.actor == 'dependabot[bot]' && github.event_name == 'pull_request' }} steps: - name: Fetch metadata id: metadata @@ -26,14 +26,14 @@ jobs: auto_merge: runs-on: ubuntu-latest - if: github.actor == 'dependabot[bot]' && github.event_name == 'check_run' + if: ${{ github.actor == 'dependabot[bot]' && github.event_name == 'check_run' }} steps: - name: Fetch metadata id: metadata uses: dependabot/fetch-metadata@v1 with: github-token: "${{ secrets.GITHUB_TOKEN }}" - - name: Enable auto-merge + - name: Enable PR auto-merge if: steps.metadata.outputs.update-type == 'version-update:semver-patch' env: PR_URL: ${{github.event.pull_request.html_url}} From 4ff60c394b6c55e84727e654b51f5de3bd4a9b72 Mon Sep 17 00:00:00 2001 From: Andrej Orsula Date: Tue, 5 Mar 2024 21:57:19 +0100 Subject: [PATCH 13/13] Improve ergonomics of generated bindings Signed-off-by: Andrej Orsula --- README.md | 16 +- pyo3_bindgen_engine/src/codegen.rs | 41 +++-- pyo3_bindgen_engine/src/config.rs | 10 +- .../src/syntax/common/attribute_variant.rs | 7 +- pyo3_bindgen_engine/src/syntax/common/path.rs | 29 ++++ pyo3_bindgen_engine/src/syntax/function.rs | 47 +++--- pyo3_bindgen_engine/src/syntax/module.rs | 123 ++++++++++++--- pyo3_bindgen_engine/src/syntax/property.rs | 46 +++--- pyo3_bindgen_engine/src/typing/from_py.rs | 24 +++ pyo3_bindgen_engine/src/typing/into_rs.rs | 119 +++++++++++--- pyo3_bindgen_engine/src/typing/mod.rs | 4 +- pyo3_bindgen_engine/src/utils/mod.rs | 2 +- pyo3_bindgen_engine/tests/bindgen.rs | 148 ++++++++++-------- pyo3_bindgen_macros/src/lib.rs | 14 +- 14 files changed, 441 insertions(+), 189 deletions(-) diff --git a/README.md b/README.md index 9adb71f..2d0103e 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ pub fn main() -> pyo3::PyResult<()> { This project is intended to simplify the integration or transition of existing Python codebases into Rust. You, as a developer, gain immediate access to the Rust type system and countless other benefits of modern compiled languages with the generated bindings. Furthermore, the entire stock of high-quality crates from [crates.io](https://crates.io) becomes at your disposal. -On its own, the generated Rust code does not provide any performance benefits over using the Python code (it might actually be slower — yet to be benchmarked). However, it can be used as a starting point for further optimization if you decide to rewrite performance-critical parts of your codebase in pure Rust. +On its own, the generated Rust code does not provide any performance benefits over using the Python code. However, it can be used as a starting point for further optimization if you decide to rewrite performance-critical parts of your codebase in pure Rust. ## Overview @@ -107,7 +107,7 @@ fn main() -> Result<(), Box> { // Generate Rust bindings to Python modules Codegen::new(Config::default())? .module_name("py_module")? - .module_name("other_module")? + .module_names(&["other_module.core", "other_module.utils.io"])? .build(std::path::Path::new(&std::env::var("OUT_DIR")?).join("bindings.rs"))?; Ok(()) } @@ -132,7 +132,7 @@ Afterwards, run the `pyo3_bindgen` executable while passing the name of the targ ```bash # Pass `--help` to show the usage and available options -pyo3_bindgen -m py_module other_module -o bindings.rs +pyo3_bindgen -m py_module other_module.core -o bindings.rs ``` ### Option 3 \[Experimental\]: Procedural macros @@ -158,11 +158,11 @@ pub use py_module::*; This project is in early development, and as such, the API of the generated bindings is not yet stable. - Not all Python types are mapped to their Rust equivalents yet. For this reason, some additional typecasting might be currently required when using the generated bindings (e.g. `let typed_value: py_module::MyClass = get_value()?.extract()?;`). -- The binding generation is primarily designed to be used inside build scripts or via procedural macros. Therefore, the performance of the codegen process is [benchmarked](./pyo3_bindgen_engine/benches/bindgen.rs) to understand the potential impact on build times. Here are some preliminary results for version `0.3.0` (measured: parsing IO & codegen | not measured: compilation of the generated bindings, which takes much longer): - - `sys`: 1.17 ms (0.56k total LoC) - - `os`: 7.03 ms (3.30k total LoC) - - `numpy`: 819 ms (242k total LoC) - - `torch`: 6.42 s (1.02M total LoC) +- The binding generation is primarily designed to be used inside build scripts or via procedural macros. Therefore, the performance of the codegen process is [benchmarked](./pyo3_bindgen_engine/benches/bindgen.rs) to understand the potential impact on build times. Here are some preliminary results for version `0.3.0` with the default configuration (measured: parsing IO & codegen | not measured: compilation of the generated bindings, which takes much longer): + - `sys`: 1.24 ms (0.66k total LoC) + - `os`: 8.38 ms (3.88k total LoC) + - `numpy`: 1.02 s (294k total LoC) + - `torch`: 7.05 s (1.08M total LoC) - The generation of bindings should never panic as long as the target Python module can be successfully imported. If it does, please [report](https://github.com/AndrejOrsula/pyo3_bindgen/issues/new) this as a bug. - The generated bindings should always be compilable and usable in Rust. If you encounter any issues, consider manually fixing the problematic parts of the bindings and please [report](https://github.com/AndrejOrsula/pyo3_bindgen/issues/new) this as a bug. - However, the generated bindings are based on the introspection of the target Python module. Therefore, the correctness of the generated bindings is directly dependent on the quality of the type annotations and docstrings in the target Python module. Ideally, the generated bindings should be considered unsafe and serve as a starting point for safe and idiomatic Rust APIs. diff --git a/pyo3_bindgen_engine/src/codegen.rs b/pyo3_bindgen_engine/src/codegen.rs index 22231fb..241ff48 100644 --- a/pyo3_bindgen_engine/src/codegen.rs +++ b/pyo3_bindgen_engine/src/codegen.rs @@ -7,12 +7,15 @@ use rustc_hash::FxHashSet as HashSet; /// Engine for automatic generation of Rust FFI bindings to Python modules. /// -/// # Example +/// # Examples /// -/// ```no_run -/// // use pyo3_bindgen::{Codegen, Config}; -/// use pyo3_bindgen_engine::{Codegen, Config}; +/// Here is a simple example of how to use the `Codegen` engine to generate +/// Rust FFI bindings for the full `os` and `sys` Python modules. With the +/// default configuration, all submodules, classes, functions, and parameters +/// will be recursively parsed and included in the generated bindings. /// +/// ```no_run +/// # use pyo3_bindgen_engine::{Codegen, Config}; /// fn main() -> Result<(), Box> { /// Codegen::new(Config::default())? /// .module_name("os")? @@ -21,6 +24,21 @@ use rustc_hash::FxHashSet as HashSet; /// Ok(()) /// } /// ``` +/// +/// For more focused generation, paths to specific submodules can be provided. +/// In the following example, only the `core` and `utils.io` submodules of the +/// `other_module` module will be included in the generated bindings alongside +/// their respective submodules, classes, functions, and parameters. +/// +/// ```no_run +/// # use pyo3_bindgen_engine::{Codegen, Config}; +/// fn main() -> Result<(), Box> { +/// Codegen::new(Config::default())? +/// .module_names(&["other_module.core", "other_module.utils.io"])? +/// .generate()?; +/// Ok(()) +/// } +/// ``` #[derive(Debug, Default, Clone)] pub struct Codegen { cfg: Config, @@ -189,9 +207,15 @@ impl Codegen { ) }) .try_for_each(|module| { - Module::parse(&self.cfg, module).map(|module| { - self.modules.push(module); - }) + crate::io_utils::with_suppressed_python_output( + module.py(), + self.cfg.suppress_python_stdout, + self.cfg.suppress_python_stderr, + || { + self.modules.push(Module::parse(&self.cfg, module)?); + Ok(()) + }, + ) })?; Ok(()) }) @@ -244,7 +268,6 @@ impl Codegen { fn merge_duplicate_submodules_recursive(input: &[Module]) -> Module { Module { - name: input[0].name.clone(), prelude: input .iter() .fold(HashSet::default(), |mut prelude, module| { @@ -309,7 +332,7 @@ impl Codegen { }) .into_iter() .collect(), - docstring: input[0].docstring.clone(), + ..input[0].clone() } } diff --git a/pyo3_bindgen_engine/src/config.rs b/pyo3_bindgen_engine/src/config.rs index 038e43c..3bc7d1f 100644 --- a/pyo3_bindgen_engine/src/config.rs +++ b/pyo3_bindgen_engine/src/config.rs @@ -48,6 +48,9 @@ pub struct Config { /// List of blocklisted attribute names that are skipped during the code generation. #[builder(default = DEFAULT_BLOCKLIST_ATTRIBUTE_NAMES.iter().map(|&s| s.to_string()).collect())] pub(crate) blocklist_names: Vec, + /// Flag that determines whether private attributes are considered while parsing the Python code. + #[builder(default = false)] + pub(crate) include_private: bool, /// Flag that determines whether to generate code for all dependencies of the target modules. /// The list of dependent modules is derived from the imports of the target modules. @@ -80,9 +83,10 @@ impl Config { if // Skip always forbidden attribute names FORBIDDEN_FUNCTION_NAMES.contains(&attr_name.as_py()) || - // Skip private attributes - attr_name.as_py().starts_with('_') || - attr_module.iter().any(|segment| segment.as_py().starts_with('_')) || + // Skip private attributes if `include_private` is disabled + (!self.include_private && + (attr_name.as_py().starts_with('_') || + attr_module.iter().any(|segment| segment.as_py().starts_with('_')))) || // Skip blocklisted attributes self.blocklist_names.iter().any(|blocklist_match| { attr_name.as_py() == blocklist_match diff --git a/pyo3_bindgen_engine/src/syntax/common/attribute_variant.rs b/pyo3_bindgen_engine/src/syntax/common/attribute_variant.rs index f7f3bdb..70771fa 100644 --- a/pyo3_bindgen_engine/src/syntax/common/attribute_variant.rs +++ b/pyo3_bindgen_engine/src/syntax/common/attribute_variant.rs @@ -50,7 +50,12 @@ impl AttributeVariant { let is_closure = attr_type_module.to_py().as_str() == "functools" && attr_type_name.as_py() == "partial"; let is_type = ["typing", "types"].contains(&attr_type_module.to_py().as_str()); - let is_external = attr_module != owner_name; + + // Some decorators might make a class look external, but they tend to include "" in their name + let is_in_locals = attr.to_string().contains(""); + + // Determine if the attribute is imported + let is_external = !is_in_locals && (attr_module != owner_name); let is_imported = is_external && (is_submodule || is_class || is_function || is_method); Ok(if consider_import && is_imported { diff --git a/pyo3_bindgen_engine/src/syntax/common/path.rs b/pyo3_bindgen_engine/src/syntax/common/path.rs index 2b4f502..175df8a 100644 --- a/pyo3_bindgen_engine/src/syntax/common/path.rs +++ b/pyo3_bindgen_engine/src/syntax/common/path.rs @@ -193,6 +193,35 @@ impl Path { segments: relative_segments, } } + + pub fn import_quote(&self, py: pyo3::marker::Python) -> proc_macro2::TokenStream { + // Find the last package and import it via py.import, then get the rest of the path via getattr() + let mut package_path = self.root().unwrap_or_else(|| unreachable!()); + for i in (1..self.len()).rev() { + let module_name = Self::from(&self[..i]); + if py.import(module_name.to_py().as_str()).is_ok() { + package_path = module_name; + break; + } + } + + // Resolve the remaining path + let remaining_path = self + .strip_prefix(package_path.segments.as_slice()) + .unwrap_or_else(|| unreachable!()); + + // Convert paths to strings + let package_path = package_path.to_py(); + let remaining_path = remaining_path + .iter() + .map(|ident| ident.as_py().to_owned()) + .collect_vec(); + + // Generate the import code + quote::quote! { + py.import(::pyo3::intern!(py, #package_path))?#(.getattr(::pyo3::intern!(py, #remaining_path))?)* + } + } } impl From for Path { diff --git a/pyo3_bindgen_engine/src/syntax/function.rs b/pyo3_bindgen_engine/src/syntax/function.rs index 2ad3257..93ef5d2 100644 --- a/pyo3_bindgen_engine/src/syntax/function.rs +++ b/pyo3_bindgen_engine/src/syntax/function.rs @@ -374,6 +374,17 @@ impl Function { .iter() .map(|param| Ok(Ident::from_py(&format!("p_{}", param.name)).try_into()?)) .collect::>>()?; + // Pre-process parameters that require it + let param_preprocessing: proc_macro2::TokenStream = self + .parameters + .iter() + .zip(param_idents.iter()) + .map(|(param, param_ident)| { + param + .annotation + .preprocess_borrowed(param_ident, local_types) + }) + .collect(); let param_types: Vec = self .parameters .iter() @@ -456,37 +467,16 @@ impl Function { // Function body (function dispatcher) let function_dispatcher = match &self.typ { - FunctionType::Function | FunctionType::Closure => { - let package = self.name.root().unwrap_or_else(|| unreachable!()).to_py(); - let module_path = if self.name.len() > 2 { - &self.name[1..self.name.len() - 1] - } else { - &[] - } - .iter() - .map(|ident| ident.as_py().to_owned()) - .collect_vec(); - quote::quote! { - py.import(::pyo3::intern!(py, #package))?#(.getattr(::pyo3::intern!(py, #module_path))?)* - } - } + FunctionType::Function | FunctionType::Closure => pyo3::Python::with_gil(|py| { + self.name + .parent() + .unwrap_or_else(|| unreachable!()) + .import_quote(py) + }), FunctionType::Method { class_path, typ: MethodType::ClassMethod | MethodType::StaticMethod | MethodType::Constructor, - } => { - let package = class_path.root().unwrap_or_else(|| unreachable!()).to_py(); - let class_path = if class_path.len() > 1 { - &class_path[1..] - } else { - &[] - } - .iter() - .map(|ident| ident.as_py().to_owned()) - .collect_vec(); - quote::quote! { - py.import(::pyo3::intern!(py, #package))?#(.getattr(::pyo3::intern!(py, #class_path))?)* - } - } + } => pyo3::Python::with_gil(|py| class_path.import_quote(py)), FunctionType::Method { typ: MethodType::InstanceMethod | MethodType::Callable, .. @@ -650,6 +640,7 @@ impl Function { // Function body output.extend(quote::quote! { { + #param_preprocessing ::pyo3::FromPyObject::extract( #function_dispatcher.#call? ) diff --git a/pyo3_bindgen_engine/src/syntax/module.rs b/pyo3_bindgen_engine/src/syntax/module.rs index 09ea250..038cbb7 100644 --- a/pyo3_bindgen_engine/src/syntax/module.rs +++ b/pyo3_bindgen_engine/src/syntax/module.rs @@ -17,6 +17,7 @@ pub struct Module { pub functions: Vec, pub properties: Vec, pub docstring: Option, + pub is_package: bool, } impl Module { @@ -43,6 +44,7 @@ impl Module { functions: Vec::default(), properties: Vec::default(), docstring, + is_package: true, }) } @@ -59,12 +61,20 @@ impl Module { Vec::new() }; - // Extract the list of all submodules in the module - let mut submodules_to_process = Self::extract_submodules(cfg, module)?; + // Determine if the module is a package that contains submodules + let is_package = module.hasattr(pyo3::intern!(py, "__path__"))?; + + // Extract the list of all submodules for packages + let mut submodules_to_process = if is_package { + Self::extract_submodules(cfg, module)? + } else { + HashSet::default() + }; // Initialize lists for all other members of the module let mut imports = Vec::new(); - let mut classes = Vec::new(); + let mut conflicting_imports = Vec::new(); + let mut classes: Vec = Vec::new(); let mut type_vars = Vec::new(); let mut functions = Vec::new(); let mut properties = Vec::new(); @@ -118,15 +128,13 @@ impl Module { .unwrap_or(attr_name.as_py().to_owned()), )); - // Make sure the import does not create a conflict with a submodule - let does_import_conflict_with_submodule = submodules_to_process.contains(&attr_name); - if does_import_conflict_with_submodule { - eprintln!( - "WARN: Import `{origin}` -> '{attr_name_full}' would conflict with a submodule of equal name. Bindings will not be generated.", - ); + // Skip if the origin is the same as the target + if origin == attr_name_full { + return Ok(()); } + // Make sure the origin attribute is allowed (each segment of the path) - let is_origin_attr_allowed = !does_import_conflict_with_submodule && (0..origin.len()).all(|i| { + let is_origin_attr_allowed = (0..origin.len()).all(|i| { let attr_name = &origin[i]; let attr_module = origin[..i].into(); let attr_type = if i == origin.len() - 1 { @@ -136,9 +144,20 @@ impl Module { }; cfg.is_attr_allowed(attr_name, &attr_module, attr_type) }); + if !is_origin_attr_allowed { + return Ok(()); + } + + // Determine if the import overwrites a submodule + let import_overwrites_submodule = submodules_to_process.contains(&attr_name); - if is_origin_attr_allowed { - let import = Import::new(origin, attr_name_full); + // Generate the import + let import = Import::new(origin, attr_name_full); + + // Add the import to the appropriate list + if import_overwrites_submodule { + conflicting_imports.push(import); + } else { imports.push(import); } } @@ -191,7 +210,71 @@ impl Module { submodules_to_process .into_iter() .filter_map(|submodule_name| { - py.import(name.join(&submodule_name.into()).to_py().as_str()) + let full_submodule_name = name.join(&submodule_name.clone().into()); + + // Handle submodules that are overwritten by imports separately + if let Some(conflicting_import) = conflicting_imports + .iter() + .find(|import| import.target == full_submodule_name) + { + if let Ok(submodule) = py + .import(full_submodule_name.to_py().as_str()) + .map_err(crate::PyBindgenError::from) + .and_then(|attr| Ok(attr.downcast::()?)) + .and_then(|module| Self::parse(cfg, module)) + { + // It could be any attribute, so all of them need to be checked + if let Some(mut import) = submodule + .imports + .into_iter() + .find(|import| import.target == conflicting_import.origin) + { + import.target = conflicting_import.target.clone(); + imports.push(import); + } + if let Some(mut class) = submodule + .classes + .into_iter() + .find(|class| class.name == conflicting_import.origin) + { + class.name = conflicting_import.target.clone(); + classes.push(class); + } + if let Some(mut type_var) = submodule + .type_vars + .into_iter() + .find(|type_var| type_var.name == conflicting_import.origin) + { + type_var.name = conflicting_import.target.clone(); + type_vars.push(type_var); + } + if let Some(mut function) = submodule + .functions + .into_iter() + .find(|function| function.name == conflicting_import.origin) + { + function.name = conflicting_import.target.clone(); + functions.push(function); + } + if let Some(mut property) = submodule + .properties + .into_iter() + .find(|property| property.name == conflicting_import.origin) + { + property.name = conflicting_import.target.clone(); + properties.push(property); + } + } + return None; + } + + // Try to import both as a package and as a attribute of the current module + py.import(full_submodule_name.to_py().as_str()) + .or_else(|_| { + module + .getattr(submodule_name.as_py()) + .and_then(|attr| Ok(attr.downcast::()?)) + }) .ok() }) .map(|submodule| Self::parse(cfg, submodule)) @@ -220,6 +303,7 @@ impl Module { functions, properties, docstring, + is_package, }) } @@ -383,18 +467,6 @@ impl Module { let py = module.py(); let pkgutil = py.import(pyo3::intern!(py, "pkgutil"))?; - // Determine if the module is a package that contains submodules - let module_name = Path::from_py(module.name()?); - let is_pkg = module - .getattr(pyo3::intern!(py, "__package__")) - .map(|package| Path::from_py(&package.to_string())) - .is_ok_and(|package_name| package_name == module_name); - - // If the module is not a package, return an empty set - if !is_pkg { - return Ok(HashSet::default()); - } - // Extract the paths of the module let module_paths = module .getattr(pyo3::intern!(py, "__path__"))? @@ -404,6 +476,7 @@ impl Module { .collect_vec(); // Extract the names of all submodules via `pkgutil.iter_modules` + let module_name = Path::from_py(module.name()?); pkgutil .call_method1(pyo3::intern!(py, "iter_modules"), (module_paths,))? .iter()? diff --git a/pyo3_bindgen_engine/src/syntax/property.rs b/pyo3_bindgen_engine/src/syntax/property.rs index e709530..c564504 100644 --- a/pyo3_bindgen_engine/src/syntax/property.rs +++ b/pyo3_bindgen_engine/src/syntax/property.rs @@ -1,6 +1,5 @@ use super::{Ident, Path}; use crate::{typing::Type, Config, Result}; -use itertools::Itertools; use rustc_hash::FxHashMap as HashMap; #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -216,24 +215,23 @@ impl Property { } } }; + let param_name = self.name.name().as_py(); let param_type = self.annotation.clone().into_rs_owned(local_types); match &self.owner { PropertyOwner::Module => { - let package = self.name.root().unwrap_or_else(|| unreachable!()).to_py(); - let module_path = if self.name.len() > 1 { - &self.name[1..] - } else { - &[] - } - .iter() - .map(|ident| ident.as_py().to_owned()) - .collect_vec(); - + let import = pyo3::Python::with_gil(|py| { + self.name + .parent() + .unwrap_or_else(|| unreachable!()) + .import_quote(py) + }); output.extend(quote::quote! { pub fn #function_ident<'py>( py: ::pyo3::marker::Python<'py>, ) -> ::pyo3::PyResult<#param_type> { - py.import(::pyo3::intern!(py, #package))?#(.getattr(::pyo3::intern!(py, #module_path))?)*.extract() + ::pyo3::FromPyObject::extract( + #import.getattr(::pyo3::intern!(py, #param_name))? + ) } }); } @@ -288,25 +286,26 @@ impl Property { } }; let param_name = self.name.name().as_py(); + let param_preprocessing = self.annotation.preprocess_borrowed( + &syn::Ident::new("p_value", proc_macro2::Span::call_site()), + local_types, + ); let param_type = self.annotation.clone().into_rs_borrowed(local_types); match &self.owner { PropertyOwner::Module => { - let package = self.name.root().unwrap_or_else(|| unreachable!()).to_py(); - let module_path = if self.name.len() > 1 { - &self.name[1..self.name.len() - 1] - } else { - &[] - } - .iter() - .map(|ident| ident.as_py().to_owned()) - .collect_vec(); - + let import = pyo3::Python::with_gil(|py| { + self.name + .parent() + .unwrap_or_else(|| unreachable!()) + .import_quote(py) + }); output.extend(quote::quote! { pub fn #function_ident<'py>( py: ::pyo3::marker::Python<'py>, p_value: #param_type, ) -> ::pyo3::PyResult<()> { - py.import(::pyo3::intern!(py, #package))?#(.getattr(::pyo3::intern!(py, #module_path))?)*.setattr(::pyo3::intern!(py, #param_name), p_value) + #param_preprocessing + #import.setattr(::pyo3::intern!(py, #param_name), p_value) } }); } @@ -317,6 +316,7 @@ impl Property { py: ::pyo3::marker::Python<'py>, p_value: #param_type, ) -> ::pyo3::PyResult<()> { + #param_preprocessing self.0.setattr(::pyo3::intern!(py, #param_name), p_value) } }); diff --git a/pyo3_bindgen_engine/src/typing/from_py.rs b/pyo3_bindgen_engine/src/typing/from_py.rs index 3d224a3..357ae4a 100644 --- a/pyo3_bindgen_engine/src/typing/from_py.rs +++ b/pyo3_bindgen_engine/src/typing/from_py.rs @@ -297,6 +297,26 @@ impl std::str::FromStr for Type { Self::PyList(Box::new(inner_type)) } "Sequence" | "Iterable" | "Iterator" => Self::PyList(Box::new(Self::Unknown)), + iterable if iterable.starts_with("Iterable[") && iterable.ends_with(']') => { + let inner_type = Self::from_str( + iterable + .strip_prefix("Iterable[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()), + )?; + Self::PyList(Box::new(inner_type)) + } + iterator if iterator.starts_with("Iterator[") && iterator.ends_with(']') => { + let inner_type = Self::from_str( + iterator + .strip_prefix("Iterator[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()), + )?; + Self::PyList(Box::new(inner_type)) + } set if set.starts_with("set[") && set.ends_with(']') => { let inner_type = Self::from_str( set.strip_prefix("set[") @@ -322,6 +342,7 @@ impl std::str::FromStr for Type { .collect::>()?; Self::PyTuple(inner_types) } + "tuple" => Self::PyTuple(vec![Self::Unknown]), // Additional types - std "ipaddress.IPv4Address" => Self::IpV4Addr, @@ -436,6 +457,9 @@ impl std::str::FromStr for Type { .unwrap_or_else(|| unreachable!()), )?, + // Forbidden types + forbidden if crate::config::FORBIDDEN_TYPE_NAMES.contains(&forbidden) => Self::PyAny, + // Other types, that might be known (custom types of modules) other => Self::Other(other.to_owned()), }) diff --git a/pyo3_bindgen_engine/src/typing/into_rs.rs b/pyo3_bindgen_engine/src/typing/into_rs.rs index 83f8eff..93d0e35 100644 --- a/pyo3_bindgen_engine/src/typing/into_rs.rs +++ b/pyo3_bindgen_engine/src/typing/into_rs.rs @@ -16,12 +16,71 @@ impl Type { Rc::into_inner(borrowed).unwrap_or_else(|| unreachable!()) } - fn into_rs(self, local_types: &HashMap) -> OutputType { + pub fn preprocess_borrowed( + &self, + ident: &syn::Ident, + local_types: &HashMap, + ) -> proc_macro2::TokenStream { match self { - Self::PyAny | Self::Unknown => { - OutputType::new_identical(quote!(&'py ::pyo3::types::PyAny)) + Self::PyDict { + key_type, + value_type, + } if !key_type.is_hashable() + || value_type + .clone() + .into_rs(local_types) + .owned + .to_string() + .contains("PyAny") => + { + quote! { + let #ident = ::pyo3::types::IntoPyDict::into_py_dict(#ident, py); + } + } + Self::PyTuple(inner_types) if inner_types.len() < 2 => { + quote! { + let #ident = ::pyo3::IntoPy::<::pyo3::Py<::pyo3::types::PyTuple>>::into_py(#ident, py); + let #ident = #ident.as_ref(py); + } + } + Self::PyAny + | Self::Unknown + | Self::Union(..) + | Self::PyNone + | Self::PyDelta + | Self::PyEllipsis => { + quote! { + let #ident = ::pyo3::IntoPy::<::pyo3::Py<::pyo3::types::PyAny>>::into_py(#ident, py); + let #ident = #ident.as_ref(py); + } + } + #[cfg(not(all(not(Py_LIMITED_API), not(PyPy))))] + Self::PyFunction { .. } => { + quote! { + let #ident = ::pyo3::IntoPy::<::pyo3::Py<::pyo3::types::PyAny>>::into_py(#ident, py); + let #ident = #ident.as_ref(py); + } } - Self::Other(..) => self.map_local_type(local_types), + Self::Other(type_name) + if Self::try_map_external_type(type_name).is_none() + && !local_types.contains_key(&Path::from_py(type_name)) => + { + quote! { + let #ident = ::pyo3::IntoPy::<::pyo3::Py<::pyo3::types::PyAny>>::into_py(#ident, py); + let #ident = #ident.as_ref(py); + } + } + _ => proc_macro2::TokenStream::new(), + } + } + + fn into_rs(self, local_types: &HashMap) -> OutputType { + match self { + Self::PyAny | Self::Unknown => OutputType::new( + quote!(&'py ::pyo3::types::PyAny), + quote!(impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyAny>>), + ), + Self::Other(..) => self.map_type(local_types), // Primitives Self::PyBool => OutputType::new_identical(quote!(bool)), @@ -37,11 +96,17 @@ impl Type { } Self::Union(_inner_types) => { // TODO: Support Rust enums where possible | alternatively, overload functions for each variant - OutputType::new_identical(quote!(&'py ::pyo3::types::PyAny)) + OutputType::new( + quote!(&'py ::pyo3::types::PyAny), + quote!(impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyAny>>), + ) } Self::PyNone => { // TODO: Determine if PyNone is even possible - OutputType::new_identical(quote!(&'py ::pyo3::types::PyAny)) + OutputType::new( + quote!(&'py ::pyo3::types::PyAny), + quote!(impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyAny>>), + ) } // Collections @@ -49,15 +114,18 @@ impl Type { key_type, value_type, } => { - if key_type.is_hashable() { + let value_type = value_type.into_rs(local_types).owned; + if key_type.is_hashable() && !value_type.to_string().contains("PyAny") { let key_type = key_type.into_rs(local_types).owned; - let value_type = value_type.into_rs(local_types).owned; OutputType::new( quote!(::std::collections::HashMap<#key_type, #value_type>), quote!(&::std::collections::HashMap<#key_type, #value_type>), ) } else { - OutputType::new_identical(quote!(&'py ::pyo3::types::PyDict)) + OutputType::new( + quote!(&'py ::pyo3::types::PyDict), + quote!(impl ::pyo3::types::IntoPyDict), + ) } } Self::PyFrozenSet(inner_type) => { @@ -88,7 +156,10 @@ impl Type { } Self::PyTuple(inner_types) => { if inner_types.len() < 2 { - OutputType::new_identical(quote!(&'py ::pyo3::types::PyTuple)) + OutputType::new( + quote!(&'py ::pyo3::types::PyTuple), + quote!(impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyTuple>>), + ) } else if inner_types.len() == 2 && *inner_types.last().unwrap_or_else(|| unreachable!()) == Self::PyEllipsis { @@ -121,7 +192,10 @@ impl Type { Self::PyDelta => { // The trait `ToPyObject` is not implemented for `Duration`, so we can't use it here yet // OutputType::new_identical(quote!(::std::time::Duration)) - OutputType::new_identical(quote!(&'py ::pyo3::types::PyAny)) + OutputType::new( + quote!(&'py ::pyo3::types::PyAny), + quote!(impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyAny>>), + ) } #[cfg(not(Py_LIMITED_API))] Self::PyTime => OutputType::new_identical(quote!(&'py ::pyo3::types::PyTime)), @@ -135,7 +209,10 @@ impl Type { Self::PyCode => OutputType::new_identical(quote!(&'py ::pyo3::types::PyCode)), Self::PyEllipsis => { // TODO: Determine if PyEllipsis is even possible - OutputType::new_identical(quote!(&'py ::pyo3::types::PyAny)) + OutputType::new( + quote!(&'py ::pyo3::types::PyAny), + quote!(impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyAny>>), + ) } #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] Self::PyFrame => OutputType::new_identical(quote!(&'py ::pyo3::types::PyFrame)), @@ -144,7 +221,10 @@ impl Type { OutputType::new_identical(quote!(&'py ::pyo3::types::PyFunction)) } #[cfg(not(all(not(Py_LIMITED_API), not(PyPy))))] - Self::PyFunction { .. } => OutputType::new_identical(quote!(&'py ::pyo3::types::PyAny)), + Self::PyFunction { .. } => OutputType::new( + quote!(&'py ::pyo3::types::PyAny), + quote!(impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyAny>>), + ), Self::PyModule => OutputType::new_identical(quote!(&'py ::pyo3::types::PyModule)), #[cfg(not(PyPy))] Self::PySuper => OutputType::new_identical(quote!(&'py ::pyo3::types::PySuper)), @@ -153,17 +233,12 @@ impl Type { } } - fn map_local_type(self, local_types: &HashMap) -> OutputType { + fn map_type(self, local_types: &HashMap) -> OutputType { // Get the inner name of the type let Self::Other(type_name) = self else { unreachable!() }; - // Ignore forbidden types - if crate::config::FORBIDDEN_TYPE_NAMES.contains(&type_name.as_str()) { - return OutputType::new_identical(quote!(&'py ::pyo3::types::PyAny)); - } - // Try to map the external types if let Some(external_type) = Self::try_map_external_type(&type_name) { return external_type; @@ -175,7 +250,11 @@ impl Type { return OutputType::new_identical(quote!(&'py #relative_path)); } - OutputType::new_identical(quote!(&'py ::pyo3::types::PyAny)) + // Unhandled types + OutputType::new( + quote!(&'py ::pyo3::types::PyAny), + quote!(impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyAny>>), + ) } fn try_map_external_type(_type_name: &str) -> Option { diff --git a/pyo3_bindgen_engine/src/typing/mod.rs b/pyo3_bindgen_engine/src/typing/mod.rs index 5922240..c5ec710 100644 --- a/pyo3_bindgen_engine/src/typing/mod.rs +++ b/pyo3_bindgen_engine/src/typing/mod.rs @@ -1,5 +1,5 @@ -mod from_py; -mod into_rs; +pub(crate) mod from_py; +pub(crate) mod into_rs; /// Enum that maps Python types to Rust types. #[derive(Debug, Clone, PartialEq, Eq, Hash)] diff --git a/pyo3_bindgen_engine/src/utils/mod.rs b/pyo3_bindgen_engine/src/utils/mod.rs index b6398da..09821bb 100644 --- a/pyo3_bindgen_engine/src/utils/mod.rs +++ b/pyo3_bindgen_engine/src/utils/mod.rs @@ -1,5 +1,5 @@ //! Various utilities. pub mod error; -pub mod io; +pub(crate) mod io; pub mod result; diff --git a/pyo3_bindgen_engine/tests/bindgen.rs b/pyo3_bindgen_engine/tests/bindgen.rs index 1ccd712..d65e682 100644 --- a/pyo3_bindgen_engine/tests/bindgen.rs +++ b/pyo3_bindgen_engine/tests/bindgen.rs @@ -14,7 +14,7 @@ macro_rules! test_bindgen { // Act let bindings = pyo3_bindgen_engine::Codegen::default() - .module_from_str(CODE_PY, concat!("t_mod_", stringify!($test_name))) + .module_from_str(CODE_PY, concat!("mod_", stringify!($test_name))) .unwrap() .generate() .unwrap(); @@ -34,10 +34,10 @@ macro_rules! test_bindgen { } test_bindgen! { - test_bindgen_attribute + bindgen_property py: r#" - t_const_float: float = 0.42 + my_property: float = 0.42 "# rs: r#" @@ -50,29 +50,30 @@ test_bindgen! { non_upper_case_globals, unused )] - pub mod t_mod_test_bindgen_attribute { - pub fn t_const_float<'py>(py: ::pyo3::marker::Python<'py>) -> ::pyo3::PyResult { - py.import(::pyo3::intern!(py, "t_mod_test_bindgen_attribute"))? - .getattr(::pyo3::intern!(py, "t_const_float"))? - .extract() + pub mod mod_bindgen_property { + pub fn my_property<'py>(py: ::pyo3::marker::Python<'py>) -> ::pyo3::PyResult { + ::pyo3::FromPyObject::extract( + py.import(::pyo3::intern!(py, "mod_bindgen_property"))? + .getattr(::pyo3::intern!(py, "my_property"))?, + ) } - pub fn set_t_const_float<'py>( + pub fn set_my_property<'py>( py: ::pyo3::marker::Python<'py>, p_value: f64, ) -> ::pyo3::PyResult<()> { - py.import(::pyo3::intern!(py, "t_mod_test_bindgen_attribute"))? - .setattr(::pyo3::intern!(py, "t_const_float"), p_value) + py.import(::pyo3::intern!(py, "mod_bindgen_property"))? + .setattr(::pyo3::intern!(py, "my_property"), p_value) } } "# } test_bindgen! { - test_bindgen_function + bindgen_function py: r#" - def t_fn(t_arg1: str) -> int: - """t_docs""" + def my_function(my_arg1: str) -> int: + """My docstring for `my_function`""" ... "# @@ -86,14 +87,20 @@ test_bindgen! { non_upper_case_globals, unused )] - pub mod t_mod_test_bindgen_function { - /// t_docs - pub fn t_fn<'py>(py: ::pyo3::marker::Python<'py>, p_t_arg1: &str) -> ::pyo3::PyResult { + pub mod mod_bindgen_function { + /// My docstring for `my_function` + pub fn my_function<'py>( + py: ::pyo3::marker::Python<'py>, + p_my_arg1: &str, + ) -> ::pyo3::PyResult { ::pyo3::FromPyObject::extract( - py.import(::pyo3::intern!(py, "t_mod_test_bindgen_function"))? + py.import(::pyo3::intern!(py, "mod_bindgen_function"))? .call_method1( - ::pyo3::intern!(py, "t_fn"), - ::pyo3::types::PyTuple::new(py, [::pyo3::ToPyObject::to_object(&p_t_arg1, py)]), + ::pyo3::intern!(py, "my_function"), + ::pyo3::types::PyTuple::new( + py, + [::pyo3::ToPyObject::to_object(&p_my_arg1, py)], + ), )?, ) } @@ -102,29 +109,29 @@ test_bindgen! { } test_bindgen! { - test_bindgen_class + bindgen_class py: r#" from typing import Dict, Optional - class t_class: - """t_docs""" - def __init__(self, t_arg1: str, t_arg2: Optional[int] = None): - """t_docs_init""" + class MyClass: + """My docstring for `MyClass`""" + def __init__(self, my_arg1: str, my_arg2: Optional[int] = None): + """My docstring for __init__""" ... - def t_method(self, t_arg1: Dict[str, int], **kwargs): - """t_docs_method""" + def my_method(self, my_arg1: Dict[str, int], **kwargs): + """My docstring for `my_method`""" ... @property - def t_prop(self) -> int: + def my_property(self) -> int: ... - @t_prop.setter - def t_prop(self, value: int): + @my_property.setter + def my_property(self, value: int): ... - def t_fn_class_param(t_arg1: t_class): + def my_function_with_class_param(my_arg1: MyClass): ... - def t_fn_class_return() -> t_class: + def my_function_with_class_return() -> MyClass: ... "# @@ -138,79 +145,88 @@ test_bindgen! { non_upper_case_globals, unused )] - pub mod t_mod_test_bindgen_class { - /// t_docs + pub mod mod_bindgen_class { + /// My docstring for `MyClass` #[repr(transparent)] - pub struct t_class(::pyo3::PyAny); - ::pyo3::pyobject_native_type_named!(t_class); + pub struct MyClass(::pyo3::PyAny); + ::pyo3::pyobject_native_type_named!(MyClass); ::pyo3::pyobject_native_type_info!( - t_class, + MyClass, ::pyo3::pyobject_native_static_type_object!(::pyo3::ffi::PyBaseObject_Type), - ::std::option::Option::Some("t_mod_test_bindgen_class.t_class") + ::std::option::Option::Some("mod_bindgen_class.MyClass") ); - ::pyo3::pyobject_native_type_extract!(t_class); + ::pyo3::pyobject_native_type_extract!(MyClass); #[automatically_derived] - impl t_class { - /// t_docs_init + impl MyClass { + /// My docstring for __init__ pub fn new<'py>( py: ::pyo3::marker::Python<'py>, - p_t_arg1: &str, - p_t_arg2: ::std::option::Option, + p_my_arg1: &str, + p_my_arg2: ::std::option::Option, ) -> ::pyo3::PyResult<&'py Self> { ::pyo3::FromPyObject::extract( - py.import(::pyo3::intern!(py, "t_mod_test_bindgen_class"))? - .getattr(::pyo3::intern!(py, "t_class"))? + py.import(::pyo3::intern!(py, "mod_bindgen_class"))? + .getattr(::pyo3::intern!(py, "MyClass"))? .call1(::pyo3::types::PyTuple::new( py, [ - ::pyo3::ToPyObject::to_object(&p_t_arg1, py), - ::pyo3::ToPyObject::to_object(&p_t_arg2, py), + ::pyo3::ToPyObject::to_object(&p_my_arg1, py), + ::pyo3::ToPyObject::to_object(&p_my_arg2, py), ], ))?, ) } - /// t_docs_method - pub fn t_method<'py>( + /// My docstring for `my_method` + pub fn my_method<'py>( &'py self, py: ::pyo3::marker::Python<'py>, - p_t_arg1: &::std::collections::HashMap<::std::string::String, i64>, - p_kwargs: &'py ::pyo3::types::PyDict, + p_my_arg1: &::std::collections::HashMap<::std::string::String, i64>, + p_kwargs: impl ::pyo3::types::IntoPyDict, ) -> ::pyo3::PyResult<&'py ::pyo3::types::PyAny> { + let p_kwargs = ::pyo3::types::IntoPyDict::into_py_dict(p_kwargs, py); ::pyo3::FromPyObject::extract(self.0.call_method( - ::pyo3::intern!(py, "t_method"), - ::pyo3::types::PyTuple::new(py, [::pyo3::ToPyObject::to_object(&p_t_arg1, py)]), + ::pyo3::intern!(py, "my_method"), + ::pyo3::types::PyTuple::new(py, [::pyo3::ToPyObject::to_object(&p_my_arg1, py)]), Some(p_kwargs), )?) } - pub fn t_prop<'py>(&'py self, py: ::pyo3::marker::Python<'py>) -> ::pyo3::PyResult { - self.0.getattr(::pyo3::intern!(py, "t_prop"))?.extract() + pub fn my_property<'py>( + &'py self, + py: ::pyo3::marker::Python<'py>, + ) -> ::pyo3::PyResult { + self.0 + .getattr(::pyo3::intern!(py, "my_property"))? + .extract() } - pub fn set_t_prop<'py>( + pub fn set_my_property<'py>( &'py self, py: ::pyo3::marker::Python<'py>, p_value: i64, ) -> ::pyo3::PyResult<()> { - self.0.setattr(::pyo3::intern!(py, "t_prop"), p_value) + self.0.setattr(::pyo3::intern!(py, "my_property"), p_value) } } - pub fn t_fn_class_param<'py>( + pub fn my_function_with_class_param<'py>( py: ::pyo3::marker::Python<'py>, - p_t_arg1: &'py t_class, + p_my_arg1: &'py MyClass, ) -> ::pyo3::PyResult<&'py ::pyo3::types::PyAny> { ::pyo3::FromPyObject::extract( - py.import(::pyo3::intern!(py, "t_mod_test_bindgen_class"))? + py.import(::pyo3::intern!(py, "mod_bindgen_class"))? .call_method1( - ::pyo3::intern!(py, "t_fn_class_param"), - ::pyo3::types::PyTuple::new(py, [::pyo3::ToPyObject::to_object(&p_t_arg1, py)]), + ::pyo3::intern!(py, "my_function_with_class_param"), + ::pyo3::types::PyTuple::new( + py, + [::pyo3::ToPyObject::to_object(&p_my_arg1, py)], + ), )?, ) } - pub fn t_fn_class_return<'py>( + pub fn my_function_with_class_return<'py>( py: ::pyo3::marker::Python<'py>, - ) -> ::pyo3::PyResult<&'py t_class> { + ) -> ::pyo3::PyResult<&'py MyClass> { ::pyo3::FromPyObject::extract( - py.import(::pyo3::intern!(py, "t_mod_test_bindgen_class"))? - .call_method0(::pyo3::intern!(py, "t_fn_class_return"))?, + py.import(::pyo3::intern!(py, "mod_bindgen_class"))? + .call_method0(::pyo3::intern!(py, "my_function_with_class_return"))?, ) } } diff --git a/pyo3_bindgen_macros/src/lib.rs b/pyo3_bindgen_macros/src/lib.rs index bd589db..fe6954b 100644 --- a/pyo3_bindgen_macros/src/lib.rs +++ b/pyo3_bindgen_macros/src/lib.rs @@ -8,17 +8,25 @@ mod parser; /// /// Panics if the bindings cannot be generated. /// -/// # Example +/// # Examples +/// +/// Here is a simple example of how to use the macro to generate bindings for the `sys` module. /// /// ```ignore /// import_python!("sys"); /// pub use sys::*; +/// ``` /// -/// // The top-level package is always included in the generated bindings for consistency +/// For consistency, the top-level package is always included in the generated bindings. +/// +/// ```ignore /// import_python!("mod.submod.subsubmod"); /// pub use mod::submod::subsubmod::*; +/// ``` +/// +/// Furthermore, the actual name of the package is always used regardless of how it is aliased. /// -/// // The actual name of the package is always used, regardless of how it is aliased +/// ```ignore /// import_python!("os.path"); /// pub use posixpath::*; /// ```