diff --git a/docs/algorithms/mapper.rst b/docs/algorithms/mapper.rst index 816d4f6b9..14360efad 100644 --- a/docs/algorithms/mapper.rst +++ b/docs/algorithms/mapper.rst @@ -1,3 +1,87 @@ +Extended technology mapping +--------------------------- + +**Header:** ``mockturtle/algorithms/emap.hpp`` + +The command `emap` stands for extended mapper. It supports large +library cells, of more than 6 inputs, and can perform matching using 3 +different methods: Boolean, pattern, or hybrid. The current version +can map to 2-output gates, such as full adders and half adders, +and provides a 2x speedup in mapping time compared to command `map` +for similar or better quality. Similarly, to `map`, the implementation +is independent of the underlying graph representation. +Additionally, `emap` supports "don't touch" white boxes (gates). + +Command `emap` can return the mapped network in two formats. +Command `emap` returns a `cell_view` that supports +multi-output cells. Command `emap_klut` returns a `binding_view` +similarly as command `map`. + +The following example shows how to perform delay-oriented technology mapping +from an and-inverter graph using large cells up to 9 inputs: + +.. code-block:: c++ + + aig_network aig = ...; + + /* read cell library in genlib format */ + std::vector gates; + std::ifstream in( ... ); + lorina::read_genlib( in, genlib_reader( gates ) ) + tech_library<9> tech_lib( gates ); + + /* perform technology mapping */ + cell_view res = emap<9>( aig, tech_lib ); + +The next example performs area-oriented graph mapping using multi-output cells: + +.. code-block:: c++ + + aig_network aig = ...; + + /* read cell library in genlib format */ + std::vector gates; + std::ifstream in( ... ); + lorina::read_genlib( in, genlib_reader( gates ) ) + tech_library tech_lib( gates ); + + /* perform technology mapping */ + emap_params ps; + ps.area_oriented_mapping = true; + ps.map_multioutput = true; + cell_view res = emap( aig, tech_lib, ps ); + +In this case, `emap` is used to return a `block_network`, which can respresent multi-output +cells as single nodes. Alternatively, also `emap_klut` can be used but multi-output cells +would be reporesented by single-output nodes. + +The maximum number of cuts stored for each node is limited to 20. +To increase this limit, change `max_cut_num` in `emap`. + +You can set the inputs arrival time and output required times using the parameters `arrival_times` +and `required times`. Moreover, it is possible to ask for a required time relaxation. For instance, +if we want to map a network with an increase of 10% over its minimal delay, we can set +`relax_required` to 10. + +For further details and usage scenarios of `emap`, such as white boxes, please check the +related tests. + +**Parameters and statistics** + +.. doxygenstruct:: mockturtle::emap_params + :members: + +.. doxygenstruct:: mockturtle::emap_stats + :members: + +**Algorithm** + +.. doxygenfunction:: mockturtle::emap(Ntk const&, tech_library const&, emap_params const&, emap_stats*) +.. doxygenfunction:: mockturtle::emap_klut(Ntk const&, tech_library const&, emap_params const&, emap_stats*) +.. doxygenfunction:: mockturtle::emap_node_map(Ntk const&, tech_library const&, emap_params const&, emap_stats*) +.. doxygenfunction:: mockturtle::emap_load_mapping(Ntk&) + + Technology mapping and network conversion ----------------------------------------- @@ -136,84 +220,4 @@ To increase this limit, change `max_cut_num` in `fast_network_cuts`. **Algorithm** .. doxygenfunction:: mockturtle::map(Ntk const&, tech_library const&, map_params const&, map_stats*) -.. doxygenfunction:: mockturtle::map(Ntk&, exact_library const&, map_params const&, map_stats*) - - - -Extended technology mapping ---------------------------- - -**Header:** ``mockturtle/algorithms/emap.hpp`` - -The command `emap` stands for extended mapper. It supports large -library cells, of more than 6 inputs, and can perform matching using 3 -different methods: Boolean, pattern, or hybrid. The current version -can map to 2-output gates, such as full adders and half adders, -and provides a 2x speedup in mapping time compared to command `map` -for similar or better quality. Similarly, to `map`, the implementation -is independent of the underlying graph representation. -Additionally, `emap` supports "don't touch" white boxes (gates). - -Command `emap` can return the mapped network in two formats. -Command `emap` returns a `cell_view` that supports -multi-output cells. Command `emap_klut` returns a `binding_view` -similarly as command `map`. - -The following example shows how to perform delay-oriented technology mapping -from an and-inverter graph using large cells up to 9 inputs: - -.. code-block:: c++ - - aig_network aig = ...; - - /* read cell library in genlib format */ - std::vector gates; - std::ifstream in( ... ); - lorina::read_genlib( in, genlib_reader( gates ) ) - tech_library<9> tech_lib( gates ); - - /* perform technology mapping */ - cell_view res = emap<9>( aig, tech_lib ); - -The next example performs area-oriented graph mapping using multi-output cells: - -.. code-block:: c++ - - aig_network aig = ...; - - /* read cell library in genlib format */ - std::vector gates; - std::ifstream in( ... ); - lorina::read_genlib( in, genlib_reader( gates ) ) - tech_library tech_lib( gates ); - - /* perform technology mapping */ - emap_params ps; - ps.area_oriented_mapping = true; - ps.map_multioutput = true; - cell_view res = emap( aig, tech_lib, ps ); - -In this case, `emap` is used to return a `block_network`, which can respresent multi-output -cells as single nodes. Alternatively, also `emap_klut` can be used but multi-output cells -would be reporesented by single-output nodes. - -The maximum number of cuts stored for each node is limited to 32. -To increase this limit, change `max_cut_num` in `emap`. - -For further details and usage scenarios of `emap`, such as white boxes, please check the -related tests. - -**Parameters and statistics** - -.. doxygenstruct:: mockturtle::emap_params - :members: - -.. doxygenstruct:: mockturtle::emap_stats - :members: - -**Algorithm** - -.. doxygenfunction:: mockturtle::emap(Ntk const&, tech_library const&, emap_params const&, emap_stats*) -.. doxygenfunction:: mockturtle::emap_klut(Ntk const&, tech_library const&, emap_params const&, emap_stats*) -.. doxygenfunction:: mockturtle::emap_node_map(Ntk const&, tech_library const&, emap_params const&, emap_stats*) -.. doxygenfunction:: mockturtle::emap_load_mapping(Ntk&) \ No newline at end of file +.. doxygenfunction:: mockturtle::map(Ntk&, exact_library const&, map_params const&, map_stats*) \ No newline at end of file diff --git a/experiments/emap.cpp b/experiments/emap.cpp index 92fed626a..55ced8457 100644 --- a/experiments/emap.cpp +++ b/experiments/emap.cpp @@ -65,6 +65,7 @@ int main() } tech_library_params tps; + tps.ignore_symmetries = false; // set to true to drastically speed-up mapping with minor delay increase tps.verbose = true; tech_library<9> tech_lib( gates, tps ); @@ -91,6 +92,7 @@ int main() ps.matching_mode = emap_params::hybrid; ps.area_oriented_mapping = false; ps.map_multioutput = true; + ps.relax_required = 0; emap_stats st; cell_view res = emap<9>( aig, tech_lib, ps, &st ); diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index aaf9b7285..2ca21c5b6 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -107,14 +107,20 @@ struct emap_params hybrid } matching_mode = hybrid; - /*! \brief Required time for delay optimization. */ + /*! \brief Target required time (for each PO). */ double required_time{ 0.0f }; - /*! \brief Required time relaxation ratio. */ + /*! \brief Required time relaxation in percentage (10 = 10%). */ double relax_required{ 0.0f }; + /*! \brief Custom input arrival times. */ + std::vector arrival_times{}; + + /*! \brief Custom output required times. */ + std::vector required_times{}; + /*! \brief Number of rounds for area flow optimization. */ - uint32_t area_flow_rounds{ 2u }; + uint32_t area_flow_rounds{ 3u }; /*! \brief Number of rounds for exact area optimization. */ uint32_t ela_rounds{ 2u }; @@ -125,8 +131,8 @@ struct emap_params /*! \brief Number of patterns for switching activity computation. */ uint32_t switching_activity_patterns{ 2048u }; - /*! \brief Fast area recovery */ - bool use_fast_area_recovery{ true }; + /*! \brief Compute area-oriented alternative matches */ + bool use_match_alternatives{ true }; /*! \brief Remove the cuts that are contained in others */ bool remove_dominated_cuts{ false }; @@ -134,9 +140,6 @@ struct emap_params /*! \brief Remove overlapping multi-output cuts */ bool remove_overlapping_multicuts{ false }; - /*! \brief Doesn't allow node duplication */ - bool allow_node_duplication{ true }; - /*! \brief Be verbose. */ bool verbose{ false }; }; @@ -671,15 +674,29 @@ struct emap_triple_hash }; #pragma endregion +template +struct best_gate_emap +{ + supergate const* gate; + double arrival; + float area; + float flow; + unsigned phase : 16; + unsigned cut : 12; + unsigned size : 4; +}; + template struct node_match_emap { /* best gate match for positive and negative output phases */ - supergate const* best_supergate[2]; + supergate const* best_gate[2]; + /* alternative best gate for positibe and negative output phase */ + best_gate_emap best_alternative[2]; /* fanin pin phases for both output phases */ uint16_t phase[2]; /* best cut index for both phases */ - uint32_t best_cut[2]; + uint16_t best_cut[2]; /* node is mapped using only one phase */ bool same_match; /* node is mapped to a multi-output gate */ @@ -700,20 +717,32 @@ struct node_match_emap float flows[2]; }; -union multi_match_data +template +class emap_impl { - uint64_t data{ 0 }; - struct +private: + union multi_match_data { - uint64_t in_tfi : 1; - uint64_t cut_index : 31; - uint64_t node_index : 32; + uint64_t data{ 0 }; + struct + { + uint64_t in_tfi : 1; + uint64_t cut_index : 31; + uint64_t node_index : 32; + }; + }; + union multioutput_info + { + uint32_t data; + struct + { + unsigned index : 29; + unsigned lowest_index : 1; + unsigned highest_index : 1; + unsigned has_info : 1; + }; }; -}; -template -class emap_impl -{ public: static constexpr float epsilon = 0.0005; static constexpr uint32_t max_cut_num = 20; @@ -750,10 +779,11 @@ class emap_impl ps( ps ), st( st ), node_match( ntk.size() ), - node_tuple_match( ntk.size(), UINT32_MAX ), + node_tuple_match( ntk.size() ), switch_activity( ps.eswp_rounds ? switching_activity( ntk, ps.switching_activity_patterns ) : std::vector( 0 ) ), cuts( ntk.size() ) { + std::memset( node_tuple_match.data(), 0, sizeof( multioutput_info ) * ntk.size() ); std::tie( lib_inv_area, lib_inv_delay, lib_inv_id ) = library.get_inverter_info(); std::tie( lib_buf_area, lib_buf_delay, lib_buf_id ) = library.get_buffer_info(); tmp_visited.reserve( 100 ); @@ -765,10 +795,11 @@ class emap_impl ps( ps ), st( st ), node_match( ntk.size() ), - node_tuple_match( ntk.size(), UINT32_MAX ), + node_tuple_match( ntk.size() ), switch_activity( switch_activity ), cuts( ntk.size() ) { + std::memset( node_tuple_match.data(), 0, sizeof( multioutput_info ) * ntk.size() ); std::tie( lib_inv_area, lib_inv_delay, lib_inv_id ) = library.get_inverter_info(); std::tie( lib_buf_area, lib_buf_delay, lib_buf_id ) = library.get_buffer_info(); tmp_visited.reserve( 100 ); @@ -789,6 +820,10 @@ class emap_impl /* compute and save topological order */ init_topo_order(); + /* init arrival time */ + if ( !init_arrivals() ) + return res; + /* search for large matches */ if ( ps.matching_mode == emap_params::structural || CutSize > 6 ) { @@ -843,6 +878,10 @@ class emap_impl /* compute and save topological order */ init_topo_order(); + /* init arrival time */ + if ( !init_arrivals() ) + return res; + /* search for large matches */ if ( ps.matching_mode == emap_params::structural || CutSize > 6 ) { @@ -888,11 +927,15 @@ class emap_impl auto [res, old2new] = initialize_map_network(); - /* TODO: multi-output support is currently not implemented */ + /* [i] multi-output support is currently not implemented */ /* compute and save topological order */ init_topo_order(); + /* init arrival time */ + if ( !init_arrivals() ) + return res; + /* compute cuts, matches, and initial mapping */ if ( !ps.area_oriented_mapping ) { @@ -930,7 +973,6 @@ class emap_impl uint32_t i = 0; while ( i++ < ps.area_flow_rounds ) { - compute_required_time(); if ( !compute_mapping() ) { return false; @@ -939,54 +981,22 @@ class emap_impl /* compute mapping using exact area */ i = 0; - if ( ps.use_fast_area_recovery ) + compute_required_time( true ); + while ( i++ < ps.ela_rounds ) { - compute_required_time( true ); - reindex_multioutput_data(); - while ( i++ < ps.ela_rounds ) + if ( !compute_mapping_exact_reversed() ) { - if ( !compute_mapping_exact_reversed( i == ps.ela_rounds ) ) - { - return false; - } - } - - /* compute mapping using exact switching activity estimation */ - i = 0; - while ( i++ < ps.eswp_rounds ) - { - if ( !compute_mapping_exact_reversed( true ) ) - { - return false; - } + return false; } } - else - { - while ( i++ < ps.ela_rounds ) - { - compute_required_time(); - if ( !compute_mapping_exact( i == ps.ela_rounds ) ) - { - return false; - } - } - /* compute mapping using exact switching activity estimation */ - i = 0; - while ( i++ < ps.eswp_rounds ) - { - compute_required_time(); - if ( !compute_mapping_exact( true ) ) - { - return false; - } - } - - /* cleaning not fully utilized multi-output gates */ - if ( ps.map_multioutput ) + /* compute mapping using exact switching activity estimation */ + i = 0; + while ( i++ < ps.eswp_rounds ) + { + if ( !compute_mapping_exact_reversed() ) { - remove_unused_multioutput(); + return false; } } @@ -1008,6 +1018,12 @@ class emap_impl continue; } + /* load multi-output cuts and data */ + if ( ps.map_multioutput && node_tuple_match[index].has_info ) + { + match_multi_add_cuts( n ); + } + /* match positive phase */ match_phase( n, 0u ); @@ -1015,26 +1031,24 @@ class emap_impl match_phase( n, 1u ); /* try to drop one phase */ - match_drop_phase( n, 0 ); + match_drop_phase( n ); - /* load and try a multi-output matches */ - if ( ps.map_multioutput && node_tuple_match[index] != UINT32_MAX ) - { - /* continue if matches do not fit in the cut data structure due to bad settings */ - if ( !match_multi_add_cuts( n ) ) - continue; + /* select alternative matches to use */ + select_alternatives( n ); - if constexpr ( DO_AREA ) + /* try multi-output matches */ + if constexpr ( DO_AREA ) + { + if ( ps.map_multioutput && node_tuple_match[index].highest_index ) { - bool multi_success = match_multioutput( n ); - if ( multi_success ) + if ( match_multioutput( n ) ) multi_node_update( n ); } } } double area_old = area; - bool success = set_mapping_refs(); + bool success = set_mapping_refs_and_req(); if ( warning_box ) { @@ -1078,7 +1092,9 @@ class emap_impl { /* all terminals have flow 0.0 */ node_data.flows[0] = node_data.flows[1] = 0.0f; + node_data.best_alternative[0].flow = node_data.best_alternative[1].flow = 0.0f; node_data.arrival[0] = node_data.arrival[1] = 0.0f; + node_data.best_alternative[0].arrival = node_data.best_alternative[1].arrival = 0.0f; /* skip if cuts have been computed before */ if ( cuts[index].size() == 0 ) { @@ -1090,10 +1106,10 @@ class emap_impl else if ( ntk.is_pi( n ) ) { node_data.flows[0] = 0.0f; - node_data.arrival[0] = 0.0f; + node_data.best_alternative[0].flow = 0.0f; /* PIs have the negative phase implemented with an inverter */ node_data.flows[1] = lib_inv_area / node_data.est_refs[1]; - node_data.arrival[1] = lib_inv_delay; + node_data.best_alternative[1].flow = lib_inv_area / node_data.est_refs[1]; /* skip if cuts have been computed before */ if ( cuts[index].size() == 0 ) { @@ -1110,7 +1126,6 @@ class emap_impl { if ( ntk.is_dont_touch( n ) ) { - warning_box |= initialize_box( n ); return false; } @@ -1197,7 +1212,7 @@ class emap_impl compute_truth_table( index, vcuts, fanin, new_cut ); /* match cut and compute data */ - compute_cut_data( new_cut, n ); + compute_cut_data( new_cut, n ); if ( ps.remove_dominated_cuts ) rcuts.insert( new_cut, false, sort ); @@ -1284,7 +1299,7 @@ class emap_impl compute_truth_table( index, vcuts, fanin, new_cut ); /* match cut and compute data */ - compute_cut_data( new_cut, n ); + compute_cut_data( new_cut, n ); if ( ps.remove_dominated_cuts ) rcuts.insert( new_cut, false, sort ); @@ -1307,7 +1322,7 @@ class emap_impl compute_truth_table( index, vcuts, fanin, new_cut ); /* match cut and compute data */ - compute_cut_data( new_cut, n ); + compute_cut_data( new_cut, n ); if ( ps.remove_dominated_cuts ) rcuts.insert( new_cut, false, sort ); @@ -1481,7 +1496,7 @@ class emap_impl auto const index = ntk.node_to_index( n ); auto& node_data = node_match[index]; - node_data.best_supergates[0] = node_data.best_supergates[1] = nullptr; + node_data.best_gate[0] = node_data.best_gate[1] = nullptr; node_data.same_match = 0; node_data.multioutput_match[0] = node_data.multioutput_match[1] = false; node_data.required[0] = node_data.required[1] = std::numeric_limits::max(); @@ -1501,10 +1516,8 @@ class emap_impl { /* all terminals have flow 0 */ node_data.flows[0] = 0.0f; - node_data.arrival[0] = 0.0f; /* PIs have the negative phase implemented with an inverter */ node_data.flows[1] = lib_inv_area / node_data.est_refs[1]; - node_data.arrival[1] = lib_inv_delay; add_unit_cut( index ); continue; } @@ -1519,10 +1532,13 @@ class emap_impl match_phase( n, 1u ); /* try to drop one phase */ - match_drop_phase( n, 0 ); + match_drop_phase( n ); + + /* select alternative matches to use */ + select_alternatives( n ); } double area_old = area; - bool success = set_mapping_refs(); + bool success = set_mapping_refs_and_req(); /* round stats */ if ( ps.verbose ) @@ -1567,7 +1583,7 @@ class emap_impl new_cut->function = kitty::extend_to<6>( ntk.node_function( n ) ); /* match cut and compute data */ - compute_cut_data( new_cut, n ); + compute_cut_data( new_cut, n ); ++cuts_total; } @@ -1587,6 +1603,7 @@ class emap_impl if ( ntk.is_pi( n ) ) { node_match[index].flows[1] = lib_inv_area / node_match[index].est_refs[1]; + node_match[index].best_alternative[1].flow = lib_inv_area / node_match[index].est_refs[1]; continue; } @@ -1610,12 +1627,12 @@ class emap_impl match_phase( n, 1u ); /* try to drop one phase */ - match_drop_phase( n, 0 ); + match_drop_phase( n ); /* try a multi-output match */ if constexpr ( DO_AREA ) { - if ( ps.map_multioutput && node_tuple_match[index] != UINT32_MAX ) + if ( ps.map_multioutput && node_tuple_match[index].highest_index ) { bool multi_success = match_multioutput( n ); if ( multi_success ) @@ -1628,7 +1645,7 @@ class emap_impl } double area_old = area; - bool success = set_mapping_refs(); + bool success = set_mapping_refs_and_req(); /* round stats */ if ( ps.verbose ) @@ -1654,85 +1671,8 @@ class emap_impl } template - bool compute_mapping_exact( bool last_round ) - { - for ( auto const& n : topo_order ) - { - if ( ntk.is_constant( n ) || ntk.is_pi( n ) ) - continue; - - /* don't touch box */ - if constexpr ( has_is_dont_touch_v ) - { - if ( ntk.is_dont_touch( n ) ) - { - if constexpr ( has_has_binding_v ) - { - propagate_data_forward_white_box( n ); - } - continue; - } - } - - auto index = ntk.node_to_index( n ); - auto& node_data = node_match[index]; - - /* recursively deselect the best cut shared between - * the two phases if in use in the cover */ - if ( node_data.same_match && ( node_data.map_refs[0] || node_data.map_refs[1] ) ) - { - uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1; - auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; - cut_deref( best_cut, n, use_phase ); - } - - /* match positive phase */ - match_phase_exact( n, 0u ); - - /* match negative phase */ - match_phase_exact( n, 1u ); - - /* try to drop one phase */ - match_drop_phase( n, 0 ); - - /* try a multi-output match */ - if ( ps.map_multioutput && node_tuple_match[index] != UINT32_MAX ) - { - bool multi_success = match_multioutput_exact( n, last_round ); - if ( multi_success ) - multi_node_update_exact( n ); - } - - if ( node_match[index].map_refs[0] ) - assert( node_match[index].arrival[0] < node_match[index].required[0] + epsilon ); - if ( node_match[index].map_refs[1] ) - assert( node_match[index].arrival[1] < node_match[index].required[1] + epsilon ); - } - - double area_old = area; - bool success = set_mapping_refs(); - - /* round stats */ - if ( ps.verbose ) - { - float area_gain = float( ( area_old - area ) / area_old * 100 ); - std::stringstream stats{}; - if constexpr ( SwitchActivity ) - stats << fmt::format( "[i] Switching: Delay = {:>12.2f} Area = {:>12.2f} Gain = {:>5.2f} % Inverters = {:>5} Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) ); - else - stats << fmt::format( "[i] Area : Delay = {:>12.2f} Area = {:>12.2f} Gain = {:>5.2f} % Inverters = {:>5} Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) ); - st.round_stats.push_back( stats.str() ); - } - - return success; - } - - template - bool compute_mapping_exact_reversed( bool last_round ) + bool compute_mapping_exact_reversed() { - /* this method works in reverse topological order: less nodes to update (faster) */ - /* instead of propagating arrival times forward, it propagates required times backwards */ - for ( auto it = topo_order.rbegin(); it != topo_order.rend(); ++it ) { if ( ntk.is_constant( *it ) || ntk.is_pi( *it ) ) @@ -1761,7 +1701,7 @@ class emap_impl /* recursively deselect the best cut shared between * the two phases if in use in the cover */ - uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1; + uint8_t use_phase = node_data.best_gate[0] != nullptr ? 0 : 1; double old_required = -1; if ( node_data.same_match ) { @@ -1775,6 +1715,13 @@ class emap_impl node_data.required[use_phase] = std::min( node_data.required[use_phase], node_data.required[use_phase ^ 1] - lib_inv_delay ); } } + else if ( !node_data.map_refs[0] || !node_data.map_refs[1] ) + { + use_phase = node_data.map_refs[0] ? 0 : 1; + auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; + cut_deref( best_cut, *it, use_phase ); + node_data.same_match = true; + } /* match positive phase */ match_phase_exact( *it, 0u ); @@ -1789,10 +1736,10 @@ class emap_impl } /* try to drop one phase */ - match_drop_phase( *it, 0 ); + match_drop_phase( *it ); - /* try a multi-output match */ - if ( ps.map_multioutput && node_tuple_match[index] < UINT32_MAX - 1 ) + /* try a multi-output match */ /* TODO: fix the required time*/ + if ( ps.map_multioutput && node_tuple_match[index].lowest_index ) { bool mapped = match_multioutput_exact( *it, true ); @@ -1850,10 +1797,10 @@ class emap_impl auto& node_data = node_match[index]; /* propagate required time through the leaves */ - unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u; + unsigned use_phase = node_data.best_gate[0] == nullptr ? 1u : 0u; unsigned other_phase = use_phase ^ 1; - assert( node_data.best_supergate[0] != nullptr || node_data.best_supergate[1] != nullptr ); + assert( node_data.best_gate[0] != nullptr || node_data.best_gate[1] != nullptr ); // assert( node_data.map_refs[0] || node_data.map_refs[1] ); /* propagate required time over the output inverter if present */ @@ -1871,7 +1818,7 @@ class emap_impl { auto ctr = 0u; auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; - auto const& supergate = node_data.best_supergate[use_phase]; + auto const& supergate = node_data.best_gate[use_phase]; for ( auto leaf : best_cut ) { auto phase = ( node_data.phase[use_phase] >> ctr ) & 1; @@ -1884,7 +1831,7 @@ class emap_impl { auto ctr = 0u; auto const& best_cut = cuts[index][node_data.best_cut[other_phase]]; - auto const& supergate = node_data.best_supergate[other_phase]; + auto const& supergate = node_data.best_gate[other_phase]; for ( auto leaf : best_cut ) { auto phase = ( node_data.phase[other_phase] >> ctr ) & 1; @@ -1930,7 +1877,162 @@ class emap_impl if ( node_data.map_refs[0] || node_data.map_refs[1] ) { /* if used and not available in the library launch a mapping error */ - if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr ) + if ( node_data.best_gate[0] == nullptr && node_data.best_gate[1] == nullptr ) + { + std::cerr << "[e] MAP ERROR: technology library does not contain constant gates, impossible to perform mapping" << std::endl; + st.mapping_error = true; + return false; + } + } + continue; + } + else if ( ntk.is_pi( *it ) ) + { + if ( node_match[index].map_refs[1] > 0u ) + { + /* Add inverter area over the negated fanins */ + area += lib_inv_area; + ++inv; + } + continue; + } + + /* continue if not referenced in the cover */ + if ( !node_match[index].map_refs[0] && !node_match[index].map_refs[1] ) + continue; + + /* don't touch box */ + if constexpr ( has_is_dont_touch_v ) + { + if ( ntk.is_dont_touch( *it ) ) + { + set_mapping_refs_dont_touch( *it ); + continue; + } + } + + unsigned use_phase = node_data.best_gate[0] == nullptr ? 1u : 0u; + + if ( node_data.best_gate[use_phase] == nullptr ) + { + /* Library is not complete, mapping is not possible */ + std::cerr << "[e] MAP ERROR: technology library is not complete, impossible to perform mapping" << std::endl; + st.mapping_error = true; + return false; + } + + if ( node_data.same_match || node_data.map_refs[use_phase] > 0 ) + { + if constexpr ( !ELA ) + { + auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; + auto ctr = 0u; + + for ( auto const leaf : best_cut ) + { + if ( ( node_data.phase[use_phase] >> ctr++ ) & 1 ) + node_match[leaf].map_refs[1]++; + else + node_match[leaf].map_refs[0]++; + } + } + area += node_data.area[use_phase]; + if ( node_data.same_match && node_data.map_refs[use_phase ^ 1] > 0 ) + { + if ( iteration < ps.area_flow_rounds ) + { + ++node_data.map_refs[use_phase]; + } + area += lib_inv_area; + ++inv; + } + } + + /* invert the phase */ + use_phase = use_phase ^ 1; + + /* if both phases are implemented and used */ + if ( !node_data.same_match && node_data.map_refs[use_phase] > 0 ) + { + if constexpr ( !ELA ) + { + auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; + + auto ctr = 0u; + for ( auto const leaf : best_cut ) + { + if ( ( node_data.phase[use_phase] >> ctr++ ) & 1 ) + node_match[leaf].map_refs[1]++; + else + node_match[leaf].map_refs[0]++; + } + } + area += node_data.area[use_phase]; + } + } + + ++iteration; + + if constexpr ( ELA ) + { + return true; + } + + /* blend estimated references */ + float const coef = 1.0f / ( ( iteration + 1.0f ) * ( iteration + 1.0f ) ); + for ( auto i = 0u; i < ntk.size(); ++i ) + { + node_match[i].est_refs[0] = std::max( 1.0f, coef * node_match[i].est_refs[0] + ( 1 - coef ) * node_match[i].map_refs[0] ); + node_match[i].est_refs[1] = std::max( 1.0f, coef * node_match[i].est_refs[1] + ( 1 - coef ) * node_match[i].map_refs[1] ); + } + + return true; + } + + template + bool set_mapping_refs_and_req() + { + for ( auto i = 0u; i < node_match.size(); ++i ) + { + node_match[i].required[0] = node_match[i].required[1] = std::numeric_limits::max(); + } + + /* compute the current worst delay and update the mapping refs */ + delay = 0.0f; + ntk.foreach_po( [this]( auto s ) { + const auto index = ntk.node_to_index( ntk.get_node( s ) ); + + if ( ntk.is_complemented( s ) ) + delay = std::max( delay, node_match[index].arrival[1] ); + else + delay = std::max( delay, node_match[index].arrival[0] ); + + if constexpr ( !ELA ) + { + if ( ntk.is_complemented( s ) ) + node_match[index].map_refs[1]++; + else + node_match[index].map_refs[0]++; + } + } ); + + set_output_required_time( iteration == 0 ); + + /* compute current area and update mapping refs in top-down order */ + area = 0.0f; + inv = 0; + for ( auto it = topo_order.rbegin(); it != topo_order.rend(); ++it ) + { + const auto index = ntk.node_to_index( *it ); + auto& node_data = node_match[index]; + + /* skip constants and PIs */ + if ( ntk.is_constant( *it ) ) + { + if ( node_match[index].map_refs[0] || node_match[index].map_refs[1] ) + { + /* if used and not available in the library launch a mapping error */ + if ( node_data.best_gate[0] == nullptr && node_data.best_gate[1] == nullptr ) { std::cerr << "[e] MAP ERROR: technology library does not contain constant gates, impossible to perform mapping" << std::endl; st.mapping_error = true; @@ -1964,9 +2066,15 @@ class emap_impl } } - unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u; + /* refine best matches with alternatives */ + if constexpr ( !DO_AREA ) + { + if ( ps.use_match_alternatives ) + refine_best_matches( *it ); + } - if ( node_data.best_supergate[use_phase] == nullptr ) + unsigned use_phase = node_data.best_gate[0] == nullptr ? 1u : 0u; + if ( node_data.best_gate[use_phase] == nullptr ) { /* Library is not complete, mapping is not possible */ std::cerr << "[e] MAP ERROR: technology library is not complete, impossible to perform mapping" << std::endl; @@ -1994,8 +2102,7 @@ class emap_impl { if ( iteration < ps.area_flow_rounds ) { - // ++node_data.map_refs[use_phase]; - node_data.map_refs[use_phase] += node_data.map_refs[use_phase ^ 1]; + ++node_data.map_refs[use_phase]; } area += lib_inv_area; ++inv; @@ -2023,6 +2130,11 @@ class emap_impl } area += node_data.area[use_phase]; } + + if ( !ps.area_oriented_mapping ) + { + match_propagate_required( index ); + } } ++iteration; @@ -2033,10 +2145,11 @@ class emap_impl } /* blend estimated references */ + float const coef = 1.0f / ( ( iteration + 1.0f ) * ( iteration + 1.0f ) ); for ( auto i = 0u; i < ntk.size(); ++i ) { - node_match[i].est_refs[0] = std::max( 1.0, ( 1.0 * node_match[i].est_refs[0] + 2.0f * node_match[i].map_refs[0] ) / 3.0 ); - node_match[i].est_refs[1] = std::max( 1.0, ( 1.0 * node_match[i].est_refs[1] + 2.0f * node_match[i].map_refs[1] ) / 3.0 ); + node_match[i].est_refs[0] = std::max( 1.0f, coef * node_match[i].est_refs[0] + ( 1 - coef ) * node_match[i].map_refs[0] ); + node_match[i].est_refs[1] = std::max( 1.0f, coef * node_match[i].est_refs[1] + ( 1 - coef ) * node_match[i].map_refs[1] ); } return true; @@ -2065,8 +2178,7 @@ class emap_impl { if ( iteration < ps.area_flow_rounds ) { - // ++node_match[index].map_refs[0]; - node_match[index].map_refs[0] += node_match[index].map_refs[1]; + ++node_match[index].map_refs[0]; } area += lib_inv_area; ++inv; @@ -2074,47 +2186,73 @@ class emap_impl } } - void compute_required_time( bool exit_early = false ) + void set_output_required_time( bool warning ) { - for ( auto i = 0u; i < node_match.size(); ++i ) - { - node_match[i].required[0] = node_match[i].required[1] = std::numeric_limits::max(); - } - - /* return if mapping is area oriented */ - if ( ps.area_oriented_mapping ) - return; - double required = delay; - /* relax delay constraints */ - if ( iteration == 1 && ps.required_time == 0.0f && ps.relax_required > 0.0f ) + if ( iteration == 0 && ps.required_time == 0.0f && ps.required_times.empty() && ps.relax_required > 0.0f ) { required *= ( 100.0 + ps.relax_required ) / 100.0; } /* Global target time constraint */ - if ( ps.required_time != 0.0f ) + if ( ps.required_times.empty() ) { - if ( ps.required_time < delay - epsilon ) + if ( ps.required_time != 0.0f ) { - if ( !ps.area_oriented_mapping && iteration == 1 ) - std::cerr << fmt::format( "[i] MAP WARNING: cannot meet the target required time of {:.2f}", ps.required_time ) << std::endl; - } - else - { - required = ps.required_time; + if ( ps.required_time < delay - epsilon ) + { + if ( warning ) + std::cerr << fmt::format( "[i] MAP WARNING: cannot meet the target required time of {:.2f}", ps.required_time ) << std::endl; + } + else + { + required = ps.required_time; + } } + + /* set the required time at POs */ + ntk.foreach_po( [&]( auto const& s ) { + const auto index = ntk.node_to_index( ntk.get_node( s ) ); + if ( ntk.is_complemented( s ) ) + node_match[index].required[1] = required; + else + node_match[index].required[0] = required; + } ); + + return; } - /* set the required time at POs */ - ntk.foreach_po( [&]( auto const& s ) { + /* Output-specific target time constraint */ + ntk.foreach_po( [&]( auto const& s, uint32_t i ) { const auto index = ntk.node_to_index( ntk.get_node( s ) ); - if ( ntk.is_complemented( s ) ) - node_match[index].required[1] = required; + uint8_t phase = ntk.is_complemented( s ) ? 1 : 0; + if ( node_match[index].arrival[phase] > ps.required_times[i] + epsilon ) + { + /* maintain the same delay */ + node_match[index].required[phase] = node_match[index].arrival[phase]; + if ( warning ) + std::cerr << fmt::format( "[i] MAP WARNING: cannot meet the target required time of {:.2f} at output {}", ps.required_times[i], i ) << std::endl; + } else - node_match[index].required[0] = required; + { + node_match[index].required[phase] = ps.required_times[i]; + } } ); + } + + void compute_required_time( bool exit_early = false ) + { + for ( auto i = 0u; i < node_match.size(); ++i ) + { + node_match[i].required[0] = node_match[i].required[1] = std::numeric_limits::max(); + } + + /* return if mapping is area oriented */ + if ( ps.area_oriented_mapping ) + return; + + set_output_required_time( iteration == 1 ); if ( exit_early ) return; @@ -2184,16 +2322,16 @@ class emap_impl } } - uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1; + uint8_t use_phase = node_data.best_gate[0] != nullptr ? 0 : 1; /* compute arrival of use_phase */ - supergate const* best_supergate = node_data.best_supergate[use_phase]; + supergate const* best_gate = node_data.best_gate[use_phase]; double worst_arrival = 0; uint16_t best_phase = node_data.phase[use_phase]; auto ctr = 0u; for ( auto l : cuts[index][node_data.best_cut[use_phase]] ) { - double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr]; + double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_gate->tdelay[ctr]; worst_arrival = std::max( worst_arrival, arrival_pin ); ++ctr; } @@ -2219,15 +2357,15 @@ class emap_impl continue; } - assert( node_data.best_supergate[use_phase] != nullptr ); + assert( node_data.best_gate[use_phase] != nullptr ); - best_supergate = node_data.best_supergate[use_phase]; + best_gate = node_data.best_gate[use_phase]; worst_arrival = 0; best_phase = node_data.phase[use_phase]; ctr = 0u; for ( auto l : cuts[index][node_data.best_cut[use_phase]] ) { - double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr]; + double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_gate->tdelay[ctr]; worst_arrival = std::max( worst_arrival, arrival_pin ); ++ctr; } @@ -2270,16 +2408,16 @@ class emap_impl { uint32_t index = ntk.node_to_index( n ); auto& node_data = node_match[index]; - uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1; + uint8_t use_phase = node_data.best_gate[0] != nullptr ? 0 : 1; /* compute arrival of use_phase */ - supergate const* best_supergate = node_data.best_supergate[use_phase]; + supergate const* best_gate = node_data.best_gate[use_phase]; double worst_arrival = 0; uint16_t best_phase = node_data.phase[use_phase]; auto ctr = 0u; for ( auto l : cuts[index][node_data.best_cut[use_phase]] ) { - double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr]; + double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_gate->tdelay[ctr]; worst_arrival = std::max( worst_arrival, arrival_pin ); ++ctr; } @@ -2293,15 +2431,15 @@ class emap_impl return; } - assert( node_data.best_supergate[0] != nullptr ); + assert( node_data.best_gate[0] != nullptr ); - best_supergate = node_data.best_supergate[use_phase]; + best_gate = node_data.best_gate[use_phase]; worst_arrival = 0; best_phase = node_data.phase[use_phase]; ctr = 0u; for ( auto l : cuts[index][node_data.best_cut[use_phase]] ) { - double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr]; + double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_gate->tdelay[ctr]; worst_arrival = std::max( worst_arrival, arrival_pin ); ++ctr; } @@ -2312,24 +2450,24 @@ class emap_impl template void match_phase( node const& n, uint8_t phase ) { - double best_arrival = std::numeric_limits::max(); - double best_area_flow = std::numeric_limits::max(); - float best_area = std::numeric_limits::max(); - uint32_t best_size = UINT32_MAX; - uint8_t best_cut = 0u; - uint16_t best_phase = 0u; - uint8_t cut_index = 0u; auto index = ntk.node_to_index( n ); - auto& node_data = node_match[index]; - supergate const* best_supergate = node_data.best_supergate[phase]; + uint32_t cut_index = 0u; + + node_data.best_gate[phase] = nullptr; + node_data.arrival[phase] = std::numeric_limits::max(); + node_data.flows[phase] = std::numeric_limits::max(); + node_data.area[phase] = std::numeric_limits::max(); + uint32_t best_size = UINT32_MAX; + + best_gate_emap& gA = node_data.best_alternative[phase]; + gA.gate = nullptr; + gA.arrival = std::numeric_limits::max(); + gA.flow = std::numeric_limits::max(); + uint32_t best_sizeA = UINT32_MAX; /* unmap multioutput */ - if ( node_data.multioutput_match[phase] ) - { - best_supergate = nullptr; - node_data.multioutput_match[phase] = false; - } + node_data.multioutput_match[phase] = false; /* foreach cut */ for ( auto& cut : cuts[index] ) @@ -2355,49 +2493,68 @@ class emap_impl { uint16_t gate_polarity = gate.polarity ^ negation; double worst_arrival = 0.0f; - double area_local = gate.area; + double worst_arrivalA = 0.0f; + float area_local = gate.area; + float area_localA = gate.area; auto ctr = 0u; - node_data.phase[phase] = gate_polarity; for ( auto l : *cut ) { - double arrival_pin = node_match[l].arrival[( gate_polarity >> ctr ) & 1] + gate.tdelay[ctr]; + uint8_t leaf_phase = ( gate_polarity >> ctr ) & 1; + + double arrival_pinA = node_match[l].best_alternative[leaf_phase].arrival + gate.tdelay[ctr]; + worst_arrivalA = std::max( worst_arrivalA, arrival_pinA ); + + // if constexpr ( DO_AREA ) + // { + // if ( worst_arrivalA > node_data.required[phase] + epsilon || worst_arrivalA >= std::numeric_limits::max() ) + // break; + // } + + double arrival_pin = node_match[l].arrival[leaf_phase] + gate.tdelay[ctr]; worst_arrival = std::max( worst_arrival, arrival_pin ); - uint8_t leaf_phase = ( node_data.phase[phase] >> ctr ) & 1; area_local += node_match[l].flows[leaf_phase]; + area_localA += node_match[l].best_alternative[leaf_phase].flow; ++ctr; } + bool skip = false; if constexpr ( DO_AREA ) { - if ( worst_arrival > node_data.required[phase] + epsilon || worst_arrival >= std::numeric_limits::max() ) + if ( ctr < cut->size() ) continue; + if ( worst_arrival > node_data.required[phase] + epsilon || worst_arrival >= std::numeric_limits::max() ) + skip = true; } - node_data.phase[phase] = gate_polarity; - - if ( compare_map( worst_arrival, best_arrival, area_local, best_area_flow, cut->size(), best_size ) ) + if ( !skip && compare_map( worst_arrival, node_data.arrival[phase], area_local, node_data.flows[phase], cut->size(), best_size ) ) { - best_arrival = worst_arrival; - best_area_flow = area_local; + node_data.best_gate[phase] = &gate; + node_data.arrival[phase] = worst_arrival; + node_data.flows[phase] = area_local; + node_data.best_cut[phase] = cut_index; + node_data.area[phase] = gate.area; + node_data.phase[phase] = gate_polarity; best_size = cut->size(); - best_cut = cut_index; - best_area = gate.area; - best_phase = gate_polarity; - best_supergate = &gate; + } + + /* compute the alternative */ + if ( compare_map( worst_arrivalA, gA.arrival, area_localA, gA.flow, cut->size(), best_sizeA ) ) + { + gA.gate = &gate; + gA.arrival = worst_arrivalA; + gA.area = gate.area; + gA.flow = area_localA; + gA.phase = gate_polarity; + gA.cut = cut_index; + best_sizeA = cut->size(); + gA.size = cut->size(); } } ++cut_index; } - - node_data.flows[phase] = best_area_flow; - node_data.arrival[phase] = best_arrival; - node_data.area[phase] = best_area; - node_data.best_cut[phase] = best_cut; - node_data.phase[phase] = best_phase; - node_data.best_supergate[phase] = best_supergate; } template @@ -2413,23 +2570,23 @@ class emap_impl auto index = ntk.node_to_index( n ); auto& node_data = node_match[index]; - supergate const* best_supergate = node_data.best_supergate[phase]; + supergate const* best_gate = node_data.best_gate[phase]; /* unmap multioutput */ if ( node_data.multioutput_match[phase] ) { /* dereference multi-output */ - if ( !node_data.same_match && best_supergate != nullptr && node_data.map_refs[phase] ) + if ( !node_data.same_match && best_gate != nullptr && node_data.map_refs[phase] ) { auto const& cut = multi_cut_set[node_data.best_cut[phase]][0]; cut_deref( cut, n, phase ); } - best_supergate = nullptr; + best_gate = nullptr; node_data.multioutput_match[phase] = false; } /* recompute best match info */ - if ( best_supergate != nullptr ) + if ( best_gate != nullptr ) { /* if cut is implemented, remove it from the cover */ if ( !node_data.same_match && node_data.map_refs[phase] ) @@ -2487,7 +2644,7 @@ class emap_impl best_size = cut->size(); best_cut = cut_index; best_phase = gate_polarity; - best_supergate = &gate; + best_gate = &gate; } } @@ -2499,7 +2656,7 @@ class emap_impl node_data.area[phase] = best_area; node_data.best_cut[phase] = best_cut; node_data.phase[phase] = best_phase; - node_data.best_supergate[phase] = best_supergate; + node_data.best_gate[phase] = best_gate; if ( !node_data.same_match && node_data.map_refs[phase] ) { @@ -2507,8 +2664,8 @@ class emap_impl } } - template - void match_drop_phase( node const& n, float required_margin_factor ) + template + void match_drop_phase( node const& n ) { auto index = ntk.node_to_index( n ); auto& node_data = node_match[index]; @@ -2520,7 +2677,7 @@ class emap_impl bool use_one = false; /* only one phase is matched */ - if ( node_data.best_supergate[0] == nullptr ) + if ( node_data.best_gate[0] == nullptr ) { set_match_complemented_phase( index, 1, worst_arrival_npos ); if constexpr ( ELA ) @@ -2530,7 +2687,7 @@ class emap_impl } return; } - else if ( node_data.best_supergate[1] == nullptr ) + else if ( node_data.best_gate[1] == nullptr ) { set_match_complemented_phase( index, 0, worst_arrival_nneg ); if constexpr ( ELA ) @@ -2557,44 +2714,41 @@ class emap_impl else { /* check if both phases + inverter meet the required time */ - use_zero = worst_arrival_nneg < ( node_data.required[1] + epsilon - required_margin_factor * lib_inv_delay ); - use_one = worst_arrival_npos < ( node_data.required[0] + epsilon - required_margin_factor * lib_inv_delay ); + use_zero = worst_arrival_nneg < ( node_data.required[1] + epsilon ); + use_one = worst_arrival_npos < ( node_data.required[0] + epsilon ); } /* condition on not used phases, evaluate a substitution during exact area recovery */ if constexpr ( ELA ) { - if ( iteration != 0 ) + if ( node_data.map_refs[0] == 0 || node_data.map_refs[1] == 0 ) { - if ( node_data.map_refs[0] == 0 || node_data.map_refs[1] == 0 ) + /* select the used match */ + auto phase = 0; + auto nphase = 0; + if ( node_data.map_refs[0] == 0 ) { - /* select the used match */ - auto phase = 0; - auto nphase = 0; - if ( node_data.map_refs[0] == 0 ) - { - phase = 1; - use_one = true; - use_zero = false; - } - else - { - nphase = 1; - use_one = false; - use_zero = true; - } - /* select the not used match instead if it leads to area improvement and doesn't violate the required time */ - if ( node_data.arrival[nphase] + lib_inv_delay < node_data.required[phase] + epsilon ) - { - auto size_phase = cuts[index][node_data.best_cut[phase]].size(); - auto size_nphase = cuts[index][node_data.best_cut[nphase]].size(); + phase = 1; + use_one = true; + use_zero = false; + } + else + { + nphase = 1; + use_one = false; + use_zero = true; + } + /* select the not used match instead if it leads to area improvement and doesn't violate the required time */ + if ( node_data.arrival[nphase] + lib_inv_delay < node_data.required[phase] + epsilon ) + { + auto size_phase = cuts[index][node_data.best_cut[phase]].size(); + auto size_nphase = cuts[index][node_data.best_cut[nphase]].size(); - if ( compare_map( node_data.arrival[nphase] + lib_inv_delay, node_data.arrival[phase], node_data.flows[nphase] + lib_inv_area, node_data.flows[phase], size_nphase, size_phase ) ) - { - /* invert the choice */ - use_zero = !use_zero; - use_one = !use_one; - } + if ( compare_map( node_data.arrival[nphase] + lib_inv_delay, node_data.arrival[phase], node_data.flows[nphase] + lib_inv_area, node_data.flows[phase], size_nphase, size_phase ) ) + { + /* invert the choice */ + use_zero = !use_zero; + use_one = !use_one; } } } @@ -2603,21 +2757,10 @@ class emap_impl if ( ( !use_zero && !use_one ) ) { /* use both phases */ - if ( ps.allow_node_duplication ) - { - node_data.flows[0] = node_data.flows[0] / node_data.est_refs[0]; - node_data.flows[1] = node_data.flows[1] / node_data.est_refs[1]; - node_data.same_match = false; - return; - } - - /* if node duplication is not allowed, pick one phase based on delay */ - auto size_zero = cuts[index][node_data.best_cut[0]].size(); - auto size_one = cuts[index][node_data.best_cut[1]].size(); - if ( compare_map( worst_arrival_npos, worst_arrival_nneg, node_data.flows[1], node_data.flows[0], size_one, size_zero ) ) - use_zero = true; - else - use_one = true; + node_data.flows[0] = node_data.flows[0] / node_data.est_refs[0]; + node_data.flows[1] = node_data.flows[1] / node_data.est_refs[1]; + node_data.same_match = false; + return; } /* use area flow as a tiebreaker */ @@ -2625,10 +2768,59 @@ class emap_impl { auto size_zero = cuts[index][node_data.best_cut[0]].size(); auto size_one = cuts[index][node_data.best_cut[1]].size(); - if ( compare_map( worst_arrival_nneg, worst_arrival_npos, node_data.flows[0], node_data.flows[1], size_zero, size_one ) ) - use_one = false; + + if constexpr ( ELA ) + { + if ( !node_data.same_match ) + { + /* both phases were implemented --> evaluate substitution */ + cut_deref( cuts[index][node_data.best_cut[0]], n, 0 ); + node_data.flows[1] = cut_deref( cuts[index][node_data.best_cut[1]], n, 1 ); + node_data.flows[0] = cut_ref( cuts[index][node_data.best_cut[0]], n, 0 ); + cut_ref( cuts[index][node_data.best_cut[1]], n, 1 ); + } + /* evaluate based on inverter cost */ + if constexpr ( !SwitchActivity ) + { + use_zero = lib_inv_area < node_data.flows[1] + epsilon; + use_one = lib_inv_area < node_data.flows[0] + epsilon; + } + + if ( use_one && use_zero ) + { + if ( compare_map( worst_arrival_nneg, worst_arrival_npos, node_data.flows[0], node_data.flows[1], size_zero, size_one ) ) + use_one = false; + else + use_zero = false; + } + else if ( !use_one && !use_zero && node_data.same_match ) + { + node_data.same_match = false; + cut_ref( cuts[index][node_data.best_cut[0]], n, 0 ); + cut_ref( cuts[index][node_data.best_cut[1]], n, 1 ); + return; + } + } else - use_zero = false; + { + /* compare flows by looking at the most convinient and referenced */ + if ( node_data.flows[0] / node_data.est_refs[0] + lib_inv_area < node_data.flows[1] / node_data.est_refs[1] + epsilon ) + { + use_one = false; + } + else if ( node_data.flows[1] / node_data.est_refs[1] + lib_inv_area < node_data.flows[0] / node_data.est_refs[0] + epsilon ) + { + use_zero = false; + } + else + { + /* delay the decision on what to keep --> wait for better estimations */ + node_data.flows[0] = node_data.flows[0] / node_data.est_refs[0]; + node_data.flows[1] = node_data.flows[1] / node_data.est_refs[1]; + node_data.same_match = false; + return; + } + } } if ( use_zero ) @@ -2676,7 +2868,7 @@ class emap_impl auto& node_data = node_match[index]; auto phase_n = phase ^ 1; node_data.same_match = true; - node_data.best_supergate[phase_n] = nullptr; + node_data.best_gate[phase_n] = nullptr; node_data.best_cut[phase_n] = node_data.best_cut[phase]; node_data.phase[phase_n] = node_data.phase[phase]; node_data.arrival[phase_n] = worst_arrival_n; @@ -2685,24 +2877,151 @@ class emap_impl node_data.flows[phase] = node_data.flows[phase] / node_data.est_refs[phase]; } - void reindex_multioutput_data() + template + inline void select_alternatives( node const& n ) { - /* re-index the multioutput list using the lowest index output instead of the greatest one */ - if ( !ps.map_multioutput ) + if constexpr ( DO_AREA ) + return; + + if ( !ps.use_match_alternatives ) + return; + + auto index = ntk.node_to_index( n ); + auto& node_data = node_match[index]; + + best_gate_emap& g0 = node_data.best_alternative[0]; + best_gate_emap& g1 = node_data.best_alternative[1]; + float g0flow = g0.flow / node_data.est_refs[0]; + float g1flow = g1.flow / node_data.est_refs[1]; + + /* process for best area */ /* removed check on required since this is executed only during a delay pass */ + if ( g0.gate != nullptr && g0flow + lib_inv_area < g1flow + epsilon ) + { + g1 = g0; + g1.gate = nullptr; + g1.arrival += lib_inv_delay; + g1.flow = ( g1.flow + lib_inv_area ) / node_data.est_refs[1]; + g0.flow = g0flow; return; + } + else if ( g1.gate != nullptr && g1flow + lib_inv_area < g0flow + epsilon ) + { + g0 = g1; + g0.gate = nullptr; + g0.arrival += lib_inv_delay; + g0.flow = ( g0.flow + lib_inv_area ) / node_data.est_refs[0]; + g1.flow = g1flow; + return; + } - for ( auto i = ntk.num_pis(); i < topo_order.size(); ++i ) + g0.flow = g0flow; + g1.flow = g1flow; + } + + inline void refine_best_matches( node const& n ) + { + auto index = ntk.node_to_index( n ); + auto& node_data = node_match[index]; + + /* evaluate to change the best matches with the best alternative */ + best_gate_emap& g0 = node_data.best_alternative[0]; + best_gate_emap& g1 = node_data.best_alternative[1]; + + if ( node_data.map_refs[0] && node_data.map_refs[1] ) { - uint32_t tuple_index = node_tuple_match[i]; - if ( tuple_index >= UINT32_MAX - 1 ) - continue; + if ( node_data.same_match ) + { + /* pick best implementation between the two alternatives */ + unsigned best_match_phase = node_data.best_gate[0] == nullptr ? 1 : 0; + unsigned use_phase = g0.gate == nullptr ? 1 : 0; + if ( g0.gate != nullptr && g1.gate != nullptr ) + { + if ( g0.arrival > node_data.required[0] + epsilon || g1.arrival > node_data.required[1] + epsilon ) + return; - multi_match_t const& tuple_data = multi_node_match[tuple_index][0]; - node_tuple_match[i] = UINT32_MAX - 1; /* arbitrary value to skip the required time propagation */ - node_tuple_match[tuple_data[0].node_index] = tuple_index; + refine_best_matches_copy_refinement( n, 0, false ); + refine_best_matches_copy_refinement( n, 1, false ); + node_data.same_match = false; + return; + } + else + { + best_gate_emap& gUse = node_data.best_alternative[use_phase]; + if ( gUse.arrival > node_data.required[use_phase] + epsilon || gUse.arrival + lib_inv_delay > node_data.required[use_phase ^ 1] + epsilon ) + { + return; + } + refine_best_matches_copy_refinement( n, use_phase, true ); + return; + } + } + else + { + /* not same match: evaluate both zero and one phase */ + if ( g0.gate != nullptr && g0.arrival < node_data.required[0] + epsilon ) + { + node_data.same_match = false; + refine_best_matches_copy_refinement( n, 0, g1.gate == nullptr && g0.arrival + lib_inv_delay < node_data.required[1] + epsilon ); + } + if ( g1.gate != nullptr && g1.arrival < node_data.required[1] + epsilon ) + { + node_data.same_match = false; + refine_best_matches_copy_refinement( n, 1, g0.gate == nullptr && g1.arrival + lib_inv_delay < node_data.required[0] + epsilon ); + } + } + } + else if ( node_data.map_refs[0] ) + { + if ( g0.gate != nullptr && g0.arrival < node_data.required[0] + epsilon ) + { + node_data.same_match = false; + refine_best_matches_copy_refinement( n, 0, false ); + } + else if ( g0.gate == nullptr && g1.arrival + lib_inv_delay < node_data.required[0] + epsilon ) + { + refine_best_matches_copy_refinement( n, 1, true ); + } + } + else + { + if ( g1.gate != nullptr && g1.arrival < node_data.required[1] + epsilon ) + { + node_data.same_match = false; + refine_best_matches_copy_refinement( n, 1, false ); + } + else if ( g1.gate == nullptr && g0.arrival + lib_inv_delay < node_data.required[1] + epsilon ) + { + refine_best_matches_copy_refinement( n, 0, true ); + } } } + inline void refine_best_matches_copy_refinement( node const& n, unsigned phase, bool both_phases ) + { + auto index = ntk.node_to_index( n ); + auto& node_data = node_match[index]; + best_gate_emap& bg = node_data.best_alternative[phase]; + + node_data.best_gate[phase] = bg.gate; + node_data.phase[phase] = bg.phase; + node_data.best_cut[phase] = bg.cut; + node_data.arrival[phase] = bg.arrival; + node_data.area[phase] = bg.area; + node_data.flows[phase] = bg.flow; + + if ( !both_phases ) + return; + + node_data.same_match = true; + phase ^= 1; + node_data.best_gate[phase] = nullptr; + node_data.phase[phase] = bg.phase; + node_data.best_cut[phase] = bg.cut; + node_data.arrival[phase] = bg.arrival + lib_inv_delay; + node_data.area[phase] = bg.area; + node_data.flows[phase] = ( bg.flow * node_data.est_refs[phase ^ 1] + lib_inv_area ) / node_data.est_refs[phase]; + } + bool initialize_box( node const& n ) { uint32_t index = ntk.node_to_index( n ); @@ -2796,16 +3115,16 @@ class emap_impl /* if only one is available, the other is obtained using an inverter */ if ( supergates_zero != nullptr ) { - node_data.best_supergate[0] = &( ( *supergates_zero )[0] ); - node_data.arrival[0] = node_data.best_supergate[0]->tdelay[0]; - node_data.area[0] = node_data.best_supergate[0]->area; + node_data.best_gate[0] = &( ( *supergates_zero )[0] ); + node_data.arrival[0] = node_data.best_gate[0]->tdelay[0]; + node_data.area[0] = node_data.best_gate[0]->area; node_data.phase[0] = 0; } if ( supergates_one != nullptr ) { - node_data.best_supergate[1] = &( ( *supergates_one )[0] ); - node_data.arrival[1] = node_data.best_supergate[1]->tdelay[0]; - node_data.area[1] = node_data.best_supergate[1]->area; + node_data.best_gate[1] = &( ( *supergates_one )[0] ); + node_data.arrival[1] = node_data.best_gate[1]->tdelay[0]; + node_data.area[1] = node_data.best_gate[1]->area; node_data.phase[1] = 0; } else @@ -2829,7 +3148,7 @@ class emap_impl { /* extract outputs tuple */ uint32_t index = ntk.node_to_index( n ); - multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index]][0]; + multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index].index][0]; /* get the cut */ auto const& cut0 = cuts[tuple_data[0].node_index][tuple_data[0].cut_index]; @@ -2907,7 +3226,7 @@ class emap_impl /* compute area flow */ if ( j == 0 || !node_data.multioutput_match[0] ) { - uint8_t current_phase = node_data.best_supergate[0] == nullptr ? 1 : 0; + uint8_t current_phase = node_data.best_gate[0] == nullptr ? 1 : 0; old_flow_sum += node_data.flows[current_phase]; } uint8_t old_phase = node_data.phase[phase[j]]; @@ -2916,38 +3235,6 @@ class emap_impl area_flow[j] = gate.area + cut_leaves_flow( cut, n, phase[j] ); node_data.phase[phase[j]] = old_phase; - /* local evaluation for delay (area flow improvement is approximated) */ - // if constexpr ( !DO_AREA ) - // { - // /* recompute local area flow of previous matches */ - // double mapped_flow = node_data.flows[phase[j]]; - - // if ( node_data.multioutput_match[phase[j]] ) - // { - // /* recompute estimation for multi-output gate */ - // float k_est = 0; - // for ( auto k = 0; k < max_multioutput_output_size; ++k ) - // { - // uint32_t index_k = tuple_data[k].node_index; - // auto used_phase = node_match[index_k].supergate[0] == nullptr ? 1 : 0; - // k_est += node_match[index_k].est_refs[used_phase]; /* TODO: review */ - // } - // mapped_flow *= k_est; - // } - // else - // { - // auto used_phase = node_data.supergate[0] == nullptr ? 1 : 0; /* TODO: review */ - // mapped_flow *= node_data.est_refs[used_phase]; - // } - - // auto const& mapped_cut = cuts[node_index][node_data.best_cut[phase[j]]]; - // if ( !compare_map( arrival[j], node_data.arrival[phase[j]], area_flow[j], mapped_flow, cut.size(), mapped_cut.size() ) ) - // { - // is_best = false; - // break; - // } - // } - /* current version may lead to delay increase */ est_refs[j] = node_data.est_refs[phase[j]]; } @@ -2993,7 +3280,7 @@ class emap_impl uint8_t mapped_phase = phase[j]; node_data.multioutput_match[mapped_phase] = true; - node_data.best_supergate[mapped_phase] = &gate; + node_data.best_gate[mapped_phase] = &gate; node_data.best_cut[mapped_phase] = cut_index[j]; node_data.phase[mapped_phase] = pin_phase[j]; node_data.arrival[mapped_phase] = arrival[j]; @@ -3005,11 +3292,11 @@ class emap_impl /* select opposite phase */ mapped_phase ^= 1; node_data.multioutput_match[mapped_phase] = true; - node_data.best_supergate[mapped_phase] = nullptr; + node_data.best_gate[mapped_phase] = nullptr; node_data.best_cut[mapped_phase] = cut_index[j]; node_data.phase[mapped_phase] = pin_phase[j]; node_data.arrival[mapped_phase] = arrival[j] + lib_inv_delay; - node_data.area[mapped_phase] = area[j]; /* partial area contribution */ + node_data.area[mapped_phase] = area[j]; /* partial area contribution */ node_data.flows[mapped_phase] = flow_sum_neg; assert( node_data.arrival[mapped_phase] < node_data.required[mapped_phase] + epsilon ); @@ -3024,7 +3311,7 @@ class emap_impl { /* extract outputs tuple */ uint32_t index = ntk.node_to_index( n ); - multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index]][0]; + multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index].index][0]; /* local values storage */ std::array best_exact_area; @@ -3053,13 +3340,13 @@ class emap_impl for ( int j = max_multioutput_output_size - 1; j >= 0; --j ) { uint32_t node_index = tuple_data[j].node_index; - uint8_t selected_phase = node_match[node_index].best_supergate[0] == nullptr ? 1 : 0; + uint8_t selected_phase = node_match[node_index].best_gate[0] == nullptr ? 1 : 0; if ( node_match[node_index].map_refs[0] || node_match[node_index].map_refs[1] ) { /* match is always single output here */ auto const& cut = cuts[node_index][node_match[node_index].best_cut[0]]; - uint8_t use_phase = node_match[node_index].best_supergate[0] != nullptr ? 0 : 1; + uint8_t use_phase = node_match[node_index].best_gate[0] != nullptr ? 0 : 1; best_exact_area[j] = cut_deref( cut, ntk.index_to_node( node_index ), use_phase ); /* mapping a non referenced phase */ @@ -3079,7 +3366,7 @@ class emap_impl if ( node_match[node_index].map_refs[0] || node_match[node_index].map_refs[1] ) { - uint8_t use_phase = node_match[node_index].best_supergate[0] != nullptr ? 0 : 1; + uint8_t use_phase = node_match[node_index].best_gate[0] != nullptr ? 0 : 1; auto const& best_cut = cuts[node_index][node_match[node_index].best_cut[use_phase]]; cut_ref( best_cut, ntk.index_to_node( node_index ), use_phase ); } @@ -3218,7 +3505,7 @@ class emap_impl /* write data */ node_data.multioutput_match[mapped_phase] = true; - node_data.best_supergate[mapped_phase] = &gate; + node_data.best_gate[mapped_phase] = &gate; node_data.best_cut[mapped_phase] = cut_index[j]; node_data.phase[mapped_phase] = pin_phase[j]; node_data.arrival[mapped_phase] = arrival[j]; @@ -3228,7 +3515,7 @@ class emap_impl /* select opposite phase */ mapped_phase ^= 1; node_data.multioutput_match[mapped_phase] = true; - node_data.best_supergate[mapped_phase] = nullptr; + node_data.best_gate[mapped_phase] = nullptr; node_data.best_cut[mapped_phase] = cut_index[j]; node_data.phase[mapped_phase] = pin_phase[j]; node_data.arrival[mapped_phase] = arrival[j] + lib_inv_delay; @@ -3246,7 +3533,7 @@ class emap_impl void multi_node_update( node const& n ) { uint32_t check_index = ntk.node_to_index( n ); - multi_match_t const& tuple_data = multi_node_match[node_tuple_match[ntk.node_to_index( n )]][0]; + multi_match_t const& tuple_data = multi_node_match[node_tuple_match[ntk.node_to_index( n )].index][0]; uint64_t signature = 0; /* check if a node is in TFI: there is a path of length > 1 */ @@ -3316,7 +3603,7 @@ class emap_impl match_phase( n, 1u ); /* try to drop one phase */ - match_drop_phase( n, 0 ); + match_drop_phase( n ); assert( node_data.arrival[0] < node_data.required[0] + epsilon ); assert( node_data.arrival[1] < node_data.required[1] + epsilon ); @@ -3326,7 +3613,7 @@ class emap_impl void multi_node_update_exact( node const& n ) { uint32_t check_index = ntk.node_to_index( n ); - multi_match_t const& tuple_data = multi_node_match[node_tuple_match[ntk.node_to_index( n )]][0]; + multi_match_t const& tuple_data = multi_node_match[node_tuple_match[ntk.node_to_index( n )].index][0]; uint64_t signature = 0; /* check if a node is in TFI: there is a path of length > 1 */ @@ -3385,7 +3672,7 @@ class emap_impl if ( node_data.same_match && ( node_data.map_refs[0] || node_data.map_refs[1] ) ) { - uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1; + uint8_t use_phase = node_data.best_gate[0] != nullptr ? 0 : 1; auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; cut_deref( best_cut, n, use_phase ); } @@ -3397,7 +3684,7 @@ class emap_impl match_phase_exact( n, 1u ); /* try to drop one phase */ - match_drop_phase( n, 0 ); + match_drop_phase( n ); assert( node_data.arrival[0] < std::numeric_limits::max() ); assert( node_data.arrival[1] < std::numeric_limits::max() ); @@ -3407,7 +3694,7 @@ class emap_impl { /* extract outputs tuple */ uint32_t index = ntk.node_to_index( n ); - multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index]][0]; + multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index].index][0]; for ( int j = max_multioutput_output_size - 1; j >= 0; --j ) { @@ -3416,78 +3703,67 @@ class emap_impl } } - template - bool match_multi_add_cuts( node const& n ) + void match_multi_add_cuts( node const& n ) { + /* assume a single cut (current version) */ uint32_t index = ntk.node_to_index( n ); - auto& matches = multi_node_match[node_tuple_match[index]]; - - /* get the cuts */ - auto tuple_data_it = matches.begin(); - while ( tuple_data_it != matches.end() ) - { - multi_match_t& tuple_data = *tuple_data_it; - uint32_t cut_index = tuple_data[0].cut_index; - auto& cut_pair = multi_cut_set[cut_index]; - bool remove_entry = false; + multi_match_t& matches = multi_node_match[node_tuple_match[index].index][0]; - /* insert multi-output cuts into the standard cut set */ - for ( auto i = 0; i < max_multioutput_output_size; ++i ) - { - uint64_t node_index = tuple_data[i].node_index; - auto& cut = cut_pair[i]; - auto single_cut = cut_pair[i]; + /* find the corresponding cut */ + uint32_t cut_p = 0; + while ( matches[cut_p].node_index != index ) + ++cut_p; - auto& rcuts = cuts[node_index]; + assert( cut_p < matches.size() ); + uint32_t cut_index = matches[cut_p].cut_index; + auto& cut = multi_cut_set[cut_index][cut_p]; + auto single_cut = multi_cut_set[cut_index][cut_p]; + auto& rcuts = cuts[index]; - /* not enough space in the data structure: abort */ - if ( rcuts.size() == max_cut_num ) - { - remove_entry = true; - break; - } + /* not enough space in the data structure: abort */ + if ( rcuts.size() == max_cut_num ) + { + match_multi_add_cuts_remove_entry( matches ); + return; + } - /* insert single cut variation if unique (for delay preservation) */ - if ( !rcuts.is_contained( single_cut ) ) - { - compute_cut_data( single_cut, ntk.index_to_node( node_index ) ); - rcuts.append_cut( single_cut ); + /* insert single cut variation if unique (for delay preservation) */ + if ( !rcuts.is_contained( single_cut ) ) + { + single_cut->pattern_index = 0; + compute_cut_data( single_cut, ntk.index_to_node( index ) ); + rcuts.append_cut( single_cut ); - /* not enough space in the data structure: abort */ - if ( rcuts.size() == max_cut_num ) - { - rcuts.limit( rcuts.size() - 1 ); - remove_entry = true; - break; - } - } + /* not enough space in the data structure: abort */ + if ( rcuts.size() == max_cut_num ) + { + rcuts.limit( rcuts.size() - 1 ); + match_multi_add_cuts_remove_entry( matches ); + return; + } + } - /* add multi-output cut */ - uint32_t num_cuts_pre = rcuts.size(); - cut->ignore = true; - rcuts.append_cut( cut ); + /* add multi-output cut */ + uint32_t num_cuts_pre = rcuts.size(); + cut->ignore = true; + rcuts.append_cut( cut ); - uint32_t num_cuts_after = rcuts.size(); - assert( num_cuts_after == num_cuts_pre + 1 ); + uint32_t num_cuts_after = rcuts.size(); + assert( num_cuts_after == num_cuts_pre + 1 ); - rcuts.limit( num_cuts_pre ); + rcuts.limit( num_cuts_pre ); - /* update tuple data */ - tuple_data[i].cut_index = num_cuts_pre; - } + /* update tuple data */ + matches[cut_p].cut_index = num_cuts_pre; + } - if ( remove_entry ) - matches.erase( tuple_data_it ); - else - ++tuple_data_it; + inline void match_multi_add_cuts_remove_entry( multi_match_t const& matches ) + { + /* reset matches */ + for ( multi_match_data const& entry : matches ) + { + node_tuple_match[entry.node_index].data = 0; } - - /* matches do not fit in the data structure, remove multi-output option */ - if ( matches.empty() ) - node_tuple_match[index] = UINT32_MAX; - - /* return if the insertion is (partially) successful */ - return !matches.empty(); } inline bool multi_node_update_cut_check( uint32_t index, uint64_t signature, uint8_t phase ) @@ -3499,99 +3775,6 @@ class emap_impl return false; } - - bool remove_unused_multioutput() - { - /* TODO: update required times */ - for ( auto it = topo_order.rbegin(); it != topo_order.rend(); ++it ) - { - if ( ntk.is_constant( *it ) || ntk.is_pi( *it ) ) - continue; - - auto index = ntk.node_to_index( *it ); - - /* get used multi-output gates */ - if ( node_tuple_match[index] == UINT32_MAX ) - continue; - - if ( node_match[index].same_match && !node_match[index].multioutput_match[0] ) - continue; - - if ( !node_match[index].same_match && !( node_match[index].multioutput_match[0] || node_match[index].multioutput_match[1] ) ) - continue; - - /* check if mapped to multi-output with unused outputs */ - multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index]][0]; - - bool used = false; - bool unused = false; - for ( auto j = 0; j < max_multioutput_output_size; ++j ) - { - uint32_t node_index = tuple_data[j].node_index; - auto& node_data = node_match[node_index]; - - if ( node_data.best_supergate[0] != nullptr && node_data.multioutput_match[0] ) - { - if ( node_data.map_refs[0] > 0 || ( node_data.same_match && ( node_data.map_refs[0] || node_data.map_refs[1] ) ) ) - used = true; - else - unused = true; - } - else if ( node_data.best_supergate[1] != nullptr && node_data.multioutput_match[1] ) - { - if ( node_data.map_refs[1] > 0 || ( node_data.same_match && ( node_data.map_refs[0] || node_data.map_refs[1] ) ) ) - used = true; - else - unused = true; - } - } - - if ( !used || !unused ) - continue; - - /* remap connected outputs (reverse topo order)*/ - for ( int j = max_multioutput_output_size - 1; j >= 0; --j ) - { - uint32_t node_index = tuple_data[j].node_index; - auto& node_data = node_match[node_index]; - auto const n = ntk.index_to_node( node_index ); - - if ( !node_data.map_refs[0] && !node_data.map_refs[1] ) - continue; - - /* recursively deselect the best cut shared between - * the two phases if in use in the cover */ - if ( node_data.same_match ) - { - uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1; - auto const& best_cut = cuts[node_index][node_data.best_cut[use_phase]]; - cut_deref( best_cut, n, use_phase ); - } - - /* match positive phase */ - match_phase_exact( n, 0u ); - - /* match negative phase */ - match_phase_exact( n, 1u ); - - /* try to drop one phase */ - match_drop_phase( n, 0 ); - } - } - - double area_old = area; - bool success = set_mapping_refs(); - - /* round stats */ - if ( ps.verbose ) - { - float area_gain = float( ( area_old - area ) / area_old * 100 ); - std::string stats = fmt::format( "[i] Cleaning : Delay = {:>12.2f} Area = {:>12.2f} Gain = {:>5.2f} % Inverters = {:>5} Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) ); - st.round_stats.push_back( stats ); - } - - return success; - } #pragma endregion #pragma region Mapping utils @@ -3670,7 +3853,7 @@ class emap_impl } /* Add inverter area if not present yet and leaf node is implemented in the opposite phase */ - if ( node_match[leaf].map_refs[leaf_phase]++ == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr ) + if ( node_match[leaf].map_refs[leaf_phase]++ == 0u && node_match[leaf].best_gate[leaf_phase] == nullptr ) { if constexpr ( SwitchActivity ) count += switch_activity[leaf]; @@ -3743,7 +3926,7 @@ class emap_impl if ( node_match[leaf].same_match ) { /* Add inverter area if it is used only by the current gate and leaf node is implemented in the opposite phase */ - if ( --node_match[leaf].map_refs[leaf_phase] == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr ) + if ( --node_match[leaf].map_refs[leaf_phase] == 0u && node_match[leaf].best_gate[leaf_phase] == nullptr ) { if constexpr ( SwitchActivity ) count += switch_activity[leaf]; @@ -3850,7 +4033,7 @@ class emap_impl } /* Add inverter area if not present yet and leaf node is implemented in the opposite phase */ - if ( node_match[leaf].map_refs[leaf_phase]++ == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr ) + if ( node_match[leaf].map_refs[leaf_phase]++ == 0u && node_match[leaf].best_gate[leaf_phase] == nullptr ) { if constexpr ( SwitchActivity ) count += switch_activity[leaf]; @@ -3946,6 +4129,41 @@ class emap_impl } ); } + bool init_arrivals() + { + if ( ps.required_times.size() && ps.required_times.size() != ntk.num_pos() ) + { + std::cerr << "[e] MAP ERROR: required time vector does not match the output size of the network" << std::endl; + st.mapping_error = true; + return false; + } + + if ( ps.arrival_times.empty() ) + { + ntk.foreach_pi( [&]( auto const& n ) { + auto& node_data = node_match[ntk.node_to_index( n )]; + node_data.arrival[0] = node_data.best_alternative[0].arrival = 0; + node_data.arrival[1] = node_data.best_alternative[1].arrival = lib_inv_delay; + } ); + return true; + } + + if ( ps.arrival_times.size() != ntk.num_pis() ) + { + std::cerr << "[e] MAP ERROR: arrival time vector does not match the input size of the network" << std::endl; + st.mapping_error = true; + return false; + } + + ntk.foreach_pi( [&]( auto const& n, uint32_t i ) { + auto& node_data = node_match[ntk.node_to_index( n )]; + node_data.arrival[0] = node_data.best_alternative[0].arrival = ps.arrival_times[i]; + node_data.arrival[1] = node_data.best_alternative[1].arrival = ps.arrival_times[i] + lib_inv_delay; + } ); + + return true; + } + void finalize_cover( binding_view& res, klut_map& old2new ) { uint32_t multioutput_count = 0; @@ -3958,7 +4176,7 @@ class emap_impl /* add inverter at PI if needed */ if ( ntk.is_constant( n ) ) { - if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr ) + if ( node_data.best_gate[0] == nullptr && node_data.best_gate[1] == nullptr ) continue; } else if ( ntk.is_pi( n ) ) @@ -3985,7 +4203,7 @@ class emap_impl } } - unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1; + unsigned phase = ( node_data.best_gate[0] != nullptr ) ? 0 : 1; /* add used cut */ if ( node_data.same_match || node_data.map_refs[phase] > 0 ) @@ -4000,7 +4218,7 @@ class emap_impl } /* count multioutput gates */ - if ( ps.map_multioutput && node_tuple_match[index] < UINT32_MAX - 1 && node_data.multioutput_match[phase] ) + if ( ps.map_multioutput && node_tuple_match[index].lowest_index && node_data.multioutput_match[phase] ) { ++multioutput_count; } @@ -4013,7 +4231,7 @@ class emap_impl create_lut_for_gate( res, old2new, index, phase ); /* count multioutput gates */ - if ( ps.map_multioutput && node_tuple_match[index] < UINT32_MAX - 1 && node_data.multioutput_match[phase] ) + if ( ps.map_multioutput && node_tuple_match[index].lowest_index && node_data.multioutput_match[phase] ) { ++multioutput_count; } @@ -4076,7 +4294,7 @@ class emap_impl /* add inverter at PI if needed */ if ( ntk.is_constant( n ) ) { - if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr ) + if ( node_data.best_gate[0] == nullptr && node_data.best_gate[1] == nullptr ) continue; } else if ( ntk.is_pi( n ) ) @@ -4103,7 +4321,7 @@ class emap_impl } } - unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1; + unsigned phase = ( node_data.best_gate[0] != nullptr ) ? 0 : 1; /* add used cut */ if ( node_data.same_match || node_data.map_refs[phase] > 0 ) @@ -4113,11 +4331,10 @@ class emap_impl { assert( node_data.same_match == true ); - if ( node_tuple_match[index] < UINT32_MAX - 1 ) + if ( node_tuple_match[index].has_info && node_tuple_match[index].lowest_index ) { ++multioutput_count; create_block_for_gate( res, old2new, index, phase, genlib_to_cell ); - /* TODO: implement */ } continue; } @@ -4175,7 +4392,7 @@ class emap_impl { auto const& node_data = node_match[index]; auto const& best_cut = cuts[index][node_data.best_cut[phase]]; - auto const& gate = node_data.best_supergate[phase]->root; + auto const& gate = node_data.best_gate[phase]->root; /* permutate and negate to obtain the matched gate truth table */ std::vector> children( gate->num_vars ); @@ -4185,7 +4402,7 @@ class emap_impl { if ( ctr >= gate->num_vars ) break; - children[node_data.best_supergate[phase]->permutation[ctr]] = old2new[l][( node_data.phase[phase] >> ctr ) & 1]; + children[node_data.best_gate[phase]->permutation[ctr]] = old2new[l][( node_data.phase[phase] >> ctr ) & 1]; ++ctr; } @@ -4236,7 +4453,7 @@ class emap_impl { auto const& node_data = node_match[index]; auto const& best_cut = cuts[index][node_data.best_cut[phase]]; - auto const& gate = node_data.best_supergate[phase]->root; + auto const& gate = node_data.best_gate[phase]->root; /* permutate and negate to obtain the matched gate truth table */ std::vector> children( gate->num_vars ); @@ -4246,7 +4463,7 @@ class emap_impl { if ( ctr >= gate->num_vars ) break; - children[node_data.best_supergate[phase]->permutation[ctr]] = old2new[l][( node_data.phase[phase] >> ctr ) & 1]; + children[node_data.best_gate[phase]->permutation[ctr]] = old2new[l][( node_data.phase[phase] >> ctr ) & 1]; ++ctr; } @@ -4296,7 +4513,7 @@ class emap_impl void create_block_for_gate( cell_view& res, block_map& old2new, uint32_t index, unsigned phase, std::vector const& genlib_to_cell ) { std::vector const& lib = res.get_library(); - composed_gate const* local_gate = node_match[index].best_supergate[phase]->root; + composed_gate const* local_gate = node_match[index].best_gate[phase]->root; standard_cell const& cell = lib[genlib_to_cell.at( local_gate->root->id )]; assert( !local_gate->is_super ); @@ -4311,11 +4528,11 @@ class emap_impl { if ( ctr >= local_gate->num_vars ) break; - children[node_match[index].best_supergate[phase]->permutation[ctr]] = old2new[l][( node_match[index].phase[phase] >> ctr ) & 1]; + children[node_match[index].best_gate[phase]->permutation[ctr]] = old2new[l][( node_match[index].phase[phase] >> ctr ) & 1]; ++ctr; } - multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index]][0]; + multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index].index][0]; std::vector outputs; std::vector functions; @@ -4327,10 +4544,10 @@ class emap_impl { uint32_t node_index = tuple_data[j].node_index; assert( node_match[node_index].same_match ); - uint8_t node_phase = node_match[node_index].best_supergate[0] != nullptr ? 0 : 1; + uint8_t node_phase = node_match[node_index].best_gate[0] != nullptr ? 0 : 1; assert( node_match[node_index].multioutput_match[node_phase] ); - gate const* node_gate = node_match[node_index].best_supergate[node_phase]->root->root; + gate const* node_gate = node_match[node_index].best_gate[node_phase]->root->root; /* wrong output */ if ( node_gate->id != g.id ) @@ -4350,7 +4567,7 @@ class emap_impl for ( uint32_t s : outputs ) { /* add inverted version if used */ - uint8_t node_phase = node_match[s].best_supergate[0] != nullptr ? 0 : 1; + uint8_t node_phase = node_match[s].best_gate[0] != nullptr ? 0 : 1; assert( node_match[s].same_match ); /* add the node in the data structure */ @@ -4467,7 +4684,6 @@ class emap_impl #pragma endregion #pragma region Cuts and matching utils - template void compute_cut_data( cut_t& cut, node const& n ) { cut->delay = std::numeric_limits::max(); @@ -4707,7 +4923,7 @@ class emap_impl #pragma endregion template - inline bool compare_map( double arrival, double best_arrival, double area_flow, double best_area_flow, uint32_t size, uint32_t best_size ) + inline bool compare_map( double arrival, double best_arrival, float area_flow, float best_area_flow, uint32_t size, uint32_t best_size ) { if constexpr ( DO_AREA ) { @@ -4762,7 +4978,7 @@ class emap_impl if ( ntk.is_constant( n ) ) { - if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr ) + if ( node_data.best_gate[0] == nullptr && node_data.best_gate[1] == nullptr ) continue; } else if ( ntk.is_pi( n ) ) @@ -4776,7 +4992,7 @@ class emap_impl if ( !node_data.map_refs[0] && !node_data.map_refs[1] ) continue; - unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1; + unsigned phase = ( node_data.best_gate[0] != nullptr ) ? 0 : 1; if ( node_data.same_match || node_data.map_refs[phase] > 0 ) { @@ -5007,7 +5223,12 @@ class emap_impl if constexpr ( OverlapFilter ) { multi_gate_mark_visited( index1, index2, cut1 ); - node_tuple_match[index2] = multi_node_match.size(); + node_tuple_match[index1].has_info = 1; + node_tuple_match[index1].lowest_index = 1; + node_tuple_match[index1].index = multi_node_match.size(); + node_tuple_match[index2].has_info = 1; + node_tuple_match[index2].highest_index = 1; + node_tuple_match[index2].index = multi_node_match.size(); } else { @@ -5036,16 +5257,6 @@ class emap_impl multi_node_match[insertion_index].push_back( p ); } } - - /* remove indexing for lower index for compatible overlapping cuts */ - if constexpr ( !OverlapFilter ) - { - for ( auto const& entry : multi_node_match ) - { - multi_match_t const& p = entry[0]; - node_tuple_match[p[0].node_index] = UINT32_MAX; - } - } } bool multi_compute_cut_data( std::array& cut_tuple ) @@ -5152,24 +5363,30 @@ class emap_impl inline bool multi_gate_check_incompatible( uint32_t index1, uint32_t index2, bool& is_new, uint32_t& data_index ) { /* check cut assigned cut outputs, specialized code for 2 outputs */ - uint32_t current_assignment = node_tuple_match[index1]; - if ( current_assignment != node_tuple_match[index2] ) - return true; + if ( !node_tuple_match[index1].has_info && !node_tuple_match[index2].has_info ) + return false; - /* load data */ - if ( current_assignment != UINT32_MAX ) + if ( node_tuple_match[index1].has_info && node_tuple_match[index2].has_info ) { + uint32_t current_assignment = node_tuple_match[index1].index; + if ( current_assignment != node_tuple_match[index2].index ) + return true; is_new = false; data_index = current_assignment; + return false; } - return false; + return true; } inline void multi_gate_mark_compatibility( uint32_t index1, uint32_t index2, uint32_t mark_value ) { - node_tuple_match[index1] = mark_value; - node_tuple_match[index2] = mark_value; + node_tuple_match[index1].has_info = 1; + node_tuple_match[index1].lowest_index = 1; + node_tuple_match[index1].index = mark_value; + node_tuple_match[index2].has_info = 1; + node_tuple_match[index2].highest_index = 1; + node_tuple_match[index2].index = mark_value; } inline void multi_gate_mark_visited( uint32_t index1, uint32_t index2, multi_cut_t const& cut ) @@ -5346,8 +5563,11 @@ class emap_impl ntk.set_visited( g, ntk.trav_id() - 2 ); if ( i > 0 && n == repr ) { - /* fix cycle: remove multi-output match; TODO: extend for more than 2 outputs */ - node_tuple_match[ntk.node_to_index( g )] = UINT32_MAX; + /* fix cycle: remove multi-output match */ + choice_ntk.foreach_choice( repr, [&]( auto const& p ) { + node_tuple_match[ntk.node_to_index( p )].data = 0; + return true; + } ); choice_ntk.remove_choice( g ); check = true; } @@ -5479,7 +5699,7 @@ class emap_impl std::vector> topo_order; node_match_t node_match; - std::vector node_tuple_match; + std::vector node_tuple_match; std::vector switch_activity; std::vector tmp_visited; @@ -5507,7 +5727,7 @@ class emap_impl * The function takes the size of the cuts in the template parameter `CutSize`. * * The function returns a block network that supports multi-output cells. - * + * * The novelties of this mapper are contained in 2 publications: * - A. Tempia Calvino and G. De Micheli, "Technology Mapping Using Multi-Output Library Cells," ICCAD, 2023. * - G. Radi, A. Tempia Calvino, and G. De Micheli, "In Medio Stat Virtus: Combining Boolean and Pattern Matching," ASP-DAC, 2024. @@ -5566,7 +5786,7 @@ cell_view emap( Ntk const& ntk, tech_library 0 ) diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp index a671e7a25..cb3c83e33 100644 --- a/include/mockturtle/utils/tech_library.hpp +++ b/include/mockturtle/utils/tech_library.hpp @@ -109,6 +109,9 @@ struct tech_library_params /*! \brief Loads multioutput gates in the library */ bool load_multioutput_gates{ true }; + /*! \brief Don't load symmetrical permutations of gate pins (drastically speeds-up mapping) */ + bool ignore_symmetries{ false }; + /*! \brief Load gates with minimum size only */ bool load_minimum_size_only{ true }; @@ -473,7 +476,7 @@ class tech_library if ( sg.root->id == it->root->id ) { /* if already in the library exit, else ignore permutations if with equal delay cost */ - if ( sg.polarity == it->polarity && sg.tdelay == it->tdelay ) + if ( sg.polarity == it->polarity && ( _ps.ignore_symmetries || sg.tdelay == it->tdelay ) ) { to_add = false; break; @@ -534,7 +537,7 @@ class tech_library if ( sg.root->id == it->root->id ) { /* if already in the library exit, else ignore permutations if with equal delay cost */ - if ( sg.polarity == it->polarity && sg.tdelay == it->tdelay ) + if ( sg.polarity == it->polarity && ( _ps.ignore_symmetries || sg.tdelay == it->tdelay ) ) { to_add = false; break; diff --git a/test/algorithms/emap.cpp b/test/algorithms/emap.cpp index 0e95a7a26..514c405e8 100644 --- a/test/algorithms/emap.cpp +++ b/test/algorithms/emap.cpp @@ -170,8 +170,7 @@ TEST_CASE( "Emap on full adder 2", "[emap]" ) emap_params ps; ps.cut_enumeration_ps.minimize_truth_table = false; - ps.use_fast_area_recovery = false; - ps.ela_rounds = 0; + ps.ela_rounds = 1; ps.eswp_rounds = 2; emap_stats st; binding_view luts = emap_klut( aig, lib, ps, &st ); @@ -244,8 +243,7 @@ TEST_CASE( "Emap on full adder 2 with cells", "[emap]" ) emap_params ps; ps.cut_enumeration_ps.minimize_truth_table = false; - ps.use_fast_area_recovery = false; - ps.ela_rounds = 0; + ps.ela_rounds = 1; ps.eswp_rounds = 2; emap_stats st; cell_view luts = emap( aig, lib, ps, &st ); @@ -382,12 +380,12 @@ TEST_CASE( "Emap on multiplier with multi-output gates", "[emap]" ) const float eps{ 0.005f }; - CHECK( luts.size() == 233u ); + CHECK( luts.size() == 235u ); CHECK( luts.num_pis() == 16u ); CHECK( luts.num_pos() == 16u ); - CHECK( luts.num_gates() == 215u ); - CHECK( st.area > 575.0f - eps ); - CHECK( st.area < 575.0f + eps ); + CHECK( luts.num_gates() == 217u ); + CHECK( st.area > 612.0f - eps ); + CHECK( st.area < 612.0f + eps ); CHECK( st.delay > 33.60f - eps ); CHECK( st.delay < 33.60f + eps ); CHECK( st.multioutput_gates == 40 ); @@ -654,6 +652,185 @@ TEST_CASE( "Emap with hybrid matching", "[emap]" ) CHECK( st.delay < 5.8f + eps ); } +TEST_CASE( "Emap with arrival times", "[emap]" ) +{ + std::vector gates; + + std::istringstream in( large_library ); + auto result = lorina::read_genlib( in, genlib_reader( gates ) ); + CHECK( result == lorina::return_code::success ); + + tech_library<6> lib( gates ); + + aig_network aig; + const auto a = aig.create_pi(); + const auto b = aig.create_pi(); + const auto c = aig.create_pi(); + const auto d = aig.create_pi(); + const auto e = aig.create_pi(); + const auto f = aig.create_pi(); + const auto g = aig.create_pi(); + const auto h = aig.create_pi(); + + const auto f1 = aig.create_and( !a, b ); + const auto f2 = aig.create_and( f1, !c ); + const auto f3 = aig.create_and( d, e ); + const auto f4 = aig.create_and( f, !g ); + const auto f5 = aig.create_and( f4, h ); + const auto f6 = aig.create_and( f2, f3 ); + const auto f7 = aig.create_and( f5, f6 ); + + aig.create_po( f7 ); + + emap_params ps; + ps.matching_mode = emap_params::boolean; + emap_stats st; + + ps.arrival_times = std::vector( 8 ); + ps.arrival_times[0] = 0.0; + ps.arrival_times[1] = 1.0; + ps.arrival_times[2] = 2.0; + ps.arrival_times[3] = 3.0; + ps.arrival_times[4] = 4.0; + ps.arrival_times[5] = 5.0; + ps.arrival_times[6] = 6.0; + ps.arrival_times[7] = 7.0; + + cell_view ntk = emap<6>( aig, lib, ps, &st ); + + const float eps{ 0.005f }; + + CHECK( ntk.size() == 27u ); + CHECK( ntk.num_pis() == 8u ); + CHECK( ntk.num_pos() == 1u ); + CHECK( ntk.num_gates() == 17u ); + CHECK( st.area > 24.0f - eps ); + CHECK( st.area < 24.0f + eps ); + CHECK( st.delay > 12.6f - eps ); + CHECK( st.delay < 12.6f + eps ); +} + +TEST_CASE( "Emap with global required times", "[emap]" ) +{ + std::vector gates; + + std::istringstream in( test_library ); + auto result = lorina::read_genlib( in, genlib_reader( gates ) ); + CHECK( result == lorina::return_code::success ); + + tech_library<6> lib( gates ); + + aig_network aig; + + std::vector a( 8 ), b( 8 ); + std::generate( a.begin(), a.end(), [&aig]() { return aig.create_pi(); } ); + std::generate( b.begin(), b.end(), [&aig]() { return aig.create_pi(); } ); + auto carry = aig.get_constant( false ); + + carry_ripple_adder_inplace( aig, a, b, carry ); + + std::for_each( a.begin(), a.end(), [&]( auto f ) { aig.create_po( f ); } ); + aig.create_po( carry ); + + emap_params ps; + ps.matching_mode = emap_params::boolean; + ps.required_time = 20.0; // real delay 15.7 + emap_stats st; + + cell_view ntk = emap<6>( aig, lib, ps, &st ); + + const float eps{ 0.005f }; + + CHECK( ntk.size() == 34 ); + CHECK( ntk.num_pis() == 16u ); + CHECK( ntk.num_pos() == 9u ); + CHECK( ntk.num_gates() == 16u ); + CHECK( st.area > 63.0f - eps ); + CHECK( st.area < 63.0f + eps ); + CHECK( st.delay < 20.0f + eps ); +} + +TEST_CASE( "Emap with required times", "[emap]" ) +{ + std::vector gates; + + std::istringstream in( test_library ); + auto result = lorina::read_genlib( in, genlib_reader( gates ) ); + CHECK( result == lorina::return_code::success ); + + tech_library<6> lib( gates ); + + aig_network aig; + + std::vector a( 8 ), b( 8 ); + std::generate( a.begin(), a.end(), [&aig]() { return aig.create_pi(); } ); + std::generate( b.begin(), b.end(), [&aig]() { return aig.create_pi(); } ); + auto carry = aig.get_constant( false ); + + carry_ripple_adder_inplace( aig, a, b, carry ); + + emap_params ps; + ps.matching_mode = emap_params::boolean; + // ps.required_time = 20.0; // real delay 15.7 + emap_stats st; + + std::for_each( a.begin(), a.end(), [&]( auto f ) { aig.create_po( f ); ps.required_times.push_back( 19.0 ); } ); + aig.create_po( carry ); + ps.required_times.push_back( 20.0 ); + + cell_view ntk = emap<6>( aig, lib, ps, &st ); + + const float eps{ 0.005f }; + + CHECK( ntk.size() == 34 ); + CHECK( ntk.num_pis() == 16u ); + CHECK( ntk.num_pos() == 9u ); + CHECK( ntk.num_gates() == 16u ); + CHECK( st.area > 63.0f - eps ); + CHECK( st.area < 63.0f + eps ); + CHECK( st.delay < 20.0f + eps ); +} + +TEST_CASE( "Emap with required time relaxation", "[emap]" ) +{ + std::vector gates; + + std::istringstream in( test_library ); + auto result = lorina::read_genlib( in, genlib_reader( gates ) ); + CHECK( result == lorina::return_code::success ); + + tech_library<6> lib( gates ); + + aig_network aig; + + std::vector a( 8 ), b( 8 ); + std::generate( a.begin(), a.end(), [&aig]() { return aig.create_pi(); } ); + std::generate( b.begin(), b.end(), [&aig]() { return aig.create_pi(); } ); + auto carry = aig.get_constant( false ); + + carry_ripple_adder_inplace( aig, a, b, carry ); + + std::for_each( a.begin(), a.end(), [&]( auto f ) { aig.create_po( f ); } ); + aig.create_po( carry ); + + emap_params ps; + ps.matching_mode = emap_params::boolean; + ps.relax_required = 27.5; // real delay 15.7 + emap_stats st; + + cell_view ntk = emap<6>( aig, lib, ps, &st ); + + const float eps{ 0.005f }; + + CHECK( ntk.size() == 34 ); + CHECK( ntk.num_pis() == 16u ); + CHECK( ntk.num_pos() == 9u ); + CHECK( ntk.num_gates() == 16u ); + CHECK( st.area > 63.0f - eps ); + CHECK( st.area < 63.0f + eps ); + CHECK( st.delay < 20.0f + eps ); +} + TEST_CASE( "Emap with supergates", "[emap]" ) { std::vector gates;