From 4d29ec04bc4388d705f340acdec9b463064f18e0 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 24 Apr 2024 18:03:35 +0200 Subject: [PATCH 01/27] First incomplete tuned emap --- include/mockturtle/algorithms/emap.hpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index bf557860a..13b46c8f2 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -1089,10 +1089,10 @@ class emap_impl { if ( cuts[index].size() != 0 ) return false; - /* all terminals have flow 0.0 */ - node_data.flows[0] = node_data.flows[1] = 0.0f; + node_data.flows[0] = 0.0f; node_data.arrival[0] = 0.0f; /* PIs have the negative phase implemented with an inverter */ + node_data.flows[1] = lib_inv_area / node_data.est_refs[1]; node_data.arrival[1] = lib_inv_delay; add_unit_cut( index ); return false; @@ -1594,8 +1594,13 @@ class emap_impl /* reset mapping */ node_match[index].map_refs[0] = node_match[index].map_refs[1] = node_match[index].map_refs[2] = 0u; - if ( ntk.is_constant( n ) || ntk.is_pi( n ) ) + if ( ntk.is_constant( n ) ) continue; + if ( ntk.is_pi( n ) ) + { + node_match[index].flows[1] = lib_inv_area / node_match[index].est_refs[1]; + continue; + } /* don't touch box */ if constexpr ( has_is_dont_touch_v ) @@ -2001,6 +2006,10 @@ class emap_impl area += node_data.area[use_phase]; if ( node_data.same_match && node_data.map_refs[use_phase ^ 1] > 0 ) { + if ( iteration < ps.area_flow_rounds ) + { + ++node_data.map_refs[use_phase]; + } area += lib_inv_area; ++inv; } @@ -2683,8 +2692,8 @@ class emap_impl node_data.phase[phase_n] = node_data.phase[phase]; node_data.arrival[phase_n] = worst_arrival_n; node_data.area[phase_n] = node_data.area[phase]; - node_data.flows[phase] = node_data.flows[phase] / node_data.est_refs[2]; - node_data.flows[phase_n] = node_data.flows[phase] + lib_inv_area; + node_data.flows[phase_n] = ( node_data.flows[phase] + lib_inv_area ) / node_data.est_refs[phase_n]; + node_data.flows[phase] = node_data.flows[phase] / node_data.est_refs[phase]; } void reindex_multioutput_data() From ac376e83d0d501e532ca5608cebe8129e23e59c7 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 24 Apr 2024 19:35:12 +0200 Subject: [PATCH 02/27] Fixes for tuning estimated references and inverters --- include/mockturtle/algorithms/emap.hpp | 69 ++++++++++++-------------- 1 file changed, 32 insertions(+), 37 deletions(-) diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index 13b46c8f2..9541540a9 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -82,7 +82,7 @@ struct emap_params /*! \brief Parameters for cut enumeration * * The default cut limit is 16. - * The maximum cut limit is 15. + * The maximum cut limit is 31. * By default, truth table minimization * is performed. */ @@ -1076,25 +1076,29 @@ class emap_impl if ( ntk.is_constant( n ) ) { - if ( cuts[index].size() != 0 ) - return false; /* all terminals have flow 0.0 */ node_data.flows[0] = node_data.flows[1] = 0.0f; node_data.arrival[0] = node_data.arrival[1] = 0.0f; - add_zero_cut( index ); - match_constants( index ); + /* skip if cuts have been computed before */ + if ( cuts[index].size() == 0 ) + { + add_zero_cut( index ); + match_constants( index ); + } return false; } else if ( ntk.is_pi( n ) ) { - if ( cuts[index].size() != 0 ) - return false; node_data.flows[0] = 0.0f; node_data.arrival[0] = 0.0f; /* PIs have the negative phase implemented with an inverter */ node_data.flows[1] = lib_inv_area / node_data.est_refs[1]; node_data.arrival[1] = lib_inv_delay; - add_unit_cut( index ); + /* skip if cuts have been computed before */ + if ( cuts[index].size() == 0 ) + { + add_unit_cut( index ); + } return false; } @@ -1106,14 +1110,8 @@ class emap_impl { if ( ntk.is_dont_touch( n ) ) { - if ( cuts[index].size() != 0 ) - { - propagate_data_forward_white_box( n ); - } - else - { - warning_box |= initialize_box( n ); - } + + warning_box |= initialize_box( n ); return false; } } @@ -1358,26 +1356,14 @@ class emap_impl auto const index = ntk.node_to_index( n ); auto& node_data = node_match[index]; - node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = static_cast( ntk.fanout_size( n ) ); - node_data.map_refs[0] = node_data.map_refs[1] = node_data.map_refs[2] = 0; - node_data.required[0] = node_data.required[1] = std::numeric_limits::max(); - if ( ntk.is_constant( n ) ) { - /* all terminals have flow 0.0 */ - node_data.flows[0] = node_data.flows[1] = 0.0f; - node_data.arrival[0] = node_data.arrival[1] = 0.0f; add_zero_cut( index ); match_constants( index ); continue; } else if ( ntk.is_pi( n ) ) { - /* all terminals have flow 0.0 */ - node_data.flows[0] = node_data.flows[1] = 0.0f; - node_data.arrival[0] = 0.0f; - /* PIs have the negative phase implemented with an inverter */ - node_data.arrival[1] = lib_inv_delay; add_unit_cut( index ); continue; } @@ -1387,7 +1373,7 @@ class emap_impl { if ( ntk.is_dont_touch( n ) ) { - warning_box |= initialize_box( n ); + add_unit_cut( index ); continue; } } @@ -1513,9 +1499,10 @@ class emap_impl else if ( ntk.is_pi( n ) ) { /* all terminals have flow 0 */ - node_data.flows[0] = node_data.flows[1] = 0.0f; + node_data.flows[0] = 0.0f; node_data.arrival[0] = 0.0f; /* PIs have the negative phase implemented with an inverter */ + node_data.flows[1] = lib_inv_area / node_data.est_refs[1]; node_data.arrival[1] = lib_inv_delay; add_unit_cut( index ); continue; @@ -2008,7 +1995,8 @@ class emap_impl { if ( iteration < ps.area_flow_rounds ) { - ++node_data.map_refs[use_phase]; + // ++node_data.map_refs[use_phase]; + node_data.map_refs[use_phase] += node_data.map_refs[use_phase ^ 1]; } area += lib_inv_area; ++inv; @@ -2079,6 +2067,11 @@ class emap_impl area += node_match[index].area[0]; if ( node_match[index].map_refs[1] ) { + if ( iteration < ps.area_flow_rounds ) + { + // ++node_match[index].map_refs[0]; + node_match[index].map_refs[0] += node_match[index].map_refs[1]; + } area += lib_inv_area; ++inv; } @@ -2717,9 +2710,11 @@ class emap_impl bool initialize_box( node const& n ) { uint32_t index = ntk.node_to_index( n ); - auto& node_data = node_match[index]; - add_unit_cut( index ); + if ( cuts[index].size() == 0 ) + add_unit_cut( index ); + + auto& node_data = node_match[index]; node_data.same_match = true; /* if it has mapping data propagate the delays and measure the data */ @@ -2730,11 +2725,11 @@ class emap_impl } /* consider as a black box */ - node_data.flows[0] = node_data.flows[1] = 0.0f; + node_data.flows[0] = 0.0f; + node_data.flows[1] = lib_inv_area / node_data.est_ref[1]; node_data.arrival[0] = 0.0f; node_data.arrival[1] = lib_inv_delay; node_data.area[0] = node_data.area[1] = 0; - node_data.flows[0] = 0; return true; } @@ -2758,8 +2753,8 @@ class emap_impl node_data.arrival[0] = arrival; node_data.arrival[1] = arrival + lib_inv_delay; node_data.area[0] = node_data.area[1] = gate.area; - node_data.flows[0] = node_data.area[0] / node_data.est_refs[2]; - node_data.flows[1] = node_data.flows[0] + lib_inv_area; + node_data.flows[1] = ( node_data.flows[0] + lib_inv_area ) / node_data.est_refs[1]; + node_data.flows[0] = node_data.area[0] / node_data.est_refs[0]; } void propagate_data_backward_white_box( node const& n ) From 28091a09bedf5bdc9f7f1a475cc161b9502aedb7 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 25 Apr 2024 10:08:14 +0200 Subject: [PATCH 03/27] Removing collective references and collective estimations, removing default value initilization for cuts --- include/mockturtle/algorithms/emap.hpp | 148 ++++++++++++------------- 1 file changed, 71 insertions(+), 77 deletions(-) diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index 9541540a9..2e61319a5 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -202,20 +202,20 @@ template struct cut_enumeration_emap_cut { /* stats */ - double delay{ 0 }; - double flow{ 0 }; - bool ignore{ false }; + uint32_t delay; + float flow; + bool ignore; /* pattern index for structural matching*/ - uint32_t pattern_index{ 0 }; + uint32_t pattern_index; /* function */ kitty::static_truth_table<6> function; /* list of supergates matching the cut for positive and negative output phases */ - std::array> const*, 2> supergates = { nullptr, nullptr }; + std::array> const*, 2> supergates; /* input negations, 0: pos, 1: neg */ - std::array negations{ 0, 0 }; + std::array negations; }; struct cut_enumeration_emap_multi_cut @@ -692,10 +692,10 @@ struct node_match_emap /* area of the best matches */ float area[2]; - /* number of references in the cover 0: pos, 1: neg, 2: pos+neg */ - uint32_t map_refs[3]; + /* number of references in the cover 0: pos, 1: neg */ + uint32_t map_refs[2]; /* references estimation */ - float est_refs[3]; + float est_refs[2]; /* area flow */ float flows[2]; }; @@ -1070,8 +1070,8 @@ class emap_impl auto const index = ntk.node_to_index( n ); auto& node_data = node_match[index]; - node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = static_cast( ntk.fanout_size( n ) ); - node_data.map_refs[0] = node_data.map_refs[1] = node_data.map_refs[2] = 0; + node_data.est_refs[0] = node_data.est_refs[1] = static_cast( ntk.fanout_size( n ) ); + node_data.map_refs[0] = node_data.map_refs[1] = 0; node_data.required[0] = node_data.required[1] = std::numeric_limits::max(); if ( ntk.is_constant( n ) ) @@ -1390,7 +1390,7 @@ class emap_impl /* round stats */ if ( ps.verbose ) { - st.round_stats.push_back( fmt::format( "[i] SCuts : Cuts = {:>12d} Time = {:>5.2f}\n", cuts_total, to_seconds( clock::now() - time_begin ) ) ); + st.round_stats.push_back( fmt::format( "[i] SCuts : Cuts = {:>12d} Time = {:>5.2f}\n", cuts_total, to_seconds( clock::now() - time_begin ) ) ); } return true; @@ -1484,8 +1484,8 @@ class emap_impl node_data.same_match = 0; node_data.multioutput_match[0] = node_data.multioutput_match[1] = false; node_data.required[0] = node_data.required[1] = std::numeric_limits::max(); - node_data.map_refs[0] = node_data.map_refs[1] = node_data.map_refs[2] = 0; - node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = static_cast( ntk.fanout_size( n ) ); + node_data.map_refs[0] = node_data.map_refs[1] = 0; + node_data.est_refs[0] = node_data.est_refs[1] = static_cast( ntk.fanout_size( n ) ); if ( ntk.is_constant( n ) ) { @@ -1579,7 +1579,7 @@ class emap_impl uint32_t index = ntk.node_to_index( n ); /* reset mapping */ - node_match[index].map_refs[0] = node_match[index].map_refs[1] = node_match[index].map_refs[2] = 0u; + node_match[index].map_refs[0] = node_match[index].map_refs[1] = 0u; if ( ntk.is_constant( n ) ) continue; @@ -1678,7 +1678,7 @@ class emap_impl /* recursively deselect the best cut shared between * the two phases if in use in the cover */ - if ( node_data.same_match && node_data.map_refs[2] != 0 ) + if ( node_data.same_match && ( node_data.map_refs[0] || node_data.map_refs[1] ) ) { uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1; auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; @@ -1741,7 +1741,7 @@ class emap_impl auto& node_data = node_match[index]; /* skip not mapped nodes */ - if ( node_match[index].map_refs[2] == 0 ) + if ( !node_data.map_refs[0] && !node_data.map_refs[1] ) continue; /* don't touch box */ @@ -1908,7 +1908,6 @@ class emap_impl if constexpr ( !ELA ) { - node_match[index].map_refs[2]++; if ( ntk.is_complemented( s ) ) node_match[index].map_refs[1]++; else @@ -1927,7 +1926,7 @@ class emap_impl /* skip constants and PIs */ if ( ntk.is_constant( *it ) ) { - if ( node_match[index].map_refs[2] > 0u ) + if ( node_data.map_refs[0] || node_data.map_refs[1] ) { /* if used and not available in the library launch a mapping error */ if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr ) @@ -1951,7 +1950,7 @@ class emap_impl } /* continue if not referenced in the cover */ - if ( node_match[index].map_refs[2] == 0u ) + if ( !node_match[index].map_refs[0] && !node_match[index].map_refs[1] ) continue; /* don't touch box */ @@ -1983,7 +1982,6 @@ class emap_impl for ( auto const leaf : best_cut ) { - node_match[leaf].map_refs[2]++; if ( ( node_data.phase[use_phase] >> ctr++ ) & 1 ) node_match[leaf].map_refs[1]++; else @@ -2016,7 +2014,6 @@ class emap_impl auto ctr = 0u; for ( auto const leaf : best_cut ) { - node_match[leaf].map_refs[2]++; if ( ( node_data.phase[use_phase] >> ctr++ ) & 1 ) node_match[leaf].map_refs[1]++; else @@ -2037,9 +2034,8 @@ class emap_impl /* blend estimated references */ for ( auto i = 0u; i < ntk.size(); ++i ) { - node_match[i].est_refs[2] = std::max( 1.0, ( 1.0 * node_match[i].est_refs[2] + 2.0f * node_match[i].map_refs[2] ) / 3.0 ); - node_match[i].est_refs[1] = std::max( 1.0, ( 1.0 * node_match[i].est_refs[1] + 2.0f * node_match[i].map_refs[1] ) / 3.0 ); node_match[i].est_refs[0] = std::max( 1.0, ( 1.0 * node_match[i].est_refs[0] + 2.0f * node_match[i].map_refs[0] ) / 3.0 ); + node_match[i].est_refs[1] = std::max( 1.0, ( 1.0 * node_match[i].est_refs[1] + 2.0f * node_match[i].map_refs[1] ) / 3.0 ); } return true; @@ -2054,7 +2050,6 @@ class emap_impl ntk.foreach_fanin( n, [&]( auto const& f ) { uint32_t leaf = ntk.node_to_index( ntk.get_node( f ) ); uint8_t phase = ntk.is_complemented( f ) ? 1 : 0; - node_match[leaf].map_refs[2]++; node_match[leaf].map_refs[phase]++; } ); } @@ -2131,7 +2126,7 @@ class emap_impl const auto index = ntk.node_to_index( *it ); - if ( node_match[index].map_refs[2] == 0 ) + if ( !node_match[index].map_refs[0] && !node_match[index].map_refs[1] ) continue; match_propagate_required( index ); @@ -2176,7 +2171,7 @@ class emap_impl if constexpr ( has_has_binding_v ) { propagate_data_forward_white_box( n ); - if ( node_data.map_refs[2] ) + if ( node_match[index].map_refs[0] || node_match[index].map_refs[1] ) area += node_data.area[0]; if ( node_data.map_refs[1] ) { @@ -2205,7 +2200,7 @@ class emap_impl node_data.arrival[use_phase] = worst_arrival; /* compute area */ - if ( ( node_data.map_refs[2] && node_data.same_match ) || node_data.map_refs[use_phase] > 0 ) + if ( node_data.map_refs[use_phase] > 0 || ( node_data.same_match && ( node_match[index].map_refs[0] || node_match[index].map_refs[1] ) ) ) { area += node_data.area[use_phase]; if ( node_data.same_match && node_data.map_refs[use_phase ^ 1] > 0 ) @@ -2529,7 +2524,7 @@ class emap_impl set_match_complemented_phase( index, 1, worst_arrival_npos ); if constexpr ( ELA ) { - if ( node_data.map_refs[2] ) + if ( node_data.map_refs[0] || node_data.map_refs[1] ) cut_ref( cuts[index][node_data.best_cut[1]], n, 1 ); } return; @@ -2539,7 +2534,7 @@ class emap_impl set_match_complemented_phase( index, 0, worst_arrival_nneg ); if constexpr ( ELA ) { - if ( node_data.map_refs[2] ) + if ( node_data.map_refs[0] || node_data.map_refs[1] ) cut_ref( cuts[index][node_data.best_cut[0]], n, 0 ); } return; @@ -2646,10 +2641,10 @@ class emap_impl if ( node_data.map_refs[1] > 0 ) cut_deref( cuts[index][node_data.best_cut[1]], n, 1 ); /* reference the positive cut if not in use before */ - if ( node_data.map_refs[0] == 0 && node_data.map_refs[2] ) + if ( node_data.map_refs[0] == 0 && node_data.map_refs[1] > 0 ) cut_ref( cuts[index][node_data.best_cut[0]], n, 0 ); } - else if ( node_data.map_refs[2] ) + else if ( node_data.map_refs[0] || node_data.map_refs[1] ) cut_ref( cuts[index][node_data.best_cut[0]], n, 0 ); } set_match_complemented_phase( index, 0, worst_arrival_nneg ); @@ -2665,10 +2660,10 @@ class emap_impl if ( node_data.map_refs[0] > 0 ) cut_deref( cuts[index][node_data.best_cut[0]], n, 0 ); /* reference the negative cut if not in use before */ - if ( node_data.map_refs[1] == 0 && node_data.map_refs[2] ) + if ( node_data.map_refs[1] == 0 && node_data.map_refs[0] > 0 ) cut_ref( cuts[index][node_data.best_cut[1]], n, 1 ); } - else if ( node_data.map_refs[2] ) + else if ( node_data.map_refs[0] || node_data.map_refs[1] ) cut_ref( cuts[index][node_data.best_cut[1]], n, 1 ); } set_match_complemented_phase( index, 1, worst_arrival_npos ); @@ -2929,13 +2924,15 @@ class emap_impl for ( auto k = 0; k < max_multioutput_output_size; ++k ) { uint32_t index_k = tuple_data[k].node_index; - k_est += node_match[index_k].est_refs[2]; + auto used_phase = node_match[index_k].supergate[0] == nullptr ? 1 : 0; + k_est += node_match[index_k].est_refs[used_phase]; /* TODO: review */ } mapped_flow *= k_est; } else { - mapped_flow *= node_data.est_refs[2]; + auto used_phase = node_data.supergate[0] == nullptr ? 1 : 0; /* TODO: review */ + mapped_flow *= node_data.est_refs[used_phase]; } auto const& mapped_cut = cuts[node_index][node_data.best_cut[phase[j]]]; @@ -2948,7 +2945,7 @@ class emap_impl /* quit exit to not unmap phases, TODO: implement it well */ /* current version may lead to delay increase */ - est_refs[j] = node_data.est_refs[2]; + est_refs[j] = node_data.est_refs[phase[j]]; } /* not better than individual gates */ @@ -2969,7 +2966,7 @@ class emap_impl flow_sum += area_flow[j]; combined_est_refs += est_refs[j]; } - flow_sum = flow_sum / combined_est_refs; + flow_sum = flow_sum ; /* not better than individual gates */ if ( respects_required && ( flow_sum > old_flow_sum + epsilon ) ) @@ -3038,7 +3035,7 @@ class emap_impl for ( uint32_t j = 0; j < max_multioutput_output_size; ++j ) { uint32_t node_index = tuple_data[j].node_index; - if ( node_match[node_index].map_refs[2] == 0 ) + if ( !node_match[node_index].map_refs[0] && !node_match[node_index].map_refs[1] ) { return false; } @@ -3049,10 +3046,9 @@ class emap_impl for ( int j = max_multioutput_output_size - 1; j >= 0; --j ) { uint32_t node_index = tuple_data[j].node_index; - best_exact_area[j] = node_match[node_index].flows[0] * node_match[node_index].est_refs[2]; uint8_t selected_phase = node_match[node_index].best_supergate[0] == nullptr ? 1 : 0; - if ( node_match[node_index].map_refs[2] != 0 ) + if ( node_match[node_index].map_refs[0] || node_match[node_index].map_refs[1] ) { /* match is always single output here */ auto const& cut = cuts[node_index][node_match[node_index].best_cut[0]]; @@ -3074,7 +3070,7 @@ class emap_impl { uint32_t node_index = tuple_data[j].node_index; - if ( node_match[node_index].map_refs[2] != 0 ) + if ( node_match[node_index].map_refs[0] || node_match[node_index].map_refs[1] ) { uint8_t use_phase = node_match[node_index].best_supergate[0] != nullptr ? 0 : 1; auto const& best_cut = cuts[node_index][node_match[node_index].best_cut[use_phase]]; @@ -3221,7 +3217,7 @@ class emap_impl node_data.arrival[mapped_phase] = arrival[j]; node_data.area[mapped_phase] = area[j]; /* partial area contribution */ - node_data.flows[mapped_phase] = area_exact[j] / node_data.est_refs[2]; /* partial exact area contribution */ + node_data.flows[mapped_phase] = area_exact[j]; /* partial exact area contribution */ /* select opposite phase */ mapped_phase ^= 1; node_data.multioutput_match[mapped_phase] = true; @@ -3230,7 +3226,7 @@ class emap_impl node_data.phase[mapped_phase] = pin_phase[j]; node_data.arrival[mapped_phase] = arrival[j] + lib_inv_delay; node_data.area[mapped_phase] = area[j]; /* partial area contribution */ - node_data.flows[mapped_phase] = area_exact[j] / node_data.est_refs[2]; + node_data.flows[mapped_phase] = area_exact[j]; assert( node_data.arrival[mapped_phase] < node_data.required[mapped_phase] + epsilon ); } @@ -3380,7 +3376,7 @@ class emap_impl assert( !node_data.multioutput_match[0] ); assert( !node_data.multioutput_match[1] ); - if ( node_data.same_match && node_data.map_refs[2] != 0 ) + if ( node_data.same_match && ( node_data.map_refs[0] || node_data.map_refs[1] ) ) { uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1; auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; @@ -3529,14 +3525,14 @@ class emap_impl if ( node_data.best_supergate[0] != nullptr && node_data.multioutput_match[0] ) { - if ( node_data.map_refs[0] > 0 || ( node_data.same_match && node_data.map_refs[2] > 0 ) ) + if ( node_data.map_refs[0] > 0 || ( node_data.same_match && ( node_data.map_refs[0] || node_data.map_refs[1] ) ) ) used = true; else unused = true; } else if ( node_data.best_supergate[1] != nullptr && node_data.multioutput_match[1] ) { - if ( node_data.map_refs[1] > 0 || ( node_data.same_match && node_data.map_refs[2] > 0 ) ) + if ( node_data.map_refs[1] > 0 || ( node_data.same_match && ( node_data.map_refs[0] || node_data.map_refs[1] ) ) ) used = true; else unused = true; @@ -3553,12 +3549,12 @@ class emap_impl auto& node_data = node_match[node_index]; auto const n = ntk.index_to_node( node_index ); - if ( node_data.map_refs[2] == 0 ) + if ( !node_data.map_refs[0] && !node_data.map_refs[1] ) continue; /* recursively deselect the best cut shared between * the two phases if in use in the cover */ - if ( node_data.same_match && node_data.map_refs[2] != 0 ) + if ( node_data.same_match ) { uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1; auto const& best_cut = cuts[node_index][node_data.best_cut[use_phase]]; @@ -3649,18 +3645,23 @@ class emap_impl else count += lib_inv_area; } - ++node_match[leaf].map_refs[2]; } else { ++node_match[leaf].map_refs[0]; - ++node_match[leaf].map_refs[2]; } continue; } if ( node_match[leaf].same_match ) { + /* Recursive referencing if leaf was not referenced */ + if ( !node_match[leaf].map_refs[0] && !node_match[leaf].map_refs[1] ) + { + auto const& best_cut = cuts[leaf][node_match[leaf].best_cut[leaf_phase]]; + count += cut_ref( best_cut, ntk.index_to_node( leaf ), leaf_phase ); + } + /* Add inverter area if not present yet and leaf node is implemented in the opposite phase */ if ( node_match[leaf].map_refs[leaf_phase]++ == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr ) { @@ -3669,16 +3670,9 @@ class emap_impl else count += lib_inv_area; } - /* Recursive referencing if leaf was not referenced */ - if ( node_match[leaf].map_refs[2]++ == 0u ) - { - auto const& best_cut = cuts[leaf][node_match[leaf].best_cut[leaf_phase]]; - count += cut_ref( best_cut, ntk.index_to_node( leaf ), leaf_phase ); - } } else { - ++node_match[leaf].map_refs[2]; if ( node_match[leaf].map_refs[leaf_phase]++ == 0u ) { auto const& best_cut = cuts[leaf][node_match[leaf].best_cut[leaf_phase]]; @@ -3731,12 +3725,10 @@ class emap_impl else count += lib_inv_area; } - --node_match[leaf].map_refs[2]; } else { --node_match[leaf].map_refs[0]; - --node_match[leaf].map_refs[2]; } continue; } @@ -3752,7 +3744,7 @@ class emap_impl count += lib_inv_area; } /* Recursive dereferencing */ - if ( --node_match[leaf].map_refs[2] == 0u ) + if ( !node_match[leaf].map_refs[0] && !node_match[leaf].map_refs[1] ) { auto const& best_cut = cuts[leaf][node_match[leaf].best_cut[leaf_phase]]; count += cut_deref( best_cut, ntk.index_to_node( leaf ), leaf_phase ); @@ -3760,7 +3752,6 @@ class emap_impl } else { - --node_match[leaf].map_refs[2]; if ( --node_match[leaf].map_refs[leaf_phase] == 0u ) { auto const& best_cut = cuts[leaf][node_match[leaf].best_cut[leaf_phase]]; @@ -3782,7 +3773,6 @@ class emap_impl for ( auto s : tmp_visited ) { uint32_t leaf = s >> 1; - --node_match[leaf].map_refs[2]; --node_match[leaf].map_refs[s & 1]; } @@ -3835,18 +3825,23 @@ class emap_impl else count += lib_inv_area; } - ++node_match[leaf].map_refs[2]; } else { ++node_match[leaf].map_refs[0]; - ++node_match[leaf].map_refs[2]; } continue; } if ( node_match[leaf].same_match ) { + /* Recursive referencing if leaf was not referenced */ + if ( !node_match[leaf].map_refs[0] && !node_match[leaf].map_refs[1] ) + { + auto const& best_cut = cuts[leaf][node_match[leaf].best_cut[leaf_phase]]; + count += cut_ref_visit( best_cut, ntk.index_to_node( leaf ), leaf_phase ); + } + /* Add inverter area if not present yet and leaf node is implemented in the opposite phase */ if ( node_match[leaf].map_refs[leaf_phase]++ == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr ) { @@ -3855,16 +3850,9 @@ class emap_impl else count += lib_inv_area; } - /* Recursive referencing if leaf was not referenced */ - if ( node_match[leaf].map_refs[2]++ == 0u ) - { - auto const& best_cut = cuts[leaf][node_match[leaf].best_cut[leaf_phase]]; - count += cut_ref_visit( best_cut, ntk.index_to_node( leaf ), leaf_phase ); - } } else { - ++node_match[leaf].map_refs[2]; if ( node_match[leaf].map_refs[leaf_phase]++ == 0u ) { auto const& best_cut = cuts[leaf][node_match[leaf].best_cut[leaf_phase]]; @@ -3977,7 +3965,7 @@ class emap_impl } /* continue if cut is not in the cover */ - if ( node_data.map_refs[2] == 0u ) + if ( !node_data.map_refs[0] && !node_data.map_refs[1] ) continue; /* don't touch box */ @@ -4095,7 +4083,7 @@ class emap_impl } /* continue if cut is not in the cover */ - if ( node_data.map_refs[2] == 0u ) + if ( !node_data.map_refs[0] && !node_data.map_refs[1] ) continue; /* don't touch box */ @@ -4562,8 +4550,8 @@ class emap_impl void recompute_cut_data( cut_t& cut, node const& n ) { /* compute cut cost based on LUT area */ - double best_arrival = 0; - double best_area_flow = cut.size() > 1 ? cut.size() : 0; + uint32_t best_arrival = 0; + float best_area_flow = cut.size() > 1 ? cut.size() : 0; for ( auto leaf : cut ) { @@ -4604,7 +4592,10 @@ class emap_impl { auto& cut = cuts[index].add_cut( &index, &index ); /* fake iterator for emptyness */ cut->ignore = true; + cut->delay = 0; + cut->flow = 0; cut->pattern_index = 0; + cut->negations[0] = cut->negations[1] = 0; } void add_unit_cut( uint32_t index ) @@ -4613,7 +4604,10 @@ class emap_impl kitty::create_nth_var( cut->function, 0 ); cut->ignore = true; + cut->delay = 0; + cut->flow = 0; cut->pattern_index = 1; + cut->negations[0] = cut->negations[1] = 0; } inline void create_structural_cut( cut_t& new_cut, std::vector const& vcuts, uint32_t new_pattern, uint32_t pattern_id1, uint32_t pattern_id2 ) @@ -4772,7 +4766,7 @@ class emap_impl } /* continue if cut is not in the cover */ - if ( node_match[index].map_refs[2] == 0u ) + if ( !node_data.map_refs[0] && !node_data.map_refs[1] ) continue; unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1; From 0a8f4b8ddd1497c9523b4168532bcd87028ae493 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 25 Apr 2024 11:37:01 +0200 Subject: [PATCH 04/27] Experiment file --- experiments/emap.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/experiments/emap.cpp b/experiments/emap.cpp index 5b1bc2f10..424d96d6a 100644 --- a/experiments/emap.cpp +++ b/experiments/emap.cpp @@ -66,9 +66,9 @@ int main() tech_library_params tps; tps.verbose = true; - tech_library<9> tech_lib( gates, tps ); + tech_library<6> tech_lib( gates, tps ); - for ( auto const& benchmark : epfl_benchmarks() ) + for ( auto const& benchmark : iwls_benchmarks() ) { fmt::print( "[i] processing {}\n", benchmark ); @@ -78,21 +78,25 @@ int main() continue; } + if ( aig.num_gates() > 100000 ) + continue; + /* remove structural redundancies */ aig_balancing_params bps; bps.minimize_levels = false; - bps.fast_mode = false; + bps.fast_mode = true; aig_balance( aig, bps ); const uint32_t size_before = aig.num_gates(); const uint32_t depth_before = depth_view( aig ).depth(); emap_params ps; - ps.matching_mode = emap_params::hybrid; + ps.matching_mode = emap_params::boolean; ps.area_oriented_mapping = false; ps.map_multioutput = true; + ps.verbose = true; emap_stats st; - cell_view res = emap<9>( aig, tech_lib, ps, &st ); + cell_view res = emap<6>( aig, tech_lib, ps, &st ); names_view res_names{ res }; restore_network_name( aig, res_names ); From 25b0bf1cbbad26d333ef1d89ebf83249297182ec Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 25 Apr 2024 16:15:44 +0200 Subject: [PATCH 05/27] Add new multi-output cut initialization --- include/mockturtle/algorithms/emap.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index 2e61319a5..ef1f68237 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -5082,6 +5082,9 @@ class emap_impl /* add cut matches */ for ( auto i = 0; i < max_multioutput_output_size; ++i ) { + cut_tuple[order[i]]->supergates[0] = nullptr; + cut_tuple[order[i]]->supergates[1] = nullptr; + cut_tuple[order[i]]->ignore = false; std::vector> const* multigate = &( ( *multigates_match )[i] ); cut_tuple[order[i]]->supergates[phase_order[i]] = multigate; } From 996c06854b8148b6e89238bdc4b544aae0b4b1a9 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 25 Apr 2024 19:23:58 +0200 Subject: [PATCH 06/27] Fixes to emap --- include/mockturtle/algorithms/emap.hpp | 87 ++++++++++++++------------ 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index ef1f68237..88c64a722 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -1166,6 +1166,7 @@ class emap_impl rcuts.set_cut_limit( ps.cut_enumeration_ps.cut_limit ); cut_t new_cut; + new_cut->pattern_index = 0; fanin_cut_t vcuts; for ( auto const& c1 : *lcuts[0] ) @@ -2871,7 +2872,6 @@ class emap_impl /* get the output phase */ pin_phase[j] = gate.polarity; phase[j] = ( gate.polarity >> NInputs ) ^ phase_inverted; - uint8_t old_phase = node_data.phase[phase[j]]; /* compute arrival */ arrival[j] = 0.0; @@ -2905,45 +2905,49 @@ class emap_impl respects_required = false; /* compute area flow */ - old_flow_sum += node_data.flows[phase[j]]; + if ( j == 0 || !node_data.multioutput_match[0] ) + { + uint8_t current_phase = node_data.best_supergate[0] == nullptr ? 1 : 0; + old_flow_sum += node_data.flows[current_phase]; + } + uint8_t old_phase = node_data.phase[phase[j]]; node_data.phase[phase[j]] = gate.polarity; area[j] = gate.area; area_flow[j] = gate.area + cut_leaves_flow( cut, n, phase[j] ); node_data.phase[phase[j]] = old_phase; /* local evaluation for delay (area flow improvement is approximated) */ - if constexpr ( !DO_AREA ) - { - /* recompute local area flow of previous matches */ - double mapped_flow = node_data.flows[phase[j]]; - - if ( node_data.multioutput_match[phase[j]] ) - { - /* recompute estimation for multi-output gate */ - float k_est = 0; - for ( auto k = 0; k < max_multioutput_output_size; ++k ) - { - uint32_t index_k = tuple_data[k].node_index; - auto used_phase = node_match[index_k].supergate[0] == nullptr ? 1 : 0; - k_est += node_match[index_k].est_refs[used_phase]; /* TODO: review */ - } - mapped_flow *= k_est; - } - else - { - auto used_phase = node_data.supergate[0] == nullptr ? 1 : 0; /* TODO: review */ - mapped_flow *= node_data.est_refs[used_phase]; - } - - auto const& mapped_cut = cuts[node_index][node_data.best_cut[phase[j]]]; - if ( !compare_map( arrival[j], node_data.arrival[phase[j]], area_flow[j], mapped_flow, cut.size(), mapped_cut.size() ) ) - { - is_best = false; - break; - } - } + // if constexpr ( !DO_AREA ) + // { + // /* recompute local area flow of previous matches */ + // double mapped_flow = node_data.flows[phase[j]]; + + // if ( node_data.multioutput_match[phase[j]] ) + // { + // /* recompute estimation for multi-output gate */ + // float k_est = 0; + // for ( auto k = 0; k < max_multioutput_output_size; ++k ) + // { + // uint32_t index_k = tuple_data[k].node_index; + // auto used_phase = node_match[index_k].supergate[0] == nullptr ? 1 : 0; + // k_est += node_match[index_k].est_refs[used_phase]; /* TODO: review */ + // } + // mapped_flow *= k_est; + // } + // else + // { + // auto used_phase = node_data.supergate[0] == nullptr ? 1 : 0; /* TODO: review */ + // mapped_flow *= node_data.est_refs[used_phase]; + // } + + // auto const& mapped_cut = cuts[node_index][node_data.best_cut[phase[j]]]; + // if ( !compare_map( arrival[j], node_data.arrival[phase[j]], area_flow[j], mapped_flow, cut.size(), mapped_cut.size() ) ) + // { + // is_best = false; + // break; + // } + // } - /* quit exit to not unmap phases, TODO: implement it well */ /* current version may lead to delay increase */ est_refs[j] = node_data.est_refs[phase[j]]; } @@ -2959,20 +2963,23 @@ class emap_impl } /* combine evaluation for precise area flow estimantion */ - double flow_sum = 0; - double combined_est_refs = 0; + /* compute equation AF(n) = ( Area(G) + |roots| * SUM_{l in leaves} AF(l) ) / SUM_{p in roots} est_refs( p ) */ + float flow_sum_pos = 0, flow_sum_neg; + float combined_est_refs = 0; for ( auto j = 0; j < max_multioutput_output_size; ++j ) { - flow_sum += area_flow[j]; + flow_sum_pos += area_flow[j]; combined_est_refs += est_refs[j]; } - flow_sum = flow_sum ; + flow_sum_neg = flow_sum_pos; + flow_sum_pos /= combined_est_refs; /* not better than individual gates */ - if ( respects_required && ( flow_sum > old_flow_sum + epsilon ) ) + if ( respects_required && ( flow_sum_pos > old_flow_sum + epsilon ) ) continue; mapped_multioutput = true; + flow_sum_neg = ( flow_sum_neg + lib_inv_area ) / combined_est_refs; /* commit multi-output gate */ for ( uint32_t j = 0; j < max_multioutput_output_size; ++j ) @@ -2991,7 +2998,7 @@ class emap_impl node_data.phase[mapped_phase] = pin_phase[j]; node_data.arrival[mapped_phase] = arrival[j]; node_data.area[mapped_phase] = area[j]; /* partial area contribution */ - node_data.flows[mapped_phase] = flow_sum; + node_data.flows[mapped_phase] = flow_sum_pos; assert( node_data.arrival[mapped_phase] < node_data.required[mapped_phase] + epsilon ); @@ -3003,7 +3010,7 @@ class emap_impl node_data.phase[mapped_phase] = pin_phase[j]; node_data.arrival[mapped_phase] = arrival[j] + lib_inv_delay; node_data.area[mapped_phase] = area[j]; /* partial area contribution */ - node_data.flows[mapped_phase] = flow_sum + lib_inv_area; /* TODO: check quality */ + node_data.flows[mapped_phase] = flow_sum_neg; assert( node_data.arrival[mapped_phase] < node_data.required[mapped_phase] + epsilon ); } From 4d6ba961ce42656809a44ee57a4043f9e17ceb7a Mon Sep 17 00:00:00 2001 From: aletempiac Date: Fri, 26 Apr 2024 09:50:37 +0200 Subject: [PATCH 07/27] Lowering the default memory usage and formatting the code --- include/mockturtle/algorithms/emap.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index 88c64a722..aaf9b7285 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -82,7 +82,7 @@ struct emap_params /*! \brief Parameters for cut enumeration * * The default cut limit is 16. - * The maximum cut limit is 31. + * The maximum cut limit is 19. * By default, truth table minimization * is performed. */ @@ -716,7 +716,7 @@ class emap_impl { public: static constexpr float epsilon = 0.0005; - static constexpr uint32_t max_cut_num = 32; + static constexpr uint32_t max_cut_num = 20; using cut_t = cut>; using cut_set_t = emap_cut_set; using cut_merge_t = typename std::array; @@ -1391,7 +1391,7 @@ class emap_impl /* round stats */ if ( ps.verbose ) { - st.round_stats.push_back( fmt::format( "[i] SCuts : Cuts = {:>12d} Time = {:>5.2f}\n", cuts_total, to_seconds( clock::now() - time_begin ) ) ); + st.round_stats.push_back( fmt::format( "[i] SCuts : Cuts = {:>12d} Time = {:>12.2f}\n", cuts_total, to_seconds( clock::now() - time_begin ) ) ); } return true; From 72e7d7e1a6fe16002620a833bd8ffc3c3b0de15f Mon Sep 17 00:00:00 2001 From: aletempiac Date: Fri, 26 Apr 2024 09:54:29 +0200 Subject: [PATCH 08/27] Reverting emap experiment to default --- experiments/emap.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/experiments/emap.cpp b/experiments/emap.cpp index 424d96d6a..92fed626a 100644 --- a/experiments/emap.cpp +++ b/experiments/emap.cpp @@ -66,9 +66,9 @@ int main() tech_library_params tps; tps.verbose = true; - tech_library<6> tech_lib( gates, tps ); + tech_library<9> tech_lib( gates, tps ); - for ( auto const& benchmark : iwls_benchmarks() ) + for ( auto const& benchmark : epfl_benchmarks() ) { fmt::print( "[i] processing {}\n", benchmark ); @@ -78,9 +78,6 @@ int main() continue; } - if ( aig.num_gates() > 100000 ) - continue; - /* remove structural redundancies */ aig_balancing_params bps; bps.minimize_levels = false; @@ -91,12 +88,11 @@ int main() const uint32_t depth_before = depth_view( aig ).depth(); emap_params ps; - ps.matching_mode = emap_params::boolean; + ps.matching_mode = emap_params::hybrid; ps.area_oriented_mapping = false; ps.map_multioutput = true; - ps.verbose = true; emap_stats st; - cell_view res = emap<6>( aig, tech_lib, ps, &st ); + cell_view res = emap<9>( aig, tech_lib, ps, &st ); names_view res_names{ res }; restore_network_name( aig, res_names ); From ecb1746aa9a03a98ab7d7dd97da4c917d271b16b Mon Sep 17 00:00:00 2001 From: aletempiac Date: Fri, 26 Apr 2024 10:12:14 +0200 Subject: [PATCH 09/27] Add updated tests --- test/algorithms/emap.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/algorithms/emap.cpp b/test/algorithms/emap.cpp index b25dce881..0e95a7a26 100644 --- a/test/algorithms/emap.cpp +++ b/test/algorithms/emap.cpp @@ -382,15 +382,15 @@ TEST_CASE( "Emap on multiplier with multi-output gates", "[emap]" ) const float eps{ 0.005f }; - CHECK( luts.size() == 234u ); + CHECK( luts.size() == 233u ); CHECK( luts.num_pis() == 16u ); CHECK( luts.num_pos() == 16u ); - CHECK( luts.num_gates() == 216u ); - CHECK( st.area > 577.0f - eps ); - CHECK( st.area < 577.0f + eps ); + CHECK( luts.num_gates() == 215u ); + CHECK( st.area > 575.0f - eps ); + CHECK( st.area < 575.0f + eps ); CHECK( st.delay > 33.60f - eps ); CHECK( st.delay < 33.60f + eps ); - CHECK( st.multioutput_gates == 39 ); + CHECK( st.multioutput_gates == 40 ); } TEST_CASE( "Emap with inverters", "[emap]" ) From f75f399bcda3674e517b4419dfa56b8da42cc95c Mon Sep 17 00:00:00 2001 From: aletempiac Date: Mon, 29 Apr 2024 21:26:50 +0200 Subject: [PATCH 10/27] Improving phase dropping heuristics and first implementation of alternative matches --- experiments/emap.cpp | 28 +- include/mockturtle/algorithms/emap.hpp | 568 +++++++++++++++++++++++-- 2 files changed, 540 insertions(+), 56 deletions(-) diff --git a/experiments/emap.cpp b/experiments/emap.cpp index 92fed626a..fa2bb17c1 100644 --- a/experiments/emap.cpp +++ b/experiments/emap.cpp @@ -55,7 +55,7 @@ int main() /* library to map to technology */ fmt::print( "[i] processing technology library\n" ); - std::string library = "multioutput"; + std::string library = "asap7"; std::vector gates; std::ifstream in( cell_libraries_path( library ) ); @@ -66,9 +66,9 @@ int main() tech_library_params tps; tps.verbose = true; - tech_library<9> tech_lib( gates, tps ); + tech_library<6> tech_lib( gates, tps ); - for ( auto const& benchmark : epfl_benchmarks() ) + for ( auto const& benchmark : iwls_benchmarks() ) { fmt::print( "[i] processing {}\n", benchmark ); @@ -78,26 +78,32 @@ int main() continue; } + // if ( aig.num_gates() > 100000 ) + // continue; + /* remove structural redundancies */ - aig_balancing_params bps; - bps.minimize_levels = false; - bps.fast_mode = true; - aig_balance( aig, bps ); + // aig_balancing_params bps; + // bps.minimize_levels = false; + // bps.fast_mode = true; + // aig_balance( aig, bps ); const uint32_t size_before = aig.num_gates(); const uint32_t depth_before = depth_view( aig ).depth(); emap_params ps; - ps.matching_mode = emap_params::hybrid; + ps.matching_mode = emap_params::boolean; ps.area_oriented_mapping = false; - ps.map_multioutput = true; + ps.map_multioutput = false; + ps.verbose = true; emap_stats st; - cell_view res = emap<9>( aig, tech_lib, ps, &st ); + cell_view res = emap<6>( aig, tech_lib, ps, &st ); names_view res_names{ res }; restore_network_name( aig, res_names ); restore_pio_names_by_order( aig, res_names ); - const auto cec = benchmark == "hyp" ? true : abc_cec_mapped_cell( res_names, benchmark, library ); + // const auto cec = benchmark == "hyp" ? true : abc_cec_mapped_cell( res_names, benchmark, library ); + // std::cout << fmt::format( "[i] CEC = {}\n", cec ); + const auto cec = false; /* don't run CEC */ /* write verilog netlist */ // write_verilog_with_cell( res_names, benchmark + "_mapped.v" ); diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index aaf9b7285..3badd8e7a 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -128,6 +128,9 @@ struct emap_params /*! \brief Fast area recovery */ bool use_fast_area_recovery{ true }; + /*! \brief Compute alternatives using a different cost functions */ + bool use_match_alternatives{ true }; + /*! \brief Remove the cuts that are contained in others */ bool remove_dominated_cuts{ false }; @@ -671,17 +674,33 @@ struct emap_triple_hash }; #pragma endregion +template +struct best_gate_emap +{ + supergate const* gate; + double arrival; + float area; + float flow; + unsigned phase : 16; + unsigned cut : 12; + unsigned size : 4; +}; + template struct node_match_emap { /* best gate match for positive and negative output phases */ supergate const* best_supergate[2]; + /* alternative best gate for positibe and negative output phase */ + best_gate_emap best_alternative[2]; /* fanin pin phases for both output phases */ uint16_t phase[2]; /* best cut index for both phases */ - uint32_t best_cut[2]; + uint16_t best_cut[2]; /* node is mapped using only one phase */ bool same_match; + /* node alternative uses only one phase */ + bool same_match_alternative; /* node is mapped to a multi-output gate */ bool multioutput_match[2]; @@ -1017,6 +1036,8 @@ class emap_impl /* try to drop one phase */ match_drop_phase( n, 0 ); + select_alternatives( n ); + /* load and try a multi-output matches */ if ( ps.map_multioutput && node_tuple_match[index] != UINT32_MAX ) { @@ -1034,7 +1055,7 @@ class emap_impl } double area_old = area; - bool success = set_mapping_refs(); + bool success = set_mapping_refs2(); if ( warning_box ) { @@ -1078,7 +1099,9 @@ class emap_impl { /* all terminals have flow 0.0 */ node_data.flows[0] = node_data.flows[1] = 0.0f; + node_data.best_alternative[0].flow = node_data.best_alternative[1].flow = 0.0f; node_data.arrival[0] = node_data.arrival[1] = 0.0f; + node_data.best_alternative[0].arrival = node_data.best_alternative[1].arrival = 0.0f; /* skip if cuts have been computed before */ if ( cuts[index].size() == 0 ) { @@ -1090,10 +1113,14 @@ class emap_impl else if ( ntk.is_pi( n ) ) { node_data.flows[0] = 0.0f; + node_data.best_alternative[0].flow = 0.0f; node_data.arrival[0] = 0.0f; + node_data.best_alternative[0].arrival = 0.0f; /* PIs have the negative phase implemented with an inverter */ node_data.flows[1] = lib_inv_area / node_data.est_refs[1]; + node_data.best_alternative[1].flow = lib_inv_area / node_data.est_refs[1]; node_data.arrival[1] = lib_inv_delay; + node_data.best_alternative[1].arrival = lib_inv_delay; /* skip if cuts have been computed before */ if ( cuts[index].size() == 0 ) { @@ -1587,6 +1614,7 @@ class emap_impl if ( ntk.is_pi( n ) ) { node_match[index].flows[1] = lib_inv_area / node_match[index].est_refs[1]; + node_match[index].best_alternative[1].flow = lib_inv_area / node_match[index].est_refs[1]; continue; } @@ -1994,8 +2022,195 @@ class emap_impl { if ( iteration < ps.area_flow_rounds ) { - // ++node_data.map_refs[use_phase]; - node_data.map_refs[use_phase] += node_data.map_refs[use_phase ^ 1]; + ++node_data.map_refs[use_phase]; + // node_data.map_refs[use_phase] += node_data.map_refs[use_phase ^ 1]; + } + area += lib_inv_area; + ++inv; + } + } + + /* invert the phase */ + use_phase = use_phase ^ 1; + + /* if both phases are implemented and used */ + if ( !node_data.same_match && node_data.map_refs[use_phase] > 0 ) + { + if constexpr ( !ELA ) + { + auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; + + auto ctr = 0u; + for ( auto const leaf : best_cut ) + { + if ( ( node_data.phase[use_phase] >> ctr++ ) & 1 ) + node_match[leaf].map_refs[1]++; + else + node_match[leaf].map_refs[0]++; + } + } + area += node_data.area[use_phase]; + } + } + + ++iteration; + + if constexpr ( ELA ) + { + return true; + } + + /* blend estimated references */ + for ( auto i = 0u; i < ntk.size(); ++i ) + { + node_match[i].est_refs[0] = std::max( 1.0, ( 1.0 * node_match[i].est_refs[0] + 2.0f * node_match[i].map_refs[0] ) / 3.0 ); + node_match[i].est_refs[1] = std::max( 1.0, ( 1.0 * node_match[i].est_refs[1] + 2.0f * node_match[i].map_refs[1] ) / 3.0 ); + } + + return true; + } + + template + bool set_mapping_refs2() + { + for ( auto i = 0u; i < node_match.size(); ++i ) + { + node_match[i].required[0] = node_match[i].required[1] = std::numeric_limits::max(); + } + + /* compute the current worst delay and update the mapping refs */ + delay = 0.0f; + ntk.foreach_po( [this]( auto s ) { + const auto index = ntk.node_to_index( ntk.get_node( s ) ); + + if ( ntk.is_complemented( s ) ) + delay = std::max( delay, node_match[index].arrival[1] ); + else + delay = std::max( delay, node_match[index].arrival[0] ); + + if constexpr ( !ELA ) + { + if ( ntk.is_complemented( s ) ) + node_match[index].map_refs[1]++; + else + node_match[index].map_refs[0]++; + } + } ); + + double required = delay; + /* relax delay constraints */ + if ( iteration == 0 && ps.required_time == 0.0f && ps.relax_required > 0.0f ) + { + required *= ( 100.0 + ps.relax_required ) / 100.0; + } + + /* Global target time constraint */ + if ( ps.required_time != 0.0f ) + { + if ( ps.required_time < delay - epsilon ) + { + if ( !ps.area_oriented_mapping && iteration == 1 ) + std::cerr << fmt::format( "[i] MAP WARNING: cannot meet the target required time of {:.2f}", ps.required_time ) << std::endl; + } + else + { + required = ps.required_time; + } + } + + /* set the required time at POs */ + ntk.foreach_po( [&]( auto const& s ) { + const auto index = ntk.node_to_index( ntk.get_node( s ) ); + if ( ntk.is_complemented( s ) ) + node_match[index].required[1] = required; + else + node_match[index].required[0] = required; + } ); + + /* compute current area and update mapping refs in top-down order */ + area = 0.0f; + inv = 0; + for ( auto it = topo_order.rbegin(); it != topo_order.rend(); ++it ) + { + const auto index = ntk.node_to_index( *it ); + auto& node_data = node_match[index]; + + /* skip constants and PIs */ + if ( ntk.is_constant( *it ) ) + { + if ( node_match[index].map_refs[0] || node_match[index].map_refs[1] ) + { + /* if used and not available in the library launch a mapping error */ + if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr ) + { + std::cerr << "[e] MAP ERROR: technology library does not contain constant gates, impossible to perform mapping" << std::endl; + st.mapping_error = true; + return false; + } + } + continue; + } + else if ( ntk.is_pi( *it ) ) + { + if ( node_match[index].map_refs[1] > 0u ) + { + /* Add inverter area over the negated fanins */ + area += lib_inv_area; + ++inv; + } + continue; + } + + /* continue if not referenced in the cover */ + if ( !node_match[index].map_refs[0] && !node_match[index].map_refs[1] ) + continue; + + /* don't touch box */ + if constexpr ( has_is_dont_touch_v ) + { + if ( ntk.is_dont_touch( *it ) ) + { + set_mapping_refs_dont_touch( *it ); + continue; + } + } + + unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u; + + if ( node_data.best_supergate[use_phase] == nullptr ) + { + /* Library is not complete, mapping is not possible */ + std::cerr << "[e] MAP ERROR: technology library is not complete, impossible to perform mapping" << std::endl; + st.mapping_error = true; + return false; + } + + /* refine best mathes looking at alternatives */ + if ( ps.use_match_alternatives) + refine_best_matches( *it ); + + if ( node_data.same_match || node_data.map_refs[use_phase] > 0 ) + { + if constexpr ( !ELA ) + { + auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; + auto ctr = 0u; + + for ( auto const leaf : best_cut ) + { + if ( ( node_data.phase[use_phase] >> ctr++ ) & 1 ) + node_match[leaf].map_refs[1]++; + else + node_match[leaf].map_refs[0]++; + } + } + area += node_data.area[use_phase]; + if ( node_data.same_match && node_data.map_refs[use_phase ^ 1] > 0 ) + { + if ( iteration < ps.area_flow_rounds ) + { + ++node_data.map_refs[use_phase]; + // node_data.map_refs[use_phase] += node_data.map_refs[use_phase ^ 1]; } area += lib_inv_area; ++inv; @@ -2023,6 +2238,11 @@ class emap_impl } area += node_data.area[use_phase]; } + + if ( !ps.area_oriented_mapping ) + { + match_propagate_required( index ); + } } ++iteration; @@ -2065,8 +2285,8 @@ class emap_impl { if ( iteration < ps.area_flow_rounds ) { - // ++node_match[index].map_refs[0]; - node_match[index].map_refs[0] += node_match[index].map_refs[1]; + ++node_match[index].map_refs[0]; + // node_match[index].map_refs[0] += node_match[index].map_refs[1]; } area += lib_inv_area; ++inv; @@ -2312,24 +2532,24 @@ class emap_impl template void match_phase( node const& n, uint8_t phase ) { - double best_arrival = std::numeric_limits::max(); - double best_area_flow = std::numeric_limits::max(); - float best_area = std::numeric_limits::max(); - uint32_t best_size = UINT32_MAX; - uint8_t best_cut = 0u; - uint16_t best_phase = 0u; - uint8_t cut_index = 0u; auto index = ntk.node_to_index( n ); - auto& node_data = node_match[index]; - supergate const* best_supergate = node_data.best_supergate[phase]; + uint32_t cut_index = 0u; + + node_data.best_supergate[phase] = nullptr; + node_data.arrival[phase] = std::numeric_limits::max(); + node_data.flows[phase] = std::numeric_limits::max(); + node_data.area[phase] = std::numeric_limits::max(); + uint32_t best_size = UINT32_MAX; + + best_gate_emap& gA = node_data.best_alternative[phase]; + gA.gate = nullptr; + gA.arrival = std::numeric_limits::max(); + gA.flow = std::numeric_limits::max(); + uint32_t best_sizeA = UINT32_MAX; /* unmap multioutput */ - if ( node_data.multioutput_match[phase] ) - { - best_supergate = nullptr; - node_data.multioutput_match[phase] = false; - } + node_data.multioutput_match[phase] = false; /* foreach cut */ for ( auto& cut : cuts[index] ) @@ -2355,49 +2575,68 @@ class emap_impl { uint16_t gate_polarity = gate.polarity ^ negation; double worst_arrival = 0.0f; - double area_local = gate.area; + double worst_arrivalA = 0.0f; + float area_local = gate.area; + float area_localA = gate.area; auto ctr = 0u; - node_data.phase[phase] = gate_polarity; for ( auto l : *cut ) { - double arrival_pin = node_match[l].arrival[( gate_polarity >> ctr ) & 1] + gate.tdelay[ctr]; + uint8_t leaf_phase = ( gate_polarity >> ctr ) & 1; + + double arrival_pinA = node_match[l].best_alternative[leaf_phase].arrival + gate.tdelay[ctr]; + worst_arrivalA = std::max( worst_arrivalA, arrival_pinA ); + + // if constexpr ( DO_AREA ) + // { + // if ( worst_arrivalA > node_data.required[phase] + epsilon || worst_arrivalA >= std::numeric_limits::max() ) + // break; + // } + + double arrival_pin = node_match[l].arrival[leaf_phase] + gate.tdelay[ctr]; worst_arrival = std::max( worst_arrival, arrival_pin ); - uint8_t leaf_phase = ( node_data.phase[phase] >> ctr ) & 1; area_local += node_match[l].flows[leaf_phase]; + area_localA += node_match[l].best_alternative[leaf_phase].flow; ++ctr; } + bool skip = false; if constexpr ( DO_AREA ) { - if ( worst_arrival > node_data.required[phase] + epsilon || worst_arrival >= std::numeric_limits::max() ) + if ( ctr < cut->size() ) continue; + if ( worst_arrival > node_data.required[phase] + epsilon || worst_arrival >= std::numeric_limits::max() ) + skip = true; } - node_data.phase[phase] = gate_polarity; - - if ( compare_map( worst_arrival, best_arrival, area_local, best_area_flow, cut->size(), best_size ) ) + if ( !skip && compare_map( worst_arrival, node_data.arrival[phase], area_local, node_data.flows[phase], cut->size(), best_size ) ) { - best_arrival = worst_arrival; - best_area_flow = area_local; + node_data.best_supergate[phase] = &gate; + node_data.arrival[phase] = worst_arrival; + node_data.flows[phase] = area_local; + node_data.best_cut[phase] = cut_index; + node_data.area[phase] = gate.area; + node_data.phase[phase] = gate_polarity; best_size = cut->size(); - best_cut = cut_index; - best_area = gate.area; - best_phase = gate_polarity; - best_supergate = &gate; + } + + /* compute the alternative */ + if ( compare_map( worst_arrivalA, gA.arrival, area_localA, gA.flow, cut->size(), best_sizeA ) ) + { + gA.gate = &gate; + gA.arrival = worst_arrivalA; + gA.area = gate.area; + gA.flow = area_localA; + gA.phase = gate_polarity; + gA.cut = cut_index; + best_sizeA = cut->size(); + gA.size = cut->size(); } } ++cut_index; } - - node_data.flows[phase] = best_area_flow; - node_data.arrival[phase] = best_arrival; - node_data.area[phase] = best_area; - node_data.best_cut[phase] = best_cut; - node_data.phase[phase] = best_phase; - node_data.best_supergate[phase] = best_supergate; } template @@ -2625,10 +2864,50 @@ class emap_impl { auto size_zero = cuts[index][node_data.best_cut[0]].size(); auto size_one = cuts[index][node_data.best_cut[1]].size(); - if ( compare_map( worst_arrival_nneg, worst_arrival_npos, node_data.flows[0], node_data.flows[1], size_zero, size_one ) ) - use_one = false; + + if constexpr ( ELA ) + { + if ( !node_data.same_match ) + { + /* both phases were implemented --> evaluate substitution */ + cut_deref( cuts[index][node_data.best_cut[0]], n, 0 ); + node_data.flows[1] = cut_deref( cuts[index][node_data.best_cut[1]], n, 1 ); + node_data.flows[0] = cut_ref( cuts[index][node_data.best_cut[0]], n, 0 ); + cut_ref( cuts[index][node_data.best_cut[1]], n, 1 ); + } + if ( compare_map( worst_arrival_nneg, worst_arrival_npos, node_data.flows[0], node_data.flows[1], size_zero, size_one ) ) + use_one = false; + else + use_zero = false; + } else - use_zero = false; + { + /* compare flows by looking at the most convinient and referenced */ + if ( node_data.flows[0] / node_data.est_refs[0] + lib_inv_area < node_data.flows[1] / node_data.est_refs[1] + epsilon ) + { + use_one = false; + } + else if ( node_data.flows[1] / node_data.est_refs[1] + lib_inv_area < node_data.flows[0] / node_data.est_refs[0] + epsilon ) + { + use_zero = false; + } + else + { + if ( iteration < ps.area_flow_rounds ) + { + /* delay the decision on what to keep --> wait for better estimations */ + node_data.flows[0] = node_data.flows[0] / node_data.est_refs[0]; + node_data.flows[1] = node_data.flows[1] / node_data.est_refs[1]; + node_data.same_match = false; + return; + } + /* commit to one of the two before going to exact area */ + if ( compare_map( worst_arrival_nneg, worst_arrival_npos, node_data.flows[0], node_data.flows[1], size_zero, size_one ) ) + use_one = false; + else + use_zero = false; + } + } } if ( use_zero ) @@ -2685,6 +2964,205 @@ class emap_impl node_data.flows[phase] = node_data.flows[phase] / node_data.est_refs[phase]; } + template + inline void select_alternatives( node const& n ) + { + if ( !ps.use_match_alternatives ) + return; + + auto index = ntk.node_to_index( n ); + auto& node_data = node_match[index]; + + best_gate_emap& g0 = node_data.best_alternative[0]; + best_gate_emap& g1 = node_data.best_alternative[1]; + + if constexpr ( DO_AREA ) + { + /* process for best delay */ + if ( g0.arrival + lib_inv_delay < g1.arrival + epsilon ) + { + node_data.same_match_alternative = true; + g1 = g0; + g1.gate = nullptr; + g1.arrival += lib_inv_delay; + g1.flow = ( g1.flow + lib_inv_area ) / node_data.est_refs[1]; + g0.flow /= node_data.est_refs[0]; + return; + } + else if ( g1.arrival + lib_inv_delay < g0.arrival + epsilon ) + { + node_data.same_match_alternative = true; + g0 = g1; + g0.gate = nullptr; + g0.arrival += lib_inv_delay; + g0.flow = ( g0.flow + lib_inv_area ) / node_data.est_refs[0]; + g1.flow /= node_data.est_refs[1]; + return; + } + } + else + { + /* process for best area */ /* removed check on required since this is executed only during a delay pass */ + if ( g0.gate != nullptr && g0.flow + lib_inv_area < g1.flow + epsilon ) + { + node_data.same_match_alternative = true; + g1 = g0; + g1.gate = nullptr; + g1.arrival += lib_inv_delay; + g1.flow = ( g1.flow + lib_inv_area ) / node_data.est_refs[1]; + g0.flow /= node_data.est_refs[0]; + return; + } + else if ( g1.gate != nullptr && g1.flow + lib_inv_area < g0.flow + epsilon ) + { + node_data.same_match_alternative = true; + g0 = g1; + g0.gate = nullptr; + g0.arrival += lib_inv_delay; + g0.flow = ( g0.flow + lib_inv_area ) / node_data.est_refs[0]; + g1.flow /= node_data.est_refs[1]; + return; + } + } + + node_data.same_match_alternative = false; + g0.flow /= node_data.est_refs[0]; + g1.flow /= node_data.est_refs[1]; + } + + inline void refine_best_matches( node const& n ) + { + auto index = ntk.node_to_index( n ); + auto& node_data = node_match[index]; + + /* evaluate to change the best matches with the best alternative */ + best_gate_emap& g0 = node_data.best_alternative[0]; + best_gate_emap& g1 = node_data.best_alternative[1]; + + /* if same match, try to keep it that way */ + if ( node_data.same_match ) + { + /* pick best implementation between the two alternatives */ + unsigned best_match_phase = node_data.best_supergate[0] == nullptr ? 1 : 0; + unsigned use_phase = g0.gate == nullptr ? 1 : 0; + if ( !node_data.same_match_alternative ) + { + int valid = 0; + float flow0 = g0.flow + ( node_data.map_refs[0] ? 0 : lib_inv_area ); + float flow1 = g0.flow + ( node_data.map_refs[1] ? 0 : lib_inv_area ); + if ( g0.arrival < node_data.required[0] + epsilon && g0.arrival + lib_inv_delay < node_data.required[1] + epsilon ) + valid = 1; + if ( g1.arrival < node_data.required[1] + epsilon && g1.arrival + lib_inv_delay < node_data.required[0] + epsilon ) + valid |= 2; + + if ( valid == 0 ) + return; + else if ( valid != 3 ) + use_phase = valid >> 1; + else if ( compare_map( g1.arrival, g0.arrival, flow1, flow0, g1.size, g0.size ) ) + use_phase = 1; + } + else + { + best_gate_emap& gUse = node_data.best_alternative[use_phase]; + if ( gUse.arrival > node_data.required[use_phase] + epsilon || gUse.arrival + lib_inv_delay > node_data.required[use_phase ^ 1] + epsilon ) + { + return; + } + } + + best_gate_emap& gUse = node_data.best_alternative[use_phase]; + float flowUse = gUse.flow * node_data.est_refs[use_phase] + ( node_data.map_refs[use_phase] ? 0 : lib_inv_area ); + float flowCurrent = node_data.flows[best_match_phase] * node_data.est_refs[best_match_phase] + ( node_data.map_refs[best_match_phase] ? 0 : lib_inv_area ); + if ( flowUse < flowCurrent ) + { + refine_best_matches_copy_refinement( n, use_phase, true ); + } + return; + } + + /* TODO: should I check the potential gain of merging the two current implementations before replacing them? */ + + /* not same match: evaluate both zero and one phase */ + if ( g0.gate != nullptr && g0.arrival < node_data.required[0] + epsilon ) + { + if ( compare_map( g0.arrival, node_data.arrival[0], g0.flow, node_data.flows[0], g0.size, cuts[index][node_data.best_cut[0]].size() ) ) + { + refine_best_matches_copy_refinement( n, 0, false ); + } + } + if ( g1.gate != nullptr && g1.arrival < node_data.required[1] + epsilon ) + { + if ( compare_map( g1.arrival, node_data.arrival[1], g1.flow, node_data.flows[1], g1.size, cuts[index][node_data.best_cut[1]].size() ) ) + { + refine_best_matches_copy_refinement( n, 1, false ); + } + } + + /* evaluate change of phase + inverter */ + if ( node_data.map_refs[0] == 0 || node_data.map_refs[1] == 0 ) + { + unsigned phase = node_data.map_refs[0] == 0 ? 1 : 0; + bool valid = node_data.arrival[phase ^ 1] + lib_inv_delay < node_data.required[phase]; + if ( valid && compare_map( node_data.arrival[phase ^ 1], node_data.arrival[phase], node_data.flows[phase ^ 1], node_data.flows[phase], cuts[index][node_data.best_cut[phase ^ 1]].size(), cuts[index][node_data.best_cut[phase]].size() ) ) + { + set_match_complemented_phase( index, phase ^ 1, node_data.arrival[phase ^ 1] + lib_inv_delay ); + } + } + + /* TODO: check if it is possible to merge the gates? */ + // if ( node_data.map_refs[0] && node_data.map_refs[1] ) + // { + // bool use_zero = node_data.arrival[0] + lib_inv_delay < node_data.required[1]; + // bool use_one = node_data.arrival[1] + lib_inv_delay < node_data.required[0]; + // if ( use_zero && use_one ) + // { + // if ( compare_map( node_data.arrival[0], node_data.arrival[1], node_data.flows[0], node_data.flows[1], cuts[index][node_data.best_cut[0]].size(), cuts[index][node_data.best_cut[1]].size() ) ) + // { + // use_one = false; + // } + // else + // { + // use_zero = false; + // } + // } + + // if ( use_zero ) + // { + // set_match_complemented_phase( index, 0, node_data.arrival[0] + lib_inv_delay ); + // } + // else if ( use_one ) + // { + // set_match_complemented_phase( index, 1, node_data.arrival[1] + lib_inv_delay ); + // } + // } + } + + inline void refine_best_matches_copy_refinement( node const& n, unsigned phase, bool both_phases ) + { + auto index = ntk.node_to_index( n ); + auto& node_data = node_match[index]; + best_gate_emap& bg = node_data.best_alternative[phase]; + + node_data.best_supergate[phase] = bg.gate; + node_data.phase[phase] = bg.phase; + node_data.best_cut[phase] = bg.cut; + node_data.arrival[phase] = bg.arrival; + node_data.area[phase] = bg.area; + node_data.flows[phase] = bg.flow; + + if ( !both_phases ) + return; + + phase ^= 1; + node_data.best_supergate[phase] = nullptr; + node_data.phase[phase] = bg.phase; + node_data.best_cut[phase] = bg.cut; + node_data.arrival[phase] = bg.arrival + lib_inv_delay; + node_data.area[phase] = bg.area; + node_data.flows[phase] = ( bg.flow * node_data.est_refs[phase ^ 1] + lib_inv_area ) / node_data.est_refs[phase]; + } + void reindex_multioutput_data() { /* re-index the multioutput list using the lowest index output instead of the greatest one */ @@ -4707,7 +5185,7 @@ class emap_impl #pragma endregion template - inline bool compare_map( double arrival, double best_arrival, double area_flow, double best_area_flow, uint32_t size, uint32_t best_size ) + inline bool compare_map( double arrival, double best_arrival, float area_flow, float best_area_flow, uint32_t size, uint32_t best_size ) { if constexpr ( DO_AREA ) { From e82ffaf919f700b27c733886cab56dc4763bec3a Mon Sep 17 00:00:00 2001 From: aletempiac Date: Tue, 30 Apr 2024 19:00:41 +0200 Subject: [PATCH 11/27] Improving emap and integration of alternatives --- include/mockturtle/algorithms/emap.hpp | 249 +++++++++++-------------- 1 file changed, 113 insertions(+), 136 deletions(-) diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index 3badd8e7a..b24bac77d 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -114,7 +114,7 @@ struct emap_params double relax_required{ 0.0f }; /*! \brief Number of rounds for area flow optimization. */ - uint32_t area_flow_rounds{ 2u }; + uint32_t area_flow_rounds{ 3u }; /*! \brief Number of rounds for exact area optimization. */ uint32_t ela_rounds{ 2u }; @@ -1713,6 +1713,13 @@ class emap_impl auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; cut_deref( best_cut, n, use_phase ); } + else if ( !node_data.map_refs[0] || !node_data.map_refs[1] ) + { + uint8_t use_phase = node_data.map_refs[0] ? 0 : 1; + auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; + cut_deref( best_cut, n, use_phase ); + node_data.same_match = true; + } /* match positive phase */ match_phase_exact( n, 0u ); @@ -1803,6 +1810,13 @@ class emap_impl node_data.required[use_phase] = std::min( node_data.required[use_phase], node_data.required[use_phase ^ 1] - lib_inv_delay ); } } + else if ( !node_data.map_refs[0] || !node_data.map_refs[1] ) + { + use_phase = node_data.map_refs[0] ? 0 : 1; + auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; + cut_deref( best_cut, *it, use_phase ); + node_data.same_match = true; + } /* match positive phase */ match_phase_exact( *it, 0u ); @@ -2061,10 +2075,11 @@ class emap_impl } /* blend estimated references */ + float const coef = 1.0f / ( ( iteration + 1.0f ) * ( iteration + 1.0f ) ); for ( auto i = 0u; i < ntk.size(); ++i ) { - node_match[i].est_refs[0] = std::max( 1.0, ( 1.0 * node_match[i].est_refs[0] + 2.0f * node_match[i].map_refs[0] ) / 3.0 ); - node_match[i].est_refs[1] = std::max( 1.0, ( 1.0 * node_match[i].est_refs[1] + 2.0f * node_match[i].map_refs[1] ) / 3.0 ); + node_match[i].est_refs[0] = std::max( 1.0f, coef * node_match[i].est_refs[0] + ( 1 - coef ) * node_match[i].map_refs[0] ); + node_match[i].est_refs[1] = std::max( 1.0f, coef * node_match[i].est_refs[1] + ( 1 - coef ) * node_match[i].map_refs[1] ); } return true; @@ -2175,8 +2190,11 @@ class emap_impl } } + /* refine best mathes looking at alternatives */ + if ( ps.use_match_alternatives ) + refine_best_matches( *it ); + unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u; - if ( node_data.best_supergate[use_phase] == nullptr ) { /* Library is not complete, mapping is not possible */ @@ -2185,10 +2203,6 @@ class emap_impl return false; } - /* refine best mathes looking at alternatives */ - if ( ps.use_match_alternatives) - refine_best_matches( *it ); - if ( node_data.same_match || node_data.map_refs[use_phase] > 0 ) { if constexpr ( !ELA ) @@ -2253,10 +2267,11 @@ class emap_impl } /* blend estimated references */ + float const coef = 1.0f / ( ( iteration + 1.0f ) * ( iteration + 1.0f ) ); for ( auto i = 0u; i < ntk.size(); ++i ) { - node_match[i].est_refs[0] = std::max( 1.0, ( 1.0 * node_match[i].est_refs[0] + 2.0f * node_match[i].map_refs[0] ) / 3.0 ); - node_match[i].est_refs[1] = std::max( 1.0, ( 1.0 * node_match[i].est_refs[1] + 2.0f * node_match[i].map_refs[1] ) / 3.0 ); + node_match[i].est_refs[0] = std::max( 1.0f, coef * node_match[i].est_refs[0] + ( 1 - coef ) * node_match[i].map_refs[0] ); + node_match[i].est_refs[1] = std::max( 1.0f, coef * node_match[i].est_refs[1] + ( 1 - coef ) * node_match[i].map_refs[1] ); } return true; @@ -2803,37 +2818,34 @@ class emap_impl /* condition on not used phases, evaluate a substitution during exact area recovery */ if constexpr ( ELA ) { - if ( iteration != 0 ) + if ( node_data.map_refs[0] == 0 || node_data.map_refs[1] == 0 ) { - if ( node_data.map_refs[0] == 0 || node_data.map_refs[1] == 0 ) + /* select the used match */ + auto phase = 0; + auto nphase = 0; + if ( node_data.map_refs[0] == 0 ) { - /* select the used match */ - auto phase = 0; - auto nphase = 0; - if ( node_data.map_refs[0] == 0 ) - { - phase = 1; - use_one = true; - use_zero = false; - } - else - { - nphase = 1; - use_one = false; - use_zero = true; - } - /* select the not used match instead if it leads to area improvement and doesn't violate the required time */ - if ( node_data.arrival[nphase] + lib_inv_delay < node_data.required[phase] + epsilon ) - { - auto size_phase = cuts[index][node_data.best_cut[phase]].size(); - auto size_nphase = cuts[index][node_data.best_cut[nphase]].size(); + phase = 1; + use_one = true; + use_zero = false; + } + else + { + nphase = 1; + use_one = false; + use_zero = true; + } + /* select the not used match instead if it leads to area improvement and doesn't violate the required time */ + if ( node_data.arrival[nphase] + lib_inv_delay < node_data.required[phase] + epsilon ) + { + auto size_phase = cuts[index][node_data.best_cut[phase]].size(); + auto size_nphase = cuts[index][node_data.best_cut[nphase]].size(); - if ( compare_map( node_data.arrival[nphase] + lib_inv_delay, node_data.arrival[phase], node_data.flows[nphase] + lib_inv_area, node_data.flows[phase], size_nphase, size_phase ) ) - { - /* invert the choice */ - use_zero = !use_zero; - use_one = !use_one; - } + if ( compare_map( node_data.arrival[nphase] + lib_inv_delay, node_data.arrival[phase], node_data.flows[nphase] + lib_inv_area, node_data.flows[phase], size_nphase, size_phase ) ) + { + /* invert the choice */ + use_zero = !use_zero; + use_one = !use_one; } } } @@ -2893,19 +2905,11 @@ class emap_impl } else { - if ( iteration < ps.area_flow_rounds ) - { - /* delay the decision on what to keep --> wait for better estimations */ - node_data.flows[0] = node_data.flows[0] / node_data.est_refs[0]; - node_data.flows[1] = node_data.flows[1] / node_data.est_refs[1]; - node_data.same_match = false; - return; - } - /* commit to one of the two before going to exact area */ - if ( compare_map( worst_arrival_nneg, worst_arrival_npos, node_data.flows[0], node_data.flows[1], size_zero, size_one ) ) - use_one = false; - else - use_zero = false; + /* delay the decision on what to keep --> wait for better estimations */ + node_data.flows[0] = node_data.flows[0] / node_data.est_refs[0]; + node_data.flows[1] = node_data.flows[1] / node_data.est_refs[1]; + node_data.same_match = false; + return; } } } @@ -2975,6 +2979,8 @@ class emap_impl best_gate_emap& g0 = node_data.best_alternative[0]; best_gate_emap& g1 = node_data.best_alternative[1]; + float g0flow = g0.flow / node_data.est_refs[0]; + float g1flow = g1.flow / node_data.est_refs[1]; if constexpr ( DO_AREA ) { @@ -2986,7 +2992,7 @@ class emap_impl g1.gate = nullptr; g1.arrival += lib_inv_delay; g1.flow = ( g1.flow + lib_inv_area ) / node_data.est_refs[1]; - g0.flow /= node_data.est_refs[0]; + g0.flow = g0flow; return; } else if ( g1.arrival + lib_inv_delay < g0.arrival + epsilon ) @@ -2996,38 +3002,38 @@ class emap_impl g0.gate = nullptr; g0.arrival += lib_inv_delay; g0.flow = ( g0.flow + lib_inv_area ) / node_data.est_refs[0]; - g1.flow /= node_data.est_refs[1]; + g1.flow = g1flow; return; } } else { /* process for best area */ /* removed check on required since this is executed only during a delay pass */ - if ( g0.gate != nullptr && g0.flow + lib_inv_area < g1.flow + epsilon ) + if ( g0.gate != nullptr && g0flow + lib_inv_area < g1flow + epsilon ) { node_data.same_match_alternative = true; g1 = g0; g1.gate = nullptr; g1.arrival += lib_inv_delay; g1.flow = ( g1.flow + lib_inv_area ) / node_data.est_refs[1]; - g0.flow /= node_data.est_refs[0]; + g0.flow = g0flow; return; } - else if ( g1.gate != nullptr && g1.flow + lib_inv_area < g0.flow + epsilon ) + else if ( g1.gate != nullptr && g1flow + lib_inv_area < g0flow + epsilon ) { node_data.same_match_alternative = true; g0 = g1; g0.gate = nullptr; g0.arrival += lib_inv_delay; g0.flow = ( g0.flow + lib_inv_area ) / node_data.est_refs[0]; - g1.flow /= node_data.est_refs[1]; + g1.flow = g1flow; return; } } node_data.same_match_alternative = false; - g0.flow /= node_data.est_refs[0]; - g1.flow /= node_data.est_refs[1]; + g0.flow = g0flow; + g1.flow = g1flow; } inline void refine_best_matches( node const& n ) @@ -3039,103 +3045,73 @@ class emap_impl best_gate_emap& g0 = node_data.best_alternative[0]; best_gate_emap& g1 = node_data.best_alternative[1]; - /* if same match, try to keep it that way */ - if ( node_data.same_match ) + if ( node_data.map_refs[0] && node_data.map_refs[1] ) { - /* pick best implementation between the two alternatives */ - unsigned best_match_phase = node_data.best_supergate[0] == nullptr ? 1 : 0; - unsigned use_phase = g0.gate == nullptr ? 1 : 0; - if ( !node_data.same_match_alternative ) + if ( node_data.same_match ) { - int valid = 0; - float flow0 = g0.flow + ( node_data.map_refs[0] ? 0 : lib_inv_area ); - float flow1 = g0.flow + ( node_data.map_refs[1] ? 0 : lib_inv_area ); - if ( g0.arrival < node_data.required[0] + epsilon && g0.arrival + lib_inv_delay < node_data.required[1] + epsilon ) - valid = 1; - if ( g1.arrival < node_data.required[1] + epsilon && g1.arrival + lib_inv_delay < node_data.required[0] + epsilon ) - valid |= 2; + /* pick best implementation between the two alternatives */ + unsigned best_match_phase = node_data.best_supergate[0] == nullptr ? 1 : 0; + unsigned use_phase = g0.gate == nullptr ? 1 : 0; + if ( !node_data.same_match_alternative ) + { + if ( g0.arrival > node_data.required[0] + epsilon || g1.arrival > node_data.required[1] + epsilon ) + return; - if ( valid == 0 ) + refine_best_matches_copy_refinement( n, 0, false ); + refine_best_matches_copy_refinement( n, 1, false ); + node_data.same_match = false; return; - else if ( valid != 3 ) - use_phase = valid >> 1; - else if ( compare_map( g1.arrival, g0.arrival, flow1, flow0, g1.size, g0.size ) ) - use_phase = 1; - } - else - { - best_gate_emap& gUse = node_data.best_alternative[use_phase]; - if ( gUse.arrival > node_data.required[use_phase] + epsilon || gUse.arrival + lib_inv_delay > node_data.required[use_phase ^ 1] + epsilon ) + } + else { + best_gate_emap& gUse = node_data.best_alternative[use_phase]; + if ( gUse.arrival > node_data.required[use_phase] + epsilon || gUse.arrival + lib_inv_delay > node_data.required[use_phase ^ 1] + epsilon ) + { + return; + } + refine_best_matches_copy_refinement( n, use_phase, true ); return; } } - - best_gate_emap& gUse = node_data.best_alternative[use_phase]; - float flowUse = gUse.flow * node_data.est_refs[use_phase] + ( node_data.map_refs[use_phase] ? 0 : lib_inv_area ); - float flowCurrent = node_data.flows[best_match_phase] * node_data.est_refs[best_match_phase] + ( node_data.map_refs[best_match_phase] ? 0 : lib_inv_area ); - if ( flowUse < flowCurrent ) + else { - refine_best_matches_copy_refinement( n, use_phase, true ); + /* not same match: evaluate both zero and one phase */ + if ( g0.gate != nullptr && g0.arrival < node_data.required[0] + epsilon ) + { + node_data.same_match = false; + refine_best_matches_copy_refinement( n, 0, node_data.same_match_alternative && g0.arrival + lib_inv_delay < node_data.required[1] + epsilon ); + } + if ( g1.gate != nullptr && g1.arrival < node_data.required[1] + epsilon ) + { + node_data.same_match = false; + refine_best_matches_copy_refinement( n, 1, node_data.same_match_alternative && g1.arrival + lib_inv_delay < node_data.required[0] + epsilon ); + } } - return; } - - /* TODO: should I check the potential gain of merging the two current implementations before replacing them? */ - - /* not same match: evaluate both zero and one phase */ - if ( g0.gate != nullptr && g0.arrival < node_data.required[0] + epsilon ) + else if ( node_data.map_refs[0] ) { - if ( compare_map( g0.arrival, node_data.arrival[0], g0.flow, node_data.flows[0], g0.size, cuts[index][node_data.best_cut[0]].size() ) ) + if ( g0.gate != nullptr && g0.arrival < node_data.required[0] + epsilon ) { + node_data.same_match = false; refine_best_matches_copy_refinement( n, 0, false ); } + else if ( g0.gate == nullptr && g1.arrival + lib_inv_delay < node_data.required[0] + epsilon ) + { + refine_best_matches_copy_refinement( n, 1, true ); + } } - if ( g1.gate != nullptr && g1.arrival < node_data.required[1] + epsilon ) + else { - if ( compare_map( g1.arrival, node_data.arrival[1], g1.flow, node_data.flows[1], g1.size, cuts[index][node_data.best_cut[1]].size() ) ) + if ( g1.gate != nullptr && g1.arrival < node_data.required[1] + epsilon ) { + node_data.same_match = false; refine_best_matches_copy_refinement( n, 1, false ); } + else if ( g1.gate == nullptr && g0.arrival + lib_inv_delay < node_data.required[1] + epsilon ) + { + refine_best_matches_copy_refinement( n, 0, true ); + } } - - /* evaluate change of phase + inverter */ - if ( node_data.map_refs[0] == 0 || node_data.map_refs[1] == 0 ) - { - unsigned phase = node_data.map_refs[0] == 0 ? 1 : 0; - bool valid = node_data.arrival[phase ^ 1] + lib_inv_delay < node_data.required[phase]; - if ( valid && compare_map( node_data.arrival[phase ^ 1], node_data.arrival[phase], node_data.flows[phase ^ 1], node_data.flows[phase], cuts[index][node_data.best_cut[phase ^ 1]].size(), cuts[index][node_data.best_cut[phase]].size() ) ) - { - set_match_complemented_phase( index, phase ^ 1, node_data.arrival[phase ^ 1] + lib_inv_delay ); - } - } - - /* TODO: check if it is possible to merge the gates? */ - // if ( node_data.map_refs[0] && node_data.map_refs[1] ) - // { - // bool use_zero = node_data.arrival[0] + lib_inv_delay < node_data.required[1]; - // bool use_one = node_data.arrival[1] + lib_inv_delay < node_data.required[0]; - // if ( use_zero && use_one ) - // { - // if ( compare_map( node_data.arrival[0], node_data.arrival[1], node_data.flows[0], node_data.flows[1], cuts[index][node_data.best_cut[0]].size(), cuts[index][node_data.best_cut[1]].size() ) ) - // { - // use_one = false; - // } - // else - // { - // use_zero = false; - // } - // } - - // if ( use_zero ) - // { - // set_match_complemented_phase( index, 0, node_data.arrival[0] + lib_inv_delay ); - // } - // else if ( use_one ) - // { - // set_match_complemented_phase( index, 1, node_data.arrival[1] + lib_inv_delay ); - // } - // } } inline void refine_best_matches_copy_refinement( node const& n, unsigned phase, bool both_phases ) @@ -3153,7 +3129,8 @@ class emap_impl if ( !both_phases ) return; - + + node_data.same_match = true; phase ^= 1; node_data.best_supergate[phase] = nullptr; node_data.phase[phase] = bg.phase; From ec071e45b43203bb3490d0595c4eed8febcdaee9 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 1 May 2024 11:21:57 +0200 Subject: [PATCH 12/27] Performance improvements --- include/mockturtle/algorithms/emap.hpp | 160 +++++++++++-------------- 1 file changed, 68 insertions(+), 92 deletions(-) diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index b24bac77d..5221703cd 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -110,7 +110,7 @@ struct emap_params /*! \brief Required time for delay optimization. */ double required_time{ 0.0f }; - /*! \brief Required time relaxation ratio. */ + /*! \brief Required time relaxation in percentage (10 = 10%). */ double relax_required{ 0.0f }; /*! \brief Number of rounds for area flow optimization. */ @@ -137,9 +137,6 @@ struct emap_params /*! \brief Remove overlapping multi-output cuts */ bool remove_overlapping_multicuts{ false }; - /*! \brief Doesn't allow node duplication */ - bool allow_node_duplication{ true }; - /*! \brief Be verbose. */ bool verbose{ false }; }; @@ -949,7 +946,6 @@ class emap_impl uint32_t i = 0; while ( i++ < ps.area_flow_rounds ) { - compute_required_time(); if ( !compute_mapping() ) { return false; @@ -964,7 +960,7 @@ class emap_impl reindex_multioutput_data(); while ( i++ < ps.ela_rounds ) { - if ( !compute_mapping_exact_reversed( i == ps.ela_rounds ) ) + if ( !compute_mapping_exact_reversed() ) { return false; } @@ -974,7 +970,7 @@ class emap_impl i = 0; while ( i++ < ps.eswp_rounds ) { - if ( !compute_mapping_exact_reversed( true ) ) + if ( !compute_mapping_exact_reversed() ) { return false; } @@ -1034,8 +1030,9 @@ class emap_impl match_phase( n, 1u ); /* try to drop one phase */ - match_drop_phase( n, 0 ); + match_drop_phase( n ); + /* select alternative matches to use */ select_alternatives( n ); /* load and try a multi-output matches */ @@ -1055,7 +1052,7 @@ class emap_impl } double area_old = area; - bool success = set_mapping_refs2(); + bool success = set_mapping_refs_and_req(); if ( warning_box ) { @@ -1546,7 +1543,7 @@ class emap_impl match_phase( n, 1u ); /* try to drop one phase */ - match_drop_phase( n, 0 ); + match_drop_phase( n ); } double area_old = area; bool success = set_mapping_refs(); @@ -1638,7 +1635,7 @@ class emap_impl match_phase( n, 1u ); /* try to drop one phase */ - match_drop_phase( n, 0 ); + match_drop_phase( n ); /* try a multi-output match */ if constexpr ( DO_AREA ) @@ -1656,7 +1653,7 @@ class emap_impl } double area_old = area; - bool success = set_mapping_refs(); + bool success = set_mapping_refs_and_req(); /* round stats */ if ( ps.verbose ) @@ -1728,7 +1725,7 @@ class emap_impl match_phase_exact( n, 1u ); /* try to drop one phase */ - match_drop_phase( n, 0 ); + match_drop_phase( n ); /* try a multi-output match */ if ( ps.map_multioutput && node_tuple_match[index] != UINT32_MAX ) @@ -1763,7 +1760,7 @@ class emap_impl } template - bool compute_mapping_exact_reversed( bool last_round ) + bool compute_mapping_exact_reversed() { /* this method works in reverse topological order: less nodes to update (faster) */ /* instead of propagating arrival times forward, it propagates required times backwards */ @@ -1831,7 +1828,7 @@ class emap_impl } /* try to drop one phase */ - match_drop_phase( *it, 0 ); + match_drop_phase( *it ); /* try a multi-output match */ if ( ps.map_multioutput && node_tuple_match[index] < UINT32_MAX - 1 ) @@ -2037,7 +2034,6 @@ class emap_impl if ( iteration < ps.area_flow_rounds ) { ++node_data.map_refs[use_phase]; - // node_data.map_refs[use_phase] += node_data.map_refs[use_phase ^ 1]; } area += lib_inv_area; ++inv; @@ -2085,8 +2081,8 @@ class emap_impl return true; } - template - bool set_mapping_refs2() + template + bool set_mapping_refs_and_req() { for ( auto i = 0u; i < node_match.size(); ++i ) { @@ -2190,9 +2186,12 @@ class emap_impl } } - /* refine best mathes looking at alternatives */ - if ( ps.use_match_alternatives ) - refine_best_matches( *it ); + /* refine best matches with alternatives */ + if constexpr ( !DO_AREA ) + { + if ( ps.use_match_alternatives ) + refine_best_matches( *it ); + } unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u; if ( node_data.best_supergate[use_phase] == nullptr ) @@ -2224,7 +2223,6 @@ class emap_impl if ( iteration < ps.area_flow_rounds ) { ++node_data.map_refs[use_phase]; - // node_data.map_refs[use_phase] += node_data.map_refs[use_phase ^ 1]; } area += lib_inv_area; ++inv; @@ -2301,7 +2299,6 @@ class emap_impl if ( iteration < ps.area_flow_rounds ) { ++node_match[index].map_refs[0]; - // node_match[index].map_refs[0] += node_match[index].map_refs[1]; } area += lib_inv_area; ++inv; @@ -2762,7 +2759,7 @@ class emap_impl } template - void match_drop_phase( node const& n, float required_margin_factor ) + void match_drop_phase( node const& n ) { auto index = ntk.node_to_index( n ); auto& node_data = node_match[index]; @@ -2811,8 +2808,8 @@ class emap_impl else { /* check if both phases + inverter meet the required time */ - use_zero = worst_arrival_nneg < ( node_data.required[1] + epsilon - required_margin_factor * lib_inv_delay ); - use_one = worst_arrival_npos < ( node_data.required[0] + epsilon - required_margin_factor * lib_inv_delay ); + use_zero = worst_arrival_nneg < ( node_data.required[1] + epsilon ); + use_one = worst_arrival_npos < ( node_data.required[0] + epsilon ); } /* condition on not used phases, evaluate a substitution during exact area recovery */ @@ -2854,21 +2851,10 @@ class emap_impl if ( ( !use_zero && !use_one ) ) { /* use both phases */ - if ( ps.allow_node_duplication ) - { - node_data.flows[0] = node_data.flows[0] / node_data.est_refs[0]; - node_data.flows[1] = node_data.flows[1] / node_data.est_refs[1]; - node_data.same_match = false; - return; - } - - /* if node duplication is not allowed, pick one phase based on delay */ - auto size_zero = cuts[index][node_data.best_cut[0]].size(); - auto size_one = cuts[index][node_data.best_cut[1]].size(); - if ( compare_map( worst_arrival_npos, worst_arrival_nneg, node_data.flows[1], node_data.flows[0], size_one, size_zero ) ) - use_zero = true; - else - use_one = true; + node_data.flows[0] = node_data.flows[0] / node_data.est_refs[0]; + node_data.flows[1] = node_data.flows[1] / node_data.est_refs[1]; + node_data.same_match = false; + return; } /* use area flow as a tiebreaker */ @@ -2887,10 +2873,24 @@ class emap_impl node_data.flows[0] = cut_ref( cuts[index][node_data.best_cut[0]], n, 0 ); cut_ref( cuts[index][node_data.best_cut[1]], n, 1 ); } - if ( compare_map( worst_arrival_nneg, worst_arrival_npos, node_data.flows[0], node_data.flows[1], size_zero, size_one ) ) - use_one = false; - else - use_zero = false; + /* evaluate based on inverter cost */ + // use_zero = lib_inv_area < node_data.flows[1] + epsilon; + // use_one = lib_inv_area < node_data.flows[0] + epsilon; + + if ( use_one && use_zero ) + { + if ( compare_map( worst_arrival_nneg, worst_arrival_npos, node_data.flows[0], node_data.flows[1], size_zero, size_one ) ) + use_one = false; + else + use_zero = false; + } + else if ( !use_one && !use_zero && node_data.same_match ) + { + node_data.same_match = false; + cut_ref( cuts[index][node_data.best_cut[0]], n, 0 ); + cut_ref( cuts[index][node_data.best_cut[1]], n, 1 ); + return; + } } else { @@ -2971,6 +2971,9 @@ class emap_impl template inline void select_alternatives( node const& n ) { + if constexpr ( DO_AREA ) + return; + if ( !ps.use_match_alternatives ) return; @@ -2982,53 +2985,26 @@ class emap_impl float g0flow = g0.flow / node_data.est_refs[0]; float g1flow = g1.flow / node_data.est_refs[1]; - if constexpr ( DO_AREA ) + /* process for best area */ /* removed check on required since this is executed only during a delay pass */ + if ( g0.gate != nullptr && g0flow + lib_inv_area < g1flow + epsilon ) { - /* process for best delay */ - if ( g0.arrival + lib_inv_delay < g1.arrival + epsilon ) - { - node_data.same_match_alternative = true; - g1 = g0; - g1.gate = nullptr; - g1.arrival += lib_inv_delay; - g1.flow = ( g1.flow + lib_inv_area ) / node_data.est_refs[1]; - g0.flow = g0flow; - return; - } - else if ( g1.arrival + lib_inv_delay < g0.arrival + epsilon ) - { - node_data.same_match_alternative = true; - g0 = g1; - g0.gate = nullptr; - g0.arrival += lib_inv_delay; - g0.flow = ( g0.flow + lib_inv_area ) / node_data.est_refs[0]; - g1.flow = g1flow; - return; - } + node_data.same_match_alternative = true; + g1 = g0; + g1.gate = nullptr; + g1.arrival += lib_inv_delay; + g1.flow = ( g1.flow + lib_inv_area ) / node_data.est_refs[1]; + g0.flow = g0flow; + return; } - else + else if ( g1.gate != nullptr && g1flow + lib_inv_area < g0flow + epsilon ) { - /* process for best area */ /* removed check on required since this is executed only during a delay pass */ - if ( g0.gate != nullptr && g0flow + lib_inv_area < g1flow + epsilon ) - { - node_data.same_match_alternative = true; - g1 = g0; - g1.gate = nullptr; - g1.arrival += lib_inv_delay; - g1.flow = ( g1.flow + lib_inv_area ) / node_data.est_refs[1]; - g0.flow = g0flow; - return; - } - else if ( g1.gate != nullptr && g1flow + lib_inv_area < g0flow + epsilon ) - { - node_data.same_match_alternative = true; - g0 = g1; - g0.gate = nullptr; - g0.arrival += lib_inv_delay; - g0.flow = ( g0.flow + lib_inv_area ) / node_data.est_refs[0]; - g1.flow = g1flow; - return; - } + node_data.same_match_alternative = true; + g0 = g1; + g0.gate = nullptr; + g0.arrival += lib_inv_delay; + g0.flow = ( g0.flow + lib_inv_area ) / node_data.est_refs[0]; + g1.flow = g1flow; + return; } node_data.same_match_alternative = false; @@ -3771,7 +3747,7 @@ class emap_impl match_phase( n, 1u ); /* try to drop one phase */ - match_drop_phase( n, 0 ); + match_drop_phase( n ); assert( node_data.arrival[0] < node_data.required[0] + epsilon ); assert( node_data.arrival[1] < node_data.required[1] + epsilon ); @@ -3852,7 +3828,7 @@ class emap_impl match_phase_exact( n, 1u ); /* try to drop one phase */ - match_drop_phase( n, 0 ); + match_drop_phase( n ); assert( node_data.arrival[0] < std::numeric_limits::max() ); assert( node_data.arrival[1] < std::numeric_limits::max() ); @@ -4030,7 +4006,7 @@ class emap_impl match_phase_exact( n, 1u ); /* try to drop one phase */ - match_drop_phase( n, 0 ); + match_drop_phase( n ); } } From c2ca9a871a1017082c24741936df788edf626d8a Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 1 May 2024 11:22:43 +0200 Subject: [PATCH 13/27] Add inverter cost evaluation in exact area (high inverter cost optimization) --- include/mockturtle/algorithms/emap.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index 5221703cd..5efc6d563 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -2874,8 +2874,8 @@ class emap_impl cut_ref( cuts[index][node_data.best_cut[1]], n, 1 ); } /* evaluate based on inverter cost */ - // use_zero = lib_inv_area < node_data.flows[1] + epsilon; - // use_one = lib_inv_area < node_data.flows[0] + epsilon; + use_zero = lib_inv_area < node_data.flows[1] + epsilon; + use_one = lib_inv_area < node_data.flows[0] + epsilon; if ( use_one && use_zero ) { From b9fb641e8a79539bd30e0c24d163da9bf013820d Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 1 May 2024 14:26:11 +0200 Subject: [PATCH 14/27] Add pin-specific input arrival times and required time constraints to emap --- include/mockturtle/algorithms/emap.hpp | 206 +++++++++++++------------ 1 file changed, 111 insertions(+), 95 deletions(-) diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index 5efc6d563..c7bd54191 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -107,12 +107,18 @@ struct emap_params hybrid } matching_mode = hybrid; - /*! \brief Required time for delay optimization. */ + /*! \brief Target required time (for each PO). */ double required_time{ 0.0f }; /*! \brief Required time relaxation in percentage (10 = 10%). */ double relax_required{ 0.0f }; + /*! \brief Custom input arrival times. */ + std::vector arrival_times{}; + + /*! \brief Custom output required times. */ + std::vector required_times{}; + /*! \brief Number of rounds for area flow optimization. */ uint32_t area_flow_rounds{ 3u }; @@ -805,6 +811,10 @@ class emap_impl /* compute and save topological order */ init_topo_order(); + /* init arrival time */ + if ( !init_arrivals() ) + return res; + /* search for large matches */ if ( ps.matching_mode == emap_params::structural || CutSize > 6 ) { @@ -859,6 +869,10 @@ class emap_impl /* compute and save topological order */ init_topo_order(); + /* init arrival time */ + if ( !init_arrivals() ) + return res; + /* search for large matches */ if ( ps.matching_mode == emap_params::structural || CutSize > 6 ) { @@ -904,11 +918,15 @@ class emap_impl auto [res, old2new] = initialize_map_network(); - /* TODO: multi-output support is currently not implemented */ + /* multi-output support is currently not implemented */ /* compute and save topological order */ init_topo_order(); + /* init arrival time */ + if ( !init_arrivals() ) + return res; + /* compute cuts, matches, and initial mapping */ if ( !ps.area_oriented_mapping ) { @@ -1111,13 +1129,9 @@ class emap_impl { node_data.flows[0] = 0.0f; node_data.best_alternative[0].flow = 0.0f; - node_data.arrival[0] = 0.0f; - node_data.best_alternative[0].arrival = 0.0f; /* PIs have the negative phase implemented with an inverter */ node_data.flows[1] = lib_inv_area / node_data.est_refs[1]; node_data.best_alternative[1].flow = lib_inv_area / node_data.est_refs[1]; - node_data.arrival[1] = lib_inv_delay; - node_data.best_alternative[1].arrival = lib_inv_delay; /* skip if cuts have been computed before */ if ( cuts[index].size() == 0 ) { @@ -1525,10 +1539,8 @@ class emap_impl { /* all terminals have flow 0 */ node_data.flows[0] = 0.0f; - node_data.arrival[0] = 0.0f; /* PIs have the negative phase implemented with an inverter */ node_data.flows[1] = lib_inv_area / node_data.est_refs[1]; - node_data.arrival[1] = lib_inv_delay; add_unit_cut( index ); continue; } @@ -1544,9 +1556,12 @@ class emap_impl /* try to drop one phase */ match_drop_phase( n ); + + /* select alternative matches to use */ + select_alternatives( n ); } double area_old = area; - bool success = set_mapping_refs(); + bool success = set_mapping_refs_and_req(); /* round stats */ if ( ps.verbose ) @@ -2108,35 +2123,7 @@ class emap_impl } } ); - double required = delay; - /* relax delay constraints */ - if ( iteration == 0 && ps.required_time == 0.0f && ps.relax_required > 0.0f ) - { - required *= ( 100.0 + ps.relax_required ) / 100.0; - } - - /* Global target time constraint */ - if ( ps.required_time != 0.0f ) - { - if ( ps.required_time < delay - epsilon ) - { - if ( !ps.area_oriented_mapping && iteration == 1 ) - std::cerr << fmt::format( "[i] MAP WARNING: cannot meet the target required time of {:.2f}", ps.required_time ) << std::endl; - } - else - { - required = ps.required_time; - } - } - - /* set the required time at POs */ - ntk.foreach_po( [&]( auto const& s ) { - const auto index = ntk.node_to_index( ntk.get_node( s ) ); - if ( ntk.is_complemented( s ) ) - node_match[index].required[1] = required; - else - node_match[index].required[0] = required; - } ); + set_output_required_time( iteration == 0 ); /* compute current area and update mapping refs in top-down order */ area = 0.0f; @@ -2306,47 +2293,73 @@ class emap_impl } } - void compute_required_time( bool exit_early = false ) + void set_output_required_time( bool warning ) { - for ( auto i = 0u; i < node_match.size(); ++i ) - { - node_match[i].required[0] = node_match[i].required[1] = std::numeric_limits::max(); - } - - /* return if mapping is area oriented */ - if ( ps.area_oriented_mapping ) - return; - double required = delay; - /* relax delay constraints */ - if ( iteration == 1 && ps.required_time == 0.0f && ps.relax_required > 0.0f ) + if ( iteration == 0 && ps.required_time == 0.0f && ps.required_times.empty() && ps.relax_required > 0.0f ) { required *= ( 100.0 + ps.relax_required ) / 100.0; } /* Global target time constraint */ - if ( ps.required_time != 0.0f ) + if ( ps.required_times.empty() ) { - if ( ps.required_time < delay - epsilon ) - { - if ( !ps.area_oriented_mapping && iteration == 1 ) - std::cerr << fmt::format( "[i] MAP WARNING: cannot meet the target required time of {:.2f}", ps.required_time ) << std::endl; - } - else + if ( ps.required_time != 0.0f ) { - required = ps.required_time; + if ( ps.required_time < delay - epsilon ) + { + if ( warning ) + std::cerr << fmt::format( "[i] MAP WARNING: cannot meet the target required time of {:.2f}", ps.required_time ) << std::endl; + } + else + { + required = ps.required_time; + } } + + /* set the required time at POs */ + ntk.foreach_po( [&]( auto const& s ) { + const auto index = ntk.node_to_index( ntk.get_node( s ) ); + if ( ntk.is_complemented( s ) ) + node_match[index].required[1] = required; + else + node_match[index].required[0] = required; + } ); + + return; } - /* set the required time at POs */ - ntk.foreach_po( [&]( auto const& s ) { + /* Output-specific target time constraint */ + ntk.foreach_po( [&]( auto const& s, uint32_t i ) { const auto index = ntk.node_to_index( ntk.get_node( s ) ); - if ( ntk.is_complemented( s ) ) - node_match[index].required[1] = required; + uint8_t phase = ntk.is_complemented( s ) ? 1 : 0; + if ( node_match[index].arrival[phase] > ps.required_times[i] + epsilon ) + { + /* maintain the same delay */ + node_match[index].required[phase] = node_match[index].arrival[phase]; + if ( warning ) + std::cerr << fmt::format( "[i] MAP WARNING: cannot meet the target required time of {:.2f} at output {}", ps.required_times[i], i ) << std::endl; + } else - node_match[index].required[0] = required; + { + node_match[index].required[phase] = ps.required_times[i]; + } } ); + } + + void compute_required_time( bool exit_early = false ) + { + for ( auto i = 0u; i < node_match.size(); ++i ) + { + node_match[i].required[0] = node_match[i].required[1] = std::numeric_limits::max(); + } + + /* return if mapping is area oriented */ + if ( ps.area_oriented_mapping ) + return; + + set_output_required_time( iteration == 1 ); if ( exit_early ) return; @@ -3347,38 +3360,6 @@ class emap_impl area_flow[j] = gate.area + cut_leaves_flow( cut, n, phase[j] ); node_data.phase[phase[j]] = old_phase; - /* local evaluation for delay (area flow improvement is approximated) */ - // if constexpr ( !DO_AREA ) - // { - // /* recompute local area flow of previous matches */ - // double mapped_flow = node_data.flows[phase[j]]; - - // if ( node_data.multioutput_match[phase[j]] ) - // { - // /* recompute estimation for multi-output gate */ - // float k_est = 0; - // for ( auto k = 0; k < max_multioutput_output_size; ++k ) - // { - // uint32_t index_k = tuple_data[k].node_index; - // auto used_phase = node_match[index_k].supergate[0] == nullptr ? 1 : 0; - // k_est += node_match[index_k].est_refs[used_phase]; /* TODO: review */ - // } - // mapped_flow *= k_est; - // } - // else - // { - // auto used_phase = node_data.supergate[0] == nullptr ? 1 : 0; /* TODO: review */ - // mapped_flow *= node_data.est_refs[used_phase]; - // } - - // auto const& mapped_cut = cuts[node_index][node_data.best_cut[phase[j]]]; - // if ( !compare_map( arrival[j], node_data.arrival[phase[j]], area_flow[j], mapped_flow, cut.size(), mapped_cut.size() ) ) - // { - // is_best = false; - // break; - // } - // } - /* current version may lead to delay increase */ est_refs[j] = node_data.est_refs[phase[j]]; } @@ -4377,6 +4358,41 @@ class emap_impl } ); } + bool init_arrivals() + { + if ( ps.required_times.size() && ps.required_times.size() != ntk.num_pos() ) + { + std::cerr << "[e] MAP ERROR: required time vector does not match the output size of the network" << std::endl; + st.mapping_error = true; + return false; + } + + if ( ps.arrival_times.empty() ) + { + ntk.foreach_pi( [&]( auto const& n ) { + auto& node_data = node_match[ntk.node_to_index( n )]; + node_data.arrival[0] = node_data.best_alternative[0].arrival = 0; + node_data.arrival[1] = node_data.best_alternative[1].arrival = lib_inv_delay; + } ); + return true; + } + + if ( ps.arrival_times.size() != ntk.num_pis() ) + { + std::cerr << "[e] MAP ERROR: arrival time vector does not match the input size of the network" << std::endl; + st.mapping_error = true; + return false; + } + + ntk.foreach_pi( [&]( auto const& n, uint32_t i ) { + auto& node_data = node_match[ntk.node_to_index( n )]; + node_data.arrival[0] = node_data.best_alternative[0].arrival = ps.arrival_times[i]; + node_data.arrival[1] = node_data.best_alternative[1].arrival = ps.arrival_times[i] + lib_inv_delay; + } ); + + return true; + } + void finalize_cover( binding_view& res, klut_map& old2new ) { uint32_t multioutput_count = 0; From f720bead12ca1d515e7c75aed0a2931803a9694b Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 1 May 2024 14:37:37 +0200 Subject: [PATCH 15/27] removing standard foward exact area method (completely replaced by exact reversed) --- include/mockturtle/algorithms/emap.hpp | 244 ++----------------------- 1 file changed, 18 insertions(+), 226 deletions(-) diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index c7bd54191..9b1a3be72 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -131,9 +131,6 @@ struct emap_params /*! \brief Number of patterns for switching activity computation. */ uint32_t switching_activity_patterns{ 2048u }; - /*! \brief Fast area recovery */ - bool use_fast_area_recovery{ true }; - /*! \brief Compute alternatives using a different cost functions */ bool use_match_alternatives{ true }; @@ -972,54 +969,23 @@ class emap_impl /* compute mapping using exact area */ i = 0; - if ( ps.use_fast_area_recovery ) + compute_required_time( true ); + reindex_multioutput_data(); + while ( i++ < ps.ela_rounds ) { - compute_required_time( true ); - reindex_multioutput_data(); - while ( i++ < ps.ela_rounds ) - { - if ( !compute_mapping_exact_reversed() ) - { - return false; - } - } - - /* compute mapping using exact switching activity estimation */ - i = 0; - while ( i++ < ps.eswp_rounds ) + if ( !compute_mapping_exact_reversed() ) { - if ( !compute_mapping_exact_reversed() ) - { - return false; - } + return false; } } - else - { - while ( i++ < ps.ela_rounds ) - { - compute_required_time(); - if ( !compute_mapping_exact( i == ps.ela_rounds ) ) - { - return false; - } - } - - /* compute mapping using exact switching activity estimation */ - i = 0; - while ( i++ < ps.eswp_rounds ) - { - compute_required_time(); - if ( !compute_mapping_exact( true ) ) - { - return false; - } - } - /* cleaning not fully utilized multi-output gates */ - if ( ps.map_multioutput ) + /* compute mapping using exact switching activity estimation */ + i = 0; + while ( i++ < ps.eswp_rounds ) + { + if ( !compute_mapping_exact_reversed() ) { - remove_unused_multioutput(); + return false; } } @@ -1693,93 +1659,9 @@ class emap_impl return success; } - template - bool compute_mapping_exact( bool last_round ) - { - for ( auto const& n : topo_order ) - { - if ( ntk.is_constant( n ) || ntk.is_pi( n ) ) - continue; - - /* don't touch box */ - if constexpr ( has_is_dont_touch_v ) - { - if ( ntk.is_dont_touch( n ) ) - { - if constexpr ( has_has_binding_v ) - { - propagate_data_forward_white_box( n ); - } - continue; - } - } - - auto index = ntk.node_to_index( n ); - auto& node_data = node_match[index]; - - /* recursively deselect the best cut shared between - * the two phases if in use in the cover */ - if ( node_data.same_match && ( node_data.map_refs[0] || node_data.map_refs[1] ) ) - { - uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1; - auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; - cut_deref( best_cut, n, use_phase ); - } - else if ( !node_data.map_refs[0] || !node_data.map_refs[1] ) - { - uint8_t use_phase = node_data.map_refs[0] ? 0 : 1; - auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; - cut_deref( best_cut, n, use_phase ); - node_data.same_match = true; - } - - /* match positive phase */ - match_phase_exact( n, 0u ); - - /* match negative phase */ - match_phase_exact( n, 1u ); - - /* try to drop one phase */ - match_drop_phase( n ); - - /* try a multi-output match */ - if ( ps.map_multioutput && node_tuple_match[index] != UINT32_MAX ) - { - bool multi_success = match_multioutput_exact( n, last_round ); - if ( multi_success ) - multi_node_update_exact( n ); - } - - if ( node_match[index].map_refs[0] ) - assert( node_match[index].arrival[0] < node_match[index].required[0] + epsilon ); - if ( node_match[index].map_refs[1] ) - assert( node_match[index].arrival[1] < node_match[index].required[1] + epsilon ); - } - - double area_old = area; - bool success = set_mapping_refs(); - - /* round stats */ - if ( ps.verbose ) - { - float area_gain = float( ( area_old - area ) / area_old * 100 ); - std::stringstream stats{}; - if constexpr ( SwitchActivity ) - stats << fmt::format( "[i] Switching: Delay = {:>12.2f} Area = {:>12.2f} Gain = {:>5.2f} % Inverters = {:>5} Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) ); - else - stats << fmt::format( "[i] Area : Delay = {:>12.2f} Area = {:>12.2f} Gain = {:>5.2f} % Inverters = {:>5} Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) ); - st.round_stats.push_back( stats.str() ); - } - - return success; - } - template bool compute_mapping_exact_reversed() { - /* this method works in reverse topological order: less nodes to update (faster) */ - /* instead of propagating arrival times forward, it propagates required times backwards */ - for ( auto it = topo_order.rbegin(); it != topo_order.rend(); ++it ) { if ( ntk.is_constant( *it ) || ntk.is_pi( *it ) ) @@ -1843,7 +1725,7 @@ class emap_impl } /* try to drop one phase */ - match_drop_phase( *it ); + match_drop_phase( *it ); /* try a multi-output match */ if ( ps.map_multioutput && node_tuple_match[index] < UINT32_MAX - 1 ) @@ -2771,7 +2653,7 @@ class emap_impl } } - template + template void match_drop_phase( node const& n ) { auto index = ntk.node_to_index( n ); @@ -2887,8 +2769,11 @@ class emap_impl cut_ref( cuts[index][node_data.best_cut[1]], n, 1 ); } /* evaluate based on inverter cost */ - use_zero = lib_inv_area < node_data.flows[1] + epsilon; - use_one = lib_inv_area < node_data.flows[0] + epsilon; + if constexpr ( !SwitchActivity ) + { + use_zero = lib_inv_area < node_data.flows[1] + epsilon; + use_one = lib_inv_area < node_data.flows[0] + epsilon; + } if ( use_one && use_zero ) { @@ -3911,99 +3796,6 @@ class emap_impl return false; } - - bool remove_unused_multioutput() - { - /* TODO: update required times */ - for ( auto it = topo_order.rbegin(); it != topo_order.rend(); ++it ) - { - if ( ntk.is_constant( *it ) || ntk.is_pi( *it ) ) - continue; - - auto index = ntk.node_to_index( *it ); - - /* get used multi-output gates */ - if ( node_tuple_match[index] == UINT32_MAX ) - continue; - - if ( node_match[index].same_match && !node_match[index].multioutput_match[0] ) - continue; - - if ( !node_match[index].same_match && !( node_match[index].multioutput_match[0] || node_match[index].multioutput_match[1] ) ) - continue; - - /* check if mapped to multi-output with unused outputs */ - multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index]][0]; - - bool used = false; - bool unused = false; - for ( auto j = 0; j < max_multioutput_output_size; ++j ) - { - uint32_t node_index = tuple_data[j].node_index; - auto& node_data = node_match[node_index]; - - if ( node_data.best_supergate[0] != nullptr && node_data.multioutput_match[0] ) - { - if ( node_data.map_refs[0] > 0 || ( node_data.same_match && ( node_data.map_refs[0] || node_data.map_refs[1] ) ) ) - used = true; - else - unused = true; - } - else if ( node_data.best_supergate[1] != nullptr && node_data.multioutput_match[1] ) - { - if ( node_data.map_refs[1] > 0 || ( node_data.same_match && ( node_data.map_refs[0] || node_data.map_refs[1] ) ) ) - used = true; - else - unused = true; - } - } - - if ( !used || !unused ) - continue; - - /* remap connected outputs (reverse topo order)*/ - for ( int j = max_multioutput_output_size - 1; j >= 0; --j ) - { - uint32_t node_index = tuple_data[j].node_index; - auto& node_data = node_match[node_index]; - auto const n = ntk.index_to_node( node_index ); - - if ( !node_data.map_refs[0] && !node_data.map_refs[1] ) - continue; - - /* recursively deselect the best cut shared between - * the two phases if in use in the cover */ - if ( node_data.same_match ) - { - uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1; - auto const& best_cut = cuts[node_index][node_data.best_cut[use_phase]]; - cut_deref( best_cut, n, use_phase ); - } - - /* match positive phase */ - match_phase_exact( n, 0u ); - - /* match negative phase */ - match_phase_exact( n, 1u ); - - /* try to drop one phase */ - match_drop_phase( n ); - } - } - - double area_old = area; - bool success = set_mapping_refs(); - - /* round stats */ - if ( ps.verbose ) - { - float area_gain = float( ( area_old - area ) / area_old * 100 ); - std::string stats = fmt::format( "[i] Cleaning : Delay = {:>12.2f} Area = {:>12.2f} Gain = {:>5.2f} % Inverters = {:>5} Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) ); - st.round_stats.push_back( stats ); - } - - return success; - } #pragma endregion #pragma region Mapping utils From 1c5284c2bae687613b21bbc8fabddabdd368bb8f Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 1 May 2024 14:47:03 +0200 Subject: [PATCH 16/27] Updating tests --- test/algorithms/emap.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/test/algorithms/emap.cpp b/test/algorithms/emap.cpp index 0e95a7a26..a52717971 100644 --- a/test/algorithms/emap.cpp +++ b/test/algorithms/emap.cpp @@ -170,8 +170,7 @@ TEST_CASE( "Emap on full adder 2", "[emap]" ) emap_params ps; ps.cut_enumeration_ps.minimize_truth_table = false; - ps.use_fast_area_recovery = false; - ps.ela_rounds = 0; + ps.ela_rounds = 1; ps.eswp_rounds = 2; emap_stats st; binding_view luts = emap_klut( aig, lib, ps, &st ); @@ -244,8 +243,7 @@ TEST_CASE( "Emap on full adder 2 with cells", "[emap]" ) emap_params ps; ps.cut_enumeration_ps.minimize_truth_table = false; - ps.use_fast_area_recovery = false; - ps.ela_rounds = 0; + ps.ela_rounds = 1; ps.eswp_rounds = 2; emap_stats st; cell_view luts = emap( aig, lib, ps, &st ); @@ -382,12 +380,12 @@ TEST_CASE( "Emap on multiplier with multi-output gates", "[emap]" ) const float eps{ 0.005f }; - CHECK( luts.size() == 233u ); + CHECK( luts.size() == 235u ); CHECK( luts.num_pis() == 16u ); CHECK( luts.num_pos() == 16u ); - CHECK( luts.num_gates() == 215u ); - CHECK( st.area > 575.0f - eps ); - CHECK( st.area < 575.0f + eps ); + CHECK( luts.num_gates() == 217u ); + CHECK( st.area > 612.0f - eps ); + CHECK( st.area < 612.0f + eps ); CHECK( st.delay > 33.60f - eps ); CHECK( st.delay < 33.60f + eps ); CHECK( st.multioutput_gates == 40 ); From e202d8d19d7c4a10a82644b57dd947ff99e22982 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 1 May 2024 15:19:41 +0200 Subject: [PATCH 17/27] Adding tests on custom required times, required time relaxation, and arrival times --- test/algorithms/emap.cpp | 179 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) diff --git a/test/algorithms/emap.cpp b/test/algorithms/emap.cpp index a52717971..514c405e8 100644 --- a/test/algorithms/emap.cpp +++ b/test/algorithms/emap.cpp @@ -652,6 +652,185 @@ TEST_CASE( "Emap with hybrid matching", "[emap]" ) CHECK( st.delay < 5.8f + eps ); } +TEST_CASE( "Emap with arrival times", "[emap]" ) +{ + std::vector gates; + + std::istringstream in( large_library ); + auto result = lorina::read_genlib( in, genlib_reader( gates ) ); + CHECK( result == lorina::return_code::success ); + + tech_library<6> lib( gates ); + + aig_network aig; + const auto a = aig.create_pi(); + const auto b = aig.create_pi(); + const auto c = aig.create_pi(); + const auto d = aig.create_pi(); + const auto e = aig.create_pi(); + const auto f = aig.create_pi(); + const auto g = aig.create_pi(); + const auto h = aig.create_pi(); + + const auto f1 = aig.create_and( !a, b ); + const auto f2 = aig.create_and( f1, !c ); + const auto f3 = aig.create_and( d, e ); + const auto f4 = aig.create_and( f, !g ); + const auto f5 = aig.create_and( f4, h ); + const auto f6 = aig.create_and( f2, f3 ); + const auto f7 = aig.create_and( f5, f6 ); + + aig.create_po( f7 ); + + emap_params ps; + ps.matching_mode = emap_params::boolean; + emap_stats st; + + ps.arrival_times = std::vector( 8 ); + ps.arrival_times[0] = 0.0; + ps.arrival_times[1] = 1.0; + ps.arrival_times[2] = 2.0; + ps.arrival_times[3] = 3.0; + ps.arrival_times[4] = 4.0; + ps.arrival_times[5] = 5.0; + ps.arrival_times[6] = 6.0; + ps.arrival_times[7] = 7.0; + + cell_view ntk = emap<6>( aig, lib, ps, &st ); + + const float eps{ 0.005f }; + + CHECK( ntk.size() == 27u ); + CHECK( ntk.num_pis() == 8u ); + CHECK( ntk.num_pos() == 1u ); + CHECK( ntk.num_gates() == 17u ); + CHECK( st.area > 24.0f - eps ); + CHECK( st.area < 24.0f + eps ); + CHECK( st.delay > 12.6f - eps ); + CHECK( st.delay < 12.6f + eps ); +} + +TEST_CASE( "Emap with global required times", "[emap]" ) +{ + std::vector gates; + + std::istringstream in( test_library ); + auto result = lorina::read_genlib( in, genlib_reader( gates ) ); + CHECK( result == lorina::return_code::success ); + + tech_library<6> lib( gates ); + + aig_network aig; + + std::vector a( 8 ), b( 8 ); + std::generate( a.begin(), a.end(), [&aig]() { return aig.create_pi(); } ); + std::generate( b.begin(), b.end(), [&aig]() { return aig.create_pi(); } ); + auto carry = aig.get_constant( false ); + + carry_ripple_adder_inplace( aig, a, b, carry ); + + std::for_each( a.begin(), a.end(), [&]( auto f ) { aig.create_po( f ); } ); + aig.create_po( carry ); + + emap_params ps; + ps.matching_mode = emap_params::boolean; + ps.required_time = 20.0; // real delay 15.7 + emap_stats st; + + cell_view ntk = emap<6>( aig, lib, ps, &st ); + + const float eps{ 0.005f }; + + CHECK( ntk.size() == 34 ); + CHECK( ntk.num_pis() == 16u ); + CHECK( ntk.num_pos() == 9u ); + CHECK( ntk.num_gates() == 16u ); + CHECK( st.area > 63.0f - eps ); + CHECK( st.area < 63.0f + eps ); + CHECK( st.delay < 20.0f + eps ); +} + +TEST_CASE( "Emap with required times", "[emap]" ) +{ + std::vector gates; + + std::istringstream in( test_library ); + auto result = lorina::read_genlib( in, genlib_reader( gates ) ); + CHECK( result == lorina::return_code::success ); + + tech_library<6> lib( gates ); + + aig_network aig; + + std::vector a( 8 ), b( 8 ); + std::generate( a.begin(), a.end(), [&aig]() { return aig.create_pi(); } ); + std::generate( b.begin(), b.end(), [&aig]() { return aig.create_pi(); } ); + auto carry = aig.get_constant( false ); + + carry_ripple_adder_inplace( aig, a, b, carry ); + + emap_params ps; + ps.matching_mode = emap_params::boolean; + // ps.required_time = 20.0; // real delay 15.7 + emap_stats st; + + std::for_each( a.begin(), a.end(), [&]( auto f ) { aig.create_po( f ); ps.required_times.push_back( 19.0 ); } ); + aig.create_po( carry ); + ps.required_times.push_back( 20.0 ); + + cell_view ntk = emap<6>( aig, lib, ps, &st ); + + const float eps{ 0.005f }; + + CHECK( ntk.size() == 34 ); + CHECK( ntk.num_pis() == 16u ); + CHECK( ntk.num_pos() == 9u ); + CHECK( ntk.num_gates() == 16u ); + CHECK( st.area > 63.0f - eps ); + CHECK( st.area < 63.0f + eps ); + CHECK( st.delay < 20.0f + eps ); +} + +TEST_CASE( "Emap with required time relaxation", "[emap]" ) +{ + std::vector gates; + + std::istringstream in( test_library ); + auto result = lorina::read_genlib( in, genlib_reader( gates ) ); + CHECK( result == lorina::return_code::success ); + + tech_library<6> lib( gates ); + + aig_network aig; + + std::vector a( 8 ), b( 8 ); + std::generate( a.begin(), a.end(), [&aig]() { return aig.create_pi(); } ); + std::generate( b.begin(), b.end(), [&aig]() { return aig.create_pi(); } ); + auto carry = aig.get_constant( false ); + + carry_ripple_adder_inplace( aig, a, b, carry ); + + std::for_each( a.begin(), a.end(), [&]( auto f ) { aig.create_po( f ); } ); + aig.create_po( carry ); + + emap_params ps; + ps.matching_mode = emap_params::boolean; + ps.relax_required = 27.5; // real delay 15.7 + emap_stats st; + + cell_view ntk = emap<6>( aig, lib, ps, &st ); + + const float eps{ 0.005f }; + + CHECK( ntk.size() == 34 ); + CHECK( ntk.num_pis() == 16u ); + CHECK( ntk.num_pos() == 9u ); + CHECK( ntk.num_gates() == 16u ); + CHECK( st.area > 63.0f - eps ); + CHECK( st.area < 63.0f + eps ); + CHECK( st.delay < 20.0f + eps ); +} + TEST_CASE( "Emap with supergates", "[emap]" ) { std::vector gates; From 42f2e1399e27ff6da0b8335f66835c977d420053 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 1 May 2024 15:28:41 +0200 Subject: [PATCH 18/27] Reverting experiment file --- experiments/emap.cpp | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/experiments/emap.cpp b/experiments/emap.cpp index fa2bb17c1..11d465f8b 100644 --- a/experiments/emap.cpp +++ b/experiments/emap.cpp @@ -55,7 +55,7 @@ int main() /* library to map to technology */ fmt::print( "[i] processing technology library\n" ); - std::string library = "asap7"; + std::string library = "multioutput"; std::vector gates; std::ifstream in( cell_libraries_path( library ) ); @@ -66,9 +66,9 @@ int main() tech_library_params tps; tps.verbose = true; - tech_library<6> tech_lib( gates, tps ); + tech_library<9> tech_lib( gates, tps ); - for ( auto const& benchmark : iwls_benchmarks() ) + for ( auto const& benchmark : epfl_benchmarks() ) { fmt::print( "[i] processing {}\n", benchmark ); @@ -78,32 +78,27 @@ int main() continue; } - // if ( aig.num_gates() > 100000 ) - // continue; - /* remove structural redundancies */ - // aig_balancing_params bps; - // bps.minimize_levels = false; - // bps.fast_mode = true; - // aig_balance( aig, bps ); + aig_balancing_params bps; + bps.minimize_levels = false; + bps.fast_mode = true; + aig_balance( aig, bps ); const uint32_t size_before = aig.num_gates(); const uint32_t depth_before = depth_view( aig ).depth(); emap_params ps; - ps.matching_mode = emap_params::boolean; + ps.matching_mode = emap_params::hybrid; ps.area_oriented_mapping = false; - ps.map_multioutput = false; - ps.verbose = true; + ps.map_multioutput = true; + ps.relax_required = 0; emap_stats st; - cell_view res = emap<6>( aig, tech_lib, ps, &st ); + cell_view res = emap<9>( aig, tech_lib, ps, &st ); names_view res_names{ res }; restore_network_name( aig, res_names ); restore_pio_names_by_order( aig, res_names ); - // const auto cec = benchmark == "hyp" ? true : abc_cec_mapped_cell( res_names, benchmark, library ); - // std::cout << fmt::format( "[i] CEC = {}\n", cec ); - const auto cec = false; /* don't run CEC */ + const auto cec = benchmark == "hyp" ? true : abc_cec_mapped_cell( res_names, benchmark, library ); /* write verilog netlist */ // write_verilog_with_cell( res_names, benchmark + "_mapped.v" ); From cc3babc2fbf805ee79a65e68ac1a543bffeaa763 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 1 May 2024 15:31:46 +0200 Subject: [PATCH 19/27] Formatting and changing output text when loading a library --- include/mockturtle/algorithms/emap.hpp | 15 +++++++-------- include/mockturtle/utils/super_utils.hpp | 4 ++-- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index 9b1a3be72..10448cba0 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -682,8 +682,8 @@ struct best_gate_emap float area; float flow; unsigned phase : 16; - unsigned cut : 12; - unsigned size : 4; + unsigned cut : 12; + unsigned size : 4; }; template @@ -1114,7 +1114,6 @@ class emap_impl { if ( ntk.is_dont_touch( n ) ) { - warning_box |= initialize_box( n ); return false; } @@ -2061,7 +2060,7 @@ class emap_impl if ( ps.use_match_alternatives ) refine_best_matches( *it ); } - + unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u; if ( node_data.best_supergate[use_phase] == nullptr ) { @@ -2240,7 +2239,7 @@ class emap_impl /* return if mapping is area oriented */ if ( ps.area_oriented_mapping ) return; - + set_output_required_time( iteration == 1 ); if ( exit_early ) @@ -3306,7 +3305,7 @@ class emap_impl node_data.best_cut[mapped_phase] = cut_index[j]; node_data.phase[mapped_phase] = pin_phase[j]; node_data.arrival[mapped_phase] = arrival[j] + lib_inv_delay; - node_data.area[mapped_phase] = area[j]; /* partial area contribution */ + node_data.area[mapped_phase] = area[j]; /* partial area contribution */ node_data.flows[mapped_phase] = flow_sum_neg; assert( node_data.arrival[mapped_phase] < node_data.required[mapped_phase] + epsilon ); @@ -5746,7 +5745,7 @@ class emap_impl * The function takes the size of the cuts in the template parameter `CutSize`. * * The function returns a block network that supports multi-output cells. - * + * * The novelties of this mapper are contained in 2 publications: * - A. Tempia Calvino and G. De Micheli, "Technology Mapping Using Multi-Output Library Cells," ICCAD, 2023. * - G. Radi, A. Tempia Calvino, and G. De Micheli, "In Medio Stat Virtus: Combining Boolean and Pattern Matching," ASP-DAC, 2024. @@ -5805,7 +5804,7 @@ cell_view emap( Ntk const& ntk, tech_library 0 ) From 2e8855fe18d8d352aa4668543ce4cafdfffc5602 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 1 May 2024 15:46:19 +0200 Subject: [PATCH 20/27] Updating documentation --- docs/algorithms/mapper.rst | 166 +++++++++++++------------ include/mockturtle/algorithms/emap.hpp | 2 +- 2 files changed, 86 insertions(+), 82 deletions(-) diff --git a/docs/algorithms/mapper.rst b/docs/algorithms/mapper.rst index 816d4f6b9..14360efad 100644 --- a/docs/algorithms/mapper.rst +++ b/docs/algorithms/mapper.rst @@ -1,3 +1,87 @@ +Extended technology mapping +--------------------------- + +**Header:** ``mockturtle/algorithms/emap.hpp`` + +The command `emap` stands for extended mapper. It supports large +library cells, of more than 6 inputs, and can perform matching using 3 +different methods: Boolean, pattern, or hybrid. The current version +can map to 2-output gates, such as full adders and half adders, +and provides a 2x speedup in mapping time compared to command `map` +for similar or better quality. Similarly, to `map`, the implementation +is independent of the underlying graph representation. +Additionally, `emap` supports "don't touch" white boxes (gates). + +Command `emap` can return the mapped network in two formats. +Command `emap` returns a `cell_view` that supports +multi-output cells. Command `emap_klut` returns a `binding_view` +similarly as command `map`. + +The following example shows how to perform delay-oriented technology mapping +from an and-inverter graph using large cells up to 9 inputs: + +.. code-block:: c++ + + aig_network aig = ...; + + /* read cell library in genlib format */ + std::vector gates; + std::ifstream in( ... ); + lorina::read_genlib( in, genlib_reader( gates ) ) + tech_library<9> tech_lib( gates ); + + /* perform technology mapping */ + cell_view res = emap<9>( aig, tech_lib ); + +The next example performs area-oriented graph mapping using multi-output cells: + +.. code-block:: c++ + + aig_network aig = ...; + + /* read cell library in genlib format */ + std::vector gates; + std::ifstream in( ... ); + lorina::read_genlib( in, genlib_reader( gates ) ) + tech_library tech_lib( gates ); + + /* perform technology mapping */ + emap_params ps; + ps.area_oriented_mapping = true; + ps.map_multioutput = true; + cell_view res = emap( aig, tech_lib, ps ); + +In this case, `emap` is used to return a `block_network`, which can respresent multi-output +cells as single nodes. Alternatively, also `emap_klut` can be used but multi-output cells +would be reporesented by single-output nodes. + +The maximum number of cuts stored for each node is limited to 20. +To increase this limit, change `max_cut_num` in `emap`. + +You can set the inputs arrival time and output required times using the parameters `arrival_times` +and `required times`. Moreover, it is possible to ask for a required time relaxation. For instance, +if we want to map a network with an increase of 10% over its minimal delay, we can set +`relax_required` to 10. + +For further details and usage scenarios of `emap`, such as white boxes, please check the +related tests. + +**Parameters and statistics** + +.. doxygenstruct:: mockturtle::emap_params + :members: + +.. doxygenstruct:: mockturtle::emap_stats + :members: + +**Algorithm** + +.. doxygenfunction:: mockturtle::emap(Ntk const&, tech_library const&, emap_params const&, emap_stats*) +.. doxygenfunction:: mockturtle::emap_klut(Ntk const&, tech_library const&, emap_params const&, emap_stats*) +.. doxygenfunction:: mockturtle::emap_node_map(Ntk const&, tech_library const&, emap_params const&, emap_stats*) +.. doxygenfunction:: mockturtle::emap_load_mapping(Ntk&) + + Technology mapping and network conversion ----------------------------------------- @@ -136,84 +220,4 @@ To increase this limit, change `max_cut_num` in `fast_network_cuts`. **Algorithm** .. doxygenfunction:: mockturtle::map(Ntk const&, tech_library const&, map_params const&, map_stats*) -.. doxygenfunction:: mockturtle::map(Ntk&, exact_library const&, map_params const&, map_stats*) - - - -Extended technology mapping ---------------------------- - -**Header:** ``mockturtle/algorithms/emap.hpp`` - -The command `emap` stands for extended mapper. It supports large -library cells, of more than 6 inputs, and can perform matching using 3 -different methods: Boolean, pattern, or hybrid. The current version -can map to 2-output gates, such as full adders and half adders, -and provides a 2x speedup in mapping time compared to command `map` -for similar or better quality. Similarly, to `map`, the implementation -is independent of the underlying graph representation. -Additionally, `emap` supports "don't touch" white boxes (gates). - -Command `emap` can return the mapped network in two formats. -Command `emap` returns a `cell_view` that supports -multi-output cells. Command `emap_klut` returns a `binding_view` -similarly as command `map`. - -The following example shows how to perform delay-oriented technology mapping -from an and-inverter graph using large cells up to 9 inputs: - -.. code-block:: c++ - - aig_network aig = ...; - - /* read cell library in genlib format */ - std::vector gates; - std::ifstream in( ... ); - lorina::read_genlib( in, genlib_reader( gates ) ) - tech_library<9> tech_lib( gates ); - - /* perform technology mapping */ - cell_view res = emap<9>( aig, tech_lib ); - -The next example performs area-oriented graph mapping using multi-output cells: - -.. code-block:: c++ - - aig_network aig = ...; - - /* read cell library in genlib format */ - std::vector gates; - std::ifstream in( ... ); - lorina::read_genlib( in, genlib_reader( gates ) ) - tech_library tech_lib( gates ); - - /* perform technology mapping */ - emap_params ps; - ps.area_oriented_mapping = true; - ps.map_multioutput = true; - cell_view res = emap( aig, tech_lib, ps ); - -In this case, `emap` is used to return a `block_network`, which can respresent multi-output -cells as single nodes. Alternatively, also `emap_klut` can be used but multi-output cells -would be reporesented by single-output nodes. - -The maximum number of cuts stored for each node is limited to 32. -To increase this limit, change `max_cut_num` in `emap`. - -For further details and usage scenarios of `emap`, such as white boxes, please check the -related tests. - -**Parameters and statistics** - -.. doxygenstruct:: mockturtle::emap_params - :members: - -.. doxygenstruct:: mockturtle::emap_stats - :members: - -**Algorithm** - -.. doxygenfunction:: mockturtle::emap(Ntk const&, tech_library const&, emap_params const&, emap_stats*) -.. doxygenfunction:: mockturtle::emap_klut(Ntk const&, tech_library const&, emap_params const&, emap_stats*) -.. doxygenfunction:: mockturtle::emap_node_map(Ntk const&, tech_library const&, emap_params const&, emap_stats*) -.. doxygenfunction:: mockturtle::emap_load_mapping(Ntk&) \ No newline at end of file +.. doxygenfunction:: mockturtle::map(Ntk&, exact_library const&, map_params const&, map_stats*) \ No newline at end of file diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index 10448cba0..bc29a2240 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -131,7 +131,7 @@ struct emap_params /*! \brief Number of patterns for switching activity computation. */ uint32_t switching_activity_patterns{ 2048u }; - /*! \brief Compute alternatives using a different cost functions */ + /*! \brief Compute area-oriented alternative matches */ bool use_match_alternatives{ true }; /*! \brief Remove the cuts that are contained in others */ From 653427c0ff496c40660b012bb54f5193feaaa024 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 1 May 2024 16:01:06 +0200 Subject: [PATCH 21/27] Changing names and formatting --- include/mockturtle/algorithms/emap.hpp | 155 ++++++++++++------------- 1 file changed, 75 insertions(+), 80 deletions(-) diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index bc29a2240..b9bf5875c 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -690,7 +690,7 @@ template struct node_match_emap { /* best gate match for positive and negative output phases */ - supergate const* best_supergate[2]; + supergate const* best_gate[2]; /* alternative best gate for positibe and negative output phase */ best_gate_emap best_alternative[2]; /* fanin pin phases for both output phases */ @@ -699,8 +699,6 @@ struct node_match_emap uint16_t best_cut[2]; /* node is mapped using only one phase */ bool same_match; - /* node alternative uses only one phase */ - bool same_match_alternative; /* node is mapped to a multi-output gate */ bool multioutput_match[2]; @@ -1484,7 +1482,7 @@ class emap_impl auto const index = ntk.node_to_index( n ); auto& node_data = node_match[index]; - node_data.best_supergates[0] = node_data.best_supergates[1] = nullptr; + node_data.best_gate[0] = node_data.best_gate[1] = nullptr; node_data.same_match = 0; node_data.multioutput_match[0] = node_data.multioutput_match[1] = false; node_data.required[0] = node_data.required[1] = std::numeric_limits::max(); @@ -1689,7 +1687,7 @@ class emap_impl /* recursively deselect the best cut shared between * the two phases if in use in the cover */ - uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1; + uint8_t use_phase = node_data.best_gate[0] != nullptr ? 0 : 1; double old_required = -1; if ( node_data.same_match ) { @@ -1785,10 +1783,10 @@ class emap_impl auto& node_data = node_match[index]; /* propagate required time through the leaves */ - unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u; + unsigned use_phase = node_data.best_gate[0] == nullptr ? 1u : 0u; unsigned other_phase = use_phase ^ 1; - assert( node_data.best_supergate[0] != nullptr || node_data.best_supergate[1] != nullptr ); + assert( node_data.best_gate[0] != nullptr || node_data.best_gate[1] != nullptr ); // assert( node_data.map_refs[0] || node_data.map_refs[1] ); /* propagate required time over the output inverter if present */ @@ -1806,7 +1804,7 @@ class emap_impl { auto ctr = 0u; auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; - auto const& supergate = node_data.best_supergate[use_phase]; + auto const& supergate = node_data.best_gate[use_phase]; for ( auto leaf : best_cut ) { auto phase = ( node_data.phase[use_phase] >> ctr ) & 1; @@ -1819,7 +1817,7 @@ class emap_impl { auto ctr = 0u; auto const& best_cut = cuts[index][node_data.best_cut[other_phase]]; - auto const& supergate = node_data.best_supergate[other_phase]; + auto const& supergate = node_data.best_gate[other_phase]; for ( auto leaf : best_cut ) { auto phase = ( node_data.phase[other_phase] >> ctr ) & 1; @@ -1865,7 +1863,7 @@ class emap_impl if ( node_data.map_refs[0] || node_data.map_refs[1] ) { /* if used and not available in the library launch a mapping error */ - if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr ) + if ( node_data.best_gate[0] == nullptr && node_data.best_gate[1] == nullptr ) { std::cerr << "[e] MAP ERROR: technology library does not contain constant gates, impossible to perform mapping" << std::endl; st.mapping_error = true; @@ -1899,9 +1897,9 @@ class emap_impl } } - unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u; + unsigned use_phase = node_data.best_gate[0] == nullptr ? 1u : 0u; - if ( node_data.best_supergate[use_phase] == nullptr ) + if ( node_data.best_gate[use_phase] == nullptr ) { /* Library is not complete, mapping is not possible */ std::cerr << "[e] MAP ERROR: technology library is not complete, impossible to perform mapping" << std::endl; @@ -2020,7 +2018,7 @@ class emap_impl if ( node_match[index].map_refs[0] || node_match[index].map_refs[1] ) { /* if used and not available in the library launch a mapping error */ - if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr ) + if ( node_data.best_gate[0] == nullptr && node_data.best_gate[1] == nullptr ) { std::cerr << "[e] MAP ERROR: technology library does not contain constant gates, impossible to perform mapping" << std::endl; st.mapping_error = true; @@ -2061,8 +2059,8 @@ class emap_impl refine_best_matches( *it ); } - unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u; - if ( node_data.best_supergate[use_phase] == nullptr ) + unsigned use_phase = node_data.best_gate[0] == nullptr ? 1u : 0u; + if ( node_data.best_gate[use_phase] == nullptr ) { /* Library is not complete, mapping is not possible */ std::cerr << "[e] MAP ERROR: technology library is not complete, impossible to perform mapping" << std::endl; @@ -2310,16 +2308,16 @@ class emap_impl } } - uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1; + uint8_t use_phase = node_data.best_gate[0] != nullptr ? 0 : 1; /* compute arrival of use_phase */ - supergate const* best_supergate = node_data.best_supergate[use_phase]; + supergate const* best_gate = node_data.best_gate[use_phase]; double worst_arrival = 0; uint16_t best_phase = node_data.phase[use_phase]; auto ctr = 0u; for ( auto l : cuts[index][node_data.best_cut[use_phase]] ) { - double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr]; + double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_gate->tdelay[ctr]; worst_arrival = std::max( worst_arrival, arrival_pin ); ++ctr; } @@ -2345,15 +2343,15 @@ class emap_impl continue; } - assert( node_data.best_supergate[use_phase] != nullptr ); + assert( node_data.best_gate[use_phase] != nullptr ); - best_supergate = node_data.best_supergate[use_phase]; + best_gate = node_data.best_gate[use_phase]; worst_arrival = 0; best_phase = node_data.phase[use_phase]; ctr = 0u; for ( auto l : cuts[index][node_data.best_cut[use_phase]] ) { - double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr]; + double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_gate->tdelay[ctr]; worst_arrival = std::max( worst_arrival, arrival_pin ); ++ctr; } @@ -2396,16 +2394,16 @@ class emap_impl { uint32_t index = ntk.node_to_index( n ); auto& node_data = node_match[index]; - uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1; + uint8_t use_phase = node_data.best_gate[0] != nullptr ? 0 : 1; /* compute arrival of use_phase */ - supergate const* best_supergate = node_data.best_supergate[use_phase]; + supergate const* best_gate = node_data.best_gate[use_phase]; double worst_arrival = 0; uint16_t best_phase = node_data.phase[use_phase]; auto ctr = 0u; for ( auto l : cuts[index][node_data.best_cut[use_phase]] ) { - double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr]; + double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_gate->tdelay[ctr]; worst_arrival = std::max( worst_arrival, arrival_pin ); ++ctr; } @@ -2419,15 +2417,15 @@ class emap_impl return; } - assert( node_data.best_supergate[0] != nullptr ); + assert( node_data.best_gate[0] != nullptr ); - best_supergate = node_data.best_supergate[use_phase]; + best_gate = node_data.best_gate[use_phase]; worst_arrival = 0; best_phase = node_data.phase[use_phase]; ctr = 0u; for ( auto l : cuts[index][node_data.best_cut[use_phase]] ) { - double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr]; + double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_gate->tdelay[ctr]; worst_arrival = std::max( worst_arrival, arrival_pin ); ++ctr; } @@ -2442,7 +2440,7 @@ class emap_impl auto& node_data = node_match[index]; uint32_t cut_index = 0u; - node_data.best_supergate[phase] = nullptr; + node_data.best_gate[phase] = nullptr; node_data.arrival[phase] = std::numeric_limits::max(); node_data.flows[phase] = std::numeric_limits::max(); node_data.area[phase] = std::numeric_limits::max(); @@ -2518,7 +2516,7 @@ class emap_impl if ( !skip && compare_map( worst_arrival, node_data.arrival[phase], area_local, node_data.flows[phase], cut->size(), best_size ) ) { - node_data.best_supergate[phase] = &gate; + node_data.best_gate[phase] = &gate; node_data.arrival[phase] = worst_arrival; node_data.flows[phase] = area_local; node_data.best_cut[phase] = cut_index; @@ -2558,23 +2556,23 @@ class emap_impl auto index = ntk.node_to_index( n ); auto& node_data = node_match[index]; - supergate const* best_supergate = node_data.best_supergate[phase]; + supergate const* best_gate = node_data.best_gate[phase]; /* unmap multioutput */ if ( node_data.multioutput_match[phase] ) { /* dereference multi-output */ - if ( !node_data.same_match && best_supergate != nullptr && node_data.map_refs[phase] ) + if ( !node_data.same_match && best_gate != nullptr && node_data.map_refs[phase] ) { auto const& cut = multi_cut_set[node_data.best_cut[phase]][0]; cut_deref( cut, n, phase ); } - best_supergate = nullptr; + best_gate = nullptr; node_data.multioutput_match[phase] = false; } /* recompute best match info */ - if ( best_supergate != nullptr ) + if ( best_gate != nullptr ) { /* if cut is implemented, remove it from the cover */ if ( !node_data.same_match && node_data.map_refs[phase] ) @@ -2632,7 +2630,7 @@ class emap_impl best_size = cut->size(); best_cut = cut_index; best_phase = gate_polarity; - best_supergate = &gate; + best_gate = &gate; } } @@ -2644,7 +2642,7 @@ class emap_impl node_data.area[phase] = best_area; node_data.best_cut[phase] = best_cut; node_data.phase[phase] = best_phase; - node_data.best_supergate[phase] = best_supergate; + node_data.best_gate[phase] = best_gate; if ( !node_data.same_match && node_data.map_refs[phase] ) { @@ -2665,7 +2663,7 @@ class emap_impl bool use_one = false; /* only one phase is matched */ - if ( node_data.best_supergate[0] == nullptr ) + if ( node_data.best_gate[0] == nullptr ) { set_match_complemented_phase( index, 1, worst_arrival_npos ); if constexpr ( ELA ) @@ -2675,7 +2673,7 @@ class emap_impl } return; } - else if ( node_data.best_supergate[1] == nullptr ) + else if ( node_data.best_gate[1] == nullptr ) { set_match_complemented_phase( index, 0, worst_arrival_nneg ); if constexpr ( ELA ) @@ -2856,7 +2854,7 @@ class emap_impl auto& node_data = node_match[index]; auto phase_n = phase ^ 1; node_data.same_match = true; - node_data.best_supergate[phase_n] = nullptr; + node_data.best_gate[phase_n] = nullptr; node_data.best_cut[phase_n] = node_data.best_cut[phase]; node_data.phase[phase_n] = node_data.phase[phase]; node_data.arrival[phase_n] = worst_arrival_n; @@ -2885,7 +2883,6 @@ class emap_impl /* process for best area */ /* removed check on required since this is executed only during a delay pass */ if ( g0.gate != nullptr && g0flow + lib_inv_area < g1flow + epsilon ) { - node_data.same_match_alternative = true; g1 = g0; g1.gate = nullptr; g1.arrival += lib_inv_delay; @@ -2895,7 +2892,6 @@ class emap_impl } else if ( g1.gate != nullptr && g1flow + lib_inv_area < g0flow + epsilon ) { - node_data.same_match_alternative = true; g0 = g1; g0.gate = nullptr; g0.arrival += lib_inv_delay; @@ -2904,7 +2900,6 @@ class emap_impl return; } - node_data.same_match_alternative = false; g0.flow = g0flow; g1.flow = g1flow; } @@ -2923,9 +2918,9 @@ class emap_impl if ( node_data.same_match ) { /* pick best implementation between the two alternatives */ - unsigned best_match_phase = node_data.best_supergate[0] == nullptr ? 1 : 0; + unsigned best_match_phase = node_data.best_gate[0] == nullptr ? 1 : 0; unsigned use_phase = g0.gate == nullptr ? 1 : 0; - if ( !node_data.same_match_alternative ) + if ( g0.gate != nullptr && g1.gate != nullptr ) { if ( g0.arrival > node_data.required[0] + epsilon || g1.arrival > node_data.required[1] + epsilon ) return; @@ -2952,12 +2947,12 @@ class emap_impl if ( g0.gate != nullptr && g0.arrival < node_data.required[0] + epsilon ) { node_data.same_match = false; - refine_best_matches_copy_refinement( n, 0, node_data.same_match_alternative && g0.arrival + lib_inv_delay < node_data.required[1] + epsilon ); + refine_best_matches_copy_refinement( n, 0, g1.gate == nullptr && g0.arrival + lib_inv_delay < node_data.required[1] + epsilon ); } if ( g1.gate != nullptr && g1.arrival < node_data.required[1] + epsilon ) { node_data.same_match = false; - refine_best_matches_copy_refinement( n, 1, node_data.same_match_alternative && g1.arrival + lib_inv_delay < node_data.required[0] + epsilon ); + refine_best_matches_copy_refinement( n, 1, g0.gate == nullptr && g1.arrival + lib_inv_delay < node_data.required[0] + epsilon ); } } } @@ -2993,7 +2988,7 @@ class emap_impl auto& node_data = node_match[index]; best_gate_emap& bg = node_data.best_alternative[phase]; - node_data.best_supergate[phase] = bg.gate; + node_data.best_gate[phase] = bg.gate; node_data.phase[phase] = bg.phase; node_data.best_cut[phase] = bg.cut; node_data.arrival[phase] = bg.arrival; @@ -3005,7 +3000,7 @@ class emap_impl node_data.same_match = true; phase ^= 1; - node_data.best_supergate[phase] = nullptr; + node_data.best_gate[phase] = nullptr; node_data.phase[phase] = bg.phase; node_data.best_cut[phase] = bg.cut; node_data.arrival[phase] = bg.arrival + lib_inv_delay; @@ -3124,16 +3119,16 @@ class emap_impl /* if only one is available, the other is obtained using an inverter */ if ( supergates_zero != nullptr ) { - node_data.best_supergate[0] = &( ( *supergates_zero )[0] ); - node_data.arrival[0] = node_data.best_supergate[0]->tdelay[0]; - node_data.area[0] = node_data.best_supergate[0]->area; + node_data.best_gate[0] = &( ( *supergates_zero )[0] ); + node_data.arrival[0] = node_data.best_gate[0]->tdelay[0]; + node_data.area[0] = node_data.best_gate[0]->area; node_data.phase[0] = 0; } if ( supergates_one != nullptr ) { - node_data.best_supergate[1] = &( ( *supergates_one )[0] ); - node_data.arrival[1] = node_data.best_supergate[1]->tdelay[0]; - node_data.area[1] = node_data.best_supergate[1]->area; + node_data.best_gate[1] = &( ( *supergates_one )[0] ); + node_data.arrival[1] = node_data.best_gate[1]->tdelay[0]; + node_data.area[1] = node_data.best_gate[1]->area; node_data.phase[1] = 0; } else @@ -3235,7 +3230,7 @@ class emap_impl /* compute area flow */ if ( j == 0 || !node_data.multioutput_match[0] ) { - uint8_t current_phase = node_data.best_supergate[0] == nullptr ? 1 : 0; + uint8_t current_phase = node_data.best_gate[0] == nullptr ? 1 : 0; old_flow_sum += node_data.flows[current_phase]; } uint8_t old_phase = node_data.phase[phase[j]]; @@ -3289,7 +3284,7 @@ class emap_impl uint8_t mapped_phase = phase[j]; node_data.multioutput_match[mapped_phase] = true; - node_data.best_supergate[mapped_phase] = &gate; + node_data.best_gate[mapped_phase] = &gate; node_data.best_cut[mapped_phase] = cut_index[j]; node_data.phase[mapped_phase] = pin_phase[j]; node_data.arrival[mapped_phase] = arrival[j]; @@ -3301,7 +3296,7 @@ class emap_impl /* select opposite phase */ mapped_phase ^= 1; node_data.multioutput_match[mapped_phase] = true; - node_data.best_supergate[mapped_phase] = nullptr; + node_data.best_gate[mapped_phase] = nullptr; node_data.best_cut[mapped_phase] = cut_index[j]; node_data.phase[mapped_phase] = pin_phase[j]; node_data.arrival[mapped_phase] = arrival[j] + lib_inv_delay; @@ -3349,13 +3344,13 @@ class emap_impl for ( int j = max_multioutput_output_size - 1; j >= 0; --j ) { uint32_t node_index = tuple_data[j].node_index; - uint8_t selected_phase = node_match[node_index].best_supergate[0] == nullptr ? 1 : 0; + uint8_t selected_phase = node_match[node_index].best_gate[0] == nullptr ? 1 : 0; if ( node_match[node_index].map_refs[0] || node_match[node_index].map_refs[1] ) { /* match is always single output here */ auto const& cut = cuts[node_index][node_match[node_index].best_cut[0]]; - uint8_t use_phase = node_match[node_index].best_supergate[0] != nullptr ? 0 : 1; + uint8_t use_phase = node_match[node_index].best_gate[0] != nullptr ? 0 : 1; best_exact_area[j] = cut_deref( cut, ntk.index_to_node( node_index ), use_phase ); /* mapping a non referenced phase */ @@ -3375,7 +3370,7 @@ class emap_impl if ( node_match[node_index].map_refs[0] || node_match[node_index].map_refs[1] ) { - uint8_t use_phase = node_match[node_index].best_supergate[0] != nullptr ? 0 : 1; + uint8_t use_phase = node_match[node_index].best_gate[0] != nullptr ? 0 : 1; auto const& best_cut = cuts[node_index][node_match[node_index].best_cut[use_phase]]; cut_ref( best_cut, ntk.index_to_node( node_index ), use_phase ); } @@ -3514,7 +3509,7 @@ class emap_impl /* write data */ node_data.multioutput_match[mapped_phase] = true; - node_data.best_supergate[mapped_phase] = &gate; + node_data.best_gate[mapped_phase] = &gate; node_data.best_cut[mapped_phase] = cut_index[j]; node_data.phase[mapped_phase] = pin_phase[j]; node_data.arrival[mapped_phase] = arrival[j]; @@ -3524,7 +3519,7 @@ class emap_impl /* select opposite phase */ mapped_phase ^= 1; node_data.multioutput_match[mapped_phase] = true; - node_data.best_supergate[mapped_phase] = nullptr; + node_data.best_gate[mapped_phase] = nullptr; node_data.best_cut[mapped_phase] = cut_index[j]; node_data.phase[mapped_phase] = pin_phase[j]; node_data.arrival[mapped_phase] = arrival[j] + lib_inv_delay; @@ -3681,7 +3676,7 @@ class emap_impl if ( node_data.same_match && ( node_data.map_refs[0] || node_data.map_refs[1] ) ) { - uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1; + uint8_t use_phase = node_data.best_gate[0] != nullptr ? 0 : 1; auto const& best_cut = cuts[index][node_data.best_cut[use_phase]]; cut_deref( best_cut, n, use_phase ); } @@ -3873,7 +3868,7 @@ class emap_impl } /* Add inverter area if not present yet and leaf node is implemented in the opposite phase */ - if ( node_match[leaf].map_refs[leaf_phase]++ == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr ) + if ( node_match[leaf].map_refs[leaf_phase]++ == 0u && node_match[leaf].best_gate[leaf_phase] == nullptr ) { if constexpr ( SwitchActivity ) count += switch_activity[leaf]; @@ -3946,7 +3941,7 @@ class emap_impl if ( node_match[leaf].same_match ) { /* Add inverter area if it is used only by the current gate and leaf node is implemented in the opposite phase */ - if ( --node_match[leaf].map_refs[leaf_phase] == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr ) + if ( --node_match[leaf].map_refs[leaf_phase] == 0u && node_match[leaf].best_gate[leaf_phase] == nullptr ) { if constexpr ( SwitchActivity ) count += switch_activity[leaf]; @@ -4053,7 +4048,7 @@ class emap_impl } /* Add inverter area if not present yet and leaf node is implemented in the opposite phase */ - if ( node_match[leaf].map_refs[leaf_phase]++ == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr ) + if ( node_match[leaf].map_refs[leaf_phase]++ == 0u && node_match[leaf].best_gate[leaf_phase] == nullptr ) { if constexpr ( SwitchActivity ) count += switch_activity[leaf]; @@ -4196,7 +4191,7 @@ class emap_impl /* add inverter at PI if needed */ if ( ntk.is_constant( n ) ) { - if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr ) + if ( node_data.best_gate[0] == nullptr && node_data.best_gate[1] == nullptr ) continue; } else if ( ntk.is_pi( n ) ) @@ -4223,7 +4218,7 @@ class emap_impl } } - unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1; + unsigned phase = ( node_data.best_gate[0] != nullptr ) ? 0 : 1; /* add used cut */ if ( node_data.same_match || node_data.map_refs[phase] > 0 ) @@ -4314,7 +4309,7 @@ class emap_impl /* add inverter at PI if needed */ if ( ntk.is_constant( n ) ) { - if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr ) + if ( node_data.best_gate[0] == nullptr && node_data.best_gate[1] == nullptr ) continue; } else if ( ntk.is_pi( n ) ) @@ -4341,7 +4336,7 @@ class emap_impl } } - unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1; + unsigned phase = ( node_data.best_gate[0] != nullptr ) ? 0 : 1; /* add used cut */ if ( node_data.same_match || node_data.map_refs[phase] > 0 ) @@ -4413,7 +4408,7 @@ class emap_impl { auto const& node_data = node_match[index]; auto const& best_cut = cuts[index][node_data.best_cut[phase]]; - auto const& gate = node_data.best_supergate[phase]->root; + auto const& gate = node_data.best_gate[phase]->root; /* permutate and negate to obtain the matched gate truth table */ std::vector> children( gate->num_vars ); @@ -4423,7 +4418,7 @@ class emap_impl { if ( ctr >= gate->num_vars ) break; - children[node_data.best_supergate[phase]->permutation[ctr]] = old2new[l][( node_data.phase[phase] >> ctr ) & 1]; + children[node_data.best_gate[phase]->permutation[ctr]] = old2new[l][( node_data.phase[phase] >> ctr ) & 1]; ++ctr; } @@ -4474,7 +4469,7 @@ class emap_impl { auto const& node_data = node_match[index]; auto const& best_cut = cuts[index][node_data.best_cut[phase]]; - auto const& gate = node_data.best_supergate[phase]->root; + auto const& gate = node_data.best_gate[phase]->root; /* permutate and negate to obtain the matched gate truth table */ std::vector> children( gate->num_vars ); @@ -4484,7 +4479,7 @@ class emap_impl { if ( ctr >= gate->num_vars ) break; - children[node_data.best_supergate[phase]->permutation[ctr]] = old2new[l][( node_data.phase[phase] >> ctr ) & 1]; + children[node_data.best_gate[phase]->permutation[ctr]] = old2new[l][( node_data.phase[phase] >> ctr ) & 1]; ++ctr; } @@ -4534,7 +4529,7 @@ class emap_impl void create_block_for_gate( cell_view& res, block_map& old2new, uint32_t index, unsigned phase, std::vector const& genlib_to_cell ) { std::vector const& lib = res.get_library(); - composed_gate const* local_gate = node_match[index].best_supergate[phase]->root; + composed_gate const* local_gate = node_match[index].best_gate[phase]->root; standard_cell const& cell = lib[genlib_to_cell.at( local_gate->root->id )]; assert( !local_gate->is_super ); @@ -4549,7 +4544,7 @@ class emap_impl { if ( ctr >= local_gate->num_vars ) break; - children[node_match[index].best_supergate[phase]->permutation[ctr]] = old2new[l][( node_match[index].phase[phase] >> ctr ) & 1]; + children[node_match[index].best_gate[phase]->permutation[ctr]] = old2new[l][( node_match[index].phase[phase] >> ctr ) & 1]; ++ctr; } @@ -4565,10 +4560,10 @@ class emap_impl { uint32_t node_index = tuple_data[j].node_index; assert( node_match[node_index].same_match ); - uint8_t node_phase = node_match[node_index].best_supergate[0] != nullptr ? 0 : 1; + uint8_t node_phase = node_match[node_index].best_gate[0] != nullptr ? 0 : 1; assert( node_match[node_index].multioutput_match[node_phase] ); - gate const* node_gate = node_match[node_index].best_supergate[node_phase]->root->root; + gate const* node_gate = node_match[node_index].best_gate[node_phase]->root->root; /* wrong output */ if ( node_gate->id != g.id ) @@ -4588,7 +4583,7 @@ class emap_impl for ( uint32_t s : outputs ) { /* add inverted version if used */ - uint8_t node_phase = node_match[s].best_supergate[0] != nullptr ? 0 : 1; + uint8_t node_phase = node_match[s].best_gate[0] != nullptr ? 0 : 1; assert( node_match[s].same_match ); /* add the node in the data structure */ @@ -5000,7 +4995,7 @@ class emap_impl if ( ntk.is_constant( n ) ) { - if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr ) + if ( node_data.best_gate[0] == nullptr && node_data.best_gate[1] == nullptr ) continue; } else if ( ntk.is_pi( n ) ) @@ -5014,7 +5009,7 @@ class emap_impl if ( !node_data.map_refs[0] && !node_data.map_refs[1] ) continue; - unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1; + unsigned phase = ( node_data.best_gate[0] != nullptr ) ? 0 : 1; if ( node_data.same_match || node_data.map_refs[phase] > 0 ) { From 406a5decf5a1aecddfd2ca49aeacc285ea7fc564 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Fri, 3 May 2024 19:05:42 +0200 Subject: [PATCH 22/27] Fixing multioutput cut insertion on cutset --- include/mockturtle/algorithms/emap.hpp | 266 ++++++++++++------------- 1 file changed, 126 insertions(+), 140 deletions(-) diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index b9bf5875c..f6454ec2b 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -717,20 +717,31 @@ struct node_match_emap float flows[2]; }; -union multi_match_data -{ - uint64_t data{ 0 }; - struct - { - uint64_t in_tfi : 1; - uint64_t cut_index : 31; - uint64_t node_index : 32; - }; -}; - template class emap_impl { +private: + union multi_match_data + { + uint64_t data{ 0 }; + struct + { + uint64_t in_tfi : 1; + uint64_t cut_index : 31; + uint64_t node_index : 32; + }; + }; + union multioutput_info + { + uint32_t data; + struct + { + unsigned index : 29; + unsigned lowest_index : 1; + unsigned highest_index : 1; + unsigned has_info : 1; + }; + }; public: static constexpr float epsilon = 0.0005; static constexpr uint32_t max_cut_num = 20; @@ -767,10 +778,11 @@ class emap_impl ps( ps ), st( st ), node_match( ntk.size() ), - node_tuple_match( ntk.size(), UINT32_MAX ), + node_tuple_match( ntk.size() ), switch_activity( ps.eswp_rounds ? switching_activity( ntk, ps.switching_activity_patterns ) : std::vector( 0 ) ), cuts( ntk.size() ) { + std::memset( node_tuple_match.data(), 0, sizeof( multioutput_info ) * ntk.size() ); std::tie( lib_inv_area, lib_inv_delay, lib_inv_id ) = library.get_inverter_info(); std::tie( lib_buf_area, lib_buf_delay, lib_buf_id ) = library.get_buffer_info(); tmp_visited.reserve( 100 ); @@ -782,10 +794,11 @@ class emap_impl ps( ps ), st( st ), node_match( ntk.size() ), - node_tuple_match( ntk.size(), UINT32_MAX ), + node_tuple_match( ntk.size() ), switch_activity( switch_activity ), cuts( ntk.size() ) { + std::memset( node_tuple_match.data(), 0, sizeof( multioutput_info ) * ntk.size() ); std::tie( lib_inv_area, lib_inv_delay, lib_inv_id ) = library.get_inverter_info(); std::tie( lib_buf_area, lib_buf_delay, lib_buf_id ) = library.get_buffer_info(); tmp_visited.reserve( 100 ); @@ -968,7 +981,6 @@ class emap_impl /* compute mapping using exact area */ i = 0; compute_required_time( true ); - reindex_multioutput_data(); while ( i++ < ps.ela_rounds ) { if ( !compute_mapping_exact_reversed() ) @@ -1005,6 +1017,12 @@ class emap_impl continue; } + /* load multi-output cuts and data */ + if ( ps.map_multioutput && node_tuple_match[index].has_info ) + { + match_multi_add_cuts( n ); + } + /* match positive phase */ match_phase( n, 0u ); @@ -1017,17 +1035,12 @@ class emap_impl /* select alternative matches to use */ select_alternatives( n ); - /* load and try a multi-output matches */ - if ( ps.map_multioutput && node_tuple_match[index] != UINT32_MAX ) + /* try multi-output matches */ + if constexpr ( DO_AREA ) { - /* continue if matches do not fit in the cut data structure due to bad settings */ - if ( !match_multi_add_cuts( n ) ) - continue; - - if constexpr ( DO_AREA ) + if ( ps.map_multioutput && node_tuple_match[index].highest_index ) { - bool multi_success = match_multioutput( n ); - if ( multi_success ) + if ( match_multioutput( n ) ) multi_node_update( n ); } } @@ -1198,7 +1211,7 @@ class emap_impl compute_truth_table( index, vcuts, fanin, new_cut ); /* match cut and compute data */ - compute_cut_data( new_cut, n ); + compute_cut_data( new_cut, n ); if ( ps.remove_dominated_cuts ) rcuts.insert( new_cut, false, sort ); @@ -1285,7 +1298,7 @@ class emap_impl compute_truth_table( index, vcuts, fanin, new_cut ); /* match cut and compute data */ - compute_cut_data( new_cut, n ); + compute_cut_data( new_cut, n ); if ( ps.remove_dominated_cuts ) rcuts.insert( new_cut, false, sort ); @@ -1308,7 +1321,7 @@ class emap_impl compute_truth_table( index, vcuts, fanin, new_cut ); /* match cut and compute data */ - compute_cut_data( new_cut, n ); + compute_cut_data( new_cut, n ); if ( ps.remove_dominated_cuts ) rcuts.insert( new_cut, false, sort ); @@ -1569,7 +1582,7 @@ class emap_impl new_cut->function = kitty::extend_to<6>( ntk.node_function( n ) ); /* match cut and compute data */ - compute_cut_data( new_cut, n ); + compute_cut_data( new_cut, n ); ++cuts_total; } @@ -1618,7 +1631,7 @@ class emap_impl /* try a multi-output match */ if constexpr ( DO_AREA ) { - if ( ps.map_multioutput && node_tuple_match[index] != UINT32_MAX ) + if ( ps.map_multioutput && node_tuple_match[index].highest_index ) { bool multi_success = match_multioutput( n ); if ( multi_success ) @@ -1724,8 +1737,8 @@ class emap_impl /* try to drop one phase */ match_drop_phase( *it ); - /* try a multi-output match */ - if ( ps.map_multioutput && node_tuple_match[index] < UINT32_MAX - 1 ) + /* try a multi-output match */ /* TODO: fix the required time*/ + if ( ps.map_multioutput && node_tuple_match[index].lowest_index ) { bool mapped = match_multioutput_exact( *it, true ); @@ -3008,24 +3021,6 @@ class emap_impl node_data.flows[phase] = ( bg.flow * node_data.est_refs[phase ^ 1] + lib_inv_area ) / node_data.est_refs[phase]; } - void reindex_multioutput_data() - { - /* re-index the multioutput list using the lowest index output instead of the greatest one */ - if ( !ps.map_multioutput ) - return; - - for ( auto i = ntk.num_pis(); i < topo_order.size(); ++i ) - { - uint32_t tuple_index = node_tuple_match[i]; - if ( tuple_index >= UINT32_MAX - 1 ) - continue; - - multi_match_t const& tuple_data = multi_node_match[tuple_index][0]; - node_tuple_match[i] = UINT32_MAX - 1; /* arbitrary value to skip the required time propagation */ - node_tuple_match[tuple_data[0].node_index] = tuple_index; - } - } - bool initialize_box( node const& n ) { uint32_t index = ntk.node_to_index( n ); @@ -3152,7 +3147,7 @@ class emap_impl { /* extract outputs tuple */ uint32_t index = ntk.node_to_index( n ); - multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index]][0]; + multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index].index][0]; /* get the cut */ auto const& cut0 = cuts[tuple_data[0].node_index][tuple_data[0].cut_index]; @@ -3315,7 +3310,7 @@ class emap_impl { /* extract outputs tuple */ uint32_t index = ntk.node_to_index( n ); - multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index]][0]; + multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index].index][0]; /* local values storage */ std::array best_exact_area; @@ -3537,7 +3532,7 @@ class emap_impl void multi_node_update( node const& n ) { uint32_t check_index = ntk.node_to_index( n ); - multi_match_t const& tuple_data = multi_node_match[node_tuple_match[ntk.node_to_index( n )]][0]; + multi_match_t const& tuple_data = multi_node_match[node_tuple_match[ntk.node_to_index( n )].index][0]; uint64_t signature = 0; /* check if a node is in TFI: there is a path of length > 1 */ @@ -3617,7 +3612,7 @@ class emap_impl void multi_node_update_exact( node const& n ) { uint32_t check_index = ntk.node_to_index( n ); - multi_match_t const& tuple_data = multi_node_match[node_tuple_match[ntk.node_to_index( n )]][0]; + multi_match_t const& tuple_data = multi_node_match[node_tuple_match[ntk.node_to_index( n )].index][0]; uint64_t signature = 0; /* check if a node is in TFI: there is a path of length > 1 */ @@ -3698,7 +3693,7 @@ class emap_impl { /* extract outputs tuple */ uint32_t index = ntk.node_to_index( n ); - multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index]][0]; + multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index].index][0]; for ( int j = max_multioutput_output_size - 1; j >= 0; --j ) { @@ -3707,78 +3702,67 @@ class emap_impl } } - template bool match_multi_add_cuts( node const& n ) { + /* assume a single cut (current version) */ uint32_t index = ntk.node_to_index( n ); - auto& matches = multi_node_match[node_tuple_match[index]]; + multi_match_t& matches = multi_node_match[node_tuple_match[index].index][0]; + + /* find the corresponding cut */ + uint32_t cut_p = 0; + while( matches[cut_p].node_index != index ) + ++cut_p; + + assert( cut_p < matches.size() ); + uint32_t cut_index = matches[cut_p].cut_index; + auto& cut = multi_cut_set[cut_index][cut_p]; + auto single_cut = multi_cut_set[cut_index][cut_p]; + auto& rcuts = cuts[index]; + + /* not enough space in the data structure: abort */ + if ( rcuts.size() == max_cut_num ) + { + match_multi_add_cuts_remove_entry( matches ); + return false; + } - /* get the cuts */ - auto tuple_data_it = matches.begin(); - while ( tuple_data_it != matches.end() ) + /* insert single cut variation if unique (for delay preservation) */ + if ( !rcuts.is_contained( single_cut ) ) { - multi_match_t& tuple_data = *tuple_data_it; - uint32_t cut_index = tuple_data[0].cut_index; - auto& cut_pair = multi_cut_set[cut_index]; - bool remove_entry = false; + single_cut->pattern_index = 0; + compute_cut_data( single_cut, ntk.index_to_node( index ) ); + rcuts.append_cut( single_cut ); - /* insert multi-output cuts into the standard cut set */ - for ( auto i = 0; i < max_multioutput_output_size; ++i ) + /* not enough space in the data structure: abort */ + if ( rcuts.size() == max_cut_num ) { - uint64_t node_index = tuple_data[i].node_index; - auto& cut = cut_pair[i]; - auto single_cut = cut_pair[i]; - - auto& rcuts = cuts[node_index]; - - /* not enough space in the data structure: abort */ - if ( rcuts.size() == max_cut_num ) - { - remove_entry = true; - break; - } - - /* insert single cut variation if unique (for delay preservation) */ - if ( !rcuts.is_contained( single_cut ) ) - { - compute_cut_data( single_cut, ntk.index_to_node( node_index ) ); - rcuts.append_cut( single_cut ); - - /* not enough space in the data structure: abort */ - if ( rcuts.size() == max_cut_num ) - { - rcuts.limit( rcuts.size() - 1 ); - remove_entry = true; - break; - } - } + rcuts.limit( rcuts.size() - 1 ); + match_multi_add_cuts_remove_entry( matches ); + return false; + } + } - /* add multi-output cut */ - uint32_t num_cuts_pre = rcuts.size(); - cut->ignore = true; - rcuts.append_cut( cut ); + /* add multi-output cut */ + uint32_t num_cuts_pre = rcuts.size(); + cut->ignore = true; + rcuts.append_cut( cut ); - uint32_t num_cuts_after = rcuts.size(); - assert( num_cuts_after == num_cuts_pre + 1 ); + uint32_t num_cuts_after = rcuts.size(); + assert( num_cuts_after == num_cuts_pre + 1 ); - rcuts.limit( num_cuts_pre ); + rcuts.limit( num_cuts_pre ); - /* update tuple data */ - tuple_data[i].cut_index = num_cuts_pre; - } + /* update tuple data */ + matches[cut_p].cut_index = num_cuts_pre; + } - if ( remove_entry ) - matches.erase( tuple_data_it ); - else - ++tuple_data_it; + inline void match_multi_add_cuts_remove_entry( multi_match_t const& matches ) + { + /* reset matches */ + for ( multi_match_data const& entry : matches ) + { + node_tuple_match[entry.node_index].data = 0; } - - /* matches do not fit in the data structure, remove multi-output option */ - if ( matches.empty() ) - node_tuple_match[index] = UINT32_MAX; - - /* return if the insertion is (partially) successful */ - return !matches.empty(); } inline bool multi_node_update_cut_check( uint32_t index, uint64_t signature, uint8_t phase ) @@ -4233,7 +4217,7 @@ class emap_impl } /* count multioutput gates */ - if ( ps.map_multioutput && node_tuple_match[index] < UINT32_MAX - 1 && node_data.multioutput_match[phase] ) + if ( ps.map_multioutput && node_tuple_match[index].lowest_index && node_data.multioutput_match[phase] ) { ++multioutput_count; } @@ -4246,7 +4230,7 @@ class emap_impl create_lut_for_gate( res, old2new, index, phase ); /* count multioutput gates */ - if ( ps.map_multioutput && node_tuple_match[index] < UINT32_MAX - 1 && node_data.multioutput_match[phase] ) + if ( ps.map_multioutput && node_tuple_match[index].lowest_index && node_data.multioutput_match[phase] ) { ++multioutput_count; } @@ -4346,11 +4330,10 @@ class emap_impl { assert( node_data.same_match == true ); - if ( node_tuple_match[index] < UINT32_MAX - 1 ) + if ( node_tuple_match[index].has_info && node_tuple_match[index].lowest_index ) { ++multioutput_count; create_block_for_gate( res, old2new, index, phase, genlib_to_cell ); - /* TODO: implement */ } continue; } @@ -4548,7 +4531,7 @@ class emap_impl ++ctr; } - multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index]][0]; + multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index].index][0]; std::vector outputs; std::vector functions; @@ -4700,7 +4683,6 @@ class emap_impl #pragma endregion #pragma region Cuts and matching utils - template void compute_cut_data( cut_t& cut, node const& n ) { cut->delay = std::numeric_limits::max(); @@ -5240,7 +5222,12 @@ class emap_impl if constexpr ( OverlapFilter ) { multi_gate_mark_visited( index1, index2, cut1 ); - node_tuple_match[index2] = multi_node_match.size(); + node_tuple_match[index1].has_info = 1; + node_tuple_match[index1].lowest_index = 1; + node_tuple_match[index1].index = multi_node_match.size(); + node_tuple_match[index2].has_info = 1; + node_tuple_match[index2].highest_index = 1; + node_tuple_match[index2].index = multi_node_match.size(); } else { @@ -5269,16 +5256,6 @@ class emap_impl multi_node_match[insertion_index].push_back( p ); } } - - /* remove indexing for lower index for compatible overlapping cuts */ - if constexpr ( !OverlapFilter ) - { - for ( auto const& entry : multi_node_match ) - { - multi_match_t const& p = entry[0]; - node_tuple_match[p[0].node_index] = UINT32_MAX; - } - } } bool multi_compute_cut_data( std::array& cut_tuple ) @@ -5385,24 +5362,30 @@ class emap_impl inline bool multi_gate_check_incompatible( uint32_t index1, uint32_t index2, bool& is_new, uint32_t& data_index ) { /* check cut assigned cut outputs, specialized code for 2 outputs */ - uint32_t current_assignment = node_tuple_match[index1]; - if ( current_assignment != node_tuple_match[index2] ) - return true; + if ( !node_tuple_match[index1].has_info && !node_tuple_match[index2].has_info ) + return false; - /* load data */ - if ( current_assignment != UINT32_MAX ) + if ( node_tuple_match[index1].has_info && node_tuple_match[index2].has_info ) { + uint32_t current_assignment = node_tuple_match[index1].index; + if ( current_assignment != node_tuple_match[index2].index ) + return true; is_new = false; data_index = current_assignment; + return false; } - return false; + return true; } inline void multi_gate_mark_compatibility( uint32_t index1, uint32_t index2, uint32_t mark_value ) { - node_tuple_match[index1] = mark_value; - node_tuple_match[index2] = mark_value; + node_tuple_match[index1].has_info = 1; + node_tuple_match[index1].lowest_index = 1; + node_tuple_match[index1].index = mark_value; + node_tuple_match[index2].has_info = 1; + node_tuple_match[index2].highest_index = 1; + node_tuple_match[index2].index = mark_value; } inline void multi_gate_mark_visited( uint32_t index1, uint32_t index2, multi_cut_t const& cut ) @@ -5579,8 +5562,11 @@ class emap_impl ntk.set_visited( g, ntk.trav_id() - 2 ); if ( i > 0 && n == repr ) { - /* fix cycle: remove multi-output match; TODO: extend for more than 2 outputs */ - node_tuple_match[ntk.node_to_index( g )] = UINT32_MAX; + /* fix cycle: remove multi-output match */ + choice_ntk.foreach_choice( repr, [&]( auto const& p ) { + node_tuple_match[ntk.node_to_index( p )].data = 0; + return true; + } ); choice_ntk.remove_choice( g ); check = true; } @@ -5712,7 +5698,7 @@ class emap_impl std::vector> topo_order; node_match_t node_match; - std::vector node_tuple_match; + std::vector node_tuple_match; std::vector switch_activity; std::vector tmp_visited; From f222053606d2e984920264b4e1a999956736ab40 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Mon, 6 May 2024 10:35:32 +0200 Subject: [PATCH 23/27] Adding option for removing symmetrical permutations of gates for faster mapping --- include/mockturtle/utils/tech_library.hpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp index a671e7a25..cb3c83e33 100644 --- a/include/mockturtle/utils/tech_library.hpp +++ b/include/mockturtle/utils/tech_library.hpp @@ -109,6 +109,9 @@ struct tech_library_params /*! \brief Loads multioutput gates in the library */ bool load_multioutput_gates{ true }; + /*! \brief Don't load symmetrical permutations of gate pins (drastically speeds-up mapping) */ + bool ignore_symmetries{ false }; + /*! \brief Load gates with minimum size only */ bool load_minimum_size_only{ true }; @@ -473,7 +476,7 @@ class tech_library if ( sg.root->id == it->root->id ) { /* if already in the library exit, else ignore permutations if with equal delay cost */ - if ( sg.polarity == it->polarity && sg.tdelay == it->tdelay ) + if ( sg.polarity == it->polarity && ( _ps.ignore_symmetries || sg.tdelay == it->tdelay ) ) { to_add = false; break; @@ -534,7 +537,7 @@ class tech_library if ( sg.root->id == it->root->id ) { /* if already in the library exit, else ignore permutations if with equal delay cost */ - if ( sg.polarity == it->polarity && sg.tdelay == it->tdelay ) + if ( sg.polarity == it->polarity && ( _ps.ignore_symmetries || sg.tdelay == it->tdelay ) ) { to_add = false; break; From fe15cd668c52dd3309ac26d1661b487052c6e17c Mon Sep 17 00:00:00 2001 From: aletempiac Date: Mon, 6 May 2024 10:41:37 +0200 Subject: [PATCH 24/27] Updating experiment emap --- experiments/emap.cpp | 1 + include/mockturtle/algorithms/emap.hpp | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/experiments/emap.cpp b/experiments/emap.cpp index 11d465f8b..55ced8457 100644 --- a/experiments/emap.cpp +++ b/experiments/emap.cpp @@ -65,6 +65,7 @@ int main() } tech_library_params tps; + tps.ignore_symmetries = false; // set to true to drastically speed-up mapping with minor delay increase tps.verbose = true; tech_library<9> tech_lib( gates, tps ); diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index f6454ec2b..30173aae2 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -742,6 +742,7 @@ class emap_impl unsigned has_info : 1; }; }; + public: static constexpr float epsilon = 0.0005; static constexpr uint32_t max_cut_num = 20; @@ -3710,9 +3711,9 @@ class emap_impl /* find the corresponding cut */ uint32_t cut_p = 0; - while( matches[cut_p].node_index != index ) + while ( matches[cut_p].node_index != index ) ++cut_p; - + assert( cut_p < matches.size() ); uint32_t cut_index = matches[cut_p].cut_index; auto& cut = multi_cut_set[cut_index][cut_p]; From bc32f0de62d8e58c87debb13d7625d17335f3484 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Mon, 6 May 2024 12:23:30 +0200 Subject: [PATCH 25/27] Bug fix delay for structural matches --- include/mockturtle/utils/struct_library.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/mockturtle/utils/struct_library.hpp b/include/mockturtle/utils/struct_library.hpp index 7787b906b..c239ca946 100644 --- a/include/mockturtle/utils/struct_library.hpp +++ b/include/mockturtle/utils/struct_library.hpp @@ -371,6 +371,12 @@ class struct_library perm, gate_pol }; + /* permute pin-to-pin delays */ + for ( uint32_t i = 0; i < gate.num_vars; ++i ) + { + sg.tdelay[i] = gate.tdelay[perm[i]]; + } + auto& v = _label_to_gate[index_rule.data]; auto it = std::lower_bound( v.begin(), v.end(), sg, [&]( auto const& s1, auto const& s2 ) { From 8a10355505806c693dd0e996e5d54da3fcd099c4 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Mon, 6 May 2024 19:41:36 +0200 Subject: [PATCH 26/27] Fixes and data structure changes --- include/mockturtle/algorithms/emap.hpp | 42 +++++++++++--------------- 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index 17d34df26..1028d8e70 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -721,26 +721,18 @@ template const& n ) + void match_multi_add_cuts( node const& n ) { /* assume a single cut (current version) */ uint32_t index = ntk.node_to_index( n ); - multi_match_t& matches = multi_node_match[node_tuple_match[index].index][0]; + multi_match_t& matches = multi_node_match[node_tuple_match[index].index].at( 0 ); /* find the corresponding cut */ uint32_t cut_p = 0; @@ -3724,7 +3716,7 @@ class emap_impl if ( rcuts.size() == max_cut_num ) { match_multi_add_cuts_remove_entry( matches ); - return false; + return; } /* insert single cut variation if unique (for delay preservation) */ @@ -3739,7 +3731,7 @@ class emap_impl { rcuts.limit( rcuts.size() - 1 ); match_multi_add_cuts_remove_entry( matches ); - return false; + return; } } @@ -3762,7 +3754,7 @@ class emap_impl /* reset matches */ for ( multi_match_data const& entry : matches ) { - node_tuple_match[entry.node_index].data = 0; + node_tuple_match[entry.node_index] = { 0 }; } } @@ -5243,8 +5235,10 @@ class emap_impl multi_match_data new_data1, new_data2; new_data1.node_index = index1; new_data1.cut_index = multi_cut_set.size() - 1; + new_data1.in_tfi = false; new_data2.node_index = index2; new_data2.cut_index = multi_cut_set.size() - 1; + new_data2.in_tfi = false; multi_match_t p = { new_data1, new_data2 }; /* add cuts to the correct bucket */ @@ -5497,7 +5491,7 @@ class emap_impl if ( multi_is_in_tfi( ntk.index_to_node( index2 ), ntk.index_to_node( index1 ), cut ) ) { /* if there is a path of length > 1 linking node 1 and 2, save as TFI node */ - uint32_t in_tfi = multi_is_in_direct_tfi( ntk.index_to_node( index2 ), ntk.index_to_node( index1 ) ) ? 0 : 1; + bool in_tfi = multi_is_in_direct_tfi( ntk.index_to_node( index2 ), ntk.index_to_node( index1 ) ); for ( auto& match : field ) match[0].in_tfi = in_tfi; /* add a TFI dependency */ @@ -5565,7 +5559,7 @@ class emap_impl { /* fix cycle: remove multi-output match */ choice_ntk.foreach_choice( repr, [&]( auto const& p ) { - node_tuple_match[ntk.node_to_index( p )].data = 0; + node_tuple_match[ntk.node_to_index( p )] = { 0 }; return true; } ); choice_ntk.remove_choice( g ); From c13cd02917c0805dac157fee123a8ce50966e079 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Mon, 6 May 2024 19:51:57 +0200 Subject: [PATCH 27/27] revert changes --- include/mockturtle/algorithms/emap.hpp | 36 +++++++++++++++----------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/include/mockturtle/algorithms/emap.hpp b/include/mockturtle/algorithms/emap.hpp index 1028d8e70..2ca21c5b6 100644 --- a/include/mockturtle/algorithms/emap.hpp +++ b/include/mockturtle/algorithms/emap.hpp @@ -721,18 +721,26 @@ template 1 linking node 1 and 2, save as TFI node */ - bool in_tfi = multi_is_in_direct_tfi( ntk.index_to_node( index2 ), ntk.index_to_node( index1 ) ); + uint32_t in_tfi = multi_is_in_direct_tfi( ntk.index_to_node( index2 ), ntk.index_to_node( index1 ) ) ? 0 : 1; for ( auto& match : field ) match[0].in_tfi = in_tfi; /* add a TFI dependency */ @@ -5559,7 +5565,7 @@ class emap_impl { /* fix cycle: remove multi-output match */ choice_ntk.foreach_choice( repr, [&]( auto const& p ) { - node_tuple_match[ntk.node_to_index( p )] = { 0 }; + node_tuple_match[ntk.node_to_index( p )].data = 0; return true; } ); choice_ntk.remove_choice( g );