diff --git a/net_finder/core/base_types.py b/net_finder/core/base_types.py index db278d5..9c9c959 100644 --- a/net_finder/core/base_types.py +++ b/net_finder/core/base_types.py @@ -6,6 +6,10 @@ from amaranth.utils import ceil_log2 +def next_power_of_two(n: int): + return 1 << ceil_log2(n) + + def net_size(max_area: int): """Returns the width/height of the net.""" diff --git a/net_finder/core/core.py b/net_finder/core/core.py index 2b77cf9..1d6b94f 100644 --- a/net_finder/core/core.py +++ b/net_finder/core/core.py @@ -1,68 +1,839 @@ +import enum + from amaranth import * -from amaranth.lib import data +from amaranth.lib import data, stream, wiring +from amaranth.lib.fifo import SyncFIFO +from amaranth.lib.memory import Memory, ReadPort +from amaranth.lib.wiring import In, Out from amaranth.utils import ceil_log2 -from .base_types import instruction_layout +from .base_types import mapping_layout, next_power_of_two +from .main_pipeline import ( + FINDERS_PER_CORE, + MainPipeline, + Task, + instruction_ref_layout, + max_decisions_len, + max_potential_len, + run_stack_entry_layout, +) +from .memory import ChunkedMemory +from .neighbour_lookup import neighbour_lookup_layout +from .net import shard_depth +from .skip_checker import undo_lookup_layout +from .utils import pipe + -FINDERS_PER_CORE = 3 +def max_run_stack_len(max_area: int): + """ + Returns the maximum number of run instructions there can be at any given time. + """ + # The run stack's length can't ever actually reach `max_area`: if every other + # square is already filled, the last one to be added will always be a potential + # instruction since all its neighbouring squares will already be filled. + return max_area - 1 -def instruction_ref_layout(max_area: int): - """Returns the layout of an instruction reference.""" +def prefix_layout(cuboids: int, max_area: int): return data.StructLayout( { - # The index of the instruction's parent in the run stack. - "parent": ceil_log2(max_area), - # The index of this instruction in its parent's list of valid children. - # - # If the index is past the end of that list, it represents the last valid - # child. Then we always store the last valid child as 11, so that when - # backtracking we can immediately see 'oh this is the last one, so we need to - # move onto the next instruction'. - "child_index": 2, + "area": range(max_area + 1), + "start_mapping": mapping_layout(cuboids, max_area), + "start_mapping_index": (cuboids - 1) * ceil_log2(max_area), + # I think this reach `max_decisions_len`: if you can get to a particular length, + # it had to end with 1 at some point, at which point you can split and increment + # the base decision. + "base_decision": range(max_decisions_len(max_area) + 1), } ) -def max_potential_len(max_area: int): - """ - Returns the maximum number of potential instructions there can be at any given - time. - """ +class FinderType(enum.Enum): + """The reason a finder is being emitted.""" - # The upper bound of how many potential instructions there can be is if every - # square on the surfaces, except for the ones set by the first instruction, has - # 4 potential instructions trying to set it: 1 from each direction. - # - # While this isn't actually possible, it's a nice clean upper bound. - return 4 * (max_area - 1) + # The state the finder's in has passed the initial test for likely having a + # solution (there's an instruction or potential instruction setting every square + # on the surfaces). + Solution = 0 + # The finder is a response to `req_split`. + Split = 1 -def max_decisions_len(max_area: int): - """Returns the maximum number of decisions there can be at any given time.""" + # The finder is a response to `req_pause`. + Pause = 2 - # There's always 1 decision for the first instruction, then the upper bound is - # that every square has 4 instructions setting it, 3 of which we decided not to - # run and the last one we did. - return 1 + 4 * (max_area - 1) +def core_in_layout(): + return data.StructLayout( + { + # The bit of the finder being received. + "data": 1, + # Whether `data` is the last bit of this finder. + "last": 1, + } + ) -def run_stack_entry_layout(cuboids: int, max_area: int): - """Returns the layout of a run stack entry.""" +def core_out_layout(): return data.StructLayout( { - # The instruction that was run. - "instruction": instruction_layout(cuboids, max_area), - # A reference to where in the run stack this instruction originally came from. - "source": instruction_ref_layout(max_area), - # Whether this instruction's child in each direction was valid at the time this - # instruction was run. - "children": 4, - # The number of potential instructions there were at the point when it was run. - "potential_len": ceil_log2(max_potential_len(max_area) + 1), - # The index of the decision to run this instruction in the list of decisions. - "decision_index": ceil_log2(max_decisions_len(max_area)), + # The bit of the finder being sent. + "data": 1, + # Whether `data` is the last bit of this finder. + "last": 1, + # The reason this finder is being emitted (stays the same for the whole finder). + "type": FinderType, } ) + + +class CoreInterface(wiring.Signature): + def __init__(self): + super().__init__( + { + "input": In(stream.Signature(core_in_layout())), + "output": Out(stream.Signature(core_out_layout())), + "req_pause": In(1), + "req_split": In(1), + "wants_finder": Out(1), + "stepping": Out(1), + "active": Out(1), + "base_decision": Out(1), + } + ) + + +class State(enum.Enum): + Clear = 0 + Receive = 1 + Run = 2 + Check = 3 + Solution = 4 + Pause = 5 + Split = 6 + + +class Core(wiring.Component): + def __init__(self, cuboids: int, max_area: int): + nl_layout = neighbour_lookup_layout(max_area) + ul_layout = undo_lookup_layout(max_area) + + super().__init__( + { + "interfaces": Out(CoreInterface()).array(FINDERS_PER_CORE), + # The ports this core should use to access the neighbour lookups. + "neighbour_lookups": In( + ReadPort.Signature( + addr_width=ceil_log2(nl_layout.depth), + shape=nl_layout.shape, + ) + ).array(cuboids), + # The ports this core should use to access the undo lookups. + "undo_lookups": In( + ReadPort.Signature( + addr_width=ceil_log2(ul_layout.depth), + shape=ul_layout.shape, + ) + ).array(cuboids - 1), + # The state that the finder in WB stage is in. + # + # It doesn't really matter which stage this comes from, though: the point is + # just to find out what state most of the core's time is being spent in, and all + # the finders will pass through WB stage once per iteration. + # + # I chose WB stage because its state comes out of a register, so we don't have + # to worry about critical paths extending into the core. + "state": Out(State), + } + ) + + self._cuboids = cuboids + self._max_area = max_area + + def elaborate(self, platform) -> Module: + m = Module() + + run_stack = Memory( + shape=run_stack_entry_layout(self._cuboids, self._max_area), + depth=FINDERS_PER_CORE + * next_power_of_two(max_run_stack_len(self._max_area)), + init=[], + ) + m.submodules.run_stack = run_stack + + potential = ChunkedMemory( + shape=instruction_ref_layout(self._max_area), + depth=max_potential_len(self._max_area), + chunks=FINDERS_PER_CORE, + ) + m.submodules.potential = potential + potential_read, potential_write = potential.sdp_port(read_domain="comb") + + decisions = ChunkedMemory( + shape=1, depth=max_decisions_len(self._max_area), chunks=FINDERS_PER_CORE + ) + m.submodules.decisions = decisions + decisions_read, decisions_write = decisions.sdp_port() + + potential_surface_ports = [] + for i in range(self._cuboids): + surface = ChunkedMemory( + shape=1, depth=self._max_area, chunks=FINDERS_PER_CORE + ) + m.submodules[f"potential_surface_{i}"] = surface + potential_surface_ports.append(surface.sdp_port()) + + in_fifos = [] + out_fifos = [] + + for i in range(FINDERS_PER_CORE): + in_fifo = SyncFIFO(width=2, depth=1) + m.submodules[f"in_fifo_{i}"] = in_fifo + + wiring.connect( + m, wiring.flipped(self.interfaces[i].input), in_fifo.w_stream + ) + in_fifos.append(in_fifo) + + out_fifo = SyncFIFO(width=4, depth=1) + m.submodules[f"out_fifo_{i}"] = out_fifo + + wiring.connect( + m, out_fifo.r_stream, wiring.flipped(self.interfaces[i].output) + ) + out_fifos.append(out_fifo) + + wb_state = Signal(State) + # The 'normal' instruction that WB stage wanted to run next: so, not the first + # instruction or a potential instruction. + # + # `wb_target.parent` is allowed to be past the end of the run stack: that means + # that whenever this ends up getting processed, it'll actually result in a + # backtrack instead of this being run. + wb_target = Signal(instruction_ref_layout(self._max_area)) + # Whether WB stage actually ended up processing `wb_target`. + wb_target_processed = Signal(1) + # Like `wb_target`, except that when `wb_target.parent` is past the end of the + # run stack, this is the instruction that was backtracked. + wb_inst_ref = Signal(instruction_ref_layout(self._max_area)) + wb_potential_index = Signal(range(max_potential_len(self._max_area))) + wb_decision_index = Signal(range(max_decisions_len(self._max_area))) + # The index we're clearing: used both for Clear state and clearing the potential + # surfaces in the background. + wb_clear_index = Signal(range(shard_depth(self._max_area))) + wb_prefix_done = Signal(1) + + # The value WB stage read from `decisions`. + wb_read_decision = Signal(1) + # If we're in Check state, whether or not we need to wait for the potential + # surfaces to finish being cleared before proceeding. + wb_clearing = Signal(1) + # If we're in Split state, whether we've already finished sending the finder and + # are now just searching for a new `base_decision`. + wb_finder_done = Signal(1) + wb_received = Signal(1) + wb_sent = Signal(1) + wb_in = Signal(core_in_layout()) + + wb_next_prefix = Signal(prefix_layout(self._cuboids, self._max_area)) + wb_next_prefix_bits_left = Signal(range(wb_next_prefix.shape().size + 1)) + wb_next_run_stack_len = Signal(range(max_run_stack_len(self._max_area) + 1)) + wb_next_potential_len = Signal(range(max_potential_len(self._max_area) + 1)) + wb_next_decisions_len = Signal(range(max_decisions_len(self._max_area) + 1)) + wb_next_potential_areas = [ + Signal(range(self._max_area + 1)) for i in range(self._cuboids) + ] + + # Where we are in the cycle of finders moving through different pipeline stages. + # + # More concretely, the pipeline stage finder 0 is in. + finder_offset = Signal(range(FINDERS_PER_CORE)) + with m.If(finder_offset == FINDERS_PER_CORE - 1): + m.d.sync += finder_offset.eq(0) + with m.Else(): + m.d.sync += finder_offset.eq(finder_offset + 1) + + # IF + # + # I considered making this pipeline stage run at the same time as WB stage, to + # reduce the amount of finders per core and hence resources used; but doing it + # that way would require implementing manual forwarding of potential + # instructions being written in WB stage, and while that wouldn't be + # particularly hard, merging the two pipeline stages together would be premature + # optimisation and so we shouldn't do it if it'll make the code worse. + # + # In addition, a merged WB/IF stage could very well end up being the critical + # path of the design, so it's not exactly as though merging them would be a + # guaranteed win - we may well have ended up having to split them up later + # anyway. + + if_finder = (FINDERS_PER_CORE - finder_offset) % FINDERS_PER_CORE + + if_prev_state = pipe(m, wb_state) + if_prev_target = pipe(m, wb_target) + if_prev_target_processed = pipe(m, wb_target_processed) + if_prev_inst_ref = pipe(m, wb_inst_ref) + if_prev_potential_index = pipe(m, wb_potential_index) + if_prev_decision_index = pipe(m, wb_decision_index) + if_prev_clear_index = pipe(m, wb_clear_index) + if_prev_prefix_done = pipe(m, wb_prefix_done) + if_prev_read_decision = pipe(m, wb_read_decision) + if_prev_clearing = pipe(m, wb_clearing) + if_prev_finder_done = pipe(m, wb_finder_done) + if_prev_received = pipe(m, wb_received) + if_prev_sent = pipe(m, wb_sent) + if_prev_in = pipe(m, wb_in) + + if_initial_prefix = pipe(m, wb_next_prefix) + if_prefix_bits_left = pipe( + m, wb_next_prefix_bits_left, init=if_prefix.shape().size + ) + if_run_stack_len = pipe(m, wb_next_run_stack_len) + if_potential_len = pipe(m, wb_next_potential_len) + if_decisions_len = pipe(m, wb_next_decisions_len) + if_potential_areas = [ + pipe(m, wb_next_potential_areas[i]) for i in range(self._cuboids) + ] + + if_req_pause = Array(self.interfaces)[if_finder].req_pause + if_req_split = Array(self.interfaces)[if_finder].req_split + + if_prefix_done = if_prefix_bits_left == 0 + + if_next_target = Signal(instruction_ref_layout(self._max_area)) + # TODO: this is the same as `if_next_inst + 1` (if we switch around the field + # order). I think this is clearer, but switch to that if it ends up improving + # performance. + with m.If(if_prev_inst_ref.child_index == 3): + m.d.comb += if_next_target.parent.eq(if_prev_inst_ref.parent + 1) + m.d.comb += if_next_target.child_index.eq(0) + with m.Else(): + m.d.comb += if_next_target.parent.eq(if_prev_inst_ref.parent) + m.d.comb += if_next_target.child_index.eq(if_prev_inst_ref.child_index + 1) + + # If WB stage processed its target, we can move on to the next one, otherwise we + # need to keep trying to process `if_prev_target`. + if_target = data.View( + instruction_ref_layout(self._max_area), + Mux(if_prev_target_processed, if_next_target, if_prev_target), + ) + + if_backtrack = (if_run_stack_len != 0) & (if_target.parent == if_run_stack_len) + + if_potential_index = Mux( + if_prev_state == State.Check, if_prev_potential_index + ~if_prev_clearing, 0 + ) + + decision_sent = if_prev_sent & if_prev_prefix_done + # This needs to be able to go up to `max_decisions_len` so that we can use + # `decision_index == decisions_len` as a check for whether we're done + # transmitting. + if_decision_index = Signal(range(max_decisions_len(self._max_area) + 1)) + with m.Switch(if_prev_state): + with m.Case(State.Solution, State.Pause): + m.d.comb += if_decision_index.eq(if_prev_decision_index + decision_sent) + with m.Case(State.Split): + m.d.comb += if_decision_index.eq( + if_prev_decision_index + (decision_sent | if_prev_finder_done) + ) + with m.Default(): + m.d.comb += if_decision_index.eq(0) + + # We only get to Clear state via. resetting, which will reset this to 0 anyway: + # so the only time we actually need to reset it is when exiting Check state so + # that we don't waste time clearing addresses the potential surfaces don't have. + if_clear_index = Mux(if_prev_state == State.Check, 0, if_prev_clear_index + 1) + + if_state = Signal(State) + if_prefix = Signal.like(if_initial_prefix) + m.d.comb += if_prefix.eq(if_initial_prefix) + + with m.Switch(if_prev_state): + with m.Case(State.Clear): + with m.If(if_prev_clear_index == shard_depth(self._max_area) - 1): + m.d.comb += if_state.eq(State.Receive) + with m.Else(): + m.d.comb += if_state.eq(State.Clear) + with m.Case(State.Receive): + with m.If(if_prev_received & if_prev_in.last): + m.d.comb += if_state.eq(State.Run) + with m.Else(): + m.d.comb += if_state.eq(State.Receive) + with m.Case(State.Run): + with m.If(if_req_pause): + m.d.comb += if_state.eq(State.Pause) + with m.Elif( + if_req_split & (if_initial_prefix.base_decision < if_decisions_len) + ): + m.d.comb += if_state.eq(State.Split) + # Set `base_decision` to what the base decision of the finder we're sending will + # be (1 past the end of its decisions), so that it gets sent out along with the + # rest of the prefix. + # + # However, it might not be our new base decision, since it might be a 0: we'll + # fix it up once we find the first 1 past our old base decision. + m.d.comb += if_prefix.base_decision.eq( + if_initial_prefix.base_decision + 1 + ) + with m.Elif( + if_backtrack + & (if_run_stack_len + if_potential_len >= if_initial_prefix.area) + ): + # There are enough run + potential instructions that we might have a solution, + # so check for that before we backtrack. + m.d.comb += if_state.eq(State.Check) + with m.Else(): + # Note that this also covers the case where we backtrack immediately. + m.d.comb += if_state.eq(State.Run) + with m.Case(State.Check): + all_squares_filled = Cat( + if_run_stack_len + if_potential_areas[i] == if_initial_prefix.area + for i in range(self._cuboids) + ).all() + with m.If(all_squares_filled): + # All the squares are filled, which means we have a potential solution! + m.d.comb += if_state.eq(State.Solution) + # Note: although `wb_potential_index` can only go up to `max_potential_len - 1`, + # this can go all the way up to `max_potential_len` thanks to Amaranth inferring + # a shape big enough to fit all possible values of `if_prev_potential_index + + # ~if_prev_clearing`. + with m.Elif(if_potential_index == if_potential_len): + # We've run all the potential instructions and not all the squares are filled, + # so this isn't a solution. Time to backtrack. + m.d.comb += if_state.eq(State.Run) + with m.Else(): + m.d.comb += if_state.eq(State.Check) + with m.Case(State.Solution): + with m.If(if_prefix_done & (if_decision_index == if_decisions_len)): + m.d.comb += if_state.eq(State.Run) + with m.Else(): + m.d.comb += if_state.eq(State.Solution) + with m.Case(State.Pause): + # We transition out of `State.Pause` via. `local_reset`, rather than via. a + # regular state transition. + m.d.comb += if_state.eq(State.Pause) + with m.Case(State.Split): + # We don't actually stop once the finder is sent like you might expect: since + # `base_decision` can't point to a 0, we have to keep going until we find a 1 to + # set it to. + with m.If(if_prev_finder_done & if_prev_read_decision): + m.d.comb += if_state.eq(State.Run) + m.d.comb += if_prefix.base_decision.eq(if_prev_decision_index) + with m.Else(): + m.d.comb += if_state.eq(State.Split) + + m.d.comb += potential_read.chunk.eq(if_finder) + m.d.comb += potential_read.addr.eq(if_potential_index) + + if_run_stack_index = Signal(range(max_run_stack_len(self._max_area))) + with m.If(if_state == State.Check): + m.d.comb += if_run_stack_index.eq(potential_read.data.parent) + with m.Elif(if_backtrack): + m.d.comb += if_run_stack_index.eq(if_run_stack_len - 1) + with m.Else(): + m.d.comb += if_run_stack_index.eq(if_target.parent) + + if_child_index = Mux( + if_state == State.Check, + potential_read.data.child_index, + if_target.child_index, + ) + + if_in_fifo = Array(in_fifos)[if_finder] + if_in_rdy = if_in_fifo.r_rdy + if_in = data.View(core_in_layout(), if_in_fifo.r_data) + + if_task = Signal(Task) + with m.Switch(if_state): + with m.Case(State.Clear): + m.d.comb += if_task.eq(Task.Clear) + with m.Case(State.Receive): + m.d.comb += if_task.eq( + # If we've received a decision of 1, we need to run the next valid instruction + # to fulfil it; otherwise, we want to not run the next valid instruction, but we + # still need to check whether it was valid so that we know whether we can move + # on to the next decision. + Mux( + if_prefix_done & if_in_rdy & if_in.data, + Task.Advance, + # This also serves as a no-op in the case where there isn't another bit + # available yet. + Task.Check, + ) + ) + with m.Case(State.Run): + m.d.comb += if_task.eq(Mux(if_backtrack, Task.Backtrack, Task.Advance)) + with m.Case(State.Check): + m.d.comb += if_task.eq(Task.Check) + with m.Default(): + # In states that don't need the main pipeline, give it the Check task, since it + # doesn't have any side effects. + m.d.comb += if_task.eq(Task.Check) + + run_stack_read = run_stack.read_port() + m.d.comb += run_stack_read.addr.eq(Cat(if_run_stack_index, if_finder)) + + # NL + + nl_finder = (FINDERS_PER_CORE + 1 - finder_offset) % FINDERS_PER_CORE + + nl_initial_state = pipe(m, if_state) + nl_initial_prefix_bits_left = pipe( + m, if_prefix_bits_left, init=if_prefix.shape().size + ) + nl_initial_decisions_len = pipe(m, if_decisions_len) + nl_initial_task = pipe(m, if_task) + nl_prefix = pipe(m, if_prefix) + nl_decision_index = pipe(m, if_decision_index) + nl_entry = run_stack_read.data + local_reset = ( + (nl_initial_state == State.Pause) + & (nl_initial_prefix_bits_left == 0) + & (nl_decision_index == nl_initial_decisions_len) + ) | ( + (nl_initial_task == Task.Backtrack) + & (nl_entry.decision_index < nl_prefix.base_decision) + ) + + nl_state = Mux(local_reset, State.Clear, nl_initial_state) + nl_prefix_bits_left = Mux( + local_reset, nl_prefix.shape().size, nl_initial_prefix_bits_left + ) + nl_run_stack_len = Mux(local_reset, 0, pipe(m, if_run_stack_len)) + nl_potential_len = Mux(local_reset, 0, pipe(m, if_potential_len)) + nl_decisions_len = Mux(local_reset, 0, nl_initial_decisions_len) + nl_potential_areas = [ + Mux(local_reset, 0, pipe(m, if_potential_areas[i])) + for i in range(self._cuboids) + ] + nl_prefix_done = nl_prefix_bits_left == 0 + nl_target = pipe(m, if_target) + nl_potential_index = pipe(m, if_potential_index) + nl_clear_index = Mux(local_reset, 0, pipe(m, if_clear_index)) + nl_child_index = pipe(m, if_child_index) + nl_in_rdy = pipe(m, if_in_rdy) + nl_in = pipe(m, if_in) + nl_task = Mux(local_reset, Task.Clear, nl_initial_task) + + main_pipeline = MainPipeline(self._cuboids, self._max_area) + m.submodules.main_pipeline = main_pipeline + + m.d.comb += main_pipeline.finder.eq(nl_finder) + m.d.comb += main_pipeline.start_mapping_index.eq(nl_prefix.start_mapping_index) + m.d.comb += main_pipeline.task.eq(nl_task) + m.d.comb += main_pipeline.entry.eq(nl_entry) + with m.If(nl_run_stack_len == 0): + m.d.comb += main_pipeline.entry.instruction.pos.x.eq(0) + m.d.comb += main_pipeline.entry.instruction.pos.y.eq(0) + m.d.comb += main_pipeline.entry.instruction.mapping.eq( + nl_prefix.start_mapping + ) + m.d.comb += main_pipeline.child.eq( + (nl_task != Task.Backtrack) & (nl_run_stack_len != 0) + ) + m.d.comb += main_pipeline.child_index.eq(nl_child_index) + m.d.comb += main_pipeline.clear_index.eq(nl_clear_index) + for i in range(self._cuboids): + wiring.connect( + m, + main_pipeline.neighbour_lookups[i], + wiring.flipped(self.neighbour_lookups[i]), + ) + for i in range(self._cuboids - 1): + wiring.connect( + m, + main_pipeline.undo_lookups[i], + wiring.flipped(self.undo_lookups[i]), + ) + + # VC + + vc_finder = (FINDERS_PER_CORE + 2 - finder_offset) % FINDERS_PER_CORE + + vc_state = pipe(m, nl_state) + vc_prefix = pipe(m, nl_prefix) + vc_prefix_bits_left = pipe(m, nl_prefix_bits_left, init=vc_prefix.shape().size) + vc_run_stack_len = pipe(m, nl_run_stack_len) + vc_potential_len = pipe(m, nl_potential_len) + vc_decisions_len = pipe(m, nl_decisions_len) + vc_potential_areas = [ + pipe(m, nl_potential_areas[i]) for i in range(self._cuboids) + ] + vc_prefix_done = pipe(m, nl_prefix_done) + vc_target = pipe(m, nl_target) + vc_potential_index = pipe(m, nl_potential_index) + vc_decision_index = pipe(m, nl_decision_index) + vc_clear_index = pipe(m, nl_clear_index) + vc_in_rdy = pipe(m, nl_in_rdy) + vc_in = pipe(m, nl_in) + vc_task = pipe(m, nl_task) + vc_entry = pipe(m, nl_entry) + + vc_instruction = main_pipeline.instruction + + for i in range(self._cuboids): + read_port, _ = potential_surface_ports[i] + m.d.comb += read_port.chunk.eq(vc_finder) + m.d.comb += read_port.addr.eq(vc_instruction.mapping[i]) + + m.d.comb += decisions_read.chunk.eq(vc_finder) + m.d.comb += decisions_read.addr.eq(vc_decision_index) + + # WB + + wb_finder = FINDERS_PER_CORE - 1 - finder_offset + + m.d.sync += wb_state.eq(vc_state) + wb_prefix = pipe(m, vc_prefix) + wb_prefix_bits_left = pipe(m, vc_prefix_bits_left, init=wb_prefix.shape().size) + wb_run_stack_len = pipe(m, vc_run_stack_len) + wb_potential_len = pipe(m, vc_potential_len) + wb_decisions_len = pipe(m, vc_decisions_len) + wb_potential_areas = [ + pipe(m, vc_potential_areas[i]) for i in range(self._cuboids) + ] + m.d.sync += wb_prefix_done.eq(vc_prefix_done) + m.d.sync += wb_target.eq(vc_target) + m.d.sync += wb_potential_index.eq(vc_potential_index) + m.d.sync += wb_decision_index.eq(vc_decision_index) + m.d.sync += wb_clear_index.eq(vc_clear_index) + wb_in_rdy = pipe(m, vc_in_rdy) + m.d.sync += wb_in.eq(vc_in) + wb_task = pipe(m, vc_task) + wb_entry = pipe(m, vc_entry) + wb_instruction = pipe(m, vc_instruction) + + wb_instruction_valid = main_pipeline.instruction_valid + wb_neighbours_valid = main_pipeline.neighbours_valid + + m.d.comb += wb_read_decision.eq(decisions_read.data) + + wb_run = ( + (wb_task == Task.Advance) & wb_instruction_valid & wb_neighbours_valid.any() + ) + wb_potential = ( + (wb_task == Task.Advance) + & wb_instruction_valid + & ~wb_neighbours_valid.any() + ) + + m.d.comb += wb_received.eq( + (wb_state == State.Receive) + & wb_in_rdy + # If the instruction wasn't valid, whether or not to run it wasn't a decision. + & ~(wb_prefix_done & (~wb_instruction_valid | ~wb_neighbours_valid.any())) + ) + for i in range(FINDERS_PER_CORE): + m.d.comb += in_fifos[i].r_en.eq((wb_finder == i) & wb_received) + + m.d.comb += wb_finder_done.eq( + wb_prefix_done & (wb_decision_index >= wb_prefix.base_decision) + ) + + wb_out_fifo = Array(out_fifos)[wb_finder] + m.d.comb += wb_sent.eq( + ( + (wb_state == State.Solution) + | (wb_state == State.Pause) + | ((wb_state == State.Split) & ~wb_finder_done) + ) + & wb_out_fifo.w_rdy + ) + + split_reached = (wb_state == State.Split) & ( + wb_decision_index == wb_prefix.base_decision - 1 + ) + wb_out = Signal(core_out_layout()) + m.d.comb += wb_out.data.eq( + Mux( + wb_prefix_done, + wb_read_decision & ~split_reached, + wb_prefix.as_value()[-1], + ) + ) + m.d.comb += wb_out.last.eq( + Mux( + wb_prefix_done, + (wb_decision_index == wb_decisions_len - 1) | split_reached, + (wb_decisions_len == 0) & (wb_prefix_bits_left == 1), + ) + ) + with m.Switch(wb_state): + with m.Case(State.Solution): + m.d.comb += wb_out.type.eq(FinderType.Solution) + with m.Case(State.Pause): + m.d.comb += wb_out.type.eq(FinderType.Pause) + with m.Case(State.Split): + m.d.comb += wb_out.type.eq(FinderType.Split) + # It doesn't really matter what this is in other states, leave it as 0. + + for i in range(FINDERS_PER_CORE): + m.d.comb += out_fifos[i].w_data.eq(wb_out) + m.d.comb += out_fifos[i].w_en.eq((wb_finder == i) & wb_sent) + + m.d.comb += self.interfaces[i].stepping.eq( + (wb_finder == i) & (wb_run | (wb_task == Task.Backtrack)) + ) + + last_child = wb_target.child_index == sum(wb_entry.children) - 1 + normalised_target = Signal.like(wb_target) + m.d.comb += normalised_target.parent.eq(wb_target.parent) + m.d.comb += normalised_target.child_index.eq( + Mux(last_child, 3, wb_target.child_index) + ) + + run_stack_write = run_stack.write_port() + m.d.comb += run_stack_write.addr.eq(Cat(wb_run_stack_len, wb_finder)) + m.d.comb += run_stack_write.data.instruction.eq(wb_instruction) + m.d.comb += run_stack_write.data.source.eq(normalised_target) + m.d.comb += run_stack_write.data.children.eq(wb_neighbours_valid) + m.d.comb += run_stack_write.data.potential_len.eq(wb_potential_len) + m.d.comb += run_stack_write.data.decision_index.eq(wb_decisions_len) + m.d.comb += run_stack_write.en.eq(wb_run) + + m.d.comb += potential_write.chunk.eq(wb_finder) + m.d.comb += potential_write.addr.eq(wb_potential_len) + m.d.comb += potential_write.data.eq(wb_target) + m.d.comb += potential_write.en.eq(wb_potential) + + m.d.comb += decisions_write.chunk.eq(wb_finder) + m.d.comb += decisions_write.addr.eq( + Mux(wb_task == Task.Backtrack, wb_entry.decision_index, wb_decisions_len) + ) + m.d.comb += decisions_write.data.eq( + Mux(wb_state == State.Receive, wb_in.data, wb_task != Task.Backtrack) + ) + m.d.comb += decisions_write.en.eq( + (wb_received & wb_prefix_done) | wb_run | (wb_task == Task.Backtrack) + ) + + # The `wb_potential_index == 0` is necessary because otherwise Check state would + # freeze up as soon as it ran the first potential instruction and the potential + # surfaces weren't empty anymore. + m.d.comb += wb_clearing.eq( + (wb_potential_index == 0) + & Cat(wb_potential_areas[i] != 0 for i in range(self._cuboids)).any() + ) + for i in range(self._cuboids): + _, write_port = potential_surface_ports[i] + m.d.comb += write_port.chunk.eq(wb_finder) + m.d.comb += write_port.addr.eq( + Mux( + wb_state == State.Check, + wb_instruction.mapping[i].square, + wb_clear_index, + ) + ) + m.d.comb += write_port.data.eq(wb_state == State.Check) + m.d.comb += write_port.en.eq( + Mux( + wb_state == State.Check, + wb_instruction_valid, + wb_clear_index < self._max_area, + ) + ) + + shift_prefix = (wb_received | wb_sent) & ~wb_prefix_done + prefix_in = Mux(wb_state == State.Receive, wb_in.data, wb_prefix.as_value()[-1]) + m.d.comb += wb_next_prefix.eq( + Mux(shift_prefix, Cat(prefix_in, wb_prefix.as_value()[:-1]), wb_prefix) + ) + m.d.comb += wb_next_prefix_bits_left.eq( + Mux( + (wb_state == State.Receive) + | (wb_state == State.Solution) + | (wb_state == State.Pause) + | (wb_state == State.Split), + wb_prefix_bits_left - shift_prefix, + wb_prefix.shape().size, + ) + ) + + with m.If( + (wb_task == Task.Backtrack) + & (wb_entry.decision_index == wb_prefix.base_decision) + ): + m.d.comb += wb_next_prefix.base_decision.eq(wb_prefix.base_decision + 1) + + with m.If(wb_task == Task.Backtrack): + m.d.comb += wb_next_run_stack_len.eq(wb_run_stack_len - 1) + m.d.comb += wb_next_potential_len.eq(wb_entry.potential_len) + with m.Else(): + m.d.comb += wb_next_run_stack_len.eq(wb_run_stack_len + wb_run) + m.d.comb += wb_next_potential_len.eq(wb_potential_len + wb_potential) + + with m.Switch(wb_state): + with m.Case(State.Receive): + m.d.comb += wb_next_decisions_len.eq( + wb_decisions_len + (wb_received & wb_prefix_done) + ) + with m.Case(State.Run): + with m.If(wb_task == Task.Backtrack): + m.d.comb += wb_next_decisions_len.eq(wb_entry.decision_index + 1) + with m.Else(): + m.d.comb += wb_next_decisions_len.eq(wb_decisions_len + wb_run) + with m.Default(): + m.d.comb += wb_next_decisions_len.eq(wb_decisions_len) + + for i in range(self._cuboids): + read_port, write_port = potential_surface_ports[i] + with m.If(wb_state == State.Clear): + wb_next_potential_areas[i].eq(wb_potential_areas[i]) + with m.Elif(write_port.en & (read_port.data == 0) & (write_port.data == 1)): + wb_next_potential_areas[i].eq(wb_potential_areas[i] + 1) + with m.Elif(write_port.en & (read_port.data == 1) & (write_port.data == 0)): + wb_next_potential_areas[i].eq(wb_potential_areas[i] - 1) + with m.Else(): + wb_next_potential_areas[i].eq(wb_potential_areas[i]) + + m.d.comb += wb_target_processed.eq( + ( + ((wb_state == State.Receive) & wb_prefix_done & wb_in_rdy) + | (wb_state == State.Run) + ) + & (wb_run_stack_len != 0) + ) + + # Arguably we should consider Check state here too, but this is only used for + # computing the next target anyway so it doesn't really matter. + # + # We use `normalised_target` here so that we can always find the next target by + # just adding 1 to `child_index`. + m.d.comb += wb_inst_ref.eq( + Mux(wb_task == Task.Backtrack, wb_entry.source, normalised_target) + ) + + for i in range(FINDERS_PER_CORE): + state = Signal(State) + prefix = Signal.like(if_prefix) + prefix_bits_left = Signal.like(if_prefix_bits_left) + with m.Switch((finder_offset + i) % FINDERS_PER_CORE): + with m.Case(0): + m.d.comb += state.eq(if_state) + m.d.comb += prefix.eq(if_prefix) + m.d.comb += prefix_bits_left.eq(if_prefix_bits_left) + with m.Case(1): + m.d.comb += state.eq(nl_state) + m.d.comb += prefix.eq(nl_prefix) + m.d.comb += prefix_bits_left.eq(nl_prefix_bits_left) + with m.Case(2): + m.d.comb += state.eq(vc_state) + m.d.comb += prefix.eq(vc_prefix) + m.d.comb += prefix_bits_left.eq(vc_prefix_bits_left) + with m.Case(3): + m.d.comb += state.eq(wb_state) + m.d.comb += prefix.eq(wb_prefix) + m.d.comb += prefix_bits_left.eq(wb_prefix_bits_left) + + m.d.comb += self.interfaces[i].wants_finder.eq( + (state == State.Receive) & prefix_bits_left == prefix.shape().size + ) + m.d.comb += self.interfaces[i].active.eq( + (state != State.Clear) & (state != State.Receive) + ) + m.d.comb += self.interfaces[i].base_decision.eq(prefix.base_decision) + + m.d.comb += self.state.eq(wb_state) + + return m diff --git a/net_finder/core/main_pipeline.py b/net_finder/core/main_pipeline.py index 3acb196..7a30bef 100644 --- a/net_finder/core/main_pipeline.py +++ b/net_finder/core/main_pipeline.py @@ -1,7 +1,7 @@ import enum from amaranth import * -from amaranth.lib import wiring +from amaranth.lib import data, wiring from amaranth.lib.memory import ReadPort from amaranth.lib.wiring import In, Out from amaranth.utils import ceil_log2 @@ -10,12 +10,92 @@ from net_finder.core.net import Net, shard_depth from .base_types import instruction_layout, net_size -from .core import FINDERS_PER_CORE, run_stack_entry_layout from .memory import ChunkedMemory from .neighbour_lookup import neighbour_lookup_layout from .skip_checker import SkipChecker, undo_lookup_layout from .utils import pipe +FINDERS_PER_CORE = 4 + + +def instruction_ref_layout(max_area: int): + """Returns the layout of an instruction reference.""" + + return data.StructLayout( + { + # The index of the instruction's parent in the run stack. + "parent": ceil_log2(max_area), + # The index of this instruction in its parent's list of valid children. + # + # If the index is past the end of that list, it represents the last valid + # child. Then we always store the last valid child as 11, so that when + # backtracking we can immediately see 'oh this is the last one, so we need to + # move onto the next instruction'. + "child_index": 2, + } + ) + + +def max_potential_len(max_area: int): + """ + Returns the maximum number of potential instructions there can be at any given + time. + """ + + # The upper bound of how many potential instructions there can be is if every + # square on the surfaces, except for the ones set by the first instruction, has + # 4 potential instructions trying to set it: 1 from each direction. + # + # While this isn't actually possible, it's a nice clean upper bound. + # + # TODO: I think we can reduce this to 4 + 2 * (max_run_stack_len - 1), since: + # - The first instruction can produce at most 4 potential instructions. + # - Each instruction after that: + # - Reduces the max. potential instructions by one (since we'd previously + # pessimistically assumed it was a potential instruction, but now clearly it + # isn't because we've run it) + # - Increases the max. potential instructions by 3. + # - So in total, it increases the maximum by 2. + return 4 * (max_area - 1) + + +def max_decisions_len(max_area: int): + """Returns the maximum number of decisions there can be at any given time.""" + + # There's always 1 decision for the first instruction, then the upper bound is + # that every square has 4 instructions setting it, 3 of which we decided not to + # run and the last one we did. + # + # TODO: smaller upper bound: + # + # Say you have a list of decisions. + # + # If it's of maximal length, it should have max_run_stack_len 1s. + # + # The first 1 produces at most 4 instructions, and the rest produce at most 3: + # so then the maximum number of decisions is 4 + 3 * (max_run_stack_len - 1). + return 1 + 4 * (max_area - 1) + + +def run_stack_entry_layout(cuboids: int, max_area: int): + """Returns the layout of a run stack entry.""" + + return data.StructLayout( + { + # The instruction that was run. + "instruction": instruction_layout(cuboids, max_area), + # A reference to where in the run stack this instruction originally came from. + "source": instruction_ref_layout(max_area), + # Whether this instruction's child in each direction was valid at the time this + # instruction was run. + "children": 4, + # The number of potential instructions there were at the point when it was run. + "potential_len": ceil_log2(max_potential_len(max_area) + 1), + # The index of the decision to run this instruction in the list of decisions. + "decision_index": ceil_log2(max_decisions_len(max_area)), + } + ) + def child_index_to_direction(children: int, child_index: int) -> int | None: """ @@ -99,6 +179,15 @@ def __init__(self, cuboids: int, max_area: int): "task": In(Task), # The run stack entry we're operating on. "entry": In(run_stack_entry_layout(cuboids, max_area)), + # Whether the instruction to advance/check is a child of `entry`, not + # `entry.instruction` itself. + # + # This should always be 0 when backtracking. + # + # This is almost always 1 when advancing/checking: the only exception is when + # running the first instruction, since it gets handed to us from outside and + # doesn't have a parent. + "child": In(1), # The index of the child of `self.entry` we're operating on (if we're advancing or # checking). "child_index": In(2), @@ -118,10 +207,14 @@ def __init__(self, cuboids: int, max_area: int): shape=ul_layout.shape, ) ).array(cuboids - 1), + # Signals coming from VC stage. + # # The instruction the pipeline ended up operating on - so, the neighbour of # `entry` when advancing/checking, and `entry.instruction` itself when # backtracking. "instruction": Out(instruction_layout(cuboids, max_area)), + # Signals coming from WB stage. + # # Whether or not `instruction` was valid. "instruction_valid": Out(1), # Whether or not the neighbours of `instruction` in each direction were valid. @@ -157,6 +250,7 @@ def elaborate(self, platform) -> Module: nl_start_mapping_index = self.start_mapping_index nl_task = self.task nl_entry = self.entry + nl_child = self.child nl_child_index = self.child_index nl_clear_index = self.clear_index @@ -186,7 +280,7 @@ def elaborate(self, platform) -> Module: wiring.flipped(self.neighbour_lookups[i]), ) m.d.comb += neighbour_lookup.input.eq(nl_entry.instruction) - m.d.comb += neighbour_lookup.t_mode.eq(nl_task != Task.Backtrack) + m.d.comb += neighbour_lookup.t_mode.eq(nl_child) m.d.comb += neighbour_lookup.direction.eq(nl_child_direction) # Valid check (VC) stage @@ -235,12 +329,12 @@ def elaborate(self, platform) -> Module: m.d.comb += skip_checker.fixed_family.eq(vc_fixed_family) m.d.comb += skip_checker.transform.eq(vc_transform) + m.d.comb += self.instruction.eq(vc_middle) + # Write back (WB) stage # # This is the stage where we write back any changes that were made to the net # and surfaces. - # - # This occurs at the same time as the outer pipeline's IF stage. wb_finder = pipe(m, vc_finder) wb_task = pipe(m, vc_task) diff --git a/net_finder/core/memory.py b/net_finder/core/memory.py index 57f21eb..9fcadcb 100644 --- a/net_finder/core/memory.py +++ b/net_finder/core/memory.py @@ -1,3 +1,5 @@ +from itertools import chain + from amaranth import * from amaranth.hdl import ShapeLike, ValueLike from amaranth.lib import wiring @@ -56,12 +58,13 @@ def __init__(self, *, shape: ShapeLike, depth: int, chunks: int): self._depth = depth self._chunks = chunks - self._read_ports: list[PureInterface] = [] - self._sdp_ports: list[tuple[PureInterface, PureInterface]] = [] + self._read_ports: list[tuple[PureInterface, str]] = [] + self._write_ports: list[PureInterface] = [] + self._sdp_ports: list[tuple[tuple[PureInterface, str], PureInterface]] = [] super().__init__({}) - def read_port(self) -> PureInterface: + def read_port(self, domain="sync") -> PureInterface: # Return a disconnected interface, which we then add to an array and hook up # during `elaborate`. port = ChunkedReadPortSignature( @@ -70,11 +73,24 @@ def read_port(self) -> PureInterface: shape=self._shape, ).create() - self._read_ports.append(port) + self._read_ports.append((port, domain)) return port - def sdp_port(self) -> tuple[PureInterface, PureInterface]: + def write_port(self) -> PureInterface: + # Return a disconnected interface, which we then add to an array and hook up + # during `elaborate`. + port = ChunkedWritePortSignature( + chunk_width=ceil_log2(self._chunks), + addr_width=ceil_log2(self._depth), + shape=self._shape, + ).create() + + self._write_ports.append(port) + + return port + + def sdp_port(self, read_domain="sync") -> tuple[PureInterface, PureInterface]: # Return disconnected interfaces, which we then add to an array and hook up # during `elaborate`. read_port = ChunkedReadPortSignature( @@ -88,7 +104,7 @@ def sdp_port(self) -> tuple[PureInterface, PureInterface]: shape=self._shape, ).create() - self._sdp_ports.append((read_port, write_port)) + self._sdp_ports.append(((read_port, read_domain), write_port)) return read_port, write_port @@ -106,7 +122,7 @@ def elaborate(self, platform) -> Module: # Give the chunk a port corresponding to each of our outer ports, and hook up # their inputs. - for port_index, (read_port, write_port) in enumerate(self._sdp_ports): + for port_index, ((read_port, _), write_port) in enumerate(self._sdp_ports): inner_read_port = chunk.read_port() inner_write_port = chunk.write_port() @@ -123,18 +139,25 @@ def elaborate(self, platform) -> Module: inner_sdp_read_ports[port_index].append(inner_read_port) - for port_index, port in enumerate(self._read_ports): - inner_port = chunk.read_port() + for port_index, (port, domain) in enumerate(self._read_ports): + inner_port = chunk.read_port(domain=domain) m.d.comb += inner_port.addr.eq(port.addr) inner_read_ports[port_index].append(inner_port) - # Connect up the SDP read ports' outputs. - for (port, _), inner_ports in zip(self._sdp_ports, inner_sdp_read_ports): - m.d.comb += port.data.eq(Array(inner_ports)[port.chunk].data) - - # Connect up the regular read ports' outputs. - for port, inner_ports in zip(self._read_ports, inner_read_ports): - m.d.comb += port.data.eq(Array(inner_ports)[port.chunk].data) + for port_index, port in enumerate(self._write_ports): + inner_port = chunk.write_port() + m.d.comb += inner_port.addr.eq(port.addr) + m.d.comb += inner_port.data.eq(port.data) + m.d.comb += inner_port.en.eq(port.en & (port.chunk == chunk_index)) + + # Connect up the read ports' outputs. + for (port, domain), inner_ports in zip( + chain(self._read_ports, (r for r, _ in self._sdp_ports)), + chain(inner_read_ports, inner_sdp_read_ports), + ): + chunk = Signal.like(port.chunk) + m.d[domain] += chunk.eq(port.chunk) + m.d.comb += port.data.eq(Array(inner_ports)[chunk].data) return m diff --git a/net_finder/core/net.py b/net_finder/core/net.py index c3d10c4..9486450 100644 --- a/net_finder/core/net.py +++ b/net_finder/core/net.py @@ -4,7 +4,7 @@ from amaranth.lib.wiring import In, Out from amaranth.utils import ceil_log2 -from .base_types import PosLayout, PosView, net_size +from .base_types import PosLayout, PosView, net_size, next_power_of_two from .memory import ChunkedMemory from .utils import pipe @@ -13,7 +13,7 @@ def shard_depth(max_area: int): net_size_ = net_size(max_area) # We need to round one of the dimensions up to the next power of two in order # for concatenating the x and y coordinates to work properly. - return (net_size_ << ceil_log2(net_size_)) // 4 + return net_size_ * next_power_of_two(net_size_) // 4 def neighbour_shards(m: Module, pos: PosView): diff --git a/net_finder/core/utils.py b/net_finder/core/utils.py index 50c1a21..0c58ec8 100644 --- a/net_finder/core/utils.py +++ b/net_finder/core/utils.py @@ -2,10 +2,10 @@ from amaranth.hdl import ValueLike -def pipe(m: Module, input: ValueLike) -> Signal: +def pipe(m: Module, input: ValueLike, **kwargs) -> Signal: # src_loc_at tells Signal how far up in the call chain to look for what to name # the signal: so, setting it to 1 means we want it to use the name of the # variable the caller's assigning our result to. - output = Signal.like(input, src_loc_at=1) + output = Signal.like(input, src_loc_at=1, **kwargs) m.d.sync += output.eq(input) return output diff --git a/net_finder/soc/core.py b/net_finder/soc/core.py index 5e966ed..45ad1ab 100644 --- a/net_finder/soc/core.py +++ b/net_finder/soc/core.py @@ -305,6 +305,7 @@ def __init__(self, cuboids: list[Cuboid], n: int): # Whether or not each core is splittable (is active and has a `base_decision` of # `splittable_base`). splittable = Cat( + # TODO: base_decision is garbage while sending, so this decision-making might be a bit off. cores_active[i] & (core.base_decision == splittable_base) for i, core in enumerate(cores) ) diff --git a/shell.nix b/shell.nix index e2dbdb9..679c431 100644 --- a/shell.nix +++ b/shell.nix @@ -29,23 +29,17 @@ let in pkgs.mkShell { venvDir = ".venv"; - packages = - [ - pkgs.python311.pkgs.venvShellHook + packages = [ + pkgs.python311.pkgs.venvShellHook - openocd - pkgs.yosys + openocd + pkgs.yosys - # Needed by Verilator simulations - pkgs.json_c - pkgs.libevent - pkgs.zlib - ] - ++ pkgs.lib.optionals pkgs.stdenv.isDarwin [ - # Needed by Rust code - pkgs.libiconv - pkgs.darwin.apple_sdk.frameworks.CoreFoundation - ]; + # Needed by Verilator simulations + pkgs.json_c + pkgs.libevent + pkgs.zlib + ]; postVenvCreation = '' ${pkgs.uv}/bin/uv pip install -r requirements.txt diff --git a/src/bin/dump_neighbours.rs b/src/bin/dump_neighbours.rs index e95d3d1..6b5fbef 100644 --- a/src/bin/dump_neighbours.rs +++ b/src/bin/dump_neighbours.rs @@ -1,4 +1,5 @@ -//! Dumps the information required by `test_neighbour_lookup.py` as JSON to stdout. +//! Dumps the information required by `test_neighbour_lookup.py` as JSON to +//! stdout. use std::io; use std::time::Duration; diff --git a/src/geometry.rs b/src/geometry.rs index 1b7d525..9803fb4 100644 --- a/src/geometry.rs +++ b/src/geometry.rs @@ -470,6 +470,14 @@ impl Net { self.color_with_cache(cuboid, &square_cache) } + // TODO: make a variant of `color` that returns all the colorings, then filters + // them down to the ones that are actually different: that is, renumber all the + // faces in the order they occur so that mapping to different faces doesn't make + // a difference, and then return the ones that are distinct under that + // representation. I think that's a good definition of different foldings: + // if you fold along the same lines every time, you should get the same result, + // and same coloring = same lines. + /// Return a version of this net with its squares 'colored' with which faces /// they're on. /// @@ -1695,8 +1703,8 @@ impl Class { .unwrap() } - /// Returns the list of all the transformations you can perform to get from the - /// root of this class's family to this class. + /// Returns the list of all the transformations you can perform to get from + /// the root of this class's family to this class. pub fn alternate_transforms( self, cache: &SquareCache, diff --git a/src/primary/mod.rs b/src/primary/mod.rs index 14ec266..83aafda 100644 --- a/src/primary/mod.rs +++ b/src/primary/mod.rs @@ -229,7 +229,8 @@ impl FinderCtx { } } - /// Given a class on the fixed cuboid, returns whether it's in the fixed family. + /// Given a class on the fixed cuboid, returns whether it's in the fixed + /// family. pub fn fixed_family(&self, cursor: Cursor) -> bool { let index = cursor.0 as usize; (self.maybe_skipped_lookup[index >> 6] >> (index & 0x3f)) & 1 != 0 @@ -1111,7 +1112,10 @@ impl FinderCtx { // net but disagreeing on what each other should map to (which leads to a cut). for &instruction in completed.iter() { let to_check = [ - instruction, + instruction, /* TODO: is this needed? I think it was at the time this was + * written, since `net` hadn't been added yet, but `net` should now + * guarantee that you can't have two instructions setting the same + * net position. */ instruction.moved_in(self, Left), instruction.moved_in(self, Up), instruction.moved_in(self, Right), @@ -1600,6 +1604,13 @@ pub fn drive>( continue; } + // TODO: this is starting to get suspicious. Counting flipped versions of nets + // as well isn't doubling the number of solutions; the flipped version of the + // net will always be another valid solution, so that should mean it's happening + // because the flipped version of the net is the same as the original net and + // still doesn't get counted separately, but that doesn't seem to be the case. + // So then why isn't it double? Are we missing solutions? I think we + // established that this was due to skipping? I don't 100% remember... count += 1; progress.suspend(|| { println!(