-
Notifications
You must be signed in to change notification settings - Fork 70
/
Copy pathvid_linerenderer.v
781 lines (705 loc) · 23.5 KB
/
vid_linerenderer.v
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
/*
This effectively is the GPU. It has a slave interface to the CPU, for register-settings and
'on-chip' video memory, as well as a master interface for the SPI RAM, for grabbing a bitmap.
It's called a line renderer as it renders lines asynchroneously from the pixel output (i.e.
as fast as possible), then writes it into the video memory. This video memory is only 4 lines
and is used as a FIFO. The downstream video hardware reads from this memory at it's own pace
(namely the HDMI and/or LCD pixel clock) and spits out the pixels to the respective display
devices.
*/
/*
* Copyright (C) 2019 Jeroen Domburg <[email protected]>
* All rights reserved.
*
* BSD 3-clause, see LICENSE.bsd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the <organization> nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
Backgrounds are simple, for this: take a bunch of tiles in tilemem and a tilemap. Tilemap
resolves current tile for (x,y), tile mem resolves the pixel, palette mem resolves color.
Sprites are harder because they can be anywhere. A sane setup would probably involve
a dual memory to render in: one gets filled (random-access) by the sprite subsystem, the
other is both read and zeroed by the line renderer.
Now we have memory we can just randomly dump sprites in... how to do that? We don't have time
to scan all sprites (how many?) every pixel, unfortunately. How many sprites do we want to have
anyway?
Let's say we do it like most consoles seem to do it: have x sprites in total, with a max of y
sprites on one scanline. Take the snes: it can do max 32 sprites per scanline (or 256 pixels),
128 sprites max. In our hw, we have 4 clocks per pixel, or 1920 clocks in total to process one
line of sprites. Let's assume all sprites have one fixed size for now (although it would be
awesome if we could add scaling or rotation to the mix.) Let's assume we wat to do max 32x32
sprites.
One of the ways to fix this up would be by just iterating over all 512 sprites; as soon as we find one
that is supposed to be on the current line, we draw it out. This means we can process all 512
sprites at the same time, perhaps iterating over them multiple times for priority stuff. Given we
write pixels to the line memory one by one and we have 2/4 accesses to tile memory, this
should cost us (512 + 32*2*32)=2560 cycles, with no prio and 32 sprites simultaneously. That ain't no
good...
Say, we hit this thing with 32 bit at a time. 4x per sprite, we read 32 bit from the tile memory.
Then, 5x, we read what is in the current tile memory, overlay the 32-bits we have, and write it back.
This means we need (512+5*2*32)=832 cycles. Nice, but no chance of rotation/scaling...
Maybe meet in the middle? There's nothing stopping us from fetching 32 bit (8 pixels) from tile
mem at a time. We can process these in parallel with fetching the next one. That would worst-case
cost us (512+32*32)=1536 cycles. That fits and allows us to do X-scaling as well.
*/
module vid_linerenderer (
input clk, reset,
output reg irq_copper,
//Slave iface from cpu
input [24:0] addr,
input [31:0] din,
input [3:0] wstrb,
input ren,
output reg [31:0] dout,
output reg ready,
//Video mem iface
output reg [19:0] vid_addr, //assume 1 meg-words linebuf mem max
input preload, //will go high 4 lines before video frame starts (with vid_address=0)
output [23:0] vid_data_out,
output reg vid_wen, vid_ren,
input [23:0] vid_data_in,
input [19:0] curr_vid_addr,
input next_field,
//Master iface to spi mem
output reg m_do_read,
input m_next_word,
output reg [23:0] m_addr,
input [31:0] m_rdata,
input m_is_idle
);
reg [23:0] fb_addr;
reg [15:0] pitch;
reg [3:0] layer_en;
reg [23:0] dma_start_addr;
reg dma_run;
wire dma_ready;
reg dma_do_read;
wire [31:0] dma_data;
qpimem_dma_rdr dma_rdr(
.clk(clk),
.rst(reset),
.addr_start(dma_start_addr),
.addr_end(dma_start_addr+(fb_is_8bit?480:(480/2))), //always only read a line
.run(dma_run),
.do_read(dma_do_read),
.ready(dma_ready),
.rdata(dma_data),
.qpi_do_read(m_do_read),
.qpi_next_word(m_next_word),
.qpi_addr(m_addr),
.qpi_rdata(m_rdata),
.qpi_is_idle(m_is_idle)
);
reg cpu_sel_tilemem;
reg cpu_sel_tilemap_a;
reg cpu_sel_tilemap_b;
reg cpu_sel_palette;
reg cpu_sel_regs;
reg cpu_sel_sprites;
reg cpu_sel_copper;
parameter REG_SEL_FB_ADDR = 0;
parameter REG_SEL_FB_PITCH = 1;
parameter REG_SEL_LAYER_EN = 2;
parameter REG_SEL_TILEA_OFF = 3;
parameter REG_SEL_TILEB_OFF = 4;
parameter REG_SEL_TILEA_INC_COL = 5;
parameter REG_SEL_TILEA_INC_ROW = 6;
parameter REG_SEL_TILEB_INC_COL = 7;
parameter REG_SEL_TILEB_INC_ROW = 8;
parameter REG_SEL_VIDPOS = 9;
parameter REG_SEL_VBLCTR = 10;
parameter REG_SEL_BGNDCOL = 11;
parameter REG_SEL_SPRITE_OFF = 12;
parameter REG_SEL_COPPERCTL_OFF = 13;
//Reminder: we have 64x64 tiles of 16x16 pixels, so in total a field of 1024x1024 pixels. Say we have one overflow bit, we need 11 bit
//for everything... that leaves 5 bits for sub-pixel addressing in scaling modes. That sounds OK.
reg [15:0] tilea_xoff;
reg [15:0] tilea_yoff;
reg [15:0] tileb_xoff;
reg [15:0] tileb_yoff;
reg [12:0] sprite_xoff;
reg [12:0] sprite_yoff;
wire [17:0] dout_tilemapa;
wire [17:0] dout_tilemapb;
wire [31:0] dout_palette;
wire [31:0] dout_tilemem;
wire [31:0] dout_sprites;
reg fb_is_8bit;
reg [31:0] bgnd_color;
reg [8:0] fb_pal_offset;
reg [1:0] cycle;
reg [19:0] write_vid_addr;
reg [19:0] write_vid_addr_next;
wire [8:0] vid_ypos;
wire [8:0] vid_xpos;
assign vid_xpos = write_vid_addr[8:0];
assign vid_ypos = write_vid_addr[17:9];
wire [8:0] vid_ypos_next;
wire [8:0] vid_xpos_next;
assign vid_xpos_next = write_vid_addr_next[8:0];
assign vid_ypos_next = write_vid_addr_next[17:9];
reg [15:0] tilea_colinc_x;
reg [15:0] tilea_colinc_y;
reg [15:0] tilea_rowinc_x;
reg [15:0] tilea_rowinc_y;
reg [15:0] tileb_colinc_x;
reg [15:0] tileb_colinc_y;
reg [15:0] tileb_rowinc_x;
reg [15:0] tileb_rowinc_y;
wire [8:0] sprite_pix;
reg [31:0] vblctr;
reg copper_run;
reg [10:0] copper_pc;
reg [10:0] copper_pc_next;
reg [31:0] copper_addr;
reg [31:0] copper_addr_next;
reg [2:0] copper_write_ct;
reg [2:0] copper_write_ct_next;
wire [31:0] copper_data;
reg copper_halts_gfx;
wire [31:0] dout_copper;
parameter COPPER_OP_WAIT = 'h8;
parameter COPPER_OP_RESET = 'h9;
parameter COPPER_OP_IRQ = 'hA;
parameter COPPER_OP_WRITE = 'h0;
//These mux to the CPU interface normally, but swap over to allow copper writes if needed.
//The copper has a higher priority.
reg [24:0] addr_muxed;
reg [31:0] din_muxed;
reg [3:0] wstrb_muxed;
reg ready_delayed;
always @(*) begin
if (copper_write_ct != 0) begin
addr_muxed = copper_addr;
din_muxed = copper_data;
wstrb_muxed = 'hf;
ready = 0;
end else begin
addr_muxed = addr;
din_muxed = din;
wstrb_muxed = wstrb;
ready = ready_delayed & ((wstrb!=0) || ren);
end
end
//Note: copper_pc corresponds to the current output of the copper memory. copper_pc_next is the next pc. Same for other _next registers
always @(*) begin
irq_copper = 0;
copper_halts_gfx = 0;
copper_addr_next = copper_addr;
copper_write_ct_next = copper_write_ct;
copper_pc_next = copper_pc;
if (!copper_run) begin
copper_pc_next = 0;
copper_write_ct_next = 0;
end else if (copper_write_ct != 0) begin
copper_halts_gfx = 1;
copper_pc_next = copper_pc + 1;
copper_addr_next = copper_addr + 4;
copper_write_ct_next = copper_write_ct - 1;
end else if (copper_data[31:28]==COPPER_OP_WAIT) begin
//Wait for specified x/y coord
if (copper_data[24:16]==vid_ypos && copper_data[8:0]==vid_xpos) begin
copper_pc_next = copper_pc + 1;
end else begin
copper_pc_next = copper_pc; //wait
end
end else if (copper_data[31:28]==COPPER_OP_RESET) begin
copper_pc_next = 0;
end else if (copper_data[31:28]==COPPER_OP_IRQ) begin
copper_pc_next = copper_pc + 1;
irq_copper = 1;
end else if (copper_data[31]==0) begin //COPPER_OP_WRITE
copper_halts_gfx = 1;
copper_addr_next = {copper_data[31:2], 2'h0};
copper_write_ct_next = copper_data[1:0]+1;
copper_pc_next = copper_pc + 1;
end
end
always @(posedge clk) begin
if (reset) begin
copper_pc <= 0;
copper_write_ct <= 0;
copper_addr <= 0;
end else begin
copper_pc <= copper_pc_next;
copper_write_ct <= copper_write_ct_next;
copper_addr <= copper_addr_next;
end
end
always @(*) begin
cpu_sel_tilemem = 0;
cpu_sel_tilemap_a = 0;
cpu_sel_tilemap_b = 0;
cpu_sel_palette = 0;
cpu_sel_regs = 0;
cpu_sel_sprites = 0;
cpu_sel_copper = 0;
dout = 0;
if (addr_muxed[17:13]=='h0) begin
cpu_sel_regs = 1;
if (addr_muxed[5:2]==REG_SEL_FB_ADDR) begin
dout = {8'h4, fb_addr};
end else if (addr_muxed[5:2]==REG_SEL_FB_PITCH) begin
dout = {7'h0, fb_pal_offset, pitch};
end else if (addr_muxed[5:2]==REG_SEL_LAYER_EN) begin
dout = {15'h0, fb_is_8bit, 12'h0, layer_en};
end else if (addr_muxed[5:2]==REG_SEL_TILEA_OFF) begin
dout = {tilea_yoff, tilea_xoff};
end else if (addr_muxed[5:2]==REG_SEL_TILEB_OFF) begin
dout = {tileb_yoff, tileb_xoff};
end else if (addr_muxed[5:2]==REG_SEL_TILEA_INC_COL) begin
dout = {tilea_colinc_x, tilea_colinc_y};
end else if (addr_muxed[5:2]==REG_SEL_TILEA_INC_ROW) begin
dout = {tilea_rowinc_x, tilea_rowinc_y};
end else if (addr_muxed[5:2]==REG_SEL_TILEB_INC_COL) begin
dout = {tileb_colinc_x, tileb_colinc_y};
end else if (addr_muxed[5:2]==REG_SEL_TILEB_INC_ROW) begin
dout = {tileb_rowinc_x, tileb_rowinc_y};
end else if (addr_muxed[5:2]==REG_SEL_VIDPOS) begin
dout = {7'h0, vid_ypos, 7'h0, vid_xpos};
end else if (addr_muxed[5:2]==REG_SEL_VBLCTR) begin
dout = vblctr;
end else if (addr_muxed[5:2]==REG_SEL_BGNDCOL) begin
dout = bgnd_color;
end else if (addr[5:2]==REG_SEL_SPRITE_OFF) begin
dout = {3'h0, sprite_yoff, 3'h0, sprite_xoff};
end else if (addr_muxed[5:2]==REG_SEL_COPPERCTL_OFF) begin
dout = {copper_run, 20'h0, copper_pc};
end
end else if (addr_muxed[17:13]=='h1) begin
cpu_sel_palette = 1;
dout = dout_palette;
end else if (addr_muxed[17:14]=='h1) begin //2,3
cpu_sel_tilemap_a = 1;
dout = {14'h0, dout_tilemapa};
end else if (addr_muxed[17:14]=='h2) begin //4,5
cpu_sel_tilemap_b = 1;
dout = {14'h0, dout_tilemapb};
end else if (addr_muxed[17:13]=='h6) begin
cpu_sel_sprites = 1;
dout = dout_sprites;
end else if (addr_muxed[17:16]==1) begin
cpu_sel_tilemem = 1;
dout = dout_tilemem;
end else if (addr_muxed[17:16]==2) begin
cpu_sel_copper = 1;
dout = dout_copper;
end
end
reg [3:0] tilepix_x;
reg [3:0] tilepix_y;
reg [8:0] tilemem_no;
wire [3:0] tilemem_pixel;
wire [31:0] tilemem_word;
wire [13:0] tilemem_addr;
assign tilemem_addr = {tilemem_no, tilepix_y, tilepix_x[3]};
reg [2:0] tilenib_sel;
always @(posedge clk) begin
tilenib_sel <= tilepix_x[2:0];
end
assign tilemem_pixel=tilemem_word[4*tilenib_sel+:4];
vid_tilemem tilemem(
.ClockA(clk),
.ClockB(clk),
.ResetA(reset),
.ResetB(reset),
.ClockEnA(1),
.ClockEnB(1),
.DataInA(din_muxed),
.DataInB(0),
.WrA(&wstrb_muxed & cpu_sel_tilemem),
.WrB(0),
.AddressA(addr_muxed[15:2]),
.AddressB(tilemem_addr),
.QA(dout_tilemem),
.QB(tilemem_word)
);
//Tilemap data:
// 8:0 - tile
// 9 - inv x
// 10 - inv y
// 17-11 - palette offset *4
reg [16:0] tilea_xb;
reg [16:0] tilea_yb;
wire [16:0] tilea_x;
wire [16:0] tilea_y;
assign tilea_x = tilea_xb + tilea_xoff;
assign tilea_y = tilea_yb + tilea_yoff;
wire [17:0] tilea_data;
wire [11:0] tilemapa_addr;
assign tilemapa_addr = {tilea_y[15:10], tilea_x[15:10]};
vid_tilemapmem tilemapa (
.ClockA(clk),
.ClockB(clk),
.ResetA(reset),
.ResetB(reset),
.ClockEnA(1),
.ClockEnB(1),
.DataInA(din_muxed[17:0]),
.DataInB(0),
.WrA(&wstrb_muxed & cpu_sel_tilemap_a),
.WrB(0),
.AddressA(addr_muxed[13:2]),
.AddressB(tilemapa_addr),
.QA(dout_tilemapa),
.QB(tilea_data)
);
reg [16:0] tileb_xb;
reg [16:0] tileb_yb;
wire [16:0] tileb_x;
wire [16:0] tileb_y;
assign tileb_x = tileb_xb + tileb_xoff;
assign tileb_y = tileb_yb + tileb_yoff;
wire [17:0] tileb_data;
wire [11:0] tilemapb_addr;
assign tilemapb_addr = {tileb_y[15:10], tileb_x[15:10]};
vid_tilemapmem tilemapb (
.ClockA(clk),
.ClockB(clk),
.ResetA(reset),
.ResetB(reset),
.ClockEnA(1),
.ClockEnB(1),
.DataInA(din_muxed[17:0]),
.DataInB(0),
.WrA(&wstrb_muxed & cpu_sel_tilemap_b),
.WrB(0),
.AddressA(addr_muxed[13:2]),
.AddressB(tilemapb_addr),
.QA(dout_tilemapb),
.QB(tileb_data)
);
reg sprite_pix_done;
wire [3:0] sprite_tilemem_x;
wire [3:0] sprite_tilemem_y;
wire [8:0] sprite_tilemem_no;
reg sprite_tilemem_ack;
vid_spriteeng spriteeng (
.clk(clk),
.reset(reset),
.cpu_addr(addr_muxed[10:2]),
.cpu_din(din_muxed),
.cpu_dout(dout_sprites),
.cpu_wstrb(cpu_sel_sprites ? wstrb_muxed : 0),
.offx(sprite_xoff),
.offy(sprite_yoff),
.vid_xpos(vid_xpos),
.vid_ypos(vid_ypos),
.sprite_pix(sprite_pix),
.pix_done(sprite_pix_done),
.tilemem_x(sprite_tilemem_x),
.tilemem_y(sprite_tilemem_y),
.tilemem_no(sprite_tilemem_no),
.tilemem_data(tilemem_pixel),
.tilemem_ack(sprite_tilemem_ack)
);
reg [16:0] tilea_linestart_x;
reg [16:0] tilea_linestart_y;
reg [16:0] tileb_linestart_x;
reg [16:0] tileb_linestart_y;
reg [8:0] pal_addr;
wire [31:0] pal_data;
vid_palettemem palettemem(
.ClockA(clk),
.ClockB(clk),
.ResetA(reset),
.ResetB(reset),
.ClockEnA(1),
.ClockEnB(1),
.DataInA(din_muxed),
.DataInB(0),
.WrA((wstrb_muxed=='hf) && cpu_sel_palette),
.WrB(0),
.AddressA(addr_muxed[10:2]),
.AddressB(pal_addr),
.QA(dout_palette),
.QB(pal_data)
);
ram_dp_32x2048 copper_mem(
.ClockA(clk),
.ClockB(clk),
.ResetA(reset),
.ResetB(reset),
.ClockEnA(1),
.ClockEnB(1),
.DataInA(din_muxed),
.DataInB(0),
.WrA((wstrb_muxed=='hf) && cpu_sel_copper),
.WrB(0),
.AddressA(addr_muxed[12:2]),
.AddressB(copper_pc_next),
.QA(dout_copper),
.QB(copper_data)
);
reg [31:0] alphamixer_in_b;
reg [7:0] alphamixer_rate;
wire [31:0] alphamixer_out_cur;
reg [31:0] alphamixer_out;
video_alphamixer mixer(
.in_a(pal_data),
.in_b(alphamixer_in_b),
.rate(alphamixer_rate),
.out(alphamixer_out_cur)
);
/*
Note we have slightly more than 4 clock cycles per pixel here. This means we can have 4 layers.
Layer FB - Framebuffer, from psram
Layer TA - Tile layer A
Layer TB - Tile layer B
Layer SP - Sprite layer.
We have 4 states per pixel, 0-3 This is what happens in each state:
0:
TileA tilemem pixel -> palette
TileB tilemap -> tilemem
FB palette data -> alpha mixer
1:
TileA palette data -> alpha mixer
TileB tilemem pixel -> palette
2:
TileA X/Y -> tilemap
TileB palette data -> alpha mixer
Sprite linebuf pixel -> palette
3:
TileA tilemap -> tilemem
TileB X/Y -> tilemap
Sprite palette data -> alpha mixer
FB data -> palette
*/
reg [7:0] fb_pixel;
always @(*) begin
tilepix_x=0;
tilepix_y=0;
tilemem_no=0;
pal_addr=0;
sprite_pix_done=0;
sprite_tilemem_ack=0;
if (cycle==0) begin
if (tileb_data[11]) begin
tilepix_y = tileb_data[9] ? (15-tileb_x[9:6]) : tileb_x[9:6];
tilepix_x = tileb_data[10] ? (15-tileb_y[9:6]) : tileb_y[9:6];
end else begin
tilepix_x = tileb_data[9] ? (15-tileb_x[9:6]) : tileb_x[9:6];
tilepix_y = tileb_data[10] ? (15-tileb_y[9:6]) : tileb_y[9:6];
end
tilemem_no = tileb_data[8:0];
pal_addr = tilemem_pixel + {tilea_data[17:12], 3'b0}; //from tilemap a
alphamixer_rate = layer_en[0] ? pal_data[31:24] : 0; //fb
alphamixer_in_b = bgnd_color; //background
end else if (cycle==1) begin
tilepix_x = sprite_tilemem_x;
tilepix_y = sprite_tilemem_y;
tilemem_no = sprite_tilemem_no;
sprite_tilemem_ack = 1;
pal_addr = tilemem_pixel + {tileb_data[17:12], 3'b0}; //from tilemap b
alphamixer_rate = layer_en[1] ? pal_data[31:24] : 0; //tilemap a
alphamixer_in_b = alphamixer_out; //bgnd+fb
end else if (cycle==2) begin
tilepix_x = sprite_tilemem_x;
tilepix_y = sprite_tilemem_y;
tilemem_no = sprite_tilemem_no;
sprite_tilemem_ack = 1;
pal_addr = sprite_pix;
sprite_pix_done = 1;
alphamixer_rate = layer_en[2] ? pal_data[31:24] : 0; //tilemap b
alphamixer_in_b = alphamixer_out; //bgnd+fb+tilemap_a
end else begin //cycle==3
if (tilea_data[11]) begin
tilepix_y = tilea_data[9] ? (15-tilea_x[9:6]) : tilea_x[9:6];
tilepix_x = tilea_data[10] ? (15-tilea_y[9:6]) : tilea_y[9:6];
end else begin
tilepix_x = tilea_data[9] ? (15-tilea_x[9:6]) : tilea_x[9:6];
tilepix_y = tilea_data[10] ? (15-tilea_y[9:6]) : tilea_y[9:6];
end
tilemem_no = tilea_data[8:0];
pal_addr = {fb_pixel+fb_pal_offset}; //from fb
alphamixer_rate = layer_en[3] ? pal_data[31:24] : 0; //sprite
alphamixer_in_b = alphamixer_out; //bgnd+fb+tilemap_a+tilemap_b
end
end
assign vid_data_out = alphamixer_out[23:0];
reg in_render_vbl;
always @(posedge clk) begin
if (reset) begin
ready_delayed <= 0;
fb_addr <= 'h7E0000; //top 128K of RAM
pitch <= 512;
write_vid_addr_next <= 'h400; //2 lines in advance, so we start writing immediately (good for sim)
vid_addr <= 0;
vid_ren <= 0;
dma_start_addr <= fb_addr;
tilea_xoff <= 0;
tileb_xoff <= 0;
tilea_xoff <= 0;
tileb_xoff <= 0;
tilea_linestart_x <= 0;
tilea_linestart_y <= 0;
tileb_linestart_x <= 0;
tileb_linestart_y <= 0;
tilea_colinc_x <= (1<<6);
tilea_colinc_y <= 0;
tilea_rowinc_x <= 0;
tilea_rowinc_y <= (1<<6);
tileb_colinc_x <= (1<<6);
tileb_colinc_y <= 0;
tileb_rowinc_x <= 0;
tileb_rowinc_y <= (1<<6);
fb_is_8bit <= 0;
alphamixer_out <= 0;
bgnd_color <= 0;
sprite_yoff <= 64;
sprite_xoff <= 64;
vblctr <= 0;
in_render_vbl <= 0;
end else begin
/* CPU interface */
ready_delayed <= ((wstrb!=0) | ren);
if ((&wstrb_muxed) && cpu_sel_regs) begin
if (addr_muxed[5:2]==REG_SEL_FB_ADDR) begin
fb_addr <= din_muxed[23:0];
end else if (addr_muxed[5:2]==REG_SEL_FB_PITCH) begin
pitch <= din_muxed[15:0];
fb_pal_offset=din_muxed[24:16];
end else if (addr_muxed[5:2]==REG_SEL_LAYER_EN) begin
layer_en <= din_muxed[3:0];
fb_is_8bit <= din_muxed[16];
end else if (addr_muxed[5:2]==REG_SEL_TILEA_OFF) begin
tilea_xoff <= din_muxed[15:0];
tilea_yoff <= din_muxed[31:16];
end else if (addr_muxed[5:2]==REG_SEL_TILEB_OFF) begin
tileb_xoff <= din_muxed[15:0];
tileb_yoff <= din_muxed[31:16];
end else if (addr_muxed[5:2]==REG_SEL_TILEA_INC_COL) begin
tilea_colinc_x <= din_muxed[15:0];
tilea_colinc_y <= din_muxed[31:16];
end else if (addr_muxed[5:2]==REG_SEL_TILEA_INC_ROW) begin
tilea_rowinc_x <= din_muxed[15:0];
tilea_rowinc_y <= din_muxed[31:16];
end else if (addr_muxed[5:2]==REG_SEL_TILEB_INC_COL) begin
tileb_colinc_x <= din_muxed[15:0];
tileb_colinc_y <= din_muxed[31:16];
end else if (addr_muxed[5:2]==REG_SEL_TILEB_INC_ROW) begin
tileb_rowinc_x <= din_muxed[15:0];
tileb_rowinc_y <= din_muxed[31:16];
end else if (addr_muxed[5:2]==REG_SEL_BGNDCOL) begin
bgnd_color <= din_muxed;
end else if (addr_muxed[5:2]==REG_SEL_SPRITE_OFF) begin
sprite_xoff <= din_muxed[12:0];
sprite_yoff <= din_muxed[28:16];
end else if (addr_muxed[5:2]==REG_SEL_COPPERCTL_OFF) begin
copper_run <= din_muxed[31];
end
end
//vid_address is the address that is sent to the write hardware. As this actually increases
//at the same time as we set the write strobe, we make sure it is write_vid_address delayed by
//one cycle. write_vid_address now is the address of the pixel we're reading from the palette
//memory and writing to the video memory.
vid_addr <= write_vid_addr;
//As the tile and sprite hardware needs some extra cycles to get the data from tile memory,
//we generate write_vid_address by delaying write_vid_address_next by one. This way, if
//write_vid_address is the current pixel position, write_vid_address_next is the future one.
write_vid_addr <= write_vid_addr_next;
dma_do_read <= 0;
vid_wen <= 0;
//Line renderer proper statemachine.
if (write_vid_addr[19:9]>=320) begin
//We're finished with this frame. Wait until the video generator starts drawing the next frame.
if (in_render_vbl == 0) begin
vblctr <= vblctr + 1;
end
dma_run <= 0;
in_render_vbl <= 1;
tilea_linestart_x <= tilea_rowinc_x;
tilea_linestart_y <= tilea_rowinc_y;
tilea_xb <= 0;
tilea_yb <= 0;
tileb_linestart_x <= tileb_rowinc_x;
tileb_linestart_y <= tileb_rowinc_y;
tileb_xb <= 0;
tileb_yb <= 0;
if (next_field) begin
write_vid_addr_next <= 0;
dma_start_addr <= fb_addr;
end else begin
//Not yet, keep idling
end
end else if (write_vid_addr[10:9] != curr_vid_addr[10:9] || preload) begin
in_render_vbl <= 0;
//If we're here, there is room in the line memory to write a new line into.
dma_run <= layer_en[0];
if (!copper_halts_gfx && (dma_ready || (fb_is_8bit==0 && write_vid_addr[3:0]!=0) || (fb_is_8bit==1 && write_vid_addr[2:0]!=0) || layer_en[0]==0)) begin
if (((fb_is_8bit==0 && write_vid_addr[2:0] == 7) || (fb_is_8bit==1 && write_vid_addr[1:0]==3)) && cycle==3) begin
//We need a new word. Enable read here (at cycle 2) because:
// dma_do_read actually goes high next cycle
// correct data will get returned next next cycle
dma_do_read <= 1;
end
alphamixer_out <= alphamixer_out_cur;
cycle <= cycle + 1;
if (cycle==0) begin
if (fb_is_8bit) begin
fb_pixel <= {dma_data[vid_xpos[3:0]*8+:8]};
end else begin
fb_pixel <= {4'h0, dma_data[vid_xpos[3:0]*4+:4]};
end
end else if (cycle==1) begin
tileb_xb <= tileb_xb + tileb_colinc_x;
tileb_yb <= tileb_yb + tileb_colinc_y;
end else if (cycle==3) begin
//Move to the next pixel
vid_wen <= 1;
if (write_vid_addr[8:0]==479) begin
dma_run <= 0;
//Shaos moved it here from below to fix even lines ignoring
dma_start_addr <= dma_start_addr + (fb_is_8bit?pitch:pitch/2);
end
if (write_vid_addr[8:0]>479) begin
//next line
write_vid_addr_next[19:9] <= write_vid_addr_next[19:9] + 'h1;
write_vid_addr_next[8:0] <= 0;
//prepare dma for next address
dma_run <= 1;
tilea_xb <= tilea_linestart_x;
tilea_yb <= tilea_linestart_y;
tilea_linestart_x <= tilea_linestart_x + tilea_rowinc_x;
tilea_linestart_y <= tilea_linestart_y + tilea_rowinc_y;
tileb_xb <= tileb_linestart_x;
tileb_yb <= tileb_linestart_y;
tileb_linestart_x <= tileb_linestart_x + tileb_rowinc_x;
tileb_linestart_y <= tileb_linestart_y + tileb_rowinc_y;
end else begin
write_vid_addr_next <= write_vid_addr_next + 'h1;
tilea_xb <= tilea_xb + tilea_colinc_x ;
tilea_yb <= tilea_yb + tilea_colinc_y;
end
end
end else begin
//waiting for dma to have something
end
end else begin
//wait for next line
dma_run <= 0;
end
end
end
endmodule