; Tiled background  management routines
; A "tiled" background is a background image made of rectangular bitmaps
; This module uses square shaped bitmaps, due to the x/y aspect ratio
; they are 32x26 pixel wide because of the different aspect ratio
; ( 4/3 for the monitor, 320/200 for the pixel, and another one for
;   the maximum image on the monitor)
;
; I'm used to call "patterns" these 32x26 tiles, the reason is that i'm 
; a native italian that started designing games when at the high school 
; and never heard the word "tiled background" before reading 
; it on rec.games.programmer.
; So, when i reinvented the wheel i had to give it a name :] .

       .386P
       
code32 segment para public use32
       assume cs:code32,ds:code32
       
include 386video.inc

PXTILE  = (TWIDTH)
MAPITEM = 8        
HALFMAPITEM = (MAPITEM/2)
HALFXTILE = (PXTILE/2)

; _GetPat
; _PutPat

; 1 nudget     = 4  pixel (plane aligned, one pixel for every plane)
; 1 big nudget = 16 pixel (plane aligned, four nudgets)

; PAT format:
; 2x26 dwords for each 4 planes.
; Given a plane, the 52 dwords have to be blitted as follows:
;         0  1
;         1  2
;         3  4
;        ..  ..
;        48  49
;        50  51
; As you can see a "pattern" is just a "picture" with size 32x26
; with optimized routines to blit it
; N.B. The standard name for files of patterns is *.PTF

        public _GetPat

; reads pattern from screen
_GetPat:
        ; in:
        ; edi = pic dest
        ; esi = scr source base
        ; eax = x in pixels
        ; edx = y in pixels
        ; out:
        ; edi = ptr to end of picture
        pushad
        add esi,eax
        add esi,[edx*4+_RowStart]
        
        mov ebp,(PXWIDTH-PXTILE) ; avanzamento di linea
        mov ecx,THEIGHT
@rowdown:        
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        add esi,ebp
        dec ecx
        jne @rowdown
        
        popad
        add edi,TILESIZE
        ; edi = posizione DOPO la lettura del pattern
	ret


patboost macro 
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        add edi,ebp
        endm
        

; Writes a pattern to screen using an unrolled loop
; (don't worry for the movsd, the cpu-memory interface
; is stressed to the limit with this and we need "small" opcodes
; to keep the pipeline filled on 386s
; The main speed gain is obtained avoiding the loop
; and thus keeping the pipeline filled even on 386s

        public _PutPat
_PutPat:
        ; in:
        ; edi = scr dest
        ; esi = pat source 
        ; eax = x in nudgets
        ; edx = y in pixels
        ; out:
        pushad
        add edi,eax
        add edi,[edx*4+_RowStart]
        
        mov ebp,(PXWIDTH-TWIDTH)
@punup:   
        patboost ;0
        patboost ;1
        patboost ;2
        patboost ;3
        patboost ;4
        patboost ;5
        patboost ;6
        patboost ;7
        patboost ;8
        patboost ;9
        patboost ;10
        patboost ;11
        patboost ;12
        patboost ;13
        patboost ;14
        patboost ;15
        patboost ;16
        patboost ;17
        patboost ;18
        patboost ;19
        patboost ;20
        patboost ;21
        patboost ;22
        patboost ;23
        patboost ;24
        movsd                    ; 25
        movsd                    ;
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        popad
        ret
     
; Put "Halved" pattern to screen
; Useful to show tiles on the "scoreboard" or to display a "zoomed" map
   
takka macro
        lodsd
        shl ax,8
        shr eax,8
        stosw
        endm
        
hboost macro 
        takka
        takka
        takka
        takka
        takka
        takka
        takka
        takka
        add edi,ebp
        endm
        
        public _PutHPat
_PutHPat:
        ; in:
        ; edi = scr dest
        ; esi = pat source 
        ; eax = x in nudgets
        ; edx = y in pixels
        ; out:
        pushad
        add edi,eax
        add edi,[edx*4+_RowStart]
        
        mov ebp,(PXWIDTH-(TWIDTH/2))
@hpunup:   
        hboost ;0
        hboost ;1
        hboost ;2
        hboost ;3
        hboost ;4
        hboost ;5
        hboost ;6
        hboost ;7
        hboost ;8
        hboost ;9
        hboost ;10
        hboost ;11
        hboost ;13
        popad
        ret
        

UPLEFT_TIL macro
        mov ebp,xrock
        mov edx,TWIDTH
        mov eax,THEIGHT
        mov edi,yroll        
        sub edx,ebp  ; TWIDTH-xrock
        sub eax,edi  ; THEIGHT-yroll
        mov esi,ebp  ; xrock
        mov ebx,stilt ; map start
        shl edi,5   ; tile line offset
        add esi,[ebx] ; tilebase+xrock+yroll*twidth
        add esi,edi   ;
        
        mov edi,_ActiveBase
ulzig:  mov ecx,edx ; blit TWIDTH-xrock dots
        rep movsb
        add edi,PXWIDTH
        add esi,ebp
        sub edi,edx
        dec eax
        jne ulzig      
        
        endm
        
LEFT_TIL macro
        mov ebp,xrock
        mov edx,TWIDTH
        mov eax,THEIGHT
        sub edx,ebp  ; TWIDTH-xrock
        sub eax,yroll
        mov esi,ebp
        mov ebx,stilt
        
        mov edi,_ActiveBase
        mov crick,7
        cmp eax,THEIGHT
        je lcrick
        add edi,[eax*4+_RowStart]
        add ebx,XTILES*MAPITEM
        dec crick
lcrick:        
        mov esi,[ebx]
        mov eax,THEIGHT
        add ebx,XTILES*MAPITEM
        add esi,ebp
lzig:   
        mov ecx,edx
        rep movsb
        add edi,PXWIDTH
        add esi,ebp
        sub edi,edx
        dec eax
        jne lzig      
        dec crick
        jne lcrick
        
        endm
        
DNLEFT_TIL macro
        mov ebp,xrock
        mov edx,TWIDTH
        mov ecx,THEIGHT
        mov eax,yroll        
        sub ecx,eax
        sub edx,ebp     ; TWIDTH-xrock
        mov edi,[ecx*4+_RowStart]    ; 
        mov esi,ebp
        add edi,PXWIDTH*6*THEIGHT
        mov ebx,stilt
        add ebx,XTILES*MAPITEM*7
        add esi,[ebx] ; tilebase+xrock+yroll*twidth
        add edi,_ActiveBase
dlzig:  mov ecx,edx
        rep movsb
        add edi,PXWIDTH
        add esi,ebp
        sub edi,edx
        dec eax
        jne dlzig      
        endm
        
UPRIGHT_TIL macro
        mov edi,_ActiveBase
        mov ebp,xrock
        mov edx,TWIDTH
        mov eax,THEIGHT
        mov ecx,yroll        
        add edi,TWIDTH*9
        sub edx,ebp  ; TWIDTH-xrock
        mov ebx,MAPITEM*10
        add edi,edx
        sub eax,ecx
        add ebx,stilt
        shl ecx,5
        mov esi,[ebx] ; tilebase+xrock+yroll*twidth
        add esi,ecx   ;
urzig:  mov ecx,ebp
        rep movsb
        add edi,PXWIDTH
        add esi,edx
        sub edi,ebp
        dec eax
        jne urzig      
        endm
        
RIGHT_TIL macro
        mov ebp,xrock
        mov edx,TWIDTH
        mov edi,_ActiveBase
        mov eax,THEIGHT
        sub edx,ebp  ; TWIDTH-xrock
        sub eax,yroll
        mov esi,ebp
        mov ebx,stilt
        add edi,edx
        add ebx,(MAPITEM*10)
        mov crick,7
        add edi,TWIDTH*9
        cmp eax,THEIGHT
        je rcrick
        add edi,[eax*4+_RowStart]
        add ebx,(XTILES*MAPITEM)
        dec crick
rcrick:        
        mov esi,[ebx]
        mov eax,THEIGHT
        add ebx,XTILES*MAPITEM
rzig:   
        mov ecx,ebp
        rep movsb
        add edi,PXWIDTH
        add esi,edx
        sub edi,ebp
        dec eax
        jne rzig      
        dec crick
        jne rcrick

        endm  
        
DNRIGHT_TIL macro
        mov edi,_ActiveBase
        mov ebp,xrock
        mov edx,TWIDTH
        mov eax,yroll        
        add edi,TWIDTH*9
        sub edx,ebp  ; TWIDTH-xrock
        mov ebx,(MAPITEM*10)+(XTILES*MAPITEM*7)
        add edi,edx
        mov ecx,THEIGHT
        add ebx,stilt
        sub ecx,eax
        add edi,PXWIDTH*THEIGHT*6
        mov esi,[ebx] ; tilebase+xrock+yroll*twidth
        add edi,[ecx*4+_RowStart]
drzig:  mov ecx,ebp
        rep movsb
        add edi,PXWIDTH
        add esi,edx
        sub edi,ebp
        dec eax
        jne drzig      
        endm
        
muvup macro
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        add edi,(PXWIDTH-TWIDTH)
     endm
    
DNSTRIPE_TIL macro
        mov edx,yroll
        mov ebp,THEIGHT
        mov edi,_ActiveBase
        sub ebp,edx
        add ebp,THEIGHT*6
        mov ecx,xrock
        mov ebx,stilt
        mov eax,10
        add edi,[ebp*4+ _RowStart]
        cmp ecx,0
        je dgumba
        add ebx,MAPITEM
        add edi,TWIDTH
        dec eax
dgumba:
        sub edi,ecx
        add ebx,XTILES*MAPITEM*7
dszip:        
        mov esi,[ebx]
        add ebx,MAPITEM
        push edi
        mov ecx,edx
dzip:       
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        add edi,(PXWIDTH-TWIDTH)
        dec ecx
        jne dzip
        
        pop edi
        add edi,32
        dec eax
        jne dszip
        endm
            
UPSTRIPE_TIL macro                
        mov edi,_ActiveBase
        mov edx,THEIGHT
        mov ebp,yroll
        mov ecx,xrock
        mov ebx,stilt
        mov eax,10
        cmp ecx,0
        je gumba
        add ebx,MAPITEM
        add edi,TWIDTH
        dec eax
gumba:
        sub edx,ebp
        sub edi,ecx
        shl ebp,5 ;line ofs
uszip:        
        mov esi,[ebx]
        add ebx,MAPITEM
        add esi,ebp
        push edi
        mov ecx,edx
uzip:       
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        add edi,(PXWIDTH-TWIDTH)
        dec ecx
        jne uzip
        
        pop edi
        add edi,32
        dec eax
        jne uszip
        endm
        
FULL_TIL macro                
        mov edi,_ActiveBase
        mov ecx,THEIGHT
        mov ebp,yroll
        mov edx,10 ; max complete tile columns on screen
        sub ecx,ebp
        mov eax,xrock
        mov ebx,stilt
        cmp eax,0         ; test xrock
        je fgumba         ;
        add edi,TWIDTH    ;
        dec edx           ;
        add ebx,MAPITEM   ;
fgumba:
        sub edi,eax ; xrock
        
        mov eax,7 ; max complete tile rows
        cmp ebp,0                   ; test yroll
        je fuszip                   ;
        dec eax                     ;
        add edi,[ecx*4+ _RowStart]  ;
        add ebx,XTILES*MAPITEM      ; 
fuszip: push ebx       
        push edi
        mov ecx,edx ; line in tiles
fuzip:  mov esi,[ebx]
        add ebx,MAPITEM 
        muvup ;1
        muvup ;2
        muvup ;3
        muvup ;4
        muvup ;5
        muvup ;6
        muvup ;7
        muvup ;8
        muvup ;9
        muvup ;0
        muvup ;1
        muvup ;2
        muvup ;3
        muvup ;4
        muvup ;5
        muvup ;6
        muvup ;7
        muvup ;8
        muvup ;9
        muvup ;0
        muvup ;1
        muvup ;2
        muvup ;3
        muvup ;4
        muvup ;5
              
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        movsd
        sub edi,(PXWIDTH*(THEIGHT-1))
        dec ecx
        jne fuzip
        
        pop edi
        pop ebx
        add edi,PXWIDTH*THEIGHT
        add ebx,XTILES*MAPITEM
        dec eax
        jne fuszip
        endm
        

; TILE MAP BLITTER
; a tile map is made of 64bit words 
; it is dimensioned as MAP[YTILES][XTILES]
; the first dword into a 64bit word is a Code32 relative offset
; pointing to the tile bitmap (pattern)
; while the upper word contains various info
; (flags, bitfield or a near pointer to an extra tile descriptor)
; the _PutMap routine just look into the lower dword.
; 
; You may wonder why i imposed these XTILES,YTILES limits.
; The reason is that while the current _PutMap is quite dumb, a future release 
; will blit just the "changed" screen areas and so will be A LOT faster
; but to do this it needs to "scroll&pan" on VRAM and from there comes the
; limits on map size.

; WARNING!
;       This code is highly optimized with instruction flow interleaving
;       and "look ahead" optimizations, it runs fast on 386s and takes
;       advantage of superior capabilities of successive intel CPUs.
;       This of course makes it harder to debug or optimize further.
        
        align dword
xrock dd 0
yroll dd 0  ; how many lines we rolled down ?
stilt dd 0  ; map "hook"      
crick dd 0  ; aux counter

        align byte
        
        public _PutMap
        
_PutMap:; Tiled Background blitter optimized for CPL3    
        ; ( the "optimization" consist in reducing to 4 the
        ;   plane-switch instructions needed for a full background refresh)
        ; esi= map file
        ; eax = x in pixels  (0..VXWIDTH)
        ; edx = y in pixels  (0..VYHEIGHT)
        ; n.b. after blitting the map you have to set _DisplayStart at x,y
        ;      & flip to this page if you wanna make it visible
        ;
        ; When scrolling around remember DO NOT MOVE
        ; more than 4 lines vertically or 320 pixels horizontally
        ; (Yeah! in a future release this thing will support "smart" blitting
        ;  and i already put a "lock" into the current code, if you respect it
        ;  the only bigger change you will see when the "smart" things will be
        ;  included will be an increased frame rate)
        
        pushad
        
        mov ebp,eax ; save x
        
        and eax,31     ; xrock
        
        shr ebp,5 ; pixels to tiles
        
        mov xrock,eax  ;
        
        mov eax,edx        ; yroll & ytil
        mov ecx,THEIGHT    ;
        mov edx,0          ;
        div ecx            ;
        ; edx = y mod THEIGHT
        ; eax = y div THEIGHT == ytil == first visible "tile line" on map
        mov yroll,edx
        ; now precalc values for blitting
        mov edx,(XTILES*MAPITEM)
        lea esi,[esi+ebp*MAPITEM] ; start into tile row
        mul edx
        ; eax <-- eax*XTILES ==  ytil*XTILES == line offset into map
        ; REMEMBER:
        ; A pattern is 32 pix wide ==> 8 nudgets
        ; there are 16 "extra" pixels on every display page
        ; and a map item uses 8 byte.
        ; NOW look at the following code and guess what's happening ...
        ; as you can see i eliminated wasteful cmp&jmp with clever bit handling
        add esi,eax ; add line offset into map
        
        mov stilt,esi  ; store map "hook"
        
        cmp xrock,0
        je norock
        
        cmp yroll,0
        je noroll
        UPLEFT_TIL
        UPRIGHT_TIL
        DNLEFT_TIL
        DNRIGHT_TIL
noroll:        
        LEFT_TIL
        RIGHT_TIL
        
norock:        
        cmp yroll,0
        je onlyfull
        UPSTRIPE_TIL 
        DNSTRIPE_TIL
onlyfull:        
        FULL_TIL     

        popad
        ret

code32 ends

 END
