// Header File
// Created 11/11/2005; 9:26:49 AM

//,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`
//FUNCTIONS

unsigned char* Planes[9];
unsigned short (*Graphics16)[27];	//Would need to be passed to 7 fxns (Nov 2008)

void Install_Planes(unsigned char* Planes_From_EXE[9])
{
	memcpy(Planes,Planes_From_EXE,32);
	Graphics16=(unsigned short (*)[27])Planes_From_EXE[8];
}

void __attribute__((__regparm__(2))) FastCopyScreen_M(void* src,void* dest)
{	//Copies a 160x99 pixel area. Both buffers must have pixel width of 240.
    asm volatile (																																	//d1-d5 = 5 registers = 20 bytes = 160 pixels
		    "moveq #8,%%d0; 0: \n"																											//9 loops of 11 lines = 99 lines
				"movem.l  (%0)+,%%d1-%%d5; movem.l  %%d1-%%d5,(%1); lea (%0,10),%0 \n"			//line 1
		    "movem.l  (%0)+,%%d1-%%d5; movem.l  %%d1-%%d5,(30,%1); lea (%0,10),%0 \n"		//line 2
		    "movem.l  (%0)+,%%d1-%%d5; movem.l  %%d1-%%d5,(60,%1); lea (%0,10),%0 \n"		//line 3
		    "movem.l  (%0)+,%%d1-%%d5; movem.l  %%d1-%%d5,(90,%1); lea (%0,10),%0 \n"		//line 4
		    "movem.l  (%0)+,%%d1-%%d5; movem.l  %%d1-%%d5,(120,%1); lea (%0,10),%0 \n"	//line 5
		    "movem.l  (%0)+,%%d1-%%d5; movem.l  %%d1-%%d5,(150,%1); lea (%0,10),%0 \n"	//line 6
		    "movem.l  (%0)+,%%d1-%%d5; movem.l  %%d1-%%d5,(180,%1); lea (%0,10),%0 \n"	//line 7
		    "movem.l  (%0)+,%%d1-%%d5; movem.l  %%d1-%%d5,(210,%1); lea (%0,10),%0 \n"	//line 8
		    "movem.l  (%0)+,%%d1-%%d5; movem.l  %%d1-%%d5,(240,%1); lea (%0,10),%0 \n"	//line 9
		    "movem.l  (%0)+,%%d1-%%d5; movem.l  %%d1-%%d5,(270,%1); lea (%0,10),%0 \n"	//line 10
		    "movem.l  (%0)+,%%d1-%%d5; movem.l  %%d1-%%d5,(300,%1); lea (%0,10),%0 \n"	//line 11
		    "lea (%1,330),%1; dbf %%d0,0b \n"																						//move src down 11 lines, repeat loop 9 times
				: "=a" (src), "=a" (dest)
				: "0"  (src),  "1" (dest)
				: "d3", "d4", "d5");
}
    

///Optimizations possible for scroll fxns: 1) Don't pass OS_Plane. 2) Pass only scroll_index, buffer (MS/BG) and h. This way, edges can be refreshed in Scroll fxn.
//Note these optimizations are hardly necessary and difficult to do. 1) hardly improves speed. 2) creates the issue of looping for two planes in Scroll fxn.
//Scroll left/right would need to handle 176 pixels (160+16) or 22 bytes 
//Scroll up/down would handle 100 pixels/lines (99+1)
//Up/down borders get refreshed every scroll
//Left/right borders get refreshed every 10 scrolls.

void __attribute__((__regparm__(1))) ScrollLeft176x99_M(unsigned short* buffer)
{
    asm volatile (	//Scrolls 176x99 pixels on a 240 pixels wide plane
    		"lea 2962(%0),%0; moveq #98,%%d0; 0:\n"				//Shift ptr to 22nd byte of the 98th line
        "lsl.w  -(%0); roxl.w -(%0); roxl.w -(%0) \n"	//Pixels 1-48
        "roxl.w -(%0); roxl.w -(%0); roxl.w -(%0) \n"	//Pixels 49-96
        "roxl.w -(%0); roxl.w -(%0); roxl.w -(%0) \n"	//Pixels 97-144
        "roxl.w -(%0); roxl.w -(%0) \n"	 							//Pixels 145-176							
        "lea -8(%0),%0; dbf %%d0,0b"									//Go to line above. Loop 99 times.
        : "=a" (buffer)
        : "0"  (buffer)
        : "d0");
}

void __attribute__((__regparm__(1))) ScrollRight176x99_M(unsigned short* buffer)
{
    asm volatile (	//Scrolls 176x99 pixels on a 240 pixels wide plane
    		"subq #2,%0; moveq #98,%%d0; 0: \n"						//buffer-=2, h (d0)= 98
        "lsr.w  (%0)+; roxr.w (%0)+; roxr.w (%0)+ \n"	//Pixels 1-48
        "roxr.w (%0)+; roxr.w (%0)+; roxr.w (%0)+ \n"	//Pixels 49-96
        "roxr.w (%0)+; roxr.w (%0)+; roxr.w (%0)+ \n"	//Pixels 97-144
        "roxr.w (%0)+; roxr.w (%0)+ \n" 							//Pixels 145-176
        "lea 8(%0),%0; dbf %%d0,0b"										//Go to next line. Loop 99 times.
        : "=a" (buffer)
        : "0"  (buffer)
        : "d0");
}

void __attribute__((__regparm__(1))) ScrollUp160_M(unsigned short* dest,unsigned short h)
{
    asm volatile (
    		"lea 30(%0),%%a1; subq #2,%1; 0: \n"																//a1=dest+30, h-=2
        "move.l (%%a1)+,(%0)+;move.l (%%a1)+,(%0)+;move.l (%%a1)+,(%0)+\n"	//Pixels 1-96
        "move.l (%%a1)+,(%0)+;move.l (%%a1)+,(%0)+\n"												//Pixels 97-160
        "lea 10(%%a1),%%a1; lea 10(%0),%0; dbf %1,0b\n"											//Move dest and src ptrs to line below
        : "=a" (dest), "=d" (h)
        : "0" (dest), "1"  (h)
        : "a1");
}

void __attribute__((__regparm__(1))) ScrollDown160_M(unsigned short* dest,unsigned short h)
{
    asm volatile (
    		"clr.l %%d1; move.w %1,%%d1; lsl.l #4,%%d1; sub.w %1,%%d1; subq #5,%%d1; add.w %%d1,%%d1 \n"	//Put 30h-10 in d1 (even)
    		"lea (%%d1,%0),%0; lea -30(%0),%%a1; subq #2,%1;	0: \n"																			//Dest+=d1, a1=dest-30, h-=2 (one for loop position, one for dbf convention)
        "move.l -(%%a1),-(%0);move.l -(%%a1),-(%0);move.l -(%%a1),-(%0)\n"														//Pixels 160-65
        "move.l -(%%a1),-(%0);move.l -(%%a1),-(%0)\n"																									//Pixels 64-1
        "lea -10(%0),%0; lea -10(%%a1),%%a1; dbf %1,0b \n"																						//Move dest and a1 ptrs to line above
        : "=a" (dest), "=d" (h)
        : "0"  (dest), "1"  (h)
        : "a1", "d1");
}

void Sprite16_OR_Mh(unsigned short x,unsigned short y,unsigned short h,unsigned short OR_Value,unsigned short Plane_Bitflag)
{
    register long           addr;
    register long           offset   = (y<<5)-(y<<1)+((x>>3)&0x1e);
    register unsigned short cnt      = 16-(x&15);
	unsigned char i,j;
	
	for(i=0;i<10;++i)
	{
		if(Plane_Bitflag&(1<<i))
		{
			addr=(unsigned long)(Planes[i]+offset);
			for(j=0;j<h;++j,addr+=30)
        *(long*)addr|=(long)(OR_Value)<<cnt;
		}
	}
}

void Sprite16_AND_Mh(unsigned short x,unsigned short y,unsigned short h,unsigned short AND_Value,unsigned short Plane_Bitflag)
{
    register long           addr;
    register long           offset   = (y<<5)-(y<<1)+((x>>3)&0x1e);
    register unsigned short cnt      = 16-(x&15);
	unsigned char i,j;
	
	for(i=0;i<10;++i)
	{
		if(Plane_Bitflag&(1<<i))
		{
			addr=(unsigned long)(Planes[i]+offset);
			for(j=0;j<h;++j,addr+=30)
        *(long*)addr&=~((long)~AND_Value<<cnt);
		}
	}
}
	
void Replenish_BG_16(unsigned char x,unsigned char y,unsigned short* mask)
{
	unsigned char i;
	unsigned long maskval;
	unsigned long temp_1;
	unsigned long temp_2;
	unsigned char cnt=16-(x&15);
	unsigned long offset=(y<<5)-(y<<1)+((x>>3)&0x1E);
	unsigned long BG_addr_1;
	unsigned long BG_addr_2;
	unsigned long Current_addr_1;
	unsigned long Current_addr_2;
	
	BG_addr_1=(unsigned long)Planes[6]+offset;
	BG_addr_2=(unsigned long)Planes[7]+offset;
	Current_addr_1=(unsigned long)Planes[2]+offset;
	Current_addr_2=(unsigned long)Planes[3]+offset;
	for(i=0;i<9;++i)
	{
		maskval=((unsigned long)(~mask[i]))<<cnt;
		temp_1=*(long*)BG_addr_1;
		temp_2=*(long*)BG_addr_2;
		temp_1&=maskval;
		temp_2&=maskval;
		maskval=~maskval;
		*(long*)Current_addr_1&=maskval;
		*(long*)Current_addr_2&=maskval;
		*(long*)Current_addr_1|=temp_1;
		*(long*)Current_addr_2|=temp_2;
		BG_addr_1+=30;
		BG_addr_2+=30;
		Current_addr_1+=30;
		Current_addr_2+=30;
	}
	return;
}

void GraySprite16_BLIT_M(unsigned short x,unsigned short y,unsigned short* Sprite,unsigned short Plane_Bitflag)
{
  register unsigned long offset = (y<<5)-(y<<1)+((x>>3)&0x1e);
  register unsigned long addr1;
  register unsigned long addr2;
  register unsigned short cnt = 16-(x&15);
  register unsigned long maskval = ~((unsigned long)0x0000FFC0<<cnt);
	unsigned short* Sprite_Backup=Sprite;
	unsigned char i,j;
	
	for(i=0;i<=4;++i)
	{
		if(Plane_Bitflag&(1<<i))
		{
			addr1=(unsigned long)(Planes[i<<1]+offset);
			addr2=(unsigned long)(Planes[(i<<1)+1]+offset);
			Sprite=Sprite_Backup;
			for(j=0;j<9;++j,addr1+=30,addr2+=30)
			{
        *(long*)addr1&=maskval;
        *(long*)addr2&=maskval;
        *(long*)addr1|=(long)(*Sprite)<<cnt;
        *(long*)addr2|=(long)(*(Sprite+9))<<cnt;
        ++Sprite;
			}
		}
	}
}

void GraySprite32_BLIT_Mh(unsigned short x,unsigned short y,short h,unsigned long* Sprite,unsigned short Plane_Bitflag)
{
  register unsigned long offset = (y<<5)-(y<<1)+((x>>3)&0x1e);
  register unsigned long addr1;
  register unsigned long addr2;
  register unsigned short cnt  = x&15;
  register unsigned short ccnt = 32-cnt;
	unsigned long* Sprite_Backup=Sprite;
	register unsigned long  data;
	register unsigned long  maskval1 = ~(0xFFFFFFFF>>cnt);
	register unsigned long  maskval2 = ~(0xFFFFFFFF<<ccnt);
	unsigned char i,j;
	
	for(i=0;i<=5;++i)
	{
		if(Plane_Bitflag&(1<<i))
		{
			addr1=(unsigned long)(Planes[i<<1]+offset);
			addr2=(unsigned long)(Planes[(i<<1)+1]+offset);
			Sprite=Sprite_Backup;
			
	    for (j=0;j<h;++j,addr1+=30,addr2+=30)
	    {
	        *(long*)addr1&=maskval1;
	        *(long*)(addr1+4)&=maskval2;
	        data=*Sprite;
	        *(long*)addr1|=(data>>cnt);
	        *(long*)(addr1+4)|=(data<<ccnt);
	
	        *(long*)addr2&=maskval1;
	        *(long*)(addr2+4)&=maskval2;
	        data=*(Sprite+h);
	        *(long*)addr2|=(data>>cnt);
	        *(long*)(addr2+4)|=(data<<ccnt);
	        ++Sprite;
	    }
		}
	}
}

void GraySprite16_MASK_M(unsigned char x,unsigned char y,unsigned short* Sprite,unsigned short Plane_Bitflag)
{
    register long           offset = (y<<5)-(y<<1)+((x>>3)&0x1e);
    register long           addr1;
    register long           addr2;
    register unsigned short cnt    = 16-(x&15);
	unsigned short* Sprite_0;
	unsigned short* Sprite_1;
	unsigned char i,j;
	
	for(i=0;i<=4;++i)
	{
		if(Plane_Bitflag&(1<<i))
		{
			addr1=(unsigned long)(Planes[i<<1]+offset);
			addr2=(unsigned long)(Planes[(i<<1)+1]+offset);
			Sprite_0=Sprite;
			Sprite_1=Sprite;
			for(j=0;j<9;++j,addr1+=30,addr2+=30)
			{
        *(long*)addr1&=~((long)~(*Sprite_0)<<cnt);
        *(long*)addr2&=~((long)~(*Sprite_1)<<cnt);
        *(long*)addr1|=(long)(*(Sprite_0+9))<<cnt;
        *(long*)addr2|=(long)(*(Sprite_1+18))<<cnt;
        ++Sprite_0;
        ++Sprite_1;
			}
		}
	}
}

void GraySprite16_MASK_Mh(unsigned char x,unsigned char y,unsigned char h,unsigned short* Sprite,unsigned short Plane_Bitflag)
{
    register long           offset = (y<<5)-(y<<1)+((x>>3)&0x1e);
    register long           addr1;
    register long           addr2;
	unsigned short* Sprite_0;
	unsigned short* Sprite_1;
    register unsigned short cnt    = 16-(x&15);
	unsigned char i,j;
	
	for(i=0;i<=4;++i)
	{
		if(Plane_Bitflag&(1<<i))
		{
			addr1=(unsigned long)(Planes[i<<1]+offset);
			addr2=(unsigned long)(Planes[(i<<1)+1]+offset);
			Sprite_0=Sprite;
			Sprite_1=Sprite;
			for(j=0;j<h;++j,addr1+=30,addr2+=30)
			{
        *(long*)addr1&=~((long)~(*Sprite_0)<<cnt);
        *(long*)addr2&=~((long)~(*Sprite_1)<<cnt);
        *(long*)addr1|=(long)(*(Sprite_0+9))<<cnt;
        *(long*)addr2|=(long)(*(Sprite_1+18))<<cnt;
        ++Sprite_0;
        ++Sprite_1;
			}
		}
	}
}