/* -*- mode: C; c-basic-offset:8 -*- */
/*
 * GLX Hardware Device Driver for Matrox Millenium G200
 * Copyright (C) 1999 Wittawat Yamwong
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
 * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 *
 *    original by Wittawat Yamwong <Wittawat.Yamwong@stud.uni-hannover.de>
 *	9/20/99 rewrite by John Carmack <johnc@idsoftware.com>
 */

/* $Id: mgacnvtex_mmx.c,v 1.5 2000/03/17 11:00:43 johnc Exp $ */

#include <stdlib.h>
#include <stdio.h>

#include <GL/gl.h>

#include "xsmesaP.h"

#include "mgalib.h"
#include "mgatex.h"


#if defined(USE_MMX_ASM)
#include "mmx.h"
#endif

#include "X86/common_x86asm.h"

#if defined(USE_MMX_ASM)

static mmx_t and2   			=(mmx_t)(long long)0xFF00FF00FF00FF00LL;
static mmx_t and3   			=(mmx_t)(long long)0x00FF00FF00FF00FFLL;
static mmx_t and4   			=(mmx_t)(long long)0x0000000000FFFFFFLL;
static mmx_t and5   			=(mmx_t)(long long)0x0000FFFFFF000000LL;
static mmx_t or1    			=(mmx_t)(long long)0xFF000000FF000000LL;
static mmx_t R4444    		=(mmx_t)(long long)0x000000F0000000F0LL;
static mmx_t GA4444    		=(mmx_t)(long long)0xF000F000F000F000LL;
static mmx_t B4444    		=(mmx_t)(long long)0x00F0000000F00000LL;
static mmx_t R565    		=(mmx_t)(long long)0x000000F8000000F8LL;
static mmx_t G565    		=(mmx_t)(long long)0x0000FC000000FC00LL;
static mmx_t B565    		=(mmx_t)(long long)0x00F8000000F80000LL;

void mgaConvertTexture_mmx( hwUI32 *destPtr, int texelBytes, 
			struct gl_texture_image *image,
			int x, int y, int width, int height ) {
	register int		i, j;
	hwUI8		*src;
	mmx_t		*src_mmx;
	mmx_t		*dest_mmx = (mmx_t *)destPtr;
	int stride;
	
	//hwMsg( 0, "texture convert\n" );
	/* FIXME: g400 luminance_alpha internal format */
	switch (texelBytes) {
	  case 1:
	    switch (image->Format) {
		   case GL_COLOR_INDEX:
		   case GL_INTENSITY:
		   case GL_LUMINANCE:
		   case GL_ALPHA:
 	        src_mmx = (mmx_t *)image->Data + (( y * image->Width + x )>>3);
			  stride = (image->Width - width) >> 3;
		     for ( i = height ; i ; i-- ) {
			    for ( j = width >> 3  ; j ; j-- ) {
			
					movq_m2r(*(src_mmx++), mm0); 			
					movq_r2m(mm0, *(dest_mmx++)); 		

			    }
				 src_mmx += stride;
		     }
			  break;
			default:
			  goto format_error;
		 }
		 break;
	  case 2:
	    switch (image->Format) {
			case GL_RGB:
 	        src = (hwUI8 *)image->Data + ( y * image->Width + x ) * 3;
			  stride = ((image->Width - width) * 3)>>2;

			  movq_m2r(R565, mm5);
			  movq_m2r(G565, mm6);
			  movq_m2r(B565, mm7);

		     for ( i = height ; i ; i-- ) {
			    for ( j = width >> 2  ; j ; j-- ) {

					movq_m2r(*((mmx_t *)src), mm0); 			// R G B r g b R G
					src+=6;

					movq_r2r(mm0, mm1); 				// R G B r g b R G

					pand_m2r(and5, mm1);				// 0 0 0 r g b 0 0
					psllq_i2r(8, mm1);				// 0 0 0 0 r g b 0
					pand_m2r(and4, mm0);				// R G B 0 0 0 0 0
					por_r2r(mm1, mm0);; 				// R G B 0 r g b 0

					movq_r2r(mm0, mm1);					// R G B A r g b a
					pand_r2r(mm6, mm0);					// 0 G 0 A 0 g 0 a
					movq_r2r(mm1, mm3);					// R G B A r g b a
					pand_r2r(mm7, mm1);					// R 0 0 0 r 0 0 0
					pand_r2r(mm5, mm3);					// 0 0 B 0 0 0 b 0

					psllq_i2r(16, mm3);
					psrld_i2r(14, mm1);
					por_r2r(mm1, mm0);
					psrld_i2r(5, mm0);

					movq_m2r(*((mmx_t *)src), mm4); 			// R G B A r g b a

					movq_r2r(mm4, mm2); 				// R G B r g b R G

					pand_m2r(and5, mm2);				// 0 0 0 r g b 0 0
					psllq_i2r(8, mm2);				// 0 0 0 0 r g b 0
					pand_m2r(and4, mm4);				// R G B 0 0 0 0 0
					por_r2r(mm2, mm4);; 				// R G B 0 r g b 0

					movq_r2r(mm4, mm2);					// R G B A r g b a
					pand_r2r(mm6, mm4);					// 0 G 0 A 0 g 0 a
					movq_r2r(mm2, mm1);					// R G B A r g b a
					pand_r2r(mm7, mm2);					// R 0 0 0 r 0 0 0
					pand_r2r(mm5, mm1);					// 0 0 B 0 0 0 b 0

					psllq_i2r(16, mm1);
					psrld_i2r(14, mm2);
					por_r2r(mm2, mm4);
					psrld_i2r(5, mm4);

					packssdw_r2r(mm4, mm0);
					packuswb_r2r(mm1, mm3);
					por_r2r(mm3, mm0);
					movq_r2m(mm0, *(dest_mmx));
					dest_mmx++;
			
					src+=6;
			    }
				 src += stride;
		     }
			  break;
			case GL_RGBA:
 	        src_mmx = (mmx_t *)image->Data + (( y * image->Width + x ) >> 1);
			  stride = (image->Width - width) >> 2;

			  movq_m2r(GA4444, mm6);
			  movq_m2r(R4444, mm5);
			  movq_m2r(B4444, mm7);

		     for ( i = height ; i ; i-- ) {
			    for ( j = width >> 2  ; j ; j-- ) {

					movq_m2r(*(src_mmx), mm0); 		// R G B A r g b a
					src_mmx++;

					movq_r2r(mm0, mm1);					// R G B A r g b a
					pand_r2r(mm6, mm0);					// 0 G 0 A 0 g 0 a
					psrlq_i2r(8, mm0);					// G 0 A 0 G 0 a 0
					movq_r2r(mm1, mm3);					// R G B A r g b a
					pand_r2r(mm5, mm1);					// R 0 0 0 r 0 0 0
					psllq_i2r(12, mm1);
					pand_r2r(mm7, mm3);					// 0 0 B 0 0 0 b 0
					por_r2r(mm1, mm0);
					psrlq_i2r(20, mm3);
					por_r2r(mm3, mm0);

					movq_m2r(*(src_mmx), mm4); 			// R G B A r g b a
					src_mmx++;

					movq_r2r(mm4, mm1);					// R G B A r g b a
					pand_r2r(mm6, mm4);					// 0 G 0 A 0 g 0 a
					psrlq_i2r(8, mm4);
					movq_r2r(mm1, mm3);					// R G B A r g b a
					pand_r2r(mm5, mm1);					// R 0 0 0 r 0 0 0
					psllq_i2r(12, mm1);
					pand_r2r(mm7, mm3);					// 0 0 B 0 0 0 b 0
					por_r2r(mm1, mm4);
					psrlq_i2r(20, mm3);
					por_r2r(mm3, mm4);

					packuswb_r2r(mm4, mm0);

					movq_r2m(mm0, *dest_mmx);
					dest_mmx++;
			
			    }
				 src_mmx += stride;
		     }
			  break;
			case GL_LUMINANCE:
 	        src = (hwUI8 *)image->Data + ( y * image->Width + x );
			  stride = (image->Width - width);
	        hwMsg(10, "GL_LUMINANCE non MMX convert ! %d\n", texelBytes); 
		     for ( i = height ; i ; i-- ) {
			    for ( j = width >> 1  ; j ; j-- ) {
				   /* FIXME: should probably use 555 texture to get true grey */
				   *destPtr++ = MGAPACKCOLOR565(src[0],src[0],src[0]) |  
					   ( MGAPACKCOLOR565(src[1],src[1],src[1]) << 16 );
				   src += 2;
			    }
				 src += stride;
		     }
			  break;
			case GL_INTENSITY:
 	        src = (hwUI8 *)image->Data + ( y * image->Width + x );
			  stride = (image->Width - width);
	        hwMsg(10, "GL_INTENSITY non MMX convert ! %d\n", texelBytes); 
		     for ( i = height ; i ; i-- ) {
			    for ( j = width >> 1  ; j ; j-- ) {
			
				   *destPtr++ = MGAPACKCOLOR4444(src[0],src[0],src[0],src[0]) |  
					   ( MGAPACKCOLOR4444(src[1],src[1],src[1],src[1]) << 16 );
				   src += 2;
			    }
				 src += stride;
		     }
			  break;
			case GL_ALPHA:
 	        src = (hwUI8 *)image->Data + ( y * image->Width + x );
			  stride = (image->Width - width);
	        hwMsg(10, "GL_ALPHA non MMX convert ! %d\n", texelBytes); 
		     for ( i = height ; i ; i-- ) {
			    for ( j = width >> 1  ; j ; j-- ) {
			
				   *destPtr++ = MGAPACKCOLOR4444(255,255,255,src[0]) |  
					   ( MGAPACKCOLOR4444(255,255,255,src[1]) << 16 );
				   src += 2;
			    }
				 src += stride;
		     }
			  break;
			case GL_LUMINANCE_ALPHA:
 	        src = (hwUI8 *)image->Data + ( y * image->Width + x ) * 2;
			  stride = (image->Width - width) * 2;
	        hwMsg(10, "GL_LUMINANCE_ALPHA non MMX convert ! %d\n", texelBytes); 
		     for ( i = height ; i ; i-- ) {
			    for ( j = width >> 1  ; j ; j-- ) {
			
				   *destPtr++ = MGAPACKCOLOR4444(src[0],src[0],src[0],src[1]) |  
					   ( MGAPACKCOLOR4444(src[2],src[2],src[2],src[3]) << 16 );
				   src += 4;
			    }
				 src += stride;
		     }
			  break;
			default:
			  goto format_error;
		 }
		 break;
	  case 4:
		 switch (image->Format) {
			case GL_RGB:
 	        src = (hwUI8 *)image->Data + (  y * image->Width + x ) * 3;
			  stride = (image->Width - width) * 3;

			  movq_m2r(and4, mm1);
			  movq_m2r(and5, mm3);
			  movq_m2r(or1, mm4);
			  movq_m2r(and3, mm6);
			  movq_m2r(and2, mm7);

		     for ( i = height ; i ; i-- ) {
			    for ( j = width>>1 ; j ; j-- ) {
			
					movq_m2r(*((mmx_t *)src), mm0); 	// R G B r g b R G

					movq_r2r(mm0, mm5); 				// R G B r g b R G

					pand_r2r(mm3, mm0);				// R G B 0 0 0 0 0
					pand_r2r(mm1, mm5);				// 0 0 0 r g b 0 0

					psllq_i2r(8, mm0);				// 0 0 0 0 r g b 0
					por_r2r(mm5, mm0);; 				// R G B 0 r g b 0

					movq_r2r(mm0, mm2); 				// R G B 0 r g b 0
					movq_r2r(mm0, mm5);				// R G B 0 r g b 0
					pand_r2r(mm7, mm0);				// 0 G 0 0 0 g 0 0

					pand_r2r(mm6, mm2);				// R 0 B 0 r 0 b 0
					pand_r2r(mm6, mm5);				// R 0 B 0 r 0 b 0

					psrld_i2r(16, mm2);				// 0 0 R 0 0 0 r 0
					pslld_i2r(16, mm5);				// B 0 0 0 b 0 0 0
					por_r2r(mm4, mm2);				// 0 0 R X 0 0 r X   X=255

					por_r2r(mm5, mm0);				// B 0 R X b 0 r X
					por_r2r(mm2, mm0);				// B G R X b g r X

					movq_r2m(mm0, *(dest_mmx++));

				   src += 6;
			    }
				 src += stride;
		     }
			  break;
			case GL_RGBA:
 	        src_mmx = (mmx_t *)image->Data + ((  y * image->Width + x ) >> 1);
			  stride = (image->Width - width) >> 1 ;

			  movq_m2r(and2, mm1);
			  movq_m2r(and3, mm3);
		     for ( i = height ; i ; i-- ) {
			    for ( j = width>>1  ; j ; j-- ) {
			
					movq_m2r(*(src_mmx++), mm0); 	// R G B A r g b a

					movq_r2r(mm0, mm2); 				// R G B A r g b a
					movq_r2r(mm0, mm5);				// R G B A r g b a
					pand_r2r(mm1, mm0);				// 0 G 0 A 0 g 0 a

					pand_r2r(mm3, mm2);				// R 0 B 0 r 0 b 0
					pand_r2r(mm3, mm5);				// R 0 B 0 r 0 b 0

					psrld_i2r(16, mm2);				// 0 0 R 0 0 0 r 0
					pslld_i2r(16, mm5);				// B 0 0 0 b 0 0 0

					por_r2r(mm2, mm0);				// 0 G R A 0 g r a
					por_r2r(mm5, mm0);				// B G R A b g r a

					movq_r2m(mm0, *(dest_mmx++));
			    }
				 src_mmx += stride;
		     }
			  break;
			case GL_LUMINANCE:
	        hwMsg(10, "GL_LUMINANCE non MMX convert ! %d\n", texelBytes); 
 	        src = (hwUI8 *)image->Data + ( y * image->Width + x );
			  stride = (image->Width - width);
		     for ( i = height ; i ; i-- ) {
			    for ( j = width ; j ; j-- ) {
			
				   *destPtr++ = MGAPACKCOLOR8888(src[0],src[0],src[0], 255);
				   src += 1;
			    }
				 src += stride;
		     }
			  break;
			case GL_INTENSITY:
 	        src = (hwUI8 *)image->Data + (  y * image->Width + x );
			  stride = (image->Width - width);
	        hwMsg(10, "GL_INTENSITY non MMX convert ! %d\n", texelBytes); 
		     for ( i = height ; i ; i-- ) {
			    for ( j = width ; j ; j-- ) {
			
				   *destPtr++ = MGAPACKCOLOR8888(src[0],src[0],src[0],src[0]);
				   src += 1;
			    }
				 src += stride;
		     }
			  break;
			case GL_ALPHA:
 	        src = (hwUI8 *)image->Data + ( y * image->Width + x );
			  stride = (image->Width - width);
	        hwMsg(10, "GL_ALPHA non MMX convert ! %d\n", texelBytes); 
		     for ( i = height ; i ; i-- ) {
			    for ( j = width ; j ; j-- ) {
			
				   *destPtr++ = MGAPACKCOLOR8888(255,255,255,src[0]);
				   src += 1;
			    }
				 src += stride;
		     }
			  break;
			case GL_LUMINANCE_ALPHA:
 	        src = (hwUI8 *)image->Data + ( y * image->Width + x ) * 2;
			  stride = (image->Width - width) * 2;
	        hwMsg(10, "GL_LUMINANCE_ALPHA non MMX convert ! %d\n", texelBytes); 
		     for ( i = height ; i ; i-- ) {
			    for ( j = width ; j ; j-- ) {
			
				   *destPtr++ = MGAPACKCOLOR8888(src[0],src[0],
						       src[0],src[1]);
				   src += 2;
			    }
				 src += stride;
		     }
			  break;
			default:
			  goto format_error;
		 }
		 break;
	  default:
		 goto format_error;
	}

	emms();

	return;

format_error:

		printf( "Unsupported texelBytes %i, image->Format %i\n", 
			  texelBytes, image->Format );
}
#endif

