(svn r13639) -Codechange: rewrite 32bpp-anim and 32bpp-optimized drawing and encoding so it uses similiar scheme as 8bpp-optimized
authorsmatz
Thu, 26 Jun 2008 15:46:19 +0000
changeset 11082 45ab75d184a0
parent 11081 ca5677d67314
child 11083 19aff1f3244e
(svn r13639) -Codechange: rewrite 32bpp-anim and 32bpp-optimized drawing and encoding so it uses similiar scheme as 8bpp-optimized
All zoom levels are stored and a kind of RLE is used. Together with further changes and reducing number of variables, drawing is ~50% faster in average.
src/blitter/32bpp_anim.cpp
src/blitter/32bpp_anim.hpp
src/blitter/32bpp_base.hpp
src/blitter/32bpp_optimized.cpp
src/blitter/32bpp_optimized.hpp
src/core/math_func.hpp
src/gfx_type.h
--- a/src/blitter/32bpp_anim.cpp	Thu Jun 26 00:40:42 2008 +0000
+++ b/src/blitter/32bpp_anim.cpp	Thu Jun 26 15:46:19 2008 +0000
@@ -15,6 +15,172 @@
 
 static FBlitter_32bppAnim iFBlitter_32bppAnim;
 
+template <BlitterMode mode>
+inline void Blitter_32bppAnim::Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom)
+{
+	const SpriteData *src = (const SpriteData *)bp->sprite;
+
+	const Colour *src_px = (const Colour *)(src->data + src->offset[zoom][0]);
+	const uint8  *src_n  = (const uint8  *)(src->data + src->offset[zoom][1]);
+
+	for (uint i = bp->skip_top; i != 0; i--) {
+		src_px = (const Colour *)((const byte *)src_px + *(const uint32 *)src_px);
+		src_n += *(const uint32 *)src_n;
+	}
+
+	uint32 *dst = (uint32 *)bp->dst + bp->top * bp->pitch + bp->left;
+	uint8 *anim = this->anim_buf + ((uint32 *)bp->dst - (uint32 *)_screen.dst_ptr) + bp->top * this->anim_buf_width + bp->left;
+
+	const byte *remap = bp->remap; // store so we don't have to access it via bp everytime
+
+	for (int y = 0; y < bp->height; y++) {
+		uint32 *dst_ln = dst + bp->pitch;
+		uint8 *anim_ln = anim + this->anim_buf_width;
+
+		const Colour *src_px_ln = (const Colour *)((const byte *)src_px + *(const uint32 *)src_px);
+		src_px++;
+
+		const uint8 *src_n_ln = src_n + *(uint32 *)src_n;
+		src_n += 4;
+
+		uint32 *dst_end = dst + bp->skip_left;
+
+		uint n;
+
+		while (dst < dst_end) {
+			n = *src_n++;
+
+			if (src_px->a == 0) {
+				dst += n;
+				src_px ++;
+				src_n++;
+
+				if (dst > dst_end) anim += dst - dst_end;
+			} else {
+				if (dst + n > dst_end) {
+					uint d = dst_end - dst;
+					src_px += d;
+					src_n += d;
+
+					dst = dst_end - bp->skip_left;
+					dst_end = dst + bp->width;
+
+					n = min<uint>(n - d, (uint)bp->width);
+					goto draw;
+				}
+				dst += n;
+				src_px += n;
+				src_n += n;
+			}
+		}
+
+		dst -= bp->skip_left;
+		dst_end -= bp->skip_left;
+
+		dst_end += bp->width;
+
+		while (dst < dst_end) {
+			n = min<uint>(*src_n++, (uint)(dst_end - dst));
+
+			if (src_px->a == 0) {
+				anim += n;
+				dst += n;
+				src_px++;
+				src_n++;
+				continue;
+			}
+
+			draw:;
+
+			switch (mode) {
+				case BM_COLOUR_REMAP:
+					if (src_px->a == 255) {
+						do {
+							uint m = *src_n;
+							/* In case the m-channel is zero, do not remap this pixel in any way */
+							if (m == 0) {
+								*dst = *src_px;
+								*anim = 0;
+							} else {
+								uint r = remap[m];
+								*anim = r;
+								if (r != 0) *dst = this->LookupColourInPalette(r);
+							}
+							anim++;
+							dst++;
+							src_px++;
+							src_n++;
+						} while (--n != 0);
+					} else {
+						do {
+							uint m = *src_n;
+							if (m == 0) {
+								*dst = ComposeColourRGBANoCheck(src_px->r, src_px->g, src_px->b, src_px->a, *dst);
+								*anim = 0;
+							} else {
+								uint r = remap[m];
+								*anim = r;
+								if (r != 0) *dst = ComposeColourPANoCheck(this->LookupColourInPalette(r), src_px->a, *dst);
+							}
+							anim++;
+							dst++;
+							src_px++;
+							src_n++;
+						} while (--n != 0);
+					}
+					break;
+
+				case BM_TRANSPARENT:
+					/* TODO -- We make an assumption here that the remap in fact is transparency, not some color.
+					 *  This is never a problem with the code we produce, but newgrfs can make it fail... or at least:
+					 *  we produce a result the newgrf maker didn't expect ;) */
+
+					/* Make the current color a bit more black, so it looks like this image is transparent */
+					src_px += n;
+					src_n += n;
+
+					do {
+						*dst = MakeTransparent(*dst, 192);
+						*anim = remap[*anim];
+						anim++;
+						dst++;
+					} while (--n != 0);
+					break;
+
+				default:
+					if (src_px->a == 255) {
+						do {
+							/* Compiler assumes pointer aliasing, can't optimise this on its own */
+							uint m = *src_n++;
+							/* Above 217 is palette animation */
+							*anim++ = m;
+							*dst++ = (m >= 217) ? this->LookupColourInPalette(m) : *src_px;
+							src_px++;
+						} while (--n != 0);
+					} else {
+						do {
+							uint m = *src_n++;
+							*anim++ = m;
+							if (m >= 217) {
+								*dst = ComposeColourPANoCheck(this->LookupColourInPalette(m), src_px->a, *dst);
+							} else {
+								*dst = ComposeColourRGBANoCheck(src_px->r, src_px->g, src_px->b, src_px->a, *dst);
+							}
+							dst++;
+							src_px++;
+						} while (--n != 0);
+					}
+					break;
+			}
+		}
+
+		anim = anim_ln;
+		dst = dst_ln;
+		src_px = src_px_ln;
+		src_n  = src_n_ln;
+	}
+}
+
 void Blitter_32bppAnim::Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom)
 {
 	if (_screen_disable_anim) {
@@ -23,10 +189,6 @@
 		return;
 	}
 
-	const SpriteLoader::CommonPixel *src, *src_line;
-	uint32 *dst, *dst_line;
-	uint8 *anim, *anim_line;
-
 	if (_screen.width != this->anim_buf_width || _screen.height != this->anim_buf_height) {
 		/* The size of the screen changed; we can assume we can wipe all data from our buffer */
 		free(this->anim_buf);
@@ -35,68 +197,11 @@
 		this->anim_buf_height = _screen.height;
 	}
 
-	/* Find where to start reading in the source sprite */
-	src_line = (const SpriteLoader::CommonPixel *)bp->sprite + (bp->skip_top * bp->sprite_width + bp->skip_left) * ScaleByZoom(1, zoom);
-	dst_line = (uint32 *)bp->dst + bp->top * bp->pitch + bp->left;
-	anim_line = this->anim_buf + ((uint32 *)bp->dst - (uint32 *)_screen.dst_ptr) + bp->top * this->anim_buf_width + bp->left;
-
-	for (int y = 0; y < bp->height; y++) {
-		dst = dst_line;
-		dst_line += bp->pitch;
-
-		src = src_line;
-		src_line += bp->sprite_width * ScaleByZoom(1, zoom);
-
-		anim = anim_line;
-		anim_line += this->anim_buf_width;
-
-		for (int x = 0; x < bp->width; x++) {
-			if (src->a == 0) {
-				/* src->r is 'misused' here to indicate how much more pixels are following with an alpha of 0 */
-				int skip = UnScaleByZoom(src->r, zoom);
-
-				dst  += skip;
-				anim += skip;
-				x    += skip - 1;
-				src  += ScaleByZoom(1, zoom) * skip;
-				continue;
-			}
-
-			switch (mode) {
-				case BM_COLOUR_REMAP:
-					/* In case the m-channel is zero, do not remap this pixel in any way */
-					if (src->m == 0) {
-						*dst = ComposeColourRGBA(src->r, src->g, src->b, src->a, *dst);
-						*anim = 0;
-					} else {
-						if (bp->remap[src->m] != 0) {
-							*dst = ComposeColourPA(this->LookupColourInPalette(bp->remap[src->m]), src->a, *dst);
-							*anim = bp->remap[src->m];
-						}
-					}
-					break;
-
-				case BM_TRANSPARENT:
-					/* TODO -- We make an assumption here that the remap in fact is transparency, not some color.
-					 *  This is never a problem with the code we produce, but newgrfs can make it fail... or at least:
-					 *  we produce a result the newgrf maker didn't expect ;) */
-
-					/* Make the current color a bit more black, so it looks like this image is transparent */
-					*dst = MakeTransparent(*dst, 192);
-					*anim = bp->remap[*anim];
-					break;
-
-				default:
-					/* Above 217 is palette animation */
-					if (src->m >= 217) *dst = ComposeColourPA(this->LookupColourInPalette(src->m), src->a, *dst);
-					else               *dst = ComposeColourRGBA(src->r, src->g, src->b, src->a, *dst);
-					*anim = src->m;
-					break;
-			}
-			dst++;
-			anim++;
-			src += ScaleByZoom(1, zoom);
-		}
+	switch (mode) {
+		default: NOT_REACHED();
+		case BM_NORMAL:       Draw<BM_NORMAL>      (bp, zoom); return;
+		case BM_COLOUR_REMAP: Draw<BM_COLOUR_REMAP>(bp, zoom); return;
+		case BM_TRANSPARENT:  Draw<BM_TRANSPARENT> (bp, zoom); return;
 	}
 }
 
--- a/src/blitter/32bpp_anim.hpp	Thu Jun 26 00:40:42 2008 +0000
+++ b/src/blitter/32bpp_anim.hpp	Thu Jun 26 15:46:19 2008 +0000
@@ -34,6 +34,8 @@
 	/* virtual */ Blitter::PaletteAnimation UsePaletteAnimation();
 
 	/* virtual */ const char *GetName() { return "32bpp-anim"; }
+
+	template <BlitterMode mode> void Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom);
 };
 
 class FBlitter_32bppAnim: public BlitterFactory<FBlitter_32bppAnim> {
--- a/src/blitter/32bpp_base.hpp	Thu Jun 26 00:40:42 2008 +0000
+++ b/src/blitter/32bpp_base.hpp	Thu Jun 26 15:46:19 2008 +0000
@@ -30,7 +30,7 @@
 	/**
 	 * Compose a colour based on RGB values.
 	 */
-	static inline uint ComposeColour(uint a, uint r, uint g, uint b)
+	static inline uint32 ComposeColour(uint a, uint r, uint g, uint b)
 	{
 		return (((a) << 24) & 0xFF000000) | (((r) << 16) & 0x00FF0000) | (((g) << 8) & 0x0000FF00) | ((b) & 0x000000FF);
 	}
@@ -46,44 +46,60 @@
 	/**
 	 * Compose a colour based on RGBA values and the current pixel value.
 	 */
-	static inline uint ComposeColourRGBA(uint r, uint g, uint b, uint a, uint current)
+	static inline uint32 ComposeColourRGBANoCheck(uint r, uint g, uint b, uint a, uint32 current)
+	{
+		uint cr = GB(current, 16, 8);
+		uint cg = GB(current, 8,  8);
+		uint cb = GB(current, 0,  8);
+
+		/* The 256 is wrong, it should be 255, but 256 is much faster... */
+		return ComposeColour(0xFF,
+							(r * a + cr * (256 - a)) / 256,
+							(g * a + cg * (256 - a)) / 256,
+							(b * a + cb * (256 - a)) / 256);
+	}
+
+	/**
+	 * Compose a colour based on RGBA values and the current pixel value.
+	 * Handles fully transparent and solid pixels in a special (faster) way.
+	 */
+	static inline uint32 ComposeColourRGBA(uint r, uint g, uint b, uint a, uint32 current)
 	{
 		if (a == 0) return current;
 		if (a >= 255) return ComposeColour(0xFF, r, g, b);
 
-		uint cr, cg, cb;
-		cr = GB(current, 16, 8);
-		cg = GB(current, 8,  8);
-		cb = GB(current, 0,  8);
+		return ComposeColourRGBANoCheck(r, g, b, a, current);
+	}
+
+	/**
+	 * Compose a colour based on Pixel value, alpha value, and the current pixel value.
+	 */
+	static inline uint32 ComposeColourPANoCheck(uint32 colour, uint a, uint32 current)
+	{
+		uint r  = GB(colour,  16, 8);
+		uint g  = GB(colour,  8,  8);
+		uint b  = GB(colour,  0,  8);
+		uint cr = GB(current, 16, 8);
+		uint cg = GB(current, 8,  8);
+		uint cb = GB(current, 0,  8);
 
 		/* The 256 is wrong, it should be 255, but 256 is much faster... */
 		return ComposeColour(0xFF,
-												(r * a + cr * (256 - a)) / 256,
-												(g * a + cg * (256 - a)) / 256,
-												(b * a + cb * (256 - a)) / 256);
+							(r * a + cr * (256 - a)) / 256,
+							(g * a + cg * (256 - a)) / 256,
+							(b * a + cb * (256 - a)) / 256);
 	}
 
 	/**
-	* Compose a colour based on Pixel value, alpha value, and the current pixel value.
-	*/
-	static inline uint ComposeColourPA(uint colour, uint a, uint current)
+	 * Compose a colour based on Pixel value, alpha value, and the current pixel value.
+	 * Handles fully transparent and solid pixels in a special (faster) way.
+	 */
+	static inline uint32 ComposeColourPA(uint32 colour, uint a, uint32 current)
 	{
 		if (a == 0) return current;
 		if (a >= 255) return (colour | 0xFF000000);
 
-		uint r, g, b, cr, cg, cb;
-		r  = GB(colour,  16, 8);
-		g  = GB(colour,  8,  8);
-		b  = GB(colour,  0,  8);
-		cr = GB(current, 16, 8);
-		cg = GB(current, 8,  8);
-		cb = GB(current, 0,  8);
-
-		/* The 256 is wrong, it should be 255, but 256 is much faster... */
-		return ComposeColour(0xFF,
-												(r * a + cr * (256 - a)) / 256,
-												(g * a + cg * (256 - a)) / 256,
-												(b * a + cb * (256 - a)) / 256);
+		return ComposeColourPANoCheck(colour, a, current);
 	}
 
 	/**
@@ -92,12 +108,11 @@
 	* @param amount the amount of transparency, times 256.
 	* @return the new colour for the screen.
 	*/
-	static inline uint MakeTransparent(uint colour, uint amount)
+	static inline uint32 MakeTransparent(uint32 colour, uint amount)
 	{
-		uint r, g, b;
-		r = GB(colour, 16, 8);
-		g = GB(colour, 8,  8);
-		b = GB(colour, 0,  8);
+		uint r = GB(colour, 16, 8);
+		uint g = GB(colour, 8,  8);
+		uint b = GB(colour, 0,  8);
 
 		return ComposeColour(0xFF, r * amount / 256, g * amount / 256, b * amount / 256);
 	}
@@ -107,12 +122,11 @@
 	* @param colour the colour to make grey.
 	* @return the new colour, now grey.
 	*/
-	static inline uint MakeGrey(uint colour)
+	static inline uint32 MakeGrey(uint32 colour)
 	{
-		uint r, g, b;
-		r = GB(colour, 16, 8);
-		g = GB(colour, 8,  8);
-		b = GB(colour, 0,  8);
+		uint r = GB(colour, 16, 8);
+		uint g = GB(colour, 8,  8);
+		uint b = GB(colour, 0,  8);
 
 		/* To avoid doubles and stuff, multiple it with a total of 65536 (16bits), then
 		*  divide by it to normalize the value to a byte again. See heightmap.cpp for
--- a/src/blitter/32bpp_optimized.cpp	Thu Jun 26 00:40:42 2008 +0000
+++ b/src/blitter/32bpp_optimized.cpp	Thu Jun 26 15:46:19 2008 +0000
@@ -6,44 +6,133 @@
 #include "../zoom_func.h"
 #include "../gfx_func.h"
 #include "../debug.h"
+#include "../core/math_func.hpp"
+#include "../core/alloc_func.hpp"
 #include "32bpp_optimized.hpp"
 
 static FBlitter_32bppOptimized iFBlitter_32bppOptimized;
 
-template <BlitterMode mode, ZoomLevel zoom> inline void Blitter_32bppOptimized::Draw(Blitter::BlitterParams *bp)
+/**
+ * Draws a sprite to a (screen) buffer. It is templated to allow faster operation.
+ *
+ * @param mode blitter mode
+ * @param bp further blitting parameters
+ * @param zoom zoom level at which we are drawing
+ */
+template <BlitterMode mode>
+inline void Blitter_32bppOptimized::Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom)
 {
-	const SpriteLoader::CommonPixel *src, *src_line;
-	uint32 *dst, *dst_line;
+	const SpriteData *src = (const SpriteData *)bp->sprite;
 
-	/* Find where to start reading in the source sprite */
-	src_line = (const SpriteLoader::CommonPixel *)bp->sprite + (bp->skip_top * bp->sprite_width + bp->skip_left) * ScaleByZoom(1, zoom);
-	dst_line = (uint32 *)bp->dst + bp->top * bp->pitch + bp->left;
+	/* src_px : each line begins with uint32 n = 'number of bytes in this line',
+	 *          then n times is the Colour struct for this line */
+	const Colour *src_px = (const Colour *)(src->data + src->offset[zoom][0]);
+	/* src_n  : each line begins with uint32 n = 'number of bytes in this line',
+	 *          then interleaved stream of 'm' and 'n' channels. 'm' is remap,
+	 *          'n' is number of bytes with the same alpha channel class */
+	const uint8  *src_n  = (const uint8  *)(src->data + src->offset[zoom][1]);
+
+	/* skip upper lines in src_px and src_n */
+	for (uint i = bp->skip_top; i != 0; i--) {
+		src_px = (const Colour *)((const byte *)src_px + *(const uint32 *)src_px);
+		src_n += *(uint32 *)src_n;
+	}
+
+	/* skip lines in dst */
+	uint32 *dst = (uint32 *)bp->dst + bp->top * bp->pitch + bp->left;
+
+	/* store so we don't have to access it via bp everytime (compiler assumes pointer aliasing) */
+	const byte *remap = bp->remap;
 
 	for (int y = 0; y < bp->height; y++) {
-		dst = dst_line;
-		dst_line += bp->pitch;
-
-		src = src_line;
-		src_line += bp->sprite_width * ScaleByZoom(1, zoom);
+		/* next dst line begins here */
+		uint32 *dst_ln = dst + bp->pitch;
 
-		for (int x = 0; x < bp->width; x++) {
-			if (src->a == 0) {
-				/* src->r is 'misused' here to indicate how much more pixels are following with an alpha of 0 */
-				int skip = UnScaleByZoom(src->r, zoom);
+		/* next src line begins here */
+		const Colour *src_px_ln = (const Colour *)((const byte *)src_px + *(const uint32 *)src_px);
+		src_px++;
 
-				dst += skip;
-				x   += skip - 1;
-				src += ScaleByZoom(1, zoom) * skip;
+		/* next src_n line begins here */
+		const uint8 *src_n_ln = src_n + *(uint32 *)src_n;
+		src_n += 4;
+
+		/* we will end this line when we reach this point */
+		uint32 *dst_end = dst + bp->skip_left;
+
+		/* number of pixels with the same aplha channel class */
+		uint n;
+
+		while (dst < dst_end) {
+			n = *src_n++;
+
+			if (src_px->a == 0) {
+				dst += n;
+				src_px ++;
+				src_n++;
+			} else {
+				if (dst + n > dst_end) {
+					uint d = dst_end - dst;
+					src_px += d;
+					src_n += d;
+
+					dst = dst_end - bp->skip_left;
+					dst_end = dst + bp->width;
+
+					n = min<uint>(n - d, (uint)bp->width);
+					goto draw;
+				}
+				dst += n;
+				src_px += n;
+				src_n += n;
+			}
+		}
+
+		dst -= bp->skip_left;
+		dst_end -= bp->skip_left;
+
+		dst_end += bp->width;
+
+		while (dst < dst_end) {
+			n = min<uint>(*src_n++, (uint)(dst_end - dst));
+
+			if (src_px->a == 0) {
+				dst += n;
+				src_px++;
+				src_n++;
 				continue;
 			}
 
+			draw:;
+
 			switch (mode) {
 				case BM_COLOUR_REMAP:
-					/* In case the m-channel is zero, do not remap this pixel in any way */
-					if (src->m == 0) {
-						*dst = ComposeColourRGBA(src->r, src->g, src->b, src->a, *dst);
+					if (src_px->a == 255) {
+						do {
+							uint m = *src_n;
+							/* In case the m-channel is zero, do not remap this pixel in any way */
+							if (m == 0) {
+								*dst = *src_px;
+							} else {
+								uint r = remap[m];
+								if (r != 0) *dst = this->LookupColourInPalette(r);
+							}
+							dst++;
+							src_px++;
+							src_n++;
+						} while (--n != 0);
 					} else {
-						if (bp->remap[src->m] != 0) *dst = ComposeColourPA(this->LookupColourInPalette(bp->remap[src->m]), src->a, *dst);
+						do {
+							uint m = *src_n;
+							if (m == 0) {
+								*dst = ComposeColourRGBANoCheck(src_px->r, src_px->g, src_px->b, src_px->a, *dst);
+							} else {
+								uint r = remap[m];
+								if (r != 0) *dst = ComposeColourPANoCheck(this->LookupColourInPalette(r), src_px->a, *dst);
+							}
+							dst++;
+							src_px++;
+							src_n++;
+						} while (--n != 0);
 					}
 					break;
 
@@ -53,30 +142,47 @@
 					 *  we produce a result the newgrf maker didn't expect ;) */
 
 					/* Make the current color a bit more black, so it looks like this image is transparent */
-					*dst = MakeTransparent(*dst, 192);
+					src_px += n;
+					src_n += n;
+
+					do {
+						*dst = MakeTransparent(*dst, 192);
+						dst++;
+					} while (--n != 0);
 					break;
 
 				default:
-					*dst = ComposeColourRGBA(src->r, src->g, src->b, src->a, *dst);
+					if (src_px->a == 255) {
+						/* faster than memcpy(), n is usually low */
+						src_n += n;
+						do {
+							*dst++ = *src_px++;
+						} while (--n != 0);
+					} else {
+						src_n += n;
+						do {
+							*dst = ComposeColourRGBANoCheck(src_px->r, src_px->g, src_px->b, src_px->a, *dst);
+							dst++;
+							src_px++;
+						} while (--n != 0);
+					}
 					break;
 			}
-			dst++;
-			src += ScaleByZoom(1, zoom);
 		}
+
+		dst = dst_ln;
+		src_px = src_px_ln;
+		src_n  = src_n_ln;
 	}
 }
 
-template <BlitterMode mode> inline void Blitter_32bppOptimized::Draw(Blitter::BlitterParams *bp, ZoomLevel zoom)
-{
-	switch (zoom) {
-		default: NOT_REACHED();
-		case ZOOM_LVL_NORMAL: Draw<mode, ZOOM_LVL_NORMAL>(bp); return;
-		case ZOOM_LVL_OUT_2X: Draw<mode, ZOOM_LVL_OUT_2X>(bp); return;
-		case ZOOM_LVL_OUT_4X: Draw<mode, ZOOM_LVL_OUT_4X>(bp); return;
-		case ZOOM_LVL_OUT_8X: Draw<mode, ZOOM_LVL_OUT_8X>(bp); return;
-	}
-}
-
+/**
+ * Draws a sprite to a (screen) buffer. Calls adequate templated function.
+ *
+ * @param bp further blitting parameters
+ * @param mode blitter mode
+ * @param zoom zoom level at which we are drawing
+ */
 void Blitter_32bppOptimized::Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom)
 {
 	switch (mode) {
@@ -87,46 +193,180 @@
 	}
 }
 
+/**
+ * Resizes the sprite in a very simple way, takes every n-th pixel and every n-th row
+ *
+ * @param sprite_src sprite to resize
+ * @param zoom resizing scale
+ * @return resized sprite
+ */
+static const SpriteLoader::Sprite *ResizeSprite(const SpriteLoader::Sprite *sprite_src, ZoomLevel zoom)
+{
+	SpriteLoader::Sprite *sprite = MallocT<SpriteLoader::Sprite>(1);
+
+	if (zoom == ZOOM_LVL_NORMAL) {
+		memcpy(sprite, sprite_src, sizeof(*sprite));
+		uint size = sprite_src->height * sprite_src->width;
+		sprite->data = MallocT<SpriteLoader::CommonPixel>(size);
+		memcpy(sprite->data, sprite_src->data, size * sizeof(SpriteLoader::CommonPixel));
+		return sprite;
+	}
+
+	sprite->height = UnScaleByZoom(sprite_src->height, zoom);
+	sprite->width  = UnScaleByZoom(sprite_src->width,  zoom);
+	sprite->x_offs = UnScaleByZoom(sprite_src->x_offs, zoom);
+	sprite->y_offs = UnScaleByZoom(sprite_src->y_offs, zoom);
+
+	uint size = sprite->height * sprite->width;
+	SpriteLoader::CommonPixel *dst = sprite->data = CallocT<SpriteLoader::CommonPixel>(size);
+
+	const SpriteLoader::CommonPixel *src = (SpriteLoader::CommonPixel *)sprite_src->data;
+	const SpriteLoader::CommonPixel *src_end = src + sprite_src->height * sprite_src->width;
+
+	uint scaled_1 = ScaleByZoom(1, zoom);
+
+	for (uint y = 0; y < sprite->height; y++) {
+		if (src >= src_end) src = src_end - sprite_src->width;
+
+		const SpriteLoader::CommonPixel *src_ln = src + sprite_src->width * scaled_1;
+		for (uint x = 0; x < sprite->width; x++) {
+			if (src >= src_ln) src = src_ln - 1;
+			*dst = *src;
+			dst++;
+			src += scaled_1;
+		}
+
+		src = src_ln;
+	}
+
+	return sprite;
+}
+
 Sprite *Blitter_32bppOptimized::Encode(SpriteLoader::Sprite *sprite, Blitter::AllocatorProc *allocator)
 {
-	Sprite *dest_sprite;
-	SpriteLoader::CommonPixel *dst;
-	dest_sprite = (Sprite *)allocator(sizeof(*dest_sprite) + sprite->height * sprite->width * sizeof(SpriteLoader::CommonPixel));
+	/* streams of pixels (a, r, g, b channels)
+	 *
+	 * stored in separated stream so data are always aligned on 4B boundary */
+	Colour *dst_px_orig[ZOOM_LVL_COUNT];
+
+	/* interleaved stream of 'm' channel and 'n' channel
+	 * 'n' is number if following pixels with the same alpha channel class
+	 * there are 3 classes: 0, 255, others
+	 *
+	 * it has to be stored in one stream so fewer registers are used -
+	 * x86 has problems with register allocation even with this solution */
+	uint8  *dst_n_orig[ZOOM_LVL_COUNT];
+
+	/* lengths of streams */
+	uint32 lengths[ZOOM_LVL_COUNT][2];
+
+	for (ZoomLevel z = ZOOM_LVL_BEGIN; z < ZOOM_LVL_END; z++) {
+		const SpriteLoader::Sprite *src_orig = ResizeSprite(sprite, z);
+
+		uint size = src_orig->height * src_orig->width;
+
+		dst_px_orig[z] = CallocT<Colour>(size + src_orig->height * 2);
+		dst_n_orig[z]  = CallocT<uint8>(size * 2 + src_orig->height * 4 * 2);
+
+		uint32 *dst_px_ln = (uint32 *)dst_px_orig[z];
+		uint32 *dst_n_ln  = (uint32 *)dst_n_orig[z];
+
+		const SpriteLoader::CommonPixel *src = (const SpriteLoader::CommonPixel *)src_orig->data;
+
+		for (uint y = src_orig->height; y > 0; y--) {
+			Colour *dst_px = (Colour *)(dst_px_ln + 1);
+			uint8 *dst_n = (uint8 *)(dst_n_ln + 1);
+
+			uint8 *dst_len = dst_n++;
+
+			uint last = 3;
+			int len = 0;
+
+			for (uint x = src_orig->width; x > 0; x--) {
+				uint8 a = src->a;
+				uint t = a > 0 && a < 255 ? 1 : a;
+
+				if (last != t || len == 255) {
+					if (last != 3) {
+						*dst_len = len;
+						dst_len = dst_n++;
+					}
+					len = 0;
+				}
+
+				last = t;
+				len++;
+
+				if (a != 0) {
+					dst_px->a = a;
+					*dst_n = src->m;
+					if (src->m != 0) {
+						/* Pre-convert the mapping channel to a RGB value */
+						uint32 colour = this->LookupColourInPalette(src->m);
+						dst_px->r = GB(colour, 16, 8);
+						dst_px->g = GB(colour, 8,  8);
+						dst_px->b = GB(colour, 0,  8);
+					} else {
+						dst_px->r = src->r;
+						dst_px->g = src->g;
+						dst_px->b = src->b;
+					}
+					dst_px++;
+					dst_n++;
+				} else if (len == 1) {
+					dst_px++;
+					*dst_n = src->m;
+					dst_n++;
+				}
+
+				src++;
+			}
+
+			if (last != 3) {
+				*dst_len = len;
+			}
+
+			dst_px = (Colour *)AlignPtr(dst_px, 4);
+			dst_n  = (uint8 *)AlignPtr(dst_n, 4);
+
+			*dst_px_ln = (uint8 *)dst_px - (uint8 *)dst_px_ln;
+			*dst_n_ln  = (uint8 *)dst_n  - (uint8 *)dst_n_ln;
+
+			dst_px_ln = (uint32 *)dst_px;
+			dst_n_ln =  (uint32 *)dst_n;
+		}
+
+		lengths[z][0] = (byte *)dst_px_ln - (byte *)dst_px_orig[z]; // all are aligned to 4B boundary
+		lengths[z][1] = (byte *)dst_n_ln  - (byte *)dst_n_orig[z];
+
+		free(src_orig->data);
+		free((void *)src_orig);
+	}
+
+	uint len = 0; // total length of data
+	for (ZoomLevel z = ZOOM_LVL_BEGIN; z < ZOOM_LVL_END; z++) {
+		len += lengths[z][0] + lengths[z][1];
+	}
+
+	Sprite *dest_sprite = (Sprite *)allocator(sizeof(*dest_sprite) + sizeof(SpriteData) + len);
 
 	dest_sprite->height = sprite->height;
 	dest_sprite->width  = sprite->width;
 	dest_sprite->x_offs = sprite->x_offs;
 	dest_sprite->y_offs = sprite->y_offs;
 
-	dst = (SpriteLoader::CommonPixel *)dest_sprite->data;
-
-	memcpy(dst, sprite->data, sprite->height * sprite->width * sizeof(SpriteLoader::CommonPixel));
-	/* Skip to the end of the array, and work backwards to find transparent blocks */
-	dst = dst + sprite->height * sprite->width - 1;
+	SpriteData *dst = (SpriteData *)dest_sprite->data;
 
-	for (uint y = sprite->height; y > 0; y--) {
-		int trans = 0;
-		/* Process sprite line backwards, to compute lengths of transparent blocks */
-		for (uint x = sprite->width; x > 0; x--) {
-			if (dst->a == 0) {
-				/* Save transparent block length in red channel; max value is 255 the red channel can contain */
-				if (trans < 255) trans++;
-				dst->r = trans;
-				dst->g = 0;
-				dst->b = 0;
-				dst->m = 0;
-			} else {
-				trans = 0;
-				if (dst->m != 0) {
-					/* Pre-convert the mapping channel to a RGB value */
-					uint color = this->LookupColourInPalette(dst->m);
-					dst->r = GB(color, 16, 8);
-					dst->g = GB(color, 8,  8);
-					dst->b = GB(color, 0,  8);
-				}
-			}
-			dst--;
-		}
+	for (ZoomLevel z = ZOOM_LVL_BEGIN; z < ZOOM_LVL_END; z++) {
+		dst->offset[z][0] = z == ZOOM_LVL_BEGIN ? 0 : lengths[z - 1][1] + dst->offset[z - 1][1];
+		dst->offset[z][1] = lengths[z][0] + dst->offset[z][0];
+
+		memcpy(dst->data + dst->offset[z][0], dst_px_orig[z], lengths[z][0]);
+		memcpy(dst->data + dst->offset[z][1], dst_n_orig[z],  lengths[z][1]);
+
+		free(dst_px_orig[z]);
+		free(dst_n_orig[z]);
 	}
+
 	return dest_sprite;
 }
--- a/src/blitter/32bpp_optimized.hpp	Thu Jun 26 00:40:42 2008 +0000
+++ b/src/blitter/32bpp_optimized.hpp	Thu Jun 26 15:46:19 2008 +0000
@@ -10,13 +10,17 @@
 
 class Blitter_32bppOptimized : public Blitter_32bppSimple {
 public:
+	struct SpriteData {
+		uint32 offset[ZOOM_LVL_COUNT][2];
+		byte data[VARARRAY_SIZE];
+	};
+
 	/* virtual */ void Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom);
 	/* virtual */ Sprite *Encode(SpriteLoader::Sprite *sprite, Blitter::AllocatorProc *allocator);
 
 	/* virtual */ const char *GetName() { return "32bpp-optimized"; }
 
-	template <BlitterMode mode, ZoomLevel zoom> void Draw(Blitter::BlitterParams *bp);
-	template <BlitterMode mode> void Draw(Blitter::BlitterParams *bp, ZoomLevel zoom);
+	template <BlitterMode mode> void Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom);
 };
 
 class FBlitter_32bppOptimized: public BlitterFactory<FBlitter_32bppOptimized> {
--- a/src/core/math_func.hpp	Thu Jun 26 00:40:42 2008 +0000
+++ b/src/core/math_func.hpp	Thu Jun 26 15:46:19 2008 +0000
@@ -101,8 +101,28 @@
 template <typename T>
 static FORCEINLINE T Align(const T x, uint n)
 {
+	assert((n & (n - 1)) == 0 && n != 0);
 	n--;
-	return (T)((x + n) & ~(n));
+	return (T)((x + n) & ~((T)n));
+}
+
+/**
+ * Return the smallest multiple of n equal or greater than x
+ * Applies to pointers only
+ *
+ * @note n must be a power of 2
+ * @param x The min value
+ * @param n The base of the number we are searching
+ * @return The smallest multiple of n equal or greater than x
+ * @see Align()
+ */
+
+assert_compile(sizeof(size_t) == sizeof(void *));
+
+template <typename T>
+static FORCEINLINE T *AlignPtr(T *x, uint n)
+{
+	return (T *)Align((size_t)x, n);
 }
 
 /**
--- a/src/gfx_type.h	Thu Jun 26 00:40:42 2008 +0000
+++ b/src/gfx_type.h	Thu Jun 26 15:46:19 2008 +0000
@@ -149,7 +149,7 @@
 	uint8 b, g, r, a; ///< colour channels in LE order
 #endif /* TTD_ENDIAN == TTD_BIG_ENDIAN */
 
-	operator uint32 () { return *(uint32 *)this; }
+	operator uint32 () const { return *(uint32 *)this; }
 };
 
 enum FontSize {