Optimize api_blit inner loop.
Function takes half of the clock time.
This commit is contained in:
		
							
								
								
									
										38
									
								
								src/tic.c
									
									
									
									
									
								
							
							
						
						
									
										38
									
								
								src/tic.c
									
									
									
									
									
								
							| @@ -1665,14 +1665,40 @@ static void api_blit(tic_mem* tic, u32* out, tic_scanline scanline) | |||||||
| 	for(s32 r = 0, y = tic->ram.vram.vars.offset.y; r < TIC80_HEIGHT; r++, y++, rowPtr += TIC80_FULLWIDTH) | 	for(s32 r = 0, y = tic->ram.vram.vars.offset.y; r < TIC80_HEIGHT; r++, y++, rowPtr += TIC80_FULLWIDTH) | ||||||
| 	{ | 	{ | ||||||
| 		memset4(rowPtr, pal[tic->ram.vram.vars.border], Left); | 		memset4(rowPtr, pal[tic->ram.vram.vars.border], Left); | ||||||
| 		memset4(rowPtr + Left, pal[tic->ram.vram.vars.bg], TIC80_WIDTH); |  | ||||||
|  |  | ||||||
| 		u32* colPtr = rowPtr + Left; |  | ||||||
|  |  | ||||||
| 		if(y >= 0 && y < TIC80_HEIGHT) | 		if(y >= 0 && y < TIC80_HEIGHT) | ||||||
| 			for(s32 c = 0, x = tic->ram.vram.vars.offset.x, index = y * TIC80_WIDTH + x; c < TIC80_WIDTH; c++, colPtr++, x++, index++) | 		{ | ||||||
| 				if(x >= 0 && x < TIC80_WIDTH) | 			u32* colPtr = rowPtr + Left; | ||||||
| 					*colPtr = pal[tic_tool_peek4(tic->ram.vram.screen.data, index)]; | 			s32 offset = tic->ram.vram.vars.offset.x; | ||||||
|  | 			s32 count = TIC80_WIDTH; | ||||||
|  | 			s32 index = y * TIC80_WIDTH; | ||||||
|  | 			if (offset > 0) { | ||||||
|  | 				memset4(rowPtr + Left, pal[tic->ram.vram.vars.bg], offset); | ||||||
|  | 				count -= offset; | ||||||
|  | 				colPtr += offset; | ||||||
|  | 			} else { | ||||||
|  | 				count += offset; | ||||||
|  | 				index -= offset; | ||||||
|  | 			} | ||||||
|  | 			// copy the first pixel if the line is not alligned to bytes. | ||||||
|  | 			if (index & 1 && count > 0) { | ||||||
|  | 				*colPtr++ = pal[tic_tool_peek4(tic->ram.vram.screen.data, index)]; | ||||||
|  | 				index++; | ||||||
|  | 				count--; | ||||||
|  | 			} | ||||||
|  | 			for(s32 c = 0, di = index/2; c < count/2; c++) | ||||||
|  | 			{ | ||||||
|  | 				// copy two pixels in one cycle | ||||||
|  | 				u8 val = ((u8*)tic->ram.vram.screen.data)[di++]; | ||||||
|  | 				*colPtr++ = pal[val & 0xf]; | ||||||
|  | 				*colPtr++ = pal[val >> 4]; | ||||||
|  | 			} | ||||||
|  | 			// copy the remaining pixel | ||||||
|  | 			if (count & 1) *colPtr = pal[tic_tool_peek4(tic->ram.vram.screen.data, index + count/2*2)]; | ||||||
|  | 			if (offset < 0) memset4(rowPtr + Left + TIC80_WIDTH + offset, pal[tic->ram.vram.vars.bg], -offset); | ||||||
|  | 		} else { | ||||||
|  | 			memset4(rowPtr + Left, pal[tic->ram.vram.vars.bg], TIC80_WIDTH); | ||||||
|  | 		} | ||||||
|  |  | ||||||
| 		memset4(rowPtr + (TIC80_FULLWIDTH-Right), pal[tic->ram.vram.vars.border], Right); | 		memset4(rowPtr + (TIC80_FULLWIDTH-Right), pal[tic->ram.vram.vars.border], Right); | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 František Jahoda
					František Jahoda