GLProgramming.com

home :: about :: development guides :: irc :: forums :: search :: paste :: links :: contribute :: code dump

-> Click here to learn how to get live help <-



 
 

Development Guide Library


Dynamic Branching - GPU Thinking Exercises - by DELTRON


/*
Controlling Variable Behavior Without Conditional Branching

  Example: Two Jugs

	Jug A holds 7 liters, Jug B holds 5 liters
	You are allowed to;
		fill jug A or B
		empty jug A or B
		pour jug A to B or vice versa
	The goal: Via filling/emptying the jugs, end up with one holding 6 liters.

	(see: link to flash game at the bottom of this DG)
	(Does the guy say "Oprah" when he jumps over the jug? heh)

Having a variable act like a jug is straight-forward with a conditional branch.

The pseudo code for such would be:

	Var = Var + Number
	if Var > MaxVarSize then Var = MaxVarSize

However, this generates a conditional branch.  What if your processor didn't support
conditional branching? - (like on most of the currently programmable GPU's).

So the challenge here is to create code for a variable that acts like a jug.


Example implementations with accompanying machine-code output:

  (As the var 'i' increases, it caps out at 2)

** USING A REGULAR CONDITIONAL BRANCH **
134:          result=i;
004015AB 8B 4D EC             mov         ecx,dword ptr [ebp-14h]
004015AE 89 4D F0             mov         dword ptr [ebp-10h],ecx
135:          if (i>2) result=2;
004015B1 83 7D EC 02          cmp         dword ptr [ebp-14h],2
004015B5 7E 07                jle         main+5Eh (004015be)
004015B7 C7 45 F0 02 00 00 00 mov         dword ptr [ebp-10h],2

** USING A CONDITIONAL EXPRESSION **
78:           int result = (i>2) ? 2:i;
004015A6 83 7D FC 02          cmp         dword ptr [ebp-4],2
004015AA 7E 09                jle         main+45h (004015b5)
004015AC C7 45 DC 02 00 00 00 mov         dword ptr [ebp-24h],2
004015B3 EB 06                jmp         main+4Bh (004015bb)
004015B5 8B 4D FC             mov         ecx,dword ptr [ebp-4]
004015B8 89 4D DC             mov         dword ptr [ebp-24h],ecx
004015BB 8B 55 DC             mov         edx,dword ptr [ebp-24h]
004015BE 89 55 F8             mov         dword ptr [result],edx

** SUCCESS FOR EVERY CASE (NO BRANCHING) **
72:       int result = (i&(~0 + (i>=cap)))+(i>=cap)*cap;
00401595 33 C0                xor         eax,eax
00401597 83 7D FC 02          cmp         dword ptr [ebp-4],2
0040159B 0F 9D C0             setge       al
0040159E 83 E8 01             sub         eax,1
004015A1 8B 4D FC             mov         ecx,dword ptr [ebp-4]
004015A4 23 C8                and         ecx,eax
004015A6 33 D2                xor         edx,edx
004015A8 83 7D FC 02          cmp         dword ptr [ebp-4],2
004015AC 0F 9D C2             setge       dl
004015AF 8D 04 51             lea         eax,[ecx+edx*2]
004015B2 89 45 F8             mov         dword ptr [ebp-8],eax

 Below is a simulation to solve the jugs puzzle.

*/

#include <iostream>
using namespace std;

void main(void)
{
	const int MaxA = 7;
	const int MaxB = 5;

	int stepnum = 1;

//STEP #1	(both empty)
	int a=0, b=0, tmp=0;
	cout << "step #" << stepnum++ << " a = " << a << ", b = " << b << endl;

//STEP #2	(fill a)
	a = MaxA;
	cout << "step #" << stepnum++ << " a = " << a << ", b = " << b << endl;

//STEP #3	(pour a into b)
	tmp = b;
	b = ((b+a)&(-1 + ((b+a)>=MaxB)))+((b+a)>=MaxB)*MaxB;
	a = a - (b-tmp);
	cout << "step #" << stepnum++ << " a = " << a << ", b = " << b << endl;

//STEP #4	(empty b)
	b = 0;
	cout << "step #" << stepnum++ << " a = " << a << ", b = " << b << endl;

//STEP #5	(pour a into b)
	tmp = b;
	b = ((b+a)&(-1 + ((b+a)>=MaxB)))+((b+a)>=MaxB)*MaxB;
	a = a - (b-tmp);
	cout << "step #" << stepnum++ << " a = " << a << ", b = " << b << endl;

//STEP #6	(fill a)
	a = MaxA;
	cout << "step #" << stepnum++ << " a = " << a << ", b = " << b << endl;

//STEP #7	(pour a into b)
	tmp = b;
	b = ((b+a)&(-1 + ((b+a)>=MaxB)))+((b+a)>=MaxB)*MaxB;
	a = a - (b-tmp);
	cout << "step #" << stepnum++ << " a = " << a << ", b = " << b << endl;

//STEP #8	(empty b)
	b = 0;
	cout << "step #" << stepnum++ << " a = " << a << ", b = " << b << endl;

//STEP #9	(pour a into b)
	tmp = b;
	b = ((b+a)&(-1 + ((b+a)>=MaxB)))+((b+a)>=MaxB)*MaxB;
	a = a - (b-tmp);
	cout << "step #" << stepnum++ << " a = " << a << ", b = " << b << endl;

//STEP #10	(fill a)
	a = MaxA;
	cout << "step #" << stepnum++ << " a = " << a << ", b = " << b << endl;

//STEP #11	(pour a into b)
	tmp = b;
	b = ((b+a)&(-1 + ((b+a)>=MaxB)))+((b+a)>=MaxB)*MaxB;
	a = a - (b-tmp);
	cout << "step #" << stepnum++ << " a = " << a << ", b = " << b << endl;

	// Jug A now holds the required amount
}



/*

  So in theory, with the ideas presented above we could solve the Two Jugs puzzle completely
  on a GPU that had no dynamic branching.   - DELTRON 5/27/04

*/


------------> Link to the "Two Jugs" flash puzzle here.