PROWAREtech
C++: Array versus Binary Tree
An interesting comparison can be made between an array and a binary search tree. Both are very fast.
The array uses less memory and takes less time to add data to when it is not sorted. Arrays add data at a constant time rate or O(1) in Big O notation or at a linear rate, O(n), when sorted. Sorting a wholey unsorted array is done in O(n•log(n)) time using the quicksort algorithm. Sorting using this algorithm along with the time to add the data unsorted makes the array faster to add data to. Searching an array is done with a binary search.
The link-based binary search tree must allocate memory for its nodes slowing it down, but searches a lot faster (on a percentage basis) than the array. The search speed should be identical, O(log(n)) (best case), however it just does not work out this way. Adding a single item to a large data structure is faster with a binary tree, O(log(n)) assuming it is well balanced.
The array-based binary search tree is about 25% faster than the link-based one due to it not having the need to allocate memory on a per node basis. The entire array is allocated all at once. The array does not grow and this is its achilles heel.
Again, the search results should be the same, however, the binary search tree is faster for both the link-based and array-based binary search tree.
It is important to note that this is a well balanced binary search tree, sorting random data. This makes it faster but at best it should only be as fast as a binary search. There is code on this page to balance a binary search tree to increase its performance even more. A perfectly balanced tree should be just as fast as a binary search on a sorted array.
Notice that the code for the array has been optimized for the int
data type.
The spaghetti code for this program is as follows.
#include <iostream>
#include <string>
#include <stdlib.h>
#include <ctime>
using namespace std;
class TRandomMersenne { // encapsulate random number generator
// or constants for MT19937:
#define MERS_N 624
#define MERS_M 397
#define MERS_R 31
#define MERS_U 11
#define MERS_S 7
#define MERS_T 15
#define MERS_L 18
#define MERS_A 0x9908B0DF
#define MERS_B 0x9D2C5680
#define MERS_C 0xEFC60000
public:
TRandomMersenne(unsigned long seed) { // constructor
RandomInit(seed);
}
void RandomInit(unsigned long seed); // re-seed
void RandomInitByArray(unsigned long seeds[], int length); // seed by more than 32 bits
int IRandom(int min, int max); // output random integer
double Random(); // output random float
unsigned long BRandom(); // output random bits
private:
unsigned long mt[MERS_N]; // state vector
int mti; // index into mt
enum TArch { LITTLEENDIAN, BIGENDIAN, NONIEEE };
TArch Architecture; // conversion to float depends on computer architecture
};
void TRandomMersenne::RandomInit(unsigned long seed) {
// re-seed generator
mt[0] = seed;
for (mti = 1; mti < MERS_N; mti++) {
mt[mti] = (1812433253UL * (mt[mti - 1] ^ (mt[mti - 1] >> 30)) + mti);
}
// detect computer architecture
union { double f; unsigned long i[2]; } convert;
convert.f = 1.0;
// Note: Old versions of the Gnu g++ compiler may make an error here,
// compile with the option -fenum-int-equiv to fix the problem
if (convert.i[1] == 0x3FF00000) Architecture = LITTLEENDIAN;
else if (convert.i[0] == 0x3FF00000) Architecture = BIGENDIAN;
else Architecture = NONIEEE;
}
void TRandomMersenne::RandomInitByArray(unsigned long seeds[], int length) {
// seed by more than 32 bits
int i, j, k;
RandomInit(19650218UL);
if (length <= 0) return;
i = 1; j = 0;
k = (MERS_N > length ? MERS_N : length);
for (; k; k--) {
mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >> 30)) * 1664525UL)) + seeds[j] + j;
i++; j++;
if (i >= MERS_N) { mt[0] = mt[MERS_N - 1]; i = 1; }
if (j >= length) j = 0;
}
for (k = MERS_N - 1; k; k--) {
mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >> 30)) * 1566083941UL)) - i;
if (++i >= MERS_N) { mt[0] = mt[MERS_N - 1]; i = 1; }
}
mt[0] = 0x80000000UL;
} // MSB is 1; assuring non-zero initial array
unsigned long TRandomMersenne::BRandom() {
// generate 32 random bits
unsigned long y;
if (mti >= MERS_N) {
// generate MERS_N words at one time
const unsigned long LOWER_MASK = (1LU << MERS_R) - 1; // lower MERS_R bits
const unsigned long UPPER_MASK = -1L << MERS_R; // upper (32 - MERS_R) bits
static const unsigned long mag01[2] = { 0, MERS_A };
int kk;
for (kk = 0; kk < MERS_N - MERS_M; kk++) {
y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
mt[kk] = mt[kk + MERS_M] ^ (y >> 1) ^ mag01[y & 1];
}
for (; kk < MERS_N - 1; kk++) {
y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
mt[kk] = mt[kk + (MERS_M - MERS_N)] ^ (y >> 1) ^ mag01[y & 1];
}
y = (mt[MERS_N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
mt[MERS_N - 1] = mt[MERS_M - 1] ^ (y >> 1) ^ mag01[y & 1];
mti = 0;
}
y = mt[mti++];
// Tempering (May be omitted):
y ^= y >> MERS_U;
y ^= (y << MERS_S) & MERS_B;
y ^= (y << MERS_T) & MERS_C;
y ^= y >> MERS_L;
return y;
}
double TRandomMersenne::Random() {
// output random float number in the interval 0 <= x < 1
union { double f; unsigned long i[2]; } convert;
unsigned long r = BRandom(); // get 32 random bits
switch (Architecture) {
case LITTLEENDIAN:
convert.i[0] = r << 20;
convert.i[1] = (r >> 12) | 0x3FF00000;
return convert.f - 1.0;
case BIGENDIAN:
convert.i[1] = r << 20;
convert.i[0] = (r >> 12) | 0x3FF00000;
return convert.f - 1.0;
case NONIEEE: default:
;
}
// This somewhat slower method works for all architectures, including
// non-IEEE floating point representation:
return (double)r * (1. / ((double)(unsigned long)(-1L) + 1.));
}
int TRandomMersenne::IRandom(int min, int max) {
// output random integer in the interval min <= x <= max
int r;
r = int((max - min + 1) * Random()) + min; // multiply interval with random and truncate
if (r > max) r = max;
if (max < min) return 0x80000000;
return r;
}
template <class TData> class ArrayNode
{
public:
TData data;
int ileft, iright;
ArrayNode()
{
ileft = iright = -1;
}
~ArrayNode()
{
ileft = iright = -1;
}
};
template <class TData> class BinaryArrayTree
{
private:
ArrayNode<TData> *arraytree;
int idx;
BinaryArrayTree() {};
public:
BinaryArrayTree(const unsigned int arraySize);
~BinaryArrayTree();
void Add(const TData &data);
bool Search(const TData &data);
};
template <class TData> void BinaryArrayTree<TData>::Add(const TData &data)
{
if(idx)
{
int *p;
if (data == arraytree[0].data)
return;
if (data < arraytree[0].data)
p = &arraytree[0].ileft;
else
p = &arraytree[0].iright;
while (*p > -1)
{
if (data == arraytree[*p].data)
return;
if (data < arraytree[*p].data)
p = &arraytree[*p].ileft;
else
p = &arraytree[*p].iright;
}
*p = idx;
arraytree[idx++].data = data;
}
else
arraytree[idx++].data = data;
}
template <class TData> bool BinaryArrayTree<TData>::Search(const TData &data)
{
if (idx)
{
int i = 0;
while (i > -1)
{
if (data == arraytree[i].data)
return true;
if (data < arraytree[i].data)
i = arraytree[i].ileft;
else
i = arraytree[i].iright;
}
}
return false;
}
template <class TData> BinaryArrayTree<TData>::BinaryArrayTree(const unsigned int arraySize)
{
arraytree = new ArrayNode<TData>[arraySize];
idx = 0;
}
template <class TData> BinaryArrayTree<TData>::~BinaryArrayTree()
{
if (arraytree)
delete[]arraytree;
arraytree = nullptr;
}
template <class TData> class LinkedNode
{
private:
LinkedNode() {}
public:
TData data;
LinkedNode<TData> *pleft, *pright;
LinkedNode(TData &dataToSort)
{
pleft = nullptr;
pright = nullptr;
data = dataToSort;
}
~LinkedNode()
{
if (pleft)
{
delete pleft;
pleft = nullptr;
}
if (pright)
{
delete pright;
pright = nullptr;
}
}
};
template <class TData> class BinaryLinkedTree
{
private:
LinkedNode<TData> *ptree;
void Add(TData &data, LinkedNode<TData> **ppnode);
bool Search(const TData &data, LinkedNode<TData> **ppnode);
public:
BinaryLinkedTree();
~BinaryLinkedTree();
void Add(TData &data);
bool Search(const TData &data);
};
template <class TData> void BinaryLinkedTree<TData>::Add(TData &data, LinkedNode<TData> **ppnode)
{
if ((*ppnode) == nullptr)
*ppnode = new LinkedNode<TData>(data);
else
{
if (data == (*ppnode)->data)
return;
if (data < (*ppnode)->data)
Add(data, &(*ppnode)->pleft);
else
Add(data, &(*ppnode)->pright);
}
}
template <class TData> void BinaryLinkedTree<TData>::Add(TData &data)
{
Add(data, &ptree);
}
template <class TData> bool BinaryLinkedTree<TData>::Search(const TData &data, LinkedNode<TData> **ppnode)
{
if ((*ppnode) != nullptr)
{
if (data < (*ppnode)->data)
return Search(data, &(*ppnode)->pleft);
if (data >(*ppnode)->data)
return Search(data, &(*ppnode)->pright);
return true;
}
return false;
}
template <class TData> bool BinaryLinkedTree<TData>::Search(const TData &data)
{
return Search(data, &ptree);
}
template <class TData> BinaryLinkedTree<TData>::BinaryLinkedTree()
{
ptree = nullptr;
}
template <class TData> BinaryLinkedTree<TData>::~BinaryLinkedTree()
{
if (ptree != nullptr)
delete ptree;
ptree = nullptr;
}
#define DEFAULT_SIZE 1000
template <class TData> class Array
{
private:
TData *items;
unsigned int length, max;
bool sorted;
void Sort(TData * const pparray, const int size) const;
public:
Array();
Array(unsigned int arrayLength);
~Array();
unsigned int Length() const;
void Add(const TData data);
TData operator[](const unsigned int index);
bool Remove(const TData &data);
void Clear();
unsigned int GetIndexOf(const TData data); // returns Length() when data not found
};
template <class TData> void Array<TData>::Sort(TData * const pparray, const int size) const // quicksort
{
if (size < 2)
return;
TData pivot = pparray[size / 2];
TData *left = pparray;
TData *right = pparray + size - 1;
while (true) {
while ((left <= right) && *left < pivot) left++;
while ((left <= right) && *right > pivot) right--;
if (left > right) break;
TData temp = *left;
*left++ = *right;
*right-- = temp;
}
Sort(pparray, right - pparray + 1);
Sort(left, pparray + size - left);
}
template <class TData> Array<TData>::Array()
{
max = DEFAULT_SIZE;
length = 0;
sorted = false;
items = new TData[max];
for (unsigned int i = 0; i < max; items[i++] = nullptr);
}
template <class TData> Array<TData>::Array(unsigned int arrayLength)
{
max = arrayLength;
length = 0;
sorted = false;
if (max == 0)
max = DEFAULT_SIZE;
items = new TData[max];
for (unsigned int i = 0; i < max; items[i++] = 0);
}
template <class TData> Array<TData>::~Array()
{
Clear();
delete[]items;
items = nullptr; // clean up the memory
}
template <class TData> unsigned int Array<TData>::Length() const
{
return length;
}
template <class TData> void Array<TData>::Add(const TData data)
{
if (length == max) // then increase size of array
{
max *= 2;
TData *newArray = new TData[max];
unsigned int i;
for (i = 0; i < length; i++)
newArray[i] = items[i];
for (; i < max; newArray[i++] = 0);
for (i = 0; i < length; items[i++] = 0);
delete[]items;
items = newArray;
}
items[length++] = data;
if (sorted)
{
TData temp;
unsigned int count;
for (count = length - 1; count > 0; count--) // reversed bubblesort with break statement
{
if (items[count] < items[count - 1])
{
temp = items[count];
items[count] = items[count - 1];
items[count - 1] = temp;
}
else
break;
}
}
}
template <class TData> TData Array<TData>::operator[](const unsigned int index)
{
// make sure to use Length() to know the size of the array so as not to exceed the bounds of the array of items
return items[index];
}
template <class TData> bool Array<TData>::Remove(const TData &data)
{
unsigned int idx = GetIndexOf(data);
if (idx < length)
{
delete items[idx];
length--;
for (unsigned int i = idx; i < length; i++)
items[i] = items[i + 1];
items[length] = 0; // clean up the memory
return true;
}
return false;
}
template <class TData> unsigned int Array<TData>::GetIndexOf(const TData data)
{
// this procedure uses binary search so array must be sorted
if (!sorted)
{
Sort(items, length);
sorted = true;
}
int first = 0, last = length - 1, middle;
while (first <= last)
{
middle = (first + last) / 2;
if (items[middle] == data)
return middle;
if (data > items[middle])
first = middle + 1;
else
last = middle - 1;
}
return length;
}
template <class TData> void Array<TData>::Clear()
{
for (unsigned int i = 0; i < length; i++)
items[i] = 0;
length = 0;
}
#define ITEMS 10000000
int main()
{
BinaryLinkedTree<int> blt;
BinaryArrayTree<int> bat(ITEMS);
Array<int> a(ITEMS);
int *temp = new int[ITEMS];
int start, tim, i;
TRandomMersenne randm(time(0));
for (i = 0; i < ITEMS; i++)
temp[i] = randm.IRandom(-ITEMS, ITEMS);
start = clock();
for (i = 0; i < ITEMS; i++)
blt.Add(temp[i]);
tim = (clock() - start);
cout << fixed << ((float)tim / 1000.0) << " seconds to add " << ITEMS << " items to linked-based binary search tree" << endl << "Space used on 32-bit system: " << ((4 * 3) * ITEMS) << " bytes" << endl << endl;
start = clock();
for (i = 0; i < ITEMS; i++)
bat.Add(temp[i]);
tim = (clock() - start);
cout << fixed << ((float)tim / 1000.0) << " seconds to add " << ITEMS << " items to array-based binary search tree" << endl << "Space used on 32-bit system: " << ((4 * 3) * ITEMS) << " bytes" << endl << endl;
start = clock();
for (i = 0; i < ITEMS; i++)
a.Add(temp[i]);
a.GetIndexOf(temp[0]); // this will force a sort of the array
tim = (clock() - start);
cout << fixed << ((float)tim / 1000.0) << " seconds to add " << ITEMS << " items to array" << endl << "Space used on 32-bit system: " << (4 * ITEMS) << " bytes" << endl << endl;
start = clock();
for (i = 0; i < ITEMS; blt.Search(i++));
tim = (clock() - start);
cout << fixed << ((float)tim / 1000.0) << " seconds to search linked-based binary search tree " << ITEMS << " times" << endl << endl;
start = clock();
for (i = 0; i < ITEMS; bat.Search(i++));
tim = (clock() - start);
cout << fixed << ((float)tim / 1000.0) << " seconds to search array-based binary search tree " << ITEMS << " times" << endl << endl;
start = clock();
for (i = 0; i < ITEMS; a.GetIndexOf(i++));
tim = (clock() - start);
cout << fixed << ((float)tim / 1000.0) << " seconds to search array using binary search " << ITEMS << " times" << endl << endl;
delete[]temp;
return 0;
}