Sorted map vs hashed map
In theory, hash map structure should be very fast on retrieving by key operations, close to O(1) like for an array. A sorted map (keys are sorted) should be O(log2 N) instead. The following test checks the difference.
Common scenario
We will use one tester class per type of tested structure. The keys are string[10] and are generated randomly in prepared array to avoid additional checks whether the key exists or not.
The test creates 1 million pairs then repeat N times (variable RunsCount) following tests including memory usage statistics (how to):
- sequential access like array
- random data access by randomly selected existing key
program HashVsSortedMap;
{$MODE OBJFPC}{$H+}
uses
SysUtils,
Classes,
Math,
Contnrs,
FGL,
DateUtils;
const
MinTestValue = 1 - MaxInt;
MaxTestValue = MaxInt;
type
TKey = string[10]; // Max key "4294967296" is up to 10 characters
TMySortedMap = specialize TFPGMap<TKey, longint>;
TMyHashMap = TFPHashList;
TTester = class(TObject)
protected
FTestItemsCount: integer;
FUniqueKeysCount: integer;
FKeys: array of TKey;
public
constructor Create(const ATestItemsCount, AUniqueKeysCount: integer); virtual;
destructor Destroy; override;
procedure CreateMap; virtual; abstract;
procedure FreeMap; virtual; abstract;
procedure Clear; virtual; abstract;
procedure Fill; virtual; abstract;
procedure Sort; virtual; abstract;
function SequentialAccess: longint; virtual; abstract;
function RandomAccess: longint; virtual; abstract;
property TestItemsCount: integer read FTestItemsCount;
property UniqueKeysCount: integer read FUniqueKeysCount;
end;
{ TSortedMapTester }
TSortedMapTester = class(TTester)
private
FMap: TMySortedMap;
public
procedure CreateMap; override;
procedure FreeMap; override;
procedure Clear; override;
procedure Fill; override;
procedure Sort; override;
function SequentialAccess: longint; override;
function RandomAccess: longint; override;
end;
{ THashMapTester }
THashMapTester = class(TTester)
private
FMap: TMyHashMap;
public
procedure CreateMap; override;
procedure FreeMap; override;
procedure Clear; override;
procedure Fill; override;
procedure Sort; override;
function SequentialAccess: longint; override;
function RandomAccess: longint; override;
end;
{ TTester }
constructor TTester.Create(const ATestItemsCount, AUniqueKeysCount: integer);
var
i: integer;
begin
inherited Create;
FTestItemsCount := ATestItemsCount;
FUniqueKeysCount := AUniqueKeysCount;
SetLength(FKeys, FTestItemsCount);
for i := 0 to FTestItemsCount - 1 do
FKeys[i] := IntToStr(Math.RandomRange(1, FUniqueKeysCount));
end;
destructor TTester.Destroy;
begin
SetLength(FKeys, 0);
inherited Destroy;
end;
{ THashMapTester }
procedure THashMapTester.CreateMap;
begin
FMap := TMyHashMap.Create;
end;
procedure THashMapTester.FreeMap;
begin
FMap.Free;
end;
procedure THashMapTester.Clear;
var
i: integer;
p: ^longint;
begin
for i := 0 to FTestItemsCount - 1 do
begin
p := FMap.Items[i];
Dispose(p);
end;
FMap.Clear;
end;
procedure THashMapTester.Fill;
var
i: integer;
p: ^longint;
begin
for i := 1 to FTestItemsCount do
begin
New(p);
p^ := Math.RandomRange(MinTestValue, MaxTestValue);
FMap.Add(FKeys[i], p);
end;
end;
procedure THashMapTester.Sort;
begin
// do nothing
end;
function THashMapTester.SequentialAccess: longint;
var
i: integer;
begin
Result := 0;
for i := 0 to FMap.Count - 1 do
Result := (Result + longint(FMap.Items[i]^)) div 2;
end;
function THashMapTester.RandomAccess: longint;
var
i: integer;
Key: ShortString;
begin
Result := 0;
for i := 1 to FTestItemsCount do
begin
Key := FKeys[Math.RandomRange(1, FTestItemsCount)];
Result := (Result + longint(FMap.Find(Key)^)) div 2;
end;
end;
{ TSortedMapTester }
procedure TSortedMapTester.CreateMap;
begin
FMap := TMySortedMap.Create;
end;
procedure TSortedMapTester.FreeMap;
begin
FMap.Free;
end;
procedure TSortedMapTester.Clear;
begin
FMap.Clear;
end;
procedure TSortedMapTester.Fill;
var
i: integer;
begin
for i := 1 to FTestItemsCount do
FMap.Add(FKeys[i], Math.RandomRange(MinTestValue, MaxTestValue));
end;
procedure TSortedMapTester.Sort;
begin
FMap.Sorted := True;
end;
function TSortedMapTester.SequentialAccess: longint;
var
i: integer;
begin
Result := 0;
for i := 0 to FMap.Count - 1 do
Result := (Result + FMap.Data[i]) div 2;
end;
function TSortedMapTester.RandomAccess: longint;
var
i: integer;
Key: TKey;
begin
Result := 0;
for i := 1 to FTestItemsCount do
begin
Key := FKeys[Math.RandomRange(1, FTestItemsCount)];
Result := (Result + FMap.KeyData[Key]) div 2;
end;
end;
procedure RunTest(const Tester: TTester;
const RunsCount: integer;
const ShowDetails: boolean);
var
i, m1, m2, m3, m4, m5: integer;
t1, t2: TDateTime;
v, AvgSize, AvgSeqTime, AvgRndTime: longint;
DiffMsec: int64;
begin
writeln('-------------------');
writeln('Testing ', Tester.ClassName,
', length: ', Tester.TestItemsCount,
', unique keys: ', Tester.UniqueKeysCount,
', density: ', (Tester.UniqueKeysCount / Tester.TestItemsCount):4:2);
writeln('-------------------');
AvgSize := 0;
AvgSeqTime := 0;
AvgRndTime := 0;
for i := 1 to RunsCount do
begin
writeln('Step ', i);
m1 := GetHeapStatus.TotalAllocated;
if ShowDetails then
writeln('Total heap memory allocated, bytes: ', m1);
Tester.CreateMap;
m2 := GetHeapStatus.TotalAllocated;
if ShowDetails then
writeln('Empty. Size: ', m2 - m1);
t1 := Now;
Tester.Fill;
t2 := Now;
m3 := GetHeapStatus.TotalAllocated;
if ShowDetails then
writeln('Filled test data. Size: ', m3 - m1);
AvgSize := AvgSize + m3 - m1;
if ShowDetails then
writeln('Elapsed time, msec: ', MilliSecondsBetween(t2, t1));
t1 := Now;
Tester.Sort;
t2 := Now;
m3 := GetHeapStatus.TotalAllocated;
if ShowDetails then
begin
writeln('Sorted data. Size: ', m3 - m1);
writeln('Sort elapsed time, msec: ', MilliSecondsBetween(t2, t1));
end;
t1 := Now;
v := Tester.SequentialAccess;
t2 := Now;
DiffMsec := MilliSecondsBetween(t2, t1);
if ShowDetails then
writeln(Format(
'Sequential access test. Elapsed, msec: %d. Average, seeks/msec: %d. Result: %d',
[DiffMsec, Tester.TestItemsCount div DiffMsec, v]));
AvgSeqTime := AvgSeqTime + DiffMsec;
t1 := Now;
v := Tester.RandomAccess;
t2 := Now;
DiffMsec := MilliSecondsBetween(t2, t1);
if ShowDetails then
writeln(Format(
'Random access test. Elapsed, msec: %d. Average, seeks/msec: %d. Result: %d',
[DiffMsec, Tester.TestItemsCount div DiffMsec, v]));
AvgRndTime := AvgRndTime + DiffMsec;
Tester.Clear;
m4 := GetHeapStatus.TotalAllocated;
if ShowDetails then
writeln('Cleared. Size: ', m4 - m1);
Tester.FreeMap;
end;
writeln('---');
writeln('Avarage values. Step count: ', RunsCount);
writeln('Memory used, bytes: ', AvgSize div RunsCount);
writeln('Sequential access. Time elapsed, msec: ', AvgSeqTime div RunsCount);
writeln('Sequential access. Seeks per msec: ', Tester.TestItemsCount / (AvgSeqTime / RunsCount):10:2);
writeln('Random access. Time elapsed, msec: ', AvgRndTime div RunsCount);
writeln('Random access. Seeks per msec: ', (Tester.TestItemsCount / (AvgRndTime / RunsCount)):10:2);
Tester.Free;
m5 := GetHeapStatus.TotalAllocated;
if ShowDetails then
writeln('Total heap memory (after disposing): ', m5);
end;
var
i, TestItemsCount, UniqueKeysCount, RunsCount, StepsCount: integer;
begin
TestItemsCount := 1000000; // Total items count in map
RunsCount := 10; // Number of same test runs to get average values
StepsCount := 1; // Number of repeating with different unique keys count
for i := 1 to StepsCount do
begin
UniqueKeysCount := Round(TestItemsCount * (1.0 / (i * i)));
RunTest(TSortedMapTester.Create(TestItemsCount, UniqueKeysCount), RunsCount, false);
RunTest(THashMapTester.Create(TestItemsCount, UniqueKeysCount), RunsCount, false);
end;
end.
Test 1
Set up following values to simulate normal usage where all keys are unique (randomly generated keys are not guaranteed 100% unique values but close to it).
RunsCount := 10; StepsCount := 1;
Results 1
In spite of 3.5 times more memory used, TFPHashList is about 20% faster than TFPGMap (sorted) in sequential access and is about 50% faster in random access.
-------------------
Testing TSortedMapTester, length: 1000000, unique keys: 1000000, density: 1.00
-------------------
Avarage values. Step count: 10
Memory used, bytes: 18305440
Sequential access. Time elapsed, msec: 7
Sequential access. Seeks per msec: 140845.07
Random access. Time elapsed, msec: 831
Random access. Seeks per msec: 1202.21
-------------------
Testing THashMapTester, length: 1000000, unique keys: 1000000, density: 1.00
-------------------
Avarage values. Step count: 10
Memory used, bytes: 69613088
Sequential access. Time elapsed, msec: 4
Sequential access. Seeks per msec: 204081.63
Random access. Time elapsed, msec: 456
Random access. Seeks per msec: 2191.54
Test 2
Now repeating the same test with different number of unique items. I set up following values
RunsCount := 5; StepsCount := 5;
Results 2
Unique key density | Structure type | Memory used, bytes | Sequential access | Random access | ||
---|---|---|---|---|---|---|
Time elapsed, msec | Seeks per msec | Time elapsed, msec | Seeks per msec | |||
1,00 | TSortedMapTester | 18305440 | 7 | 135135 | 835 | 1197 |
1,00 | THashMapTester | 69599168 | 5 | 192307 | 454 | 2198 |
Difference | 3,8 | 0,7 | 1,4 | 0,5 | 1,8 | |
0,25 | TSortedMapTester | 18305440 | 7 | 142857 | 759 | 1318 |
0,25 | THashMapTester | 67889504 | 4 | 208333 | 374 | 2670 |
Difference | 3,7 | 0,6 | 1,5 | 0,5 | 2,0 | |
0,11 | TSortedMapTester | 18305440 | 7 | 142857 | 698 | 1431 |
0,11 | THashMapTester | 67878720 | 5 | 200000 | 333 | 2995 |
Difference | 3,7 | 0,7 | 1,4 | 0,5 | 2,1 | |
0,06 | TSortedMapTester | 18305440 | 7 | 138888 | 665 | 1503 |
0,06 | THashMapTester | 67832800 | 4 | 217391 | 255 | 3909 |
Difference | 3,7 | 0,6 | 1,6 | 0,4 | 2,6 | |
0,04 | TSortedMapTester | 18305440 | 7 | 142857 | 594 | 1681 |
0,04 | THashMapTester | 67833920 | 4 | 227272 | 223 | 4480 |
Difference | 3,7 | 0,6 | 1,6 | 0,4 | 2,7 |
Testing platform
- Quad core Intel(R) Core(TM) i5-3470 CPU @ 3.20GHz (only one core is used)
- Linux Ubuntu 12.04 64 bits
- FPC 2.6.4 with Lazarus 1.4.2
- Code optimisation is "o1"
See also "Bencmark: array vs TList vs dynamic array".
blog comments powered by Disqus