Difference between revisions of "Array sort"
From Free Pascal wiki
Jump to navigationJump to search (Created page with "A typical task is to sort an array. One of the problems is that an array could be of any type. Not a simple type, but more complicated types. Prior to introducing generics in...") |
(Empty "var" section is forbidden in trunk; simpler “Median”; “avg”⇒“pivot”.) |
||
(29 intermediate revisions by 5 users not shown) | |||
Line 1: | Line 1: | ||
A typical task is to sort an array. One of the problems is that an array could be of any type. Not a simple type, but more complicated types. | A typical task is to sort an array. One of the problems is that an array could be of any type. Not a simple type, but more complicated types. | ||
− | Prior to introducing | + | ==Basic Version== |
+ | Prior to introducing [[Generics]] into Freepascal, sorting an array of arbitrary type could be achieve via untyped variables (implicit [[Pointers]]). | ||
+ | * No generics involved | ||
+ | * works on any version of FPC (or Delphi) | ||
+ | * any array data type | ||
+ | ===Source Code=== | ||
<source lang="delphi"> | <source lang="delphi"> | ||
unit anysort; | unit anysort; | ||
Line 22: | Line 27: | ||
procedure AnyQuickSort(var Arr; idxL, idxH: Integer; | procedure AnyQuickSort(var Arr; idxL, idxH: Integer; | ||
− | Stride: Integer; CompareFunc: TCompareFunc; var SwapBuf); | + | Stride: Integer; CompareFunc: TCompareFunc; var SwapBuf, MedBuf); |
var | var | ||
ls,hs : Integer; | ls,hs : Integer; | ||
Line 37: | Line 42: | ||
hs:=hi*Stride; | hs:=hi*Stride; | ||
ms:=mi*Stride; | ms:=mi*Stride; | ||
+ | Move(pb[ms], medBuf, Stride); | ||
repeat | repeat | ||
− | while CompareFunc( pb[ls], | + | while CompareFunc( pb[ls], medBuf) < 0 do begin |
inc(ls, Stride); | inc(ls, Stride); | ||
inc(li); | inc(li); | ||
end; | end; | ||
− | while CompareFunc( | + | while CompareFunc( medBuf, pb[hs] ) < 0 do begin |
dec(hs, Stride); | dec(hs, Stride); | ||
dec(hi); | dec(hi); | ||
Line 50: | Line 56: | ||
Move(pb[hs], pb[ls], Stride); | Move(pb[hs], pb[ls], Stride); | ||
Move(SwapBuf, pb[hs], Stride); | Move(SwapBuf, pb[hs], Stride); | ||
+ | // begin fix 11/11/2021: update ms if the reference point is moved | ||
+ | if li=mi then ms:=hs; | ||
+ | if hi=mi then ms:=ls; | ||
+ | // end fix | ||
inc(ls, Stride); inc(li); | inc(ls, Stride); inc(li); | ||
dec(hs, Stride); dec(hi); | dec(hs, Stride); dec(hi); | ||
end; | end; | ||
until ls>hs; | until ls>hs; | ||
− | if hi>idxL then AnyQuickSort(Arr, idxL, hi, Stride, CompareFunc, SwapBuf); | + | if hi>idxL then AnyQuickSort(Arr, idxL, hi, Stride, CompareFunc, SwapBuf, MedBuf); |
− | if li<idxH then AnyQuickSort(Arr, li, idxH, Stride, CompareFunc, SwapBuf); | + | if li<idxH then AnyQuickSort(Arr, li, idxH, Stride, CompareFunc, SwapBuf, MedBuf); |
end; | end; | ||
Line 62: | Line 72: | ||
buf: array of byte; | buf: array of byte; | ||
begin | begin | ||
− | SetLength(buf, Stride); | + | if Count <= 1 then Exit; // should be more than 1 to be sortable |
− | AnyQuickSort(Arr, 0, Count-1, Stride, compareFunc, buf[0]); | + | SetLength(buf, Stride*2); |
+ | AnyQuickSort(Arr, 0, Count-1, Stride, compareFunc, buf[0], buf[Stride]); | ||
end; | end; | ||
− | end | + | end. |
</source> | </source> | ||
+ | ===Use Sample=== | ||
Here's an example on how to use AnySort() function to sort an array of Integer | Here's an example on how to use AnySort() function to sort an array of Integer | ||
<source lang="delphi"> | <source lang="delphi"> | ||
interface | interface | ||
− | uses AnySort | + | uses AnySort; |
procedure SortArrayInteger(var arr: array of Integer; count: Integer); | procedure SortArrayInteger(var arr: array of Integer; count: Integer); | ||
Line 91: | Line 103: | ||
procedure SortArrayInteger(var arr: array of Integer; count: Integer); | procedure SortArrayInteger(var arr: array of Integer; count: Integer); | ||
begin | begin | ||
− | AnySort(arr, Count, sizeof(Integer), CompareInt); | + | anysort.AnySort(arr, Count, sizeof(Integer), @CompareInt); |
end; | end; | ||
</source> | </source> | ||
+ | ==Generics Version== | ||
+ | * FPC-only. | ||
+ | * Uses '''generic''' with static comparer. | ||
+ | * Comparer is a simple ''less'' predicate. | ||
+ | * Essentially an Introsort, a QuickSort that falls back to a simple algorithm for small enough subarrays. | ||
+ | * Bypasses managed types handling to greatly speed up sorting arrays of reference-counted types; won't work with custom Copy/AddRef operators. | ||
+ | * Protected against | ||
+ | ** O(N²) time (falls back to heap sort); | ||
+ | ** O(N) recursion depth; maximum recursion depth is ⌈bitsizeof(SizeUint) - log₂ SelectionThreshold⌉. | ||
+ | |||
+ | ===Source Code=== | ||
+ | <source lang='delphi'>{$mode objfpc} {$coperators on} | ||
+ | unit AnySort2; | ||
+ | |||
+ | interface | ||
+ | |||
+ | type | ||
+ | // Comparer should provide function Less(const a, b: Elem): boolean. | ||
+ | generic Sorter<Elem, Comparer> = class | ||
+ | type | ||
+ | pElem = ^Elem; | ||
+ | |||
+ | class procedure Sort(p: pElem; count: SizeUint); static; | ||
+ | class procedure Sort(var a: array of Elem); static; | ||
+ | |||
+ | private const | ||
+ | SelectionThreshold = 12; | ||
+ | HeapArity = 4; | ||
+ | type | ||
+ | SwapTemp = array[0 .. sizeof(Elem) - 1] of byte; | ||
+ | |||
+ | class procedure SelectionSort(p: pElem; count: SizeUint); static; | ||
+ | |||
+ | class function Median(p: pElem; count: SizeUint): pElem; static; | ||
+ | class procedure QSort(p: pElem; count, reasonable: SizeUint); static; | ||
+ | |||
+ | class procedure HeapReplacePessimistic(p: pElem; count, id: SizeUint; const x: SwapTemp); static; | ||
+ | class procedure HeapSort(p: pElem; count: SizeUint); static; | ||
+ | end; | ||
+ | |||
+ | generic ComparerLessOp<Elem> = class | ||
+ | class function Less(const a, b: Elem): boolean; static; inline; | ||
+ | end; | ||
+ | |||
+ | generic SorterLessOp<Elem> = class(specialize Sorter<Elem, specialize ComparerLessOp<Elem>>) end; | ||
+ | |||
+ | implementation | ||
+ | |||
+ | class procedure Sorter.Sort(p: pElem; count: SizeUint); | ||
+ | begin | ||
+ | QSort(p, count, count); | ||
+ | end; | ||
+ | |||
+ | class procedure Sorter.Sort(var a: array of Elem); | ||
+ | begin | ||
+ | Sort(pElem(a), length(a)); | ||
+ | end; | ||
+ | |||
+ | class procedure Sorter.SelectionSort(p: pElem; count: SizeUint); | ||
+ | var | ||
+ | i, j, imin: SizeInt; | ||
+ | t: SwapTemp; | ||
+ | begin | ||
+ | for i := 0 to SizeInt(count) - 2 do | ||
+ | begin | ||
+ | imin := i; | ||
+ | for j := i + 1 to SizeInt(count) - 1 do | ||
+ | if Comparer.Less(p[j], p[imin]) then imin := j; | ||
+ | t := SwapTemp(p[i]); SwapTemp(p[i]) := SwapTemp(p[imin]); SwapTemp(p[imin]) := t; | ||
+ | end; | ||
+ | end; | ||
+ | |||
+ | class function Sorter.Median(p: pElem; count: SizeUint): pElem; | ||
+ | var | ||
+ | a, b, c, t: pElem; | ||
+ | begin | ||
+ | a := p; | ||
+ | b := p + count div 2; | ||
+ | c := p + SizeUint(count - 1); | ||
+ | if Comparer.Less(b^, a^) then begin t := a; a := b; b := t; end; | ||
+ | if Comparer.Less(c^, b^) then begin t := c; c := b; b := t; end; | ||
+ | if Comparer.Less(b^, a^) then result := a else result := b; | ||
+ | end; | ||
+ | |||
+ | // see MSVC implementation of std::sort | ||
+ | class procedure Sorter.QSort(p: pElem; count, reasonable: SizeUint); | ||
+ | var | ||
+ | L, R: SizeInt; | ||
+ | t, pivot: SwapTemp; | ||
+ | begin | ||
+ | while (count > SelectionThreshold) and (reasonable > 0) do | ||
+ | begin | ||
+ | reasonable := reasonable div 2 + reasonable div 4; | ||
+ | |||
+ | pivot := SwapTemp(Median(p, count)^); | ||
+ | R := 0; | ||
+ | L := count - 1; | ||
+ | |||
+ | repeat | ||
+ | while Comparer.Less(p[R], Elem(pivot)) do inc(R); | ||
+ | while Comparer.Less(Elem(pivot), p[L]) do dec(L); | ||
+ | if R <= L then | ||
+ | begin | ||
+ | t := SwapTemp(p[R]); SwapTemp(p[R]) := SwapTemp(p[L]); SwapTemp(p[L]) := t; | ||
+ | inc(R); | ||
+ | dec(L); | ||
+ | end; | ||
+ | until R > L; | ||
+ | |||
+ | // [0 .. L], [R .. count - 1] | ||
+ | // possible edge cases are L = -1 or R = count ! | ||
+ | if SizeInt(count) - R <= L then | ||
+ | begin | ||
+ | QSort(p + R, SizeInt(count) - R, reasonable); // QSort calls with count = 0 or count = 1 are safe. | ||
+ | count := L + 1; | ||
+ | end else | ||
+ | begin | ||
+ | QSort(p, L + 1, reasonable); | ||
+ | p += R; | ||
+ | count -= R; | ||
+ | end; | ||
+ | end; | ||
+ | if count > SelectionThreshold then | ||
+ | HeapSort(p, count) | ||
+ | else if count >= 2 then | ||
+ | SelectionSort(p, count); | ||
+ | end; | ||
+ | |||
+ | // HeapReplacePessimistic(p, count, id, x) | ||
+ | // | ||
+ | // is equivalent to | ||
+ | // | ||
+ | // p[id] := x; | ||
+ | // id := HeapDownThoroughly(..., id); | ||
+ | // HeapUp(..., id) | ||
+ | // | ||
+ | // where HeapDownThoroughly doesn't stop at correct position, instead shifting the element all the way down, saving one compare at each level. | ||
+ | // HeapUp then bubbles the element up to the correct position. | ||
+ | // See Python's 'heapq' for justification over simple HeapDown. | ||
+ | // | ||
+ | // Careful with 'x' passed by reference and pointing into 'p'! | ||
+ | |||
+ | class procedure Sorter.HeapReplacePessimistic(p: pElem; count, id: SizeUint; const x: SwapTemp); | ||
+ | var | ||
+ | child, bestChild, lastChild, parent, start: SizeUint; | ||
+ | begin | ||
+ | start := id; | ||
+ | repeat | ||
+ | child := HeapArity * id; // childs of 'id' are p[child + 1] ... p[child + HeapArity]. | ||
+ | bestChild := child + 1; | ||
+ | if bestChild >= count then break; | ||
+ | |||
+ | lastChild := child + HeapArity; | ||
+ | if lastChild >= count then lastChild := count - 1; | ||
+ | |||
+ | for child := child + 2 to lastChild do | ||
+ | if Comparer.Less(p[bestChild], p[child]) then bestChild := child; | ||
+ | |||
+ | SwapTemp(p[id]) := SwapTemp(p[bestChild]); | ||
+ | id := bestChild; | ||
+ | until false; | ||
+ | |||
+ | while id > start do | ||
+ | begin | ||
+ | parent := (id - 1) div HeapArity; | ||
+ | if not Comparer.Less(p[parent], Elem(x)) then break; | ||
+ | SwapTemp(p[id]) := SwapTemp(p[parent]); | ||
+ | id := parent; | ||
+ | end; | ||
+ | SwapTemp(p[id]) := x; | ||
+ | end; | ||
+ | |||
+ | class procedure Sorter.HeapSort(p: pElem; count: SizeUint); | ||
+ | var | ||
+ | i: SizeInt; | ||
+ | t: SwapTemp; | ||
+ | begin | ||
+ | for i := SizeInt((count + (HeapArity - 2)) div HeapArity) - 1 downto 0 do | ||
+ | begin | ||
+ | t := SwapTemp(p[i]); | ||
+ | HeapReplacePessimistic(p, count, i, t); | ||
+ | end; | ||
+ | for i := SizeInt(count) - 1 downto 1 do | ||
+ | begin | ||
+ | t := SwapTemp(p[i]); | ||
+ | SwapTemp(p[i]) := SwapTemp(p[0]); | ||
+ | HeapReplacePessimistic(p, i, 0, t); | ||
+ | end; | ||
+ | end; | ||
+ | |||
+ | class function ComparerLessOp.Less(const a, b: Elem): boolean; | ||
+ | begin | ||
+ | result := a < b; | ||
+ | end; | ||
+ | |||
+ | end.</source> | ||
+ | ===Use Sample=== | ||
+ | Usage:<source lang='delphi'>{$mode objfpc} | ||
+ | uses AnySort2; | ||
+ | |||
+ | var | ||
+ | a: array of integer; | ||
+ | |||
+ | begin | ||
+ | a := specialize TArray<integer>.Create(111, 555, 888, 777, 333, 444, 666); | ||
+ | specialize SorterLessOp<integer>.Sort(a); | ||
+ | end.</source> | ||
[[Category:Sort]] | [[Category:Sort]] |
Latest revision as of 10:29, 23 July 2022
A typical task is to sort an array. One of the problems is that an array could be of any type. Not a simple type, but more complicated types.
Basic Version
Prior to introducing Generics into Freepascal, sorting an array of arbitrary type could be achieve via untyped variables (implicit Pointers).
- No generics involved
- works on any version of FPC (or Delphi)
- any array data type
Source Code
unit anysort;
{$ifdef fpc}{$mode delphi}{$H+}{$endif}
interface
type
TCompareFunc = function (const elem1, elem2): Integer;
procedure AnySort(var Arr; Count: Integer; Stride: Integer; CompareFunc: TCompareFunc);
implementation
type
TByteArray = array [Word] of byte;
PByteArray = ^TByteArray;
procedure AnyQuickSort(var Arr; idxL, idxH: Integer;
Stride: Integer; CompareFunc: TCompareFunc; var SwapBuf, MedBuf);
var
ls,hs : Integer;
li,hi : Integer;
mi : Integer;
ms : Integer;
pb : PByteArray;
begin
pb:=@Arr;
li:=idxL;
hi:=idxH;
mi:=(li+hi) div 2;
ls:=li*Stride;
hs:=hi*Stride;
ms:=mi*Stride;
Move(pb[ms], medBuf, Stride);
repeat
while CompareFunc( pb[ls], medBuf) < 0 do begin
inc(ls, Stride);
inc(li);
end;
while CompareFunc( medBuf, pb[hs] ) < 0 do begin
dec(hs, Stride);
dec(hi);
end;
if ls <= hs then begin
Move(pb[ls], SwapBuf, Stride);
Move(pb[hs], pb[ls], Stride);
Move(SwapBuf, pb[hs], Stride);
// begin fix 11/11/2021: update ms if the reference point is moved
if li=mi then ms:=hs;
if hi=mi then ms:=ls;
// end fix
inc(ls, Stride); inc(li);
dec(hs, Stride); dec(hi);
end;
until ls>hs;
if hi>idxL then AnyQuickSort(Arr, idxL, hi, Stride, CompareFunc, SwapBuf, MedBuf);
if li<idxH then AnyQuickSort(Arr, li, idxH, Stride, CompareFunc, SwapBuf, MedBuf);
end;
procedure AnySort(var Arr; Count: Integer; Stride: Integer; CompareFunc: TCompareFunc);
var
buf: array of byte;
begin
if Count <= 1 then Exit; // should be more than 1 to be sortable
SetLength(buf, Stride*2);
AnyQuickSort(Arr, 0, Count-1, Stride, compareFunc, buf[0], buf[Stride]);
end;
end.
Use Sample
Here's an example on how to use AnySort() function to sort an array of Integer
interface
uses AnySort;
procedure SortArrayInteger(var arr: array of Integer; count: Integer);
implementation
function CompareInt(const d1,d2): integer;
var
i1 : integer absolute d1;
i2 : integer absolute d2;
begin
if i1=i2 then Result:=0
else if i1<i2 then Result:=-1
else Result:=1;
end;
procedure SortArrayInteger(var arr: array of Integer; count: Integer);
begin
anysort.AnySort(arr, Count, sizeof(Integer), @CompareInt);
end;
Generics Version
- FPC-only.
- Uses generic with static comparer.
- Comparer is a simple less predicate.
- Essentially an Introsort, a QuickSort that falls back to a simple algorithm for small enough subarrays.
- Bypasses managed types handling to greatly speed up sorting arrays of reference-counted types; won't work with custom Copy/AddRef operators.
- Protected against
- O(N²) time (falls back to heap sort);
- O(N) recursion depth; maximum recursion depth is ⌈bitsizeof(SizeUint) - log₂ SelectionThreshold⌉.
Source Code
{$mode objfpc} {$coperators on}
unit AnySort2;
interface
type
// Comparer should provide function Less(const a, b: Elem): boolean.
generic Sorter<Elem, Comparer> = class
type
pElem = ^Elem;
class procedure Sort(p: pElem; count: SizeUint); static;
class procedure Sort(var a: array of Elem); static;
private const
SelectionThreshold = 12;
HeapArity = 4;
type
SwapTemp = array[0 .. sizeof(Elem) - 1] of byte;
class procedure SelectionSort(p: pElem; count: SizeUint); static;
class function Median(p: pElem; count: SizeUint): pElem; static;
class procedure QSort(p: pElem; count, reasonable: SizeUint); static;
class procedure HeapReplacePessimistic(p: pElem; count, id: SizeUint; const x: SwapTemp); static;
class procedure HeapSort(p: pElem; count: SizeUint); static;
end;
generic ComparerLessOp<Elem> = class
class function Less(const a, b: Elem): boolean; static; inline;
end;
generic SorterLessOp<Elem> = class(specialize Sorter<Elem, specialize ComparerLessOp<Elem>>) end;
implementation
class procedure Sorter.Sort(p: pElem; count: SizeUint);
begin
QSort(p, count, count);
end;
class procedure Sorter.Sort(var a: array of Elem);
begin
Sort(pElem(a), length(a));
end;
class procedure Sorter.SelectionSort(p: pElem; count: SizeUint);
var
i, j, imin: SizeInt;
t: SwapTemp;
begin
for i := 0 to SizeInt(count) - 2 do
begin
imin := i;
for j := i + 1 to SizeInt(count) - 1 do
if Comparer.Less(p[j], p[imin]) then imin := j;
t := SwapTemp(p[i]); SwapTemp(p[i]) := SwapTemp(p[imin]); SwapTemp(p[imin]) := t;
end;
end;
class function Sorter.Median(p: pElem; count: SizeUint): pElem;
var
a, b, c, t: pElem;
begin
a := p;
b := p + count div 2;
c := p + SizeUint(count - 1);
if Comparer.Less(b^, a^) then begin t := a; a := b; b := t; end;
if Comparer.Less(c^, b^) then begin t := c; c := b; b := t; end;
if Comparer.Less(b^, a^) then result := a else result := b;
end;
// see MSVC implementation of std::sort
class procedure Sorter.QSort(p: pElem; count, reasonable: SizeUint);
var
L, R: SizeInt;
t, pivot: SwapTemp;
begin
while (count > SelectionThreshold) and (reasonable > 0) do
begin
reasonable := reasonable div 2 + reasonable div 4;
pivot := SwapTemp(Median(p, count)^);
R := 0;
L := count - 1;
repeat
while Comparer.Less(p[R], Elem(pivot)) do inc(R);
while Comparer.Less(Elem(pivot), p[L]) do dec(L);
if R <= L then
begin
t := SwapTemp(p[R]); SwapTemp(p[R]) := SwapTemp(p[L]); SwapTemp(p[L]) := t;
inc(R);
dec(L);
end;
until R > L;
// [0 .. L], [R .. count - 1]
// possible edge cases are L = -1 or R = count !
if SizeInt(count) - R <= L then
begin
QSort(p + R, SizeInt(count) - R, reasonable); // QSort calls with count = 0 or count = 1 are safe.
count := L + 1;
end else
begin
QSort(p, L + 1, reasonable);
p += R;
count -= R;
end;
end;
if count > SelectionThreshold then
HeapSort(p, count)
else if count >= 2 then
SelectionSort(p, count);
end;
// HeapReplacePessimistic(p, count, id, x)
//
// is equivalent to
//
// p[id] := x;
// id := HeapDownThoroughly(..., id);
// HeapUp(..., id)
//
// where HeapDownThoroughly doesn't stop at correct position, instead shifting the element all the way down, saving one compare at each level.
// HeapUp then bubbles the element up to the correct position.
// See Python's 'heapq' for justification over simple HeapDown.
//
// Careful with 'x' passed by reference and pointing into 'p'!
class procedure Sorter.HeapReplacePessimistic(p: pElem; count, id: SizeUint; const x: SwapTemp);
var
child, bestChild, lastChild, parent, start: SizeUint;
begin
start := id;
repeat
child := HeapArity * id; // childs of 'id' are p[child + 1] ... p[child + HeapArity].
bestChild := child + 1;
if bestChild >= count then break;
lastChild := child + HeapArity;
if lastChild >= count then lastChild := count - 1;
for child := child + 2 to lastChild do
if Comparer.Less(p[bestChild], p[child]) then bestChild := child;
SwapTemp(p[id]) := SwapTemp(p[bestChild]);
id := bestChild;
until false;
while id > start do
begin
parent := (id - 1) div HeapArity;
if not Comparer.Less(p[parent], Elem(x)) then break;
SwapTemp(p[id]) := SwapTemp(p[parent]);
id := parent;
end;
SwapTemp(p[id]) := x;
end;
class procedure Sorter.HeapSort(p: pElem; count: SizeUint);
var
i: SizeInt;
t: SwapTemp;
begin
for i := SizeInt((count + (HeapArity - 2)) div HeapArity) - 1 downto 0 do
begin
t := SwapTemp(p[i]);
HeapReplacePessimistic(p, count, i, t);
end;
for i := SizeInt(count) - 1 downto 1 do
begin
t := SwapTemp(p[i]);
SwapTemp(p[i]) := SwapTemp(p[0]);
HeapReplacePessimistic(p, i, 0, t);
end;
end;
class function ComparerLessOp.Less(const a, b: Elem): boolean;
begin
result := a < b;
end;
end.
Use Sample
Usage:
{$mode objfpc}
uses AnySort2;
var
a: array of integer;
begin
a := specialize TArray<integer>.Create(111, 555, 888, 777, 333, 444, 666);
specialize SorterLessOp<integer>.Sort(a);
end.