aboutsummaryrefslogtreecommitdiff
path: root/core/slice
diff options
context:
space:
mode:
authorgingerBill <gingerBill@users.noreply.github.com>2025-10-07 12:38:01 +0100
committergingerBill <gingerBill@users.noreply.github.com>2025-10-07 12:38:01 +0100
commitf40fc2792f01b1fa265f62fb2aa4ef39e7529903 (patch)
treeed038f5a1049da56d7eb09e41393c8b5e8e71d6b /core/slice
parent1400952530a24f5c2926de7a7ac414bd4a947e7a (diff)
Replace normal sort procedure with a simpler unified type-erased one
Diffstat (limited to 'core/slice')
-rw-r--r--core/slice/sort.odin149
-rw-r--r--core/slice/sort_private.odin466
2 files changed, 295 insertions, 320 deletions
diff --git a/core/slice/sort.odin b/core/slice/sort.odin
index 3b4119afa..b613ff1ed 100644
--- a/core/slice/sort.odin
+++ b/core/slice/sort.odin
@@ -6,6 +6,8 @@ Ordering :: enum {
Greater = +1,
}
+Generic_Cmp :: #type proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering
+
@(require_results)
cmp :: proc(a, b: $E) -> Ordering where ORD(E) {
switch {
@@ -35,7 +37,16 @@ cmp_proc :: proc($E: typeid) -> (proc(E, E) -> Ordering) where ORD(E) {
sort :: proc(data: $T/[]$E) where ORD(E) {
when size_of(E) != 0 {
if n := len(data); n > 1 {
- _quick_sort_general(data, 0, n, _max_depth(n), struct{}{}, .Ordered)
+ raw := ([^]byte)(raw_data(data))
+ _smoothsort(raw, uint(len(data)), size_of(E), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+ x, y := (^E)(lhs)^, (^E)(rhs)^
+ if x < y {
+ return .Less
+ } else if x > y {
+ return .Greater
+ }
+ return .Equal
+ }, nil)
}
}
}
@@ -79,7 +90,18 @@ sort_with_indices :: proc(data: $T/[]$E, allocator := context.allocator) -> (ind
for _, idx in indices {
indices[idx] = idx
}
- _quick_sort_general_with_indices(data, indices, 0, n, _max_depth(n), struct{}{}, .Ordered)
+ raw := ([^]byte)(raw_data(indices))
+ _smoothsort(raw, uint(len(indices)), size_of(int), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+ data := ([^]E)(user_data)
+ xi, yi := (^int)(lhs)^, (^int)(rhs)^
+ x, y := data[xi], data[yi]
+ if x < y {
+ return .Less
+ } else if x > y {
+ return .Greater
+ }
+ return .Equal
+ }, raw_data(data))
}
return indices
}
@@ -91,7 +113,39 @@ sort_with_indices :: proc(data: $T/[]$E, allocator := context.allocator) -> (ind
sort_by :: proc(data: $T/[]$E, less: proc(i, j: E) -> bool) {
when size_of(E) != 0 {
if n := len(data); n > 1 {
- _quick_sort_general(data, 0, n, _max_depth(n), less, .Less)
+ raw := ([^]byte)(raw_data(data))
+ _smoothsort(raw, uint(len(data)), size_of(E), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+ x, y := (^E)(lhs)^, (^E)(rhs)^
+ less := (proc(E, E) -> bool)(user_data)
+ switch {
+ case less(x, y): return .Less
+ case less(y, x): return .Greater
+ }
+ return .Equal
+ }, rawptr(less))
+ }
+ }
+}
+
+sort_by_with_data :: proc(data: $T/[]$E, less: proc(i, j: E, user_data: rawptr) -> bool, user_data: rawptr) {
+ when size_of(E) != 0 {
+ if n := len(data); n > 1 {
+ Context :: struct {
+ less: proc(i, j: E, user_data: rawptr) -> bool,
+ user_data: rawptr,
+ }
+ ctx := &Context{less, user_data}
+
+ raw := ([^]byte)(raw_data(data))
+ _smoothsort(raw, uint(len(data)), size_of(E), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+ x, y := (^E)(lhs)^, (^E)(rhs)^
+ ctx := (^Context)(user_data)
+ switch {
+ case ctx.less(x, y, ctx.user_data): return .Less
+ case ctx.less(y, x, ctx.user_data): return .Greater
+ }
+ return .Equal
+ }, ctx)
}
}
}
@@ -105,8 +159,55 @@ sort_by_with_indices :: proc(data: $T/[]$E, less: proc(i, j: E) -> bool, allocat
for _, idx in indices {
indices[idx] = idx
}
- _quick_sort_general_with_indices(data, indices, 0, n, _max_depth(n), less, .Less)
- return indices
+
+ Context :: struct{
+ less: proc(i, j: E) -> bool,
+ data: T,
+ }
+ ctx := &Context{less, data}
+
+ raw := ([^]byte)(raw_data(indices))
+ _smoothsort(raw, uint(len(indices)), size_of(int), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+ ctx := (^Context)(user_data)
+ xi, yi := (^int)(lhs)^, (^int)(rhs)^
+ x, y := ctx.data[xi], ctx.data[yi]
+ switch {
+ case ctx.less(x, y): return .Less
+ case ctx.less(y, x): return .Greater
+ }
+ return .Equal
+ }, ctx)
+ }
+ }
+ return indices
+}
+
+sort_by_with_indices_with_data :: proc(data: $T/[]$E, less: proc(i, j: E, user_data: rawptr) -> bool, user_data: rawptr, allocator := context.allocator) -> (indices : []int) {
+ indices = make([]int, len(data), allocator)
+ when size_of(E) != 0 {
+ if n := len(data); n > 1 {
+ for _, idx in indices {
+ indices[idx] = idx
+ }
+
+ Context :: struct{
+ less: proc(i, j: E, user_data: rawptr) -> bool,
+ data: T,
+ user_data: rawptr,
+ }
+ ctx := &Context{less, data, user_data}
+
+ raw := ([^]byte)(raw_data(indices))
+ _smoothsort(raw, uint(len(indices)), size_of(int), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+ ctx := (^Context)(user_data)
+ xi, yi := (^int)(lhs)^, (^int)(rhs)^
+ x, y := ctx.data[xi], ctx.data[yi]
+ switch {
+ case ctx.less(x, y, ctx.user_data): return .Less
+ case ctx.less(y, x, ctx.user_data): return .Greater
+ }
+ return .Equal
+ }, ctx)
}
}
return indices
@@ -115,11 +216,47 @@ sort_by_with_indices :: proc(data: $T/[]$E, less: proc(i, j: E) -> bool, allocat
sort_by_cmp :: proc(data: $T/[]$E, cmp: proc(i, j: E) -> Ordering) {
when size_of(E) != 0 {
if n := len(data); n > 1 {
- _quick_sort_general(data, 0, n, _max_depth(n), cmp, .Cmp)
+ raw := ([^]byte)(raw_data(data))
+ _smoothsort(raw, uint(len(data)), size_of(E), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+ x, y := (^E)(lhs)^, (^E)(rhs)^
+ cmp := cast(proc(E, E) -> Ordering)(user_data)
+ return cmp(x, y)
+ }, rawptr(cmp))
}
}
}
+
+sort_by_cmp_with_data :: proc(data: $T/[]$E, cmp: proc(i, j: E, user_data: rawptr) -> Ordering, user_data: rawptr) {
+ when size_of(E) != 0 {
+ if n := len(data); n > 1 {
+ Context :: struct{
+ cmp: proc(i, j: E, user_data: rawptr) -> Ordering,
+ user_data: rawptr,
+ }
+ ctx := &Context{cmp, user_data}
+
+ raw := ([^]byte)(raw_data(data))
+ _smoothsort(raw, uint(len(data)), size_of(E), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+ x, y := (^E)(lhs)^, (^E)(rhs)^
+ ctx := (^Context)(user_data)
+ return ctx.cmp(x, y, ctx.user_data)
+ }, ctx)
+ }
+ }
+}
+
+
+sort_by_generic_cmp :: proc(data: $T/[]$E, cmp: Generic_Cmp, user_data: rawptr) {
+ when size_of(E) != 0 {
+ if n := len(data); n > 1 {
+ raw := ([^]byte)(raw_data(data))
+ _smoothsort(raw, uint(len(data)), size_of(E), cmp, user_data)
+ }
+ }
+}
+
+
// stable_sort sorts a slice
stable_sort :: proc(data: $T/[]$E) where ORD(E) {
when size_of(E) != 0 {
diff --git a/core/slice/sort_private.odin b/core/slice/sort_private.odin
index 36637c4cd..efa9c596b 100644
--- a/core/slice/sort_private.odin
+++ b/core/slice/sort_private.odin
@@ -1,6 +1,7 @@
#+private
package slice
+import "base:builtin"
import "base:intrinsics"
_ :: intrinsics
@@ -12,171 +13,6 @@ Sort_Kind :: enum {
Cmp,
}
-_quick_sort_general :: proc(data: $T/[]$E, a, b, max_depth: int, call: $P, $KIND: Sort_Kind) where (ORD(E) && KIND == .Ordered) || (KIND != .Ordered) #no_bounds_check {
- less :: #force_inline proc(a, b: E, call: P) -> bool {
- when KIND == .Ordered {
- return a < b
- } else when KIND == .Less {
- return call(a, b)
- } else when KIND == .Cmp {
- return call(a, b) == .Less
- } else {
- #panic("unhandled Sort_Kind")
- }
- }
-
- insertion_sort :: proc(data: $T/[]$E, a, b: int, call: P) #no_bounds_check {
- for i in a+1..<b {
- for j := i; j > a && less(data[j], data[j-1], call); j -= 1 {
- swap(data, j, j-1)
- }
- }
- }
-
- heap_sort :: proc(data: $T/[]$E, a, b: int, call: P) #no_bounds_check {
- sift_down :: proc(data: T, lo, hi, first: int, call: P) #no_bounds_check {
- root := lo
- for {
- child := 2*root + 1
- if child >= hi {
- break
- }
- if child+1 < hi && less(data[first+child], data[first+child+1], call) {
- child += 1
- }
- if !less(data[first+root], data[first+child], call) {
- return
- }
- swap(data, first+root, first+child)
- root = child
- }
- }
-
-
- first, lo, hi := a, 0, b-a
-
- for i := (hi-1)/2; i >= 0; i -= 1 {
- sift_down(data, i, hi, first, call)
- }
-
- for i := hi-1; i >= 0; i -= 1 {
- swap(data, first, first+i)
- sift_down(data, lo, i, first, call)
- }
- }
-
- median3 :: proc(data: T, m1, m0, m2: int, call: P) #no_bounds_check {
- if less(data[m1], data[m0], call) {
- swap(data, m1, m0)
- }
- if less(data[m2], data[m1], call) {
- swap(data, m2, m1)
- if less(data[m1], data[m0], call) {
- swap(data, m1, m0)
- }
- }
- }
-
- do_pivot :: proc(data: T, lo, hi: int, call: P) -> (midlo, midhi: int) #no_bounds_check {
- m := int(uint(lo+hi)>>1)
- if hi-lo > 40 {
- s := (hi-lo)/8
- median3(data, lo, lo+s, lo+s*2, call)
- median3(data, m, m-s, m+s, call)
- median3(data, hi-1, hi-1-s, hi-1-s*2, call)
- }
- median3(data, lo, m, hi-1, call)
-
- pivot := lo
- a, c := lo+1, hi-1
-
-
- for ; a < c && less(data[a], data[pivot], call); a += 1 {
- }
- b := a
-
- for {
- for ; b < c && !less(data[pivot], data[b], call); b += 1 { // data[b] <= pivot
- }
- for ; b < c && less(data[pivot], data[c-1], call); c -=1 { // data[c-1] > pivot
- }
- if b >= c {
- break
- }
-
- swap(data, b, c-1)
- b += 1
- c -= 1
- }
-
- protect := hi-c < 5
- if !protect && hi-c < (hi-lo)/4 {
- dups := 0
- if !less(data[pivot], data[hi-1], call) {
- swap(data, c, hi-1)
- c += 1
- dups += 1
- }
- if !less(data[b-1], data[pivot], call) {
- b -= 1
- dups += 1
- }
-
- if !less(data[m], data[pivot], call) {
- swap(data, m, b-1)
- b -= 1
- dups += 1
- }
- protect = dups > 1
- }
- if protect {
- for {
- for ; a < b && !less(data[b-1], data[pivot], call); b -= 1 {
- }
- for ; a < b && less(data[a], data[pivot], call); a += 1 {
- }
- if a >= b {
- break
- }
- swap(data, a, b-1)
- a += 1
- b -= 1
- }
- }
- swap(data, pivot, b-1)
- return b-1, c
- }
-
-
- a, b, max_depth := a, b, max_depth
-
- for b-a > 12 { // only use shell sort for lengths <= 12
- if max_depth == 0 {
- heap_sort(data, a, b, call)
- return
- }
- max_depth -= 1
- mlo, mhi := do_pivot(data, a, b, call)
- if mlo-a < b-mhi {
- _quick_sort_general(data, a, mlo, max_depth, call, KIND)
- a = mhi
- } else {
- _quick_sort_general(data, mhi, b, max_depth, call, KIND)
- b = mlo
- }
- }
- if b-a > 1 {
- // Shell short with gap 6
- for i in a+6..<b {
- if less(data[i], data[i-6], call) {
- swap(data, i, i-6)
- }
- }
- insertion_sort(data, a, b, call)
- }
-}
-
-
_stable_sort_general :: proc(data: $T/[]$E, call: $P, $KIND: Sort_Kind) where (ORD(E) && KIND == .Ordered) || (KIND != .Ordered) #no_bounds_check {
less :: #force_inline proc(a, b: E, call: P) -> bool {
when KIND == .Ordered {
@@ -200,179 +36,181 @@ _stable_sort_general :: proc(data: $T/[]$E, call: $P, $KIND: Sort_Kind) where (O
}
}
-_quick_sort_general_with_indices :: proc(data: $T/[]$E, indices: []int, a, b, max_depth: int, call: $P, $KIND: Sort_Kind) where (ORD(E) && KIND == .Ordered) || (KIND != .Ordered) #no_bounds_check {
- less :: #force_inline proc(a, b: E, call: P) -> bool {
- when KIND == .Ordered {
- return a < b
- } else when KIND == .Less {
- return call(a, b)
- } else when KIND == .Cmp {
- return call(a, b) == .Less
- } else {
- #panic("unhandled Sort_Kind")
+@(private)
+_smoothsort :: proc(base: [^]byte, nel: uint, width: uint, cmp: Generic_Cmp, arg: rawptr) {
+ pntz :: proc "contextless" (p: [2]uint) -> int {
+ r := intrinsics.count_trailing_zeros(p[0] - 1)
+ if r != 0 {
+ return int(r)
}
- }
-
- insertion_sort :: proc(data: $T/[]$E, indices: []int, a, b: int, call: P) #no_bounds_check {
- for i in a+1..<b {
- for j := i; j > a && less(data[j], data[j-1], call); j -= 1 {
- swap(data, j, j-1)
- swap(indices, j, j-1)
- }
+ r = (8*size_of(uint) + intrinsics.count_trailing_zeros(p[1]))
+ if r != 8*size_of(uint) {
+ return int(r)
}
+ return 0
}
- heap_sort :: proc(data: $T/[]$E, indices: []int, a, b: int, call: P) #no_bounds_check {
- sift_down :: proc(data: T, indices: []int, lo, hi, first: int, call: P) #no_bounds_check {
- root := lo
- for {
- child := 2*root + 1
- if child >= hi {
- break
- }
- if child+1 < hi && less(data[first+child], data[first+child+1], call) {
- child += 1
- }
- if !less(data[first+root], data[first+child], call) {
- return
- }
- swap(data, first+root, first+child)
- swap(indices, first+root, first+child)
- root = child
- }
- }
-
-
- first, lo, hi := a, 0, b-a
-
- for i := (hi-1)/2; i >= 0; i -= 1 {
- sift_down(data, indices, i, hi, first, call)
+ shl :: proc "contextless" (p: []uint, n: int) {
+ n := n
+ if n >= 8*size_of(uint) {
+ n -= 8*size_of(uint)
+ p[1] = p[0]
+ p[0] = 0
}
-
- for i := hi-1; i >= 0; i -= 1 {
- swap(data, first, first+i)
- swap(indices, first, first+i)
- sift_down(data, indices, lo, i, first, call)
+ p[1] <<= uint(n)
+ p[0] |= p[0] >> uint(8*size_of(uint) - n)
+ p[0] <<= uint(n)
+ }
+ shr :: proc "contextless" (p: []uint, n: int) {
+ n := n
+ if n >= 8*size_of(uint) {
+ n -= 8*size_of(uint)
+ p[0] = p[1]
+ p[1] = 0
}
+ p[0] >>= uint(n)
+ p[0] |= p[1] << uint(8*size_of(uint) - n)
+ p[1] >>= uint(n)
}
- median3 :: proc(data: T, indices: []int, m1, m0, m2: int, call: P) #no_bounds_check {
- if less(data[m1], data[m0], call) {
- swap(data, m1, m0)
- swap(indices, m1, m0)
+ cycle :: proc "contextless" (width: uint, data: [][^]byte, n: int) {
+ if len(data) < 2 {
+ return
}
- if less(data[m2], data[m1], call) {
- swap(data, m2, m1)
- swap(indices, m2, m1)
- if less(data[m1], data[m0], call) {
- swap(data, m1, m0)
- swap(indices, m1, m0)
+ buf: [256]u8 = ---
+ data[n] = raw_data(buf[:])
+ width := width
+ for width != 0 {
+ l := builtin.min(size_of(buf), int(width))
+ copy(data[n][:l], data[0][:l])
+ for i in 0..<n {
+ copy(data[i][:l], data[i+1][:l])
+ data[i] = data[i][l:]
+ }
+ width -= uint(l)
+ }
+ }
+
+ sift :: proc(head: [^]byte, width: uint, cmp: Generic_Cmp, arg: rawptr, pshift: int, lp: []uint) {
+ head := head
+ buf: [14*size_of(uint)+1][^]byte = ---
+ buf[0] = head
+ i := 1
+ pshift := pshift
+ for pshift > 1 {
+ rt := head[-width:]
+ lf := head[-width:][-lp[pshift - 2]:]
+ if cmp(buf[0], lf, arg) >= .Equal && cmp(buf[0], rt, arg) >= .Equal {
+ break
}
+ if cmp(lf, rt, arg) >= .Equal {
+ buf[i], head = lf, lf
+ pshift -= 1
+ } else {
+ buf[i], head = rt, rt
+ pshift -= 2
+ }
+ i += 1
}
+ cycle(width, buf[:], i)
}
- do_pivot :: proc(data: T, indices: []int, lo, hi: int, call: P) -> (midlo, midhi: int) #no_bounds_check {
- m := int(uint(lo+hi)>>1)
- if hi-lo > 40 {
- s := (hi-lo)/8
- median3(data, indices, lo, lo+s, lo+s*2, call)
- median3(data, indices, m, m-s, m+s, call)
- median3(data, indices, hi-1, hi-1-s, hi-1-s*2, call)
- }
- median3(data, indices, lo, m, hi-1, call)
-
- pivot := lo
- a, c := lo+1, hi-1
+ trinkle :: proc(head: [^]byte, width: uint, cmp: Generic_Cmp, arg: rawptr, pp: []uint, pshift: int, trusty: bool, lp: []uint) {
+ head := head
+ p := [2]uint{pp[0], pp[1]}
- for ; a < c && less(data[a], data[pivot], call); a += 1 {
- }
- b := a
+ buf: [14*size_of(uint)+1][^]byte = ---
+ buf[0] = head
- for {
- for ; b < c && !less(data[pivot], data[b], call); b += 1 { // data[b] <= pivot
- }
- for ; b < c && less(data[pivot], data[c-1], call); c -=1 { // data[c-1] > pivot
- }
- if b >= c {
+ i := 1
+ trail := 0
+ pshift := pshift
+ trusty := trusty
+ for p[0] != 1 || p[1] != 0 {
+ stepson := head[-lp[pshift]:]
+ if cmp(stepson, buf[0], arg) <= .Equal {
break
}
-
- swap(data, b, c-1)
- swap(indices, b, c-1)
- b += 1
- c -= 1
- }
-
- protect := hi-c < 5
- if !protect && hi-c < (hi-lo)/4 {
- dups := 0
- if !less(data[pivot], data[hi-1], call) {
- swap(data, c, hi-1)
- swap(indices, c, hi-1)
- c += 1
- dups += 1
- }
- if !less(data[b-1], data[pivot], call) {
- b -= 1
- dups += 1
- }
-
- if !less(data[m], data[pivot], call) {
- swap(data, m, b-1)
- swap(indices, m, b-1)
- b -= 1
- dups += 1
- }
- protect = dups > 1
- }
- if protect {
- for {
- for ; a < b && !less(data[b-1], data[pivot], call); b -= 1 {
- }
- for ; a < b && less(data[a], data[pivot], call); a += 1 {
- }
- if a >= b {
+ if !trusty && pshift > 1 {
+ rt := head[-width:]
+ lf := head[-width:][-lp[pshift-2]:]
+ if cmp(rt, stepson, arg) >= .Equal || cmp(lf, stepson, arg) >= .Equal {
break
}
- swap(data, a, b-1)
- swap(indices, a, b-1)
- a += 1
- b -= 1
}
+ buf[i] = stepson
+ head = stepson
+ trail = pntz(p)
+ shr(p[:], trail)
+ pshift += trail
+ trusty = false
+ i += 1
}
- swap(data, pivot, b-1)
- swap(indices, pivot, b-1)
- return b-1, c
- }
-
- assert(len(data) == len(indices))
-
- a, b, max_depth := a, b, max_depth
-
- for b-a > 12 { // only use shell sort for lengths <= 12
- if max_depth == 0 {
- heap_sort(data, indices, a, b, call)
+ if trusty {
return
}
- max_depth -= 1
- mlo, mhi := do_pivot(data, indices, a, b, call)
- if mlo-a < b-mhi {
- _quick_sort_general_with_indices(data, indices, a, mlo, max_depth, call, KIND)
- a = mhi
- } else {
- _quick_sort_general_with_indices(data, indices, mhi, b, max_depth, call, KIND)
- b = mlo
- }
+ cycle(width, buf[:], i)
+ sift(head, width, cmp, arg, pshift, lp)
}
- if b-a > 1 {
- // Shell short with gap 6
- for i in a+6..<b {
- if less(data[i], data[i-6], call) {
- swap(data, i, i-6)
- swap(indices, i, i-6)
- }
+
+ size := nel * width
+ if size == 0 {
+ return
+ }
+
+ lp: [12*size_of(uint)]uint = ---
+ lp[1] = width
+ lp[0] = lp[1]
+ for i := 2; true; i += 1 {
+ lp[i] = lp[i-2] + lp[i-1] + width
+ if lp[i] >= size {
+ break
}
- insertion_sort(data, indices, a, b, call)
}
-}
+
+ head := base
+ high := head[size - width:]
+ p := [2]uint{1, 0}
+ pshift := 1
+ for head < high {
+ if (p[0] & 3) == 3 {
+ sift(head, width, cmp, arg, pshift, lp[:])
+ shr(p[:], 2)
+ pshift += 2
+ } else {
+ if lp[pshift - 1] >= uint(uintptr(high) - uintptr(head)) {
+ trinkle(head, width, cmp, arg, p[:], pshift, false, lp[:])
+ } else {
+ sift(head, width, cmp, arg, pshift, lp[:])
+ }
+ if pshift == 1 {
+ shl(p[:], 1)
+ pshift = 0
+ } else {
+ shl(p[:], pshift - 1)
+ pshift = 1
+ }
+ }
+ p[0] |= 1
+ head = head[width:]
+ }
+ trinkle(head, width, cmp, arg, p[:], pshift, false, lp[:])
+ for pshift != 1 || p[0] != 1 || p[1] != 0 {
+ if pshift <= 1 {
+ trail := pntz(p)
+ shr(p[:], trail)
+ pshift += trail
+ } else {
+ shl(p[:], 2)
+ pshift -= 2
+ p[0] ~= 7
+ shr(p[:], 1)
+ trinkle(head[-width:][-lp[pshift]:], width, cmp, arg, p[:], pshift + 1, true, lp[:])
+ shl(p[:], 1)
+ p[0] |= 1
+ trinkle(head[-width:], width, cmp, arg, p[:], pshift, true, lp[:])
+ }
+ head = head[-width:]
+ }
+} \ No newline at end of file