pos = [0 ... VA.length]
rpos = [0 ... VA.length]
swap = (i, j) ->
VA.swap i, j
tmp = rpos[j]; rpos[j] = rpos[i]; rpos[i] = tmp
pos[rpos[i]] = i; pos[rpos[j]] = j
j = 0
for i in [0 ... VA.length]
v = VA.get(i)
do (v, i) ->
setTimeout ->
# Move the element that was originally in position i to position j
swap pos[i], j++
VA.play()
, 100 * v
sort = (begin, end, bit) ->
i = begin
j = end
mask = 1 << bit
while i < j
while i < j and !(VA.get(i) & mask)
++i
while i < j and (VA.get(j - 1) & mask)
--j
if i < j
VA.swap(i++, --j)
if bit and i > begin
sort(begin, i, bit - 1)
if bit and i < end
sort(i, end, bit - 1)
sort(0, VA.length, 30)
s = (lo, hi) ->
if lo==hi
return
if lo+1==hi
if VA.gt(lo,hi)
VA.swap(lo,hi)
return
mid=Math.floor(lo+(hi-lo)/2)
s(lo,mid)
s(mid+1,hi)
mid++
while lo<=mid && mid<=hi
if VA.gt(lo,mid)
VA.insert(mid, lo)
mid++
else
lo++
s(0,VA.length)
This is exactly what's going on. You can't get a reflow on the page without giving up control, and I didn't want to require people to write asynchronous code to give up control after every swap. So it runs your code and queues up actions, to be played back when its done. This is also why you have to use the methods on VA instead of working directly on the array.
This might be a dumb question, but can you add numbers about the size of the stack to the right-side statistics? I.e. How many variables and calls to functions are stored in memory at at a time.
I'm not sure if this possible or relevant to record based on how Javascript engines compile and work, but it would seem interesting to know.
Very neat, but here's a couple of additions I'd like to see:
1. Allow for the array to contain values outside of the range of [1..length]. Ideally, allow for duplicate values as well. Since you control the swap and insert operations, you can maintain two separate sets, one of the actual numbers, and one of the "normalized" values that you display as your graph.
2. Give us an operation to highlight a set of lines and have the highlights persist. I wanted to take mayoff's radix-exchange sort and tweak it to highlight the "working set" that it's currently sorting, but there's no way to do that. I'm thinking here that we just need one function VA.persistHighlight() which takes start and end indices and highlights them, and persists those highlights until a new call to VA.persistHighlight(). The "transient" highlights would be layered on top of the persistent one.
Yay. My first program in CoffeScript. It was fun to implement algorithm of which I only vaguely remembered how it worked. Details bit me few times before I got it working and right.
Heapsort:
fix_heap = (y, size) ->
loop
y1 = 2*y+1
y2 = 2*y+2
if y1 >= size then break
if !(y2 < size) || VA.gt(y1, y2)
if VA.lt(y, y1)
VA.swap(y, y1)
y = y1
else
break
else
if VA.lt(y, y2)
VA.swap(y, y2)
y = y2
else
break
pull_up = (y) ->
loop
y0 = (y-1) >> 1
if y == 0 || VA.lt(y, y0) then break
VA.swap(y0, y)
y = y0
for x in [1 ... VA.length]
pull_up(x)
for x in [VA.length-1 ... 0] by -1
VA.swap(x, 0)
fix_heap(0, x)
You can also build the heap from the bottom up and change fix_heap a little which altogether is faster and simpler (thanks wiki):
fix_heap = (y, size) ->
loop
y1 = 2*y+1
if y1 >= size then break
if y1 + 1 < size && VA.gt(y1 + 1, y1)
y1++
if VA.lt(y, y1)
VA.swap(y, y1)
y = y1
else
break
for x in [VA.length >> 1 ... -1] by -1
fix_heap(x, VA.length)
for x in [VA.length-1 ... 0] by -1
VA.swap(x, 0)
fix_heap(0, x)
It's fantastic. I don't know CoffeeScript so all I did was compare the pre-written ones to each other and see if they did what I expected them to do, but it's a nice visual representation. The only problem I have with it is that the neon yellow on black with a gray/white background can be pain inducing, especially when moving quickly.
Speaking of pain inducing, I found watching the bubble sort very pain inducing! I've always known it was slow compared to other options, but this make you never want to see the thing mentioned again!
Looks like bubble sort is so slow because this implementation has a very high ratio between the costs of a compare and a swap. A swap involves redrawing large parts of the graphical area, while a compare is fast. Bubble sort makes N^2 swaps, as compared to insertion sort which makes N^2 comparisons but only N swaps.
When a swap costs the same as a compare, bubble sort is as fast as the other N^2 sorts.
Turn off "Quick Compare" to make comparisons take longer if you want a different ratio. By default, it takes about 1/15 as long to compare as swap (I thought the visualization looked better this way).
I know Bubble sort is "bad", but there's no reason to make it worse than it needs to be. The current algorithm continues comparing the items that have already "bubbled" (or in this case, "sunk") by iterating the whole length on each pass, rather than shorting the loop to only consider the unsorted portion.
A minor tweak is (changed lines marked with #):
bubbleSort = ->
VA.locals.swapped = true
y = VA.length # grab initial length
while VA.locals.swapped
y-- # shorten sorted portion on each pass
VA.locals.swapped = false
for x in [0...y] # only iterate to y
VA.locals.x = x
if VA.gt(x, x + 1)
VA.swap(x, x + 1)
VA.locals.swapped = true
bubbleSort()
I love the stats on the side. Knowing how many comparisons and swaps occur really shows the complexity of the algorithms.
And to segue into my own area of interest with sorting, it would be awesome to see cache and branch behaviour too (though of course this running in the browser, it's not exactly the real behaviour, but still).
Cache behavior simulation would be kind of a pain to implement, but I might give it a shot at some point. What is branch behavior? I'm not familiar with the term.
Yeah, I can see that. A big problem is that the set sizes where cache effects start to come into effect are much larger than the sizes you're modelling. A level 1 cache is going to have 4K, which is at least 5x the size you're modelling by default. But you can probably use play sizes, like a cache which only holds 16 or 32 ints.
By branch behaviour, I mean branch prediction. I did some research (http://paulbiggar.com/research/#sorting-tr) before that showed that branch prediction is really important for sorts. For example, insertion sort is way better than selection sort, and radix sort has really really good branch prediction properties, making it beat quicksort (well, this is LSB radixsort, and I think you implemented MSB radixsort, but no doubt it applies somewhat).
So for the branch simulation, implement a 2 bit dynamic saturating counter, and note the number of mispredictions. There are simpler and more complex predictors, but that's probably the simplest that's a reasonable approximation of real life.
I'll definitely implement a cache model to show cache behavior. I'm not sure branch prediction is realistic, though; I'm not actually parsing the code myself, so finding the branch points would be a lot of extra work. I'm considering a scheme where you have to call an extra function inside each branching statement to get useful branch prediction results. Something along the lines of:
for x in foreach([1...VA.length], "for1")
y = 0
while branch(VA.gt(x, y), "while1")
y++
if branch(y == x, "if1")
break
VA.insert(x, y)
Where the `branch`, `foreach` functions are identities over their first argument, and they use the second argument to to track branching for a given branch point (i.e. you must uniquely name each branch, foreach line). A foreach would be a branch run arg0.length + 1 times, with arg0.length true branches followed by a single false branch. Sound at all reasonable?
shuffle = ->
for x in [0..VA.length]
VA.swap x, Math.floor(Math.random()*VA.length)
sort = ->
VA.play()
if !checkSorted()
shuffle()
setTimeout(sort, 10)
checkSorted = ->
for x in [0..VA.length-1]
if VA.gte(x, x+1)
return false
return true
sort()
Implemented my own quicksort that seems to have better performance than the built-in one.
q = (lo, hi) =>
# highlight range
VA.persistHighlight([lo..hi])
p = VA.get(lo)
l = lo
r = hi
# test pivot position
t = false
while (l < r)
if VA.gt(l,r)
VA.swap(l,r)
t = !t
if t
r--
else
l++
if (l > lo)
q(lo, l - 1)
if (hi > l + 1)
q(l + 1, hi)
q(0,VA.length - 1)
twoWayBubbleSort = ->
VA.locals.swapped = true
while VA.locals.swapped
VA.locals.swapped = false
for x in [0...VA.length - 1]
VA.locals.x = x
if VA.gt(x, x + 1)
VA.swap(x, x + 1)
VA.locals.swapped = true
for x in[(VA.length-1)..1]
VA.locals.x = x
if VA.gt(x - 1, x)
VA.swap(x - 1, x)
VA.locals.swapped = true
twoWayBubbleSort()
I don't like the insertion sort. Couldn't it do a binary search on the sorted part of the list, when it's looking for the place to put the next element?
I coded up the easiest version of each algorithm, rather than the most efficient. (Quicksort uses the first element as a pivot, rather than something more intelligent) Feel free to email me an algorithm if you think it should be there/is better than what I wrote. The pre-written algorithms weren't exactly my focus.
The quicksort implementation is incorrect. The code simply picking the left-most element as the pivot, and as a result it becomes pathologically slow for nearly-sorted array -- for example, run bubblesort then quicksort, or run quicksort twice -- not to mention potential stack overflow problem due to O(N) recursive call.
There are ways to make the n^2 worst case extraordinarily unlikely, and there is a way to make it impossible (though it is highly technical).
Some others have posted methods (random pivot, median of 3). The gnu libc qsort implementation is a good learning tool (it uses median of 3). I think CLRS has the guaranteed n log n version.
Why three random elements though? If you expect some structure in the array, then picking pivots based on that structure makes more sense (e.g. first, middle and last for sorted, reverse sorted for partially sorted arrays).
On the other hand, if you are sorting unsanitised input, then random is good, but it should be secure pseudo-random numbers if it's important to your security/performance whilst under attack.
I said this in another comment, but I coded up the simplest version of each algorithm - the quicksort is correct, just not optimal. The focus was on letting people write their own code, not the pre-written algorithms.