fn distributeBeads num_beads:10000 num_threads:100 = 
(
	fn p_sort p1 p2 index:1 = if p1[index] < p2[index] then 1 else if p1[index] > p2[index] then -1 else 0 
	fn q_sort tab index:1 = 
	(
		local p = tab[1]
		local val = p[index]
		
		deleteitem tab 1
		local i = 1
		local j = tab.count
			
		local k = 
		(
			if (val >= tab[i][index]) then i
			else if (val <= tab[j][index]) then j
			else
			(
				local pivot
				while (j - i > 1) do
				(
					pivot = (i + j)/2 
					if (val > tab[pivot][index]) then j = pivot
					else if (val < tab[pivot][index]) then i = pivot
					else i = j
				)
				if (val >= tab[pivot][index]) then pivot else (pivot + 1)
			)
		)
		insertitem p tab k
	)
	
	seed 0
	t = timestamp()
	h = heapfree
	local thread_data = #()
	for i=1 to num_threads do 
	(
		len = random 10.0 200.0
		append thread_data [i, len, 0, len]
	)
	qsort thread_data p_sort index:4
	
	local all_len = 0
	local bcount = amin num_threads num_beads
	for k=1 to bcount do
	(
		local v = thread_data[k]
		v[3] = 1
		v[4] -= v[2]/2
		all_len += v[4] 
		thread_data[k] = v
	)
	num_beads -= bcount
	if num_beads > 0 do 
	(
		qsort thread_data p_sort index:4
		local bcount = 0
		for i=1 to num_threads do
		(
			local v = thread_data[i]
			num = (v[4]/all_len * num_beads) as integer
			if (num > 0) do
			(
				v[3] += num
				[b]v[4] = v[2] + v[2]*0.001 - v[2]*v[3]/(v[3]+1)[/b]
				bcount += num 
			)
			thread_data[i] = v
		)
		
		k = extra = bcount - num_beads		
		if k < 0 do
		(
			qsort thread_data p_sort index:4
			do
			(
				k += 1
				local v = thread_data[1]
				v[3] += 1
				[B]v[4] = v[2] + v[2]*0.001 - v[2]*v[3]/(v[3]+1)[/B]
				thread_data[1] = v
				
				--qsort thread_data p_sort index:4
				q_sort thread_data index:4
			)
			while (k < 0) 
		)
	)
	
	t = timestamp() - t
	h = h - heapfree
	
	all_beads = 0
	for i=1 to num_threads do all_beads += thread_data[i][3] as integer
	format "% >>>>>>>> beads:% -> threads:% % = time:% heap:%
" extra all_beads num_threads thread_data.count t h
)
now it works. i’ve added a little priority to originally longer thread in case when current segments are exactly the same… see BOLD lines in the code
Still almost the same errors. See bellow for 1 to 200 beads, 10 threads.
beads:43	current:4	previous:5
beads:75	current:5	previous:6
beads:76	current:11	previous:12
beads:108	current:11	previous:12
beads:140	current:10	previous:11
beads:141	current:21	previous:22
beads:173	current:29	previous:30
it seems like combination of yours and mine works perfect:
(pure mxs)
fn distributeBeads num_beads:10000 num_threads:100 = 
(
	fn p_sort p1 p2 index:1 = if p1[index] < p2[index] then 1 else if p1[index] > p2[index] then -1 else 0 
	
	seed 0
	t = timestamp()
	h = heapfree
	local thread_data = #()
	for i=1 to num_threads do 
	(
		len = random 10.0 200.0
		append thread_data [i, len, 0]
	)
	
	t = timestamp()
	h = heapfree
	
	stab = #()
	stab.count = num_threads
	
	qsort thread_data p_sort index:2 
	bcount = (amin num_threads num_beads)
	for i = 1 to bcount do
	(
		local v = thread_data[i] 
		v[3] = 1
		stab[i] = v[2]
		thread_data[i] = v
	)
	
	num_beads -= bcount
	all_len = 0
	for i=1 to num_threads do all_len += thread_data[i][2]
		
	if num_beads > 0 do
	(
		bcount = 0
		for i=1 to num_threads do --while beads > 0 do 
		(
			local v = thread_data[i]
			num = (stab[i]/all_len * num_beads) as integer 
			if (num > 0) do
			(
				v[3] += num
				stab[i] = v[2]/v[3]
				bcount += num 
			)
			thread_data[i] = v
		)
		
		extra = (num_beads -= bcount)
		
		for i = 1 to num_beads do
		(
			x = finditem stab (amax stab)
			local v = thread_data[x]
			v[3] += 1
			stab[x] = v[2]/v[3]
			thread_data[x] = v
		)
	)
	
	t = timestamp() - t
	h = h - heapfree
	
	all_beads = 0
	for i=1 to num_threads do all_beads += thread_data[i][3] as integer
	format "% >>>>>>>> beads:% -> threads:% % = time:% heap:%
" extra all_beads num_threads thread_data.count t h
)
505 >>>>>>>> beads:1000000 -> threads:1000 1000 = time:16 heap:242888L
501 >>>>>>>> beads:100000 -> threads:1000 1000 = time:16 heap:247312L
This is the result I get:
505 >>>>>>>> beads:1000000 -> threads:1000 1000 = time:21 heap:252352L
Here is another algorithm, also pure MXS. It tends to favor by a little the shortest shape, but I am pleased with the results. It is 2X faster
(
	fn DistributePointsInShapes splines points:0 =
	(
		fn SortByLength t1 t2 = t1[3] - t2[3]
		
		dist = 0.0
		num_splines = splines.count
		result = for j in splines collect
		(
			dist += curvelength j
			#(j, 0, curvelength j)
		)
		qsort result SortByLength
		
		if points <= num_splines then
		(
			for j = num_splines to (num_splines-points+1) by -1 do result[j][2] = 1
		)else(
			points -= num_splines
			for j in result do
			(
				amount = int (j[3]/(dist/points))
				j[2] = amount + 1
				dist -= j[3]
				points -= amount
			)
		)
		return result
	)
)
beads:1000000 threads:1000 time:11 heap:338136L
I’ve tested with other configurations, for example for 10K threads 1M beads this algorithm is 5.5 times faster, it takes just 125ms.
WOW! you found it!
here is a slightly modified code to keep ‘right’ pattern:
	fn distributeBeads threads num_beads:10000 = 
	(
		fn SortByLength t1 t2 = t2[2] - t1[2]
		
		num_threads = threads.count
		all_len = 0
		all_beads = [num_beads, 0]
		thread_data = #()
                for i=1 to threads.count do
		(
			len = getsplineshapelength threads[i]
			all_len += len 
			append thread_data [i, len, 0]
		)
		qsort thread_data SortByLength
		
		if num_beads <= num_threads then
		(
			for i = 1 to num_beads do thread_data[i][3] = 1
			all_beads.y = num_beads
		)
		else
		(
			for v in thread_data while num_beads > 0 do
			(
				num = int (v[2]/all_len * (num_beads + 1))
				num = amin num num_beads
				if num > 0 do
				(
					v[3] = num
					all_len -= v[2]
					num_beads -= num
					all_beads.y += num
				)
			)
		)
		format ">> beads:%
" all_beads
	)
 
the last version gives me:
1000000 >>>> beads:1000000 -> threads:10000 = time:80 heap:7861456L
Why do you think it does not produce a correct pattern? I think it is good.
BTW, neither versions produces the exact same pattern as the algorithm in post #38 , so I am not sure what is correct for you.
check you algorithm on 10 threads… using my snippet for example.
you can see that on 12 it splits 10th spline on three instead of splitting 9th on 2.
this is not correct in my mind because at the moment of 12th beat segments of 10th spline are shorter then 9th.
the same behavior is during whole algorithm
I see what you mean. I missed it.
I’ve been testing your modified version and it is also a not reliable solution. There are many configurations where it assigns beads to the wrong thread, and many others where a bead from the shortest thread is missing.
For example:
Threads:10 – Min Radius:10 – Spacing:4
Threads:10 – Min Radius:1 – Spacing:10
Threre are many more.
If you need a reliable working solution, I would stick, for now, to post #38
It is a brute force algorithm so I don’t think it will fail, and if you will do it in C++, the times won’t bee too bad.
anyway what we did is absolutely amassing… honestly i would stop with version 2-3 which being written on c++ could give me a good enough result.
but … i would probably shame myself knowing that it was bodged.
This is why i’m skeptical about the MCG. Any optimization is delegated to … to some one.
New version!
fn DistributePointsInShapes splines points:0 =
(
	fn SortByLength a b = b[3]-a[3]
	fn SortBySegment a b = if a[4]>b[4] then -1 else if a[4]<b[4] then 1 else if a[3]>b[3] then -1 else if a[3]<b[3] then 1 else 0
	d = 0.0
	r = for j in splines collect
	(
		l = curvelength j
		d += l
		#(j, 0, l, l)
	)
	if points < r.count do qsort r SortByLength
	missed = points
	for j = 1 to amin r.count points do
	(
		r[j][2] = amax 1 (int (r[j][3]/d * points))
		r[j][4] = r[j][3] / (r[j][2]+1)
		missed -= r[j][2]
	)
	
	qsort r SortBySegment
	for j = 1 to missed do r[j][2] += 1
	
	return r
)
EDIT: 30% improvement.
beads:1000000 threads:1000 time:19 heap:365648L
beads:1000000 threads:10000 time:189 heap:2974936L
I’ve done more tests and it looks consistent. Also add a little improvement as the array does not need to be sorted twice if the number of beads is larger than the number of threads.
It produces the exact same results as the brute force algorithm from pot #38 in most cases, probably over 95-98% of the times.
In those cases where the distribution is a little different, I think 95-98% of the threads gets the same amount of beads.
The “error” is due to threads with same subdivision length, but in order to not resort the array, they are assigned not always to the longest thread. I don’t see any problem with this, as the beads are still distributed in a “fair” way.
Have you been able to test it? Does it work for you?
it still has a problems… try my template 10 threads with spacing 20, delete all splines except 1, 3, and 10.
you will see the problems
Yea, don’t even need to try it, I know what the problem is. 
Dam algorithm. It looks so simple and it is by far the hardest one I met so far.
Anyone out there that would like to contribute?
It has been impossible for me to follow you. But I think one of the problems may be assigning first one bead to each thread. This disturbs the algorithm based on threads length.