Fast way to find smallest sum of n subsequences in python
given a list L
, which is the fastest method to find subsequences of consecutive length n
and find the index of the smallest sum. Here is my (slow) version:
from random import randrange
import numpy as np
L = np.array([randrange(-5000, 5000) for _ in range(10 * 10**6)]) #random array
subarray_length = 3 #for example
sublist_sums = [sum(L[i:i+subarray_length]) for i
in range(0, len(L) - (subarray_length - 1))]
candidate_idx = 0
candidate_sum = sublist_sums[0]
for idx, val in enumerate(sublist_sums[1:]):
if val < candidate_sum:
candidate_sum, candidate_idx = val, idx + 1
print(candidate_idx)
MWE
L = [6,5,4,3,2,1]
# sublists: [[6,5,4], [5,4,3], [4,3,2], [3,2,1] ]
subarray_length = 3
sublist_sums = [sum(L[i:i+subarray_length]) for i
in range(0, len(L) - (subarray_length - 1))]
# sublist_sums = [15, 12, 9, 6]
for idx, val in enumerate(sublist_sums[1:]):
if val < candidate_sum:
candidate_sum, candidate_idx = val, idx + 1
candidate_idx = 3
# 3 is the index of the first element of the length 3 sublist
# with the smallest sum of all 3 length consecutive sublists
+3
source to share
2 answers
Here's one approach -
# Based on http://stackoverflow.com/a/14314054/3293881 by @Jaime
def moving_sum(a, n) :
ret = np.cumsum(a)
ret[n:] = ret[n:] - ret[:-n]
return ret[n - 1:]
sums = moving_sum(L, subarray_length)
min_idx = sums.argmin()
candidate_sum, candidate_idx = sums[min_idx], min_idx
Alternatively, we could use np.convolve
windowed sums to compute, for example:
sums = np.convolve(L,np.ones(subarray_length,dtype=int),'valid')
Another way to get sums
with Scipy uniform 1D filter
-
from scipy.ndimage.filters import uniform_filter1d as unif1d W = subarray_length a = np.array(L,dtype=float) hW = (W-1)//2 sums = W*unif1d(a,size=W, mode='constant')[hW:-hW]
+2
source to share
Slightly less efficient than @ Divakar's answer you can use as_strided
to collect subarrays and navigate from there:
import numpy as np
def min_sub_sum(arr,n):
shape = (arr.shape[0]-n+1,n)
strides = arr.strides
subs = np.lib.stride_tricks.as_strided(arr,shape,strides*2)
sums = np.einsum('ij->i',subs)
return np.argmin(sums)
L = np.array([x for x in range(10000,0,-1)])
n = 5
print(min_sub_sum(L,n))
# 9995
+1
source to share