Making this C array processing code is more python (and even numpy)
I'm trying to get my head around python's (and ultimately numpy's) awesome list processing capabilities. I am converting C code that I wrote in python.
I have a text data file where the first line is the header and then every odd line is my input and every even line is my output. The entire data space is divided. I am rather confused that I was able to read all data in lists using nested lists. amazing stuff.
with open('data.txt', 'r') as f:
# get all lines as a list of strings
lines = list(f)
# convert header row to list of ints and get info
header = map(int, lines[0].split(' '))
num_samples = header[0]
input_dim = header[1]
output_dim = header[2]
del header
# bad ass list comprehensions
inputs = [[float(x) for x in l.split()] for l in lines[1::2]]
outputs = [[float(x) for x in l.split()] for l in lines[2::2]]
del x, l, lines
Then I want to create a new list, where each item is a function of the corresponding input-output pair. I couldn't figure out how to do this with any python based optimization. Here it is in C-style python:
# calculate position
pos_list = [];
pos_y = 0
for i in range(num_samples):
pantilt = outputs[i];
target = inputs[i];
if(pantilt[0] > 90):
pantilt[0] -=180
pantilt[1] *= -1
elif pantilt[0] < -90:
pantilt[0] += 180
pantilt[1] *= -1
tan_pan = math.tan(math.radians(pantilt[0]))
tan_tilt = math.tan(math.radians(pantilt[1]))
pos = [0, pos_y, 0]
pos[2] = tan_tilt * (target[1] - pos[1]) / math.sqrt(tan_pan * tan_pan + 1)
pos[0] = pos[2] * tan_pan
pos[0] += target[0]
pos[2] += target[2]
pos_list.append(pos)
del pantilt, target, tan_pan, tan_tilt, pos, pos_y
I tried to do it with insight or map but couldn't figure out how:
- fetch from two different lists (both for input and output) for each element of the pos_list array
- put the body of the algorithm into understanding. does this have to be a separate function or is there a funky way to use lambda for this?
- would it be possible to do this without any loops at all, just stick it into numpy and vectorize the whole thing?
source to share
One vector approach using boolean-indexing/mask
-
import numpy as np
def mask_vectorized(inputs,outputs,pos_y):
# Create a copy of outputs array for editing purposes
pantilt_2d = outputs[:,:2].copy()
# Get mask correspindig to IF conditional statements in original code
mask_col0_lt = pantilt_2d[:,0]<-90
mask_col0_gt = pantilt_2d[:,0]>90
# Edit the first column as per the statements in original code
pantilt_2d[:,0][mask_col0_gt] -= 180
pantilt_2d[:,0][mask_col0_lt] += 180
# Edit the second column as per the statements in original code
pantilt_2d[ mask_col0_lt | mask_col0_gt,1] *= -1
# Get vectorized tan_pan and tan_tilt
tan_pan_tilt = np.tan(np.radians(pantilt_2d))
# Vectorized calculation for: "tan_tilt * (target[1] .." from original code
V = (tan_pan_tilt[:,1]*(inputs[:,1] - pos_y))/np.sqrt((tan_pan_tilt[:,0]**2)+1)
# Setup output numpy array
pos_array_vectorized = np.empty((num_samples,3))
# Put in values into columns of output array
pos_array_vectorized[:,0] = inputs[:,0] + tan_pan_tilt[:,0]*V
pos_array_vectorized[:,1] = pos_y
pos_array_vectorized[:,2] = inputs[:,2] + V
# Convert to list, if so desired for the final output
# (keeping as numpy array could boost up the performance further)
return pos_array_vectorized.tolist()
Runtime tests
In [415]: # Parameters and setup input arrays
...: num_samples = 1000
...: outputs = np.random.randint(-180,180,(num_samples,5))
...: inputs = np.random.rand(num_samples,6)
...: pos_y = 3.4
...:
In [416]: %timeit original(inputs,outputs,pos_y)
100 loops, best of 3: 2.44 ms per loop
In [417]: %timeit mask_vectorized(inputs,outputs,pos_y)
10000 loops, best of 3: 181 ยตs per loop
source to share
Suppose you are reading your file in a list like:
lines = open('data.txt', 'r').readlines()
The header looks like this:
lines[0]
Even lines:
even = lines[1:][::2]
and odd lines:
odd = lines[2:][::2]
Now you can create a list using itertools.izip
from these two lists:
itertools.izip(even, odd)
It's kind of a list-like thing (you can loop over it or just write list( ... )
around it to turn it into a real list), each representing a pair of your I / O data.
source to share
In case anyone stumbles upon the same question, here are four options based on Ami's assumption (functions do1, do1b, do2, do3)
And for those who are curious, here are the benchmarks (I have ~ 1000 pairs of I / O data. Perhaps with radically more data, benchmarks will change more)
- % timeit do3 () - 100 loops, best of 3: 2.72 ms per loop
- % timeit do2 () - 100 loops, best of 3: 2.73ms per loop
- % timeit do1b () - 100 loops, best of 3: 2.74 ms per loop
- % timeit do1 () - 100 loops, best of 3: 2.67ms per loop
....
def load_file(filename = 'Sharpy_7.txt'):
global file_data, num_samples, input_dim, output_dim
with open(filename, 'r') as f:
# get all lines as a list of strings
file_data = list(f)
# convert header row to list of ints and get info
header = map(int, file_data[0].split(' '))
num_samples = header[0]
input_dim = header[1]
output_dim = header[2]
f.close()
def calc_pos2(d):
target = d[0]
pantilt = d[1]
if(pantilt[0] > 90):
pantilt[0] -=180
pantilt[1] *= -1
elif pantilt[0] < -90:
pantilt[0] += 180
pantilt[1] *= -1
tan_pan = math.tan(math.radians(pantilt[0]))
tan_tilt = math.tan(math.radians(pantilt[1]))
pos = [0, 0, 0]
pos[2] = tan_tilt * (target[1] - pos[1]) / math.sqrt(tan_pan * tan_pan + 1)
pos[0] = pos[2] * tan_pan
pos[0] += target[0]
pos[2] += target[2]
return pos
def calc_pos(target, pantilt):
if(pantilt[0] > 90):
pantilt[0] -=180
pantilt[1] *= -1
elif pantilt[0] < -90:
pantilt[0] += 180
pantilt[1] *= -1
tan_pan = math.tan(math.radians(pantilt[0]))
tan_tilt = math.tan(math.radians(pantilt[1]))
pos = [0, 0, 0]
pos[2] = tan_tilt * (target[1] - pos[1]) / math.sqrt(tan_pan * tan_pan + 1)
pos[0] = pos[2] * tan_pan
pos[0] += target[0]
pos[2] += target[2]
return pos
def calc_stats():
global pos_array, pos_avg, pos_std
pos_array = np.asarray(pos_list)
pos_avg = np.mean(pos_array, 0)
pos_std = np.std(pos_array, 0)
# map on itertools.izip
def do3():
global pos_list
# bad ass list comprehensions
target_list = [[float(x) for x in l.split()] for l in file_data[1::2]]
pantilt_list = [[float(x) for x in l.split()] for l in file_data[2::2]]
# calculate position
pos_list = map(calc_pos2, itertools.izip(target_list, pantilt_list))
# list comprehension on itertools.izip
def do2():
global pos_list
# bad ass list comprehensions
target_list = [[float(x) for x in l.split()] for l in file_data[1::2]]
pantilt_list = [[float(x) for x in l.split()] for l in file_data[2::2]]
# calculate position
pos_list = [calc_pos(d[0], d[1]) for d in itertools.izip(target_list, pantilt_list)]
# for loop with function call
def do1b():
global pos_list
# bad ass list comprehensions
target_list = [[float(x) for x in l.split()] for l in file_data[1::2]]
pantilt_list = [[float(x) for x in l.split()] for l in file_data[2::2]]
# calculate position
pos_list = [];
for i in range(num_samples):
pos_list.append(calc_pos(target_list[i], pantilt_list[i]))
# for loop with unrolled algorithm
def do1():
global pos_list
# bad ass list comprehensions
target_list = [[float(x) for x in l.split()] for l in file_data[1::2]]
pantilt_list = [[float(x) for x in l.split()] for l in file_data[2::2]]
# calculate position
pos_list = [];
for i in range(num_samples):
pantilt = pantilt_list[i];
target = target_list[i];
if(pantilt[0] > 90):
pantilt[0] -=180
pantilt[1] *= -1
elif pantilt[0] < -90:
pantilt[0] += 180
pantilt[1] *= -1
tan_pan = math.tan(math.radians(pantilt[0]))
tan_tilt = math.tan(math.radians(pantilt[1]))
pos = [0, 0, 0]
pos[2] = tan_tilt * (target[1] - pos[1]) / math.sqrt(tan_pan * tan_pan + 1)
pos[0] = pos[2] * tan_pan
pos[0] += target[0]
pos[2] += target[2]
pos_list.append(pos)
source to share