How to create layer0 input for 3 channel input images

Hi I am following http://deeplearning.net/tutorial/code/convolutional_mlp.py for a conv neural network implementation. I have input images where the channel is important and so I want to have a 3 channel feature map as a level 0 input.

So I need something like this

layer0_input = x.reshape((batch_size, 3, 240, 135)) # width 240, height 135, 3 channels

instead

layer0_input = x.reshape((batch_size, 1, 28, 28)) # 28*28 normalized MNIST gray scale images

to be used here

layer0 = LeNetConvPoolLayer(
    rng,
    input=layer0_input,
    image_shape=(batch_size, 3, 240, 135),
    filter_shape=(nkerns[0], 1, 5, 5),
    poolsize=(2, 2)
)

      

where this x is provided by anano as

 train_model = theano.function(
    [index],
    cost,
    updates=updates,
    givens={
        x: train_set_x[index * batch_size: (index + 1) * batch_size],
        y: train_set_y[index * batch_size: (index + 1) * batch_size]
    }
)

      

So - my question is, how do I create (form) that train_set_x?

C (greyscale intensity - i.e. one channel) train_set_x is generated as

    shared_x = theano.shared(numpy.asarray(data_x,
                                           dtype=theano.config.floatX),

      

where data_x is a flattened numpy array of 784 length (for 28 * 28 pixels)

Thanks a lot for the advice

+3


source to share


1 answer


I was able to get it to work. I am pasting some code here that might help someone. Not very elegant - but it works.

def shuffle_in_unison(a, b): #courtsey http://stackoverflow.com/users/190280/josh-bleecher-snyder assert len(a) == len(b) shuffled_a = np.empty(a.shape, dtype=a.dtype) shuffled_b = np.empty(b.shape, dtype=b.dtype) permutation = np.random.permutation(len(a)) for old_index, new_index in enumerate(permutation): shuffled_a[new_index] = a[old_index] shuffled_b[new_index] = b[old_index] return shuffled_a, shuffled_b

def createDataSet(imagefolder):

os.chdir(imagefolder)

# total number of files
number_of_files = len([item for item in os.listdir('.') if os.path.isfile(os.path.join('.', item))])

# get a shuffled list : I needed this because my image names were of the format n_x_<some details>.jpg
# where n was my target and x was a number from 0 to m-1 where m was the number of samples
# of the target value n. So I needed so shuffle and iterate while putting images in train
# test and validate arrays
image_index_array = range(0,number_of_files)
random.seed(12)
random.shuffle(image_index_array)
# split 80/10/10 - train/test/val
trainsize = int(number_of_files*.8)
testsize = int(number_of_files*.1)
valsize = number_of_files - trainsize - testsize

# create the random value arrays of train/test/val by slicing the total image index array
train_index_array = image_index_array[0:trainsize]
test_index_array = image_index_array[trainsize:trainsize+testsize]
validate_index_array = image_index_array[trainsize+testsize:]

# initialize the data structures
dataset = {'train':[[],[]],'test':[[],[]],'validate':[[],[]]}

i_counter = 0
train_X = []
train_y = []

test_X = []
test_y = []

val_X = []
val_y = []

for item in os.listdir('.'):
    if not os.path.isfile(os.path.join('.', item)):
        continue

    if item.endswith('.pkl'):
        continue

    print 'Processing item ' + item
    item_y = item.split('_')[0]
    item_x = cv2.imread(item)

    height, width = item_x.shape[:2]

    # this was my requirement - skip it if you do not need it
    if(height != 135 or width != 240):
        continue

    # get 3 channels
    b,g,r = cv2.split(item_x)

    item_x = [b,g,r]
    item_x = np.array(item_x)
    item_x = item_x.reshape(3,135*240)

    if i_counter in test_index_array:
        test_X.append(item_x)
        test_y.append(item_y)
    elif i_counter in validate_index_array:
        val_X.append(item_x)
        val_y.append(item_y)
    else:
        train_X.append(item_x)
        train_y.append(item_y)

    i_counter = i_counter + 1

# fix the dimensions. Flatten out the channel and intensity dimensions    
train_X = np.array(train_X)
train_X = train_X.reshape(train_X.shape[0],train_X.shape[1]*train_X.shape[2])
test_X = np.array(test_X)
test_X = test_X.reshape(test_X.shape[0],test_X.shape[1]*test_X.shape[2])
val_X = np.array(val_X)
val_X = val_X.reshape(val_X.shape[0],val_X.shape[1]*val_X.shape[2])

train_y = np.array(train_y)
test_y = np.array(test_y)
val_y = np.array(val_y)

# shuffle the train and test arrays in unison
train_X,train_y = shuffle_in_unison(train_X,train_y)
test_X,test_y = shuffle_in_unison(test_X,test_y)

# pickle them
dataset['train'] = [train_X,train_y]
dataset['test'] = [test_X,test_y]
dataset['validate'] = [val_X,val_y]
output = open('pcount.pkl', 'wb')
cPickle.dump(dataset, output)
output.close`

      

Once you have this pickle file you can use it in convolutional_mlp.py like this.



    layer0_input = x.reshape((batch_size, 3, 135, 240))

# Construct the first convolutional pooling layer:
# filtering reduces the image size to (135-8+1 , 240-5+1) = (128, 236)
# maxpooling reduces this further to (128/2, 236/2) = (64, 118)
# 4D output tensor is thus of shape (batch_size, nkerns[0], 64, 118)
layer0 = LeNetConvPoolLayer(
    rng,
    input=layer0_input,
    image_shape=(batch_size, 3, 135, 240),
    filter_shape=(nkerns[0], 3, 8, 5),
    poolsize=(2, 2)
)

      

The load_data function in logistic_sgd.py needs a little change as shown below

    f = open(dataset, 'rb')
dump = cPickle.load(f)
train_set = dump['train']
valid_set = dump['validate']
test_set = dump['test']
f.close()

      

Hope it helps

+5


source







All Articles