Handwritten Equation Solver

Project Definition

Project Overview

Problem Statement

Metrics

Analysis

Data Exploration and Visualization

Handwritten zeros in three different image files

Methodology — Tackling the Problem

Data Preprocessing — Preparing the Data

img = cv2.imread(os.path.join(folder_path, filename), cv2.IMREAD_GRAYSCALE)

# invert image pixels
invert = cv2.bitwise_not(img)
# make image binary (black or white) based on threshold value
ret, thresh = cv2.threshold(invert, 127, 255, cv2.THRESH_BINARY)
# Sort contours
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# sort contours form left to right
sorted_bounding_boxes = sort_contours(contours)

# resize contour regions to 28 x 28
maxi = 0
for box in sorted_bounding_boxes:
# get top left corner point as well as width and height of box
x, y, width, height = cv2.boundingRect(box)
maxi = max(width * height, maxi)
if maxi == width * height:
x_max = x
y_max = y
w_max = width
h_max = height

# crop the bounding box(the contour) on the original inverted image
crop_img = invert[y_max:y_max + h_max + 10, x_max:x_max + w_max + 10]
# resize the cropped image and scale to a 28 x 28 image
resize_img = cv2.resize(crop_img, (28, 28))
train_data.append(resize_img)
label_array.append(label_dict[folder])

Implementation

# compile the model
model.compile(loss='categorical_crossentropy', optimizer='rmsprop',
metrics=['accuracy'])

# train the model and save the best weights
checkpointer = ModelCheckpoint(filepath='best.hdf5', verbose=1,
save_best_only=True)
model.fit(x_train, y_train, batch_size=32, epochs=100,
validation_data=(x_validation, y_validation), verbose=2,
callbacks=[checkpointer], shuffle=True)
x_train = np.array(train_data).reshape(len(train_data), len(train_data[0]), len(train_data[0][0]), 1)
y_train = keras.utils.to_categorical(train_labels, num_classes)
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=2, padding='same', activation='relu',
input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=64, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(500, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(12, activation='softmax'))

Refinement

Results

Model Evaluation and Validation

Justification

Test Data Example
result_string = ''
for data in test_data:
eval_bool = False
evaluate_data = np.array(data)
evaluate_data = data.reshape(1, evaluate_data.shape[0], evaluate_data.shape[1], 1)
result = loaded_model.predict_classes(evaluate_data)
value_position = val_list.index(str(result[0]))
key = key_list[value_position]
result_string += str(key)

Conclusion

Reflection

Improvement

Final Thoughts