该程序可以在gitHub上找到 https://github.com/yjl9122/object-detector-master.git
主函数为
#!/usr/bin/python
import os
# Link to the UIUC Car Database
# http://l2r.cs.uiuc.edu/~cogcomp/Data/Car/CarData.tar.gz
# dataset_url = "http://l2r.cs.uiuc.edu/~cogcomp/Data/Car/CarData.tar.gz"
# dataset_path = "../data/dataset/CarData.tar.gz"
# Fetch and extract the dataset
# if not os.path.exists(dataset_path):
# os.system("wget {} -O {}".format(dataset_url, dataset_path))
# os.system("tar -xvzf {} -C {}".format(dataset_path, os.path.split(dataset_path)[0]))
# Extract the features
pos_path = "../data/dataset/CarData/pos"
neg_path = "../data/dataset/CarData/neg"
os.system("python ../object-detector/extract-features.py -p {} -n {}".format(pos_path, neg_path))
# Perform training
pos_feat_path = "../data/features/pos"
neg_feat_path = "../data/features/neg"
os.system("python ../object-detector/train-classifier.py -p {} -n {}".format(pos_feat_path, neg_feat_path))
# Perform testing
test_im_path = "../data/dataset/CarData/TestImages/test-18.pgm"
os.system("python ../object-detector/test-classifier.py -i {} -d {} --visualize".format(test_im_path,2))
参数为
[hog]
min_wdw_sz: [100, 40]
step_size: [10, 10]
orientations: 9
pixels_per_cell: [5, 5]
cells_per_block: [3, 3]
visualize: False
transform_sqrt:True
visualize_test: True
[nms]
threshold: 0.3
[paths]
pos_feat_ph: ../data/features/pos
neg_feat_ph: ../data/features/neg
model_path: ../data/models/svm.model
读取参数config
'''
Set the config variable.
'''
import ConfigParser as cp
import json
config = cp.RawConfigParser()
config.read('../data/config/config.cfg')
min_wdw_sz = tuple(json.loads(config.get("hog","min_wdw_sz")))
step_size = tuple(json.loads(config.get("hog", "step_size")))
orientations = config.getint("hog", "orientations")
pixels_per_cell = json.loads(config.get("hog", "pixels_per_cell"))
cells_per_block = json.loads(config.get("hog", "cells_per_block"))
visualize = config.getboolean("hog", "visualize")
visualize_test = config.getboolean("hog","visualize_test")
transform_sqrt = config.getboolean("hog", "transform_sqrt")
pos_feat_ph = config.get("paths", "pos_feat_ph")
neg_feat_ph = config.get("paths", "neg_feat_ph")
model_path = config.get("paths", "model_path")
threshold = config.getfloat("nms", "threshold")
HOG特征提取,在这里实现的步骤有些繁琐,可以实现的时候将HOG和SVM写在一起
# Import the functions to calculate feature descriptors
from skimage.feature import local_binary_pattern
from skimage.feature import hog
from skimage.io import imread
from sklearn.externals import joblib
# To read file names
import argparse as ap
import glob
import os
from config import *
if __name__ == "__main__":
# Argument Parser
parser = ap.ArgumentParser()
parser.add_argument('-p', "--pospath", help="Path to positive images",
required=True)
parser.add_argument('-n', "--negpath", help="Path to negative images",
required=True)
parser.add_argument('-d', "--descriptor", help="Descriptor to be used -- HOG",
default="HOG")
args = vars(parser.parse_args())
pos_im_path = args["pospath"]
neg_im_path = args["negpath"]
des_type = args["descriptor"]
# If feature directories don't exist, create them
if not os.path.isdir(pos_feat_ph):
os.makedirs(pos_feat_ph)
# If feature directories don't exist, create them
if not os.path.isdir(neg_feat_ph):
os.makedirs(neg_feat_ph)
print "Calculating the descriptors for the positive samples and saving them"
for im_path in glob.glob(os.path.join(pos_im_path, "*")):
im = imread(im_path, as_grey=True)
if des_type == "HOG":
fd = hog(im, orientations, pixels_per_cell, cells_per_block, visualise=visualize,transform_sqrt=transform_sqrt)
fd_name = os.path.split(im_path)[1].split(".")[0] + ".feat"
fd_path = os.path.join(pos_feat_ph, fd_name)
joblib.dump(fd, fd_path)
print "Positive features saved in {}".format(pos_feat_ph)
print "Calculating the descriptors for the negative samples and saving them"
for im_path in glob.glob(os.path.join(neg_im_path, "*")):
im = imread(im_path, as_grey=True)
if des_type == "HOG":
fd = hog(im, orientations, pixels_per_cell, cells_per_block, visualise=visualize,transform_sqrt=transform_sqrt)
fd_name = os.path.split(im_path)[1].split(".")[0] + ".feat"
fd_path = os.path.join(neg_feat_ph, fd_name)
joblib.dump(fd, fd_path)
print "Negative features saved in {}".format(neg_feat_ph)
print "Completed calculating features from training images"
SVM训练
# Import the required modules
from skimage.feature import local_binary_pattern
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.externals import joblib
import argparse as ap
import glob
import os
from config import *
if __name__ == "__main__":
# Parse the command line arguments
parser = ap.ArgumentParser()
parser.add_argument('-p', "--posfeat", help="Path to the positive features directory", required=True)
parser.add_argument('-n', "--negfeat", help="Path to the negative features directory", required=True)
parser.add_argument('-c', "--classifier", help="Classifier to be used", default="LIN_SVM")
args = vars(parser.parse_args())
pos_feat_path = args["posfeat"]
neg_feat_path = args["negfeat"]
# Classifiers supported
clf_type = args['classifier']
fds = []
labels = []
# Load the positive features
for feat_path in glob.glob(os.path.join(pos_feat_path,"*.feat")):
fd = joblib.load(feat_path)
fds.append(fd)
labels.append(1)
# Load the negative features
for feat_path in glob.glob(os.path.join(neg_feat_path,"*.feat")):
fd = joblib.load(feat_path)
fds.append(fd)
labels.append(0)
if clf_type is "LIN_SVM":
clf = LinearSVC()
print "Training a Linear SVM Classifier"
clf.fit(fds, labels)
# If feature directories don't exist, create them
if not os.path.isdir(os.path.split(model_path)[0]):
os.makedirs(os.path.split(model_path)[0])
joblib.dump(clf, model_path)
print "Classifier saved to {}".format(model_path)
至此HOG和SVM的训练部分完成了,生成了SVM.modle
HOG+SVM测试部分
# Import the required modules
from skimage.transform import pyramid_gaussian
from skimage.io import imread
from skimage.feature import hog
from sklearn.externals import joblib
import cv2
import argparse as ap
from nms import nms
from config import *
def sliding_window(image, window_size, step_size):
'''
This function returns a patch of the input image `image` of size equal
to `window_size`. The first image returned top-left co-ordinates (0, 0)
and are increment in both x and y directions by the `step_size` supplied.
So, the input parameters are -
* `image` - Input Image
* `window_size` - Size of Sliding Window
* `step_size` - Incremented Size of Window
The function returns a tuple -
(x, y, im_window)
where
* x is the top-left x co-ordinate
* y is the top-left y co-ordinate
* im_window is the sliding window image
'''
for y in xrange(0, image.shape[0], step_size[1]):
for x in xrange(0, image.shape[1], step_size[0]):
yield (x, y, image[y:y + window_size[1], x:x + window_size[0]])
if __name__ == "__main__":
# Parse the command line arguments
parser = ap.ArgumentParser()
parser.add_argument('-i', "--image", help="Path to the test image", required=True)
parser.add_argument('-d','--downscale', help="Downscale ratio", default=1.25,
type=int)
parser.add_argument('-v', '--visualize', help="Visualize the sliding window",
action="store_true")
args = vars(parser.parse_args())
# Read the image
im = imread(args["image"], as_grey=False)
# min_wdw_sz = (100, 40)
# step_size = (10, 10)
downscale = args['downscale']
visualize_det = args['visualize']
# Load the classifier
clf = joblib.load(model_path)
# List to store the detections
detections = []
# The current scale of the image
scale = 0
# Downscale the image and iterate
for im_scaled in pyramid_gaussian(im, downscale=downscale):
# This list contains detections at the current scale
cd = []
# If the width or height of the scaled image is less than
# the width or height of the window, then end the iterations.
if im_scaled.shape[0] < min_wdw_sz[1] or im_scaled.shape[1] < min_wdw_sz[0]:
break
for (x, y, im_window) in sliding_window(im_scaled, min_wdw_sz, step_size):
if im_window.shape[0] != min_wdw_sz[1] or im_window.shape[1] != min_wdw_sz[0]:
continue
# Calculate the HOG features
if visualize_test==False:
fd = hog(im_window, orientations, pixels_per_cell, cells_per_block, visualise=visualize_test, transform_sqrt=transform_sqrt)
elif visualize_test==True:
fd ,hog_image= hog(im_window, orientations, pixels_per_cell, cells_per_block, visualise=visualize_test, transform_sqrt=transform_sqrt)
cv2.imshow('hog_image',hog_image)
cv2.waitKey(0)
pred = clf.predict(fd)
if pred == 1:
print "Detection:: Location -> ({}, {})".format(x, y)
print "Scale -> {} | Confidence Score {} \n".format(scale,clf.decision_function(fd))
detections.append((x, y, clf.decision_function(fd),
int(min_wdw_sz[0]*(downscale**scale)),
int(min_wdw_sz[1]*(downscale**scale))))
cd.append(detections[-1])
# If visualize is set to true, display the working
# of the sliding window
if visualize_det:
clone = im_scaled.copy()
for x1, y1, _, _, _ in cd:
# Draw the detections at this scale
cv2.rectangle(clone, (x1, y1), (x1 + im_window.shape[1], y1 +
im_window.shape[0]), (0, 0, 0), thickness=2)
cv2.rectangle(clone, (x, y), (x + im_window.shape[1], y +
im_window.shape[0]), (255, 255, 255), thickness=2)
cv2.imshow("Sliding Window in Progress", clone)
cv2.waitKey(30)
# Move the the next scale
scale+=1
# Display the results before performing NMS
clone = im.copy()
for (x_tl, y_tl, _, w, h) in detections:
# Draw the detections
cv2.rectangle(im, (x_tl, y_tl), (x_tl+w, y_tl+h), (0, 0, 0), thickness=2)
cv2.imshow("Raw Detections before NMS", im)
cv2.waitKey()
# Perform Non Maxima Suppression
detections = nms(detections, threshold)
# Display the results after performing NMS
for (x_tl, y_tl, _, w, h) in detections:
# Draw the detections
cv2.rectangle(clone, (x_tl, y_tl), (x_tl+w,y_tl+h), (0, 0, 0), thickness=2)
cv2.imshow("Final Detections after applying NMS", clone)
cv2.waitKey()
其中检测出的矩形框会很多,于是对其使用非极大抑制
def overlapping_area(detection_1, detection_2):
'''
Function to calculate overlapping area'si
`detection_1` and `detection_2` are 2 detections whose area
of overlap needs to be found out.
Each detection is list in the format ->
[x-top-left, y-top-left, confidence-of-detections, width-of-detection, height-of-detection]
The function returns a value between 0 and 1,
which represents the area of overlap.
0 is no overlap and 1 is complete overlap.
Area calculated from ->
http://math.stackexchange.com/questions/99565/simplest-way-to-calculate-the-intersect-area-of-two-rectangles
'''
# Calculate the x-y co-ordinates of the
# rectangles
x1_tl = detection_1[0]
x2_tl = detection_2[0]
x1_br = detection_1[0] + detection_1[3]
x2_br = detection_2[0] + detection_2[3]
y1_tl = detection_1[1]
y2_tl = detection_2[1]
y1_br = detection_1[1] + detection_1[4]
y2_br = detection_2[1] + detection_2[4]
# Calculate the overlapping Area
x_overlap = max(0, min(x1_br, x2_br)-max(x1_tl, x2_tl))
y_overlap = max(0, min(y1_br, y2_br)-max(y1_tl, y2_tl))
overlap_area = x_overlap * y_overlap
area_1 = detection_1[3] * detection_2[4]
area_2 = detection_2[3] * detection_2[4]
total_area = area_1 + area_2 - overlap_area
return overlap_area / float(total_area)
def nms(detections, threshold=.5):
'''
This function performs Non-Maxima Suppression.
`detections` consists of a list of detections.
Each detection is in the format ->
[x-top-left, y-top-left, confidence-of-detections, width-of-detection, height-of-detection]
If the area of overlap is greater than the `threshold`,
the area with the lower confidence score is removed.
The output is a list of detections.
'''
if len(detections) == 0:
return []
# Sort the detections based on confidence score
detections = sorted(detections, key=lambda detections: detections[2],
reverse=True)
# Unique detections will be appended to this list
new_detections=[]
# Append the first detection
new_detections.append(detections[0])
# Remove the detection from the original list
del detections[0]
# For each detection, calculate the overlapping area
# and if area of overlap is less than the threshold set
# for the detections in `new_detections`, append the
# detection to `new_detections`.
# In either case, remove the detection from `detections` list.
for index, detection in enumerate(detections):
for new_detection in new_detections:
if overlapping_area(detection, new_detection) > threshold:
del detections[index]
break
else:
new_detections.append(detection)
del detections[index]
return new_detections
if __name__ == "__main__":
# Example of how to use the NMS Module
detections = [[31, 31, .9, 10, 10], [31, 31, .12, 10, 10], [100, 34, .8,10, 10]]
print "Detections before NMS = {}".format(detections)
print "Detections after NMS = {}".format(nms(detections))