官网直接python接口
原理
detect.py
# video detecting is written by my self
import cv2
from google.colab.patches import cv2_imshow
import PIL
from PIL import Image
import argparse
import os
from ctypes import *
import math
import random
import time
import numpy as np
import matplotlib.pyplot as plt
# def convertBack(x, y, w, h):
# xmin = int(round(x - (w / 2)))
# xmax = int(round(x + (w / 2)))
# ymin = int(round(y - (h / 2)))
# ymax = int(round(y + (h / 2)))
# return xmin, ymin, xmax, ymax
def array_to_image(arr):
# need to return old values to avoid python freeing memory
arr = arr.transpose(2,0,1)
c = arr.shape[0]
h = arr.shape[1]
w = arr.shape[2]
arr = (arr/255.0).flatten()
data = dn.c_array(dn.c_float, arr)
im = dn.IMAGE(w,h,c,data)
return im
########################################################################### Remember to change it
import os
os.sys.path.append('/content/drive/My Drive/yolo/darknet/python')
from darknet import *
import darknet as dn
# OpenCV
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='yolo3-tiny? for detection')
#parser.add_argument('--image', '-I', help='image 属性,非必要参数,但是有默认值', default=1)
parser.add_argument('--out', help='out,输出视频的地址',default ="/content/drive/My Drive/yolo/darknet/video/video_out/out.mp4")
parser.add_argument('--path', help='path,输入需要detect地址', required=True)
parser.add_argument('--cfg', help = 'such as yolov3.cfg',required=True)
parser.add_argument('--weights', help = 'such as yolov3.weights',required=True)
parser.add_argument('--meta', help = 'such as mask.data',required=True)
args = parser.parse_args()
# 注意一定要在路径前加上b, 由于c语言 remember to add b before string due to c format
#net = dn.load_net(b"cfg/yolov3.cfg", b"weights/mask.weights", 0)
net = dn.load_net(bytes(args.cfg, encoding='utf-8'), bytes(args.weights, encoding='utf-8'), 0)
meta=dn.load_meta(bytes(args.meta, encoding='utf-8')) # the .data doc records class and train test info
#read video all by myself
cap = cv2.VideoCapture(args.path)
total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
print("Frames per second using video.get(cv2.CAP_PROP_FPS) : {0}".format(fps))
# Fourcc decoding type
fourcc = cv2.VideoWriter_fourcc(*'mp4v') #or 'XVID'
out = cv2.VideoWriter(args.out, fourcc, fps, (width, height))
count=0
while(1):
count=count+1
print(count)
if count==1*fps:
break
ret, img = cap.read() ## 返回两个值,ret表示读取是否成功,frame/img为读取的帧内容
#img = np.array(img)
if ret:
#change cv array to IMAGE object
#print(type(img))
im=dn.ndarray_to_image(img) # image structure
r = dn.detect(net, meta, im)
for i in range(len(r)):
#print(r[i])
x1 = r[i][2][0]-r[i][2][2]/2
y1 = r[i][2][1]-r[i][2][3]/2
x2 = r[i][2][0]+r[i][2][2]/2
y2 = r[i][2][1]+r[i][2][3]/2
label = r[i][0]
score = r[i][1]
print(label.decode())
pt1 = (int(x1), int(y1))
pt2 = (int(x2), int(y2))
#print(label)
#cv2.rectangle(img, pt1, pt2, (0, 255, 0), 1)
cv2.rectangle(img,(int(x1),int(y1)),(int(x2),int(y2)),(0,255,0),2)
cv2.putText(img, str(label.decode()), (int(x1),int(y1)),fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale =0.8, color=(0,255,255), thickness=2)
# plt.imshow(img)
# plt.show()
out.write(img) # output video
else:
break
# k==cv2.waitKey(0)
# if k==27 & 0xFF == ord('q'):
# break
cap.release()
out.release()
cv2.destroyAllWindows()
yolov3-tiny.cfg
[net] #[net]主要记载了,测试和训练的参数
# Testing
#batch=1
#subdivisions=1
# Training
batch=64
subdivisions=8 #把batch分组输入减少内存压力,colab上应该不用顾虑。
width=416 #默认大小416*416, 3通道 -> (3 416 416)
height=416
channels=3
momentum=0.9
decay=0.0005
angle=180 #-180-180 #图片随机旋转多少(-度 ~ +度),数据增强
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.05 #0.001
burn_in=1000 #在迭代次数小于burn_in时,其学习率的更新有一种方式,大于burn_in时,才采用policy的更新方式
max_batches = 205000 #500200
policy=steps
#steps=100,1000,1500,2000, 10000, 15000 # 400000-450000 在多少迭代后,进行learning rate 缩放scales因子
steps=1000,3000,7000#, 12000
scales=.1,.5,.5, #.5#, .1
[convolutional]
batch_normalize=1
filters=16
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=1
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
###########
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=21
activation=linear
[yolo]
mask = 3,4,5
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
classes=2
num=6 # 6 box for a grid cell一个格子预测几个回归框,这一层是3,4,5这三个框
jitter=.3 # RGB转成HSV然后,三个值进行5%-10%的随机增减然后变回RGB
ignore_thresh = .6 # 0.7参与计算的IOU最低值
truth_thresh = 1
random=1
[route] #跨连接层
layers = -4
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 8
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=21
activation=linear
[yolo]
mask = 0,1,2
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
classes=2
num=6
jitter=.3
ignore_thresh = .6
truth_thresh = 1
random=1
colab训练截图
!sudo apt-get update
!sudo apt-get install dos2unix
!dos2unix cfg/mask_det.data
!dos2unix cfg/mask_det.names
!dos2unix cfg/yolov3-tiny.cfg
!dos2unix scripts/f_train.txt
!pwd
%cd /content/drive/My Drive/yolo/darknet
!chmod u+x darknet
!./darknet detector train cfg/mask_det.data cfg/yolov3-tiny.cfg /content/mydirver/yolo/darknet/weights/yolov3-tiny.backup
测试之前将训练师的CUDNN关闭,否则会没有框
# when use detect,set cudnn=0
%cd /content/mydirver/yolo/darknet
!make clean
!make
%matplotlib inline
!python3 det.py --path '/content/drive/My Drive/yolo/darknet/video/1.mp4' --cfg '/content/drive/My Drive/yolo/darknet/cfg/yolov3-tiny.cfg' --weights '/content/drive/My Drive/yolo/darknet/weights/yolov3-tiny_60000.weights' --meta "/content/drive/My Drive/yolo/darknet/cfg/mask_det.data"
!pwd
%cd /content/drive/My Drive/yolo/darknet
!python3 detect_img.py --image '1' --path '/content/mydirver/yolo/darknet/data/mask3.jpg' --cfg '/content/drive/My Drive/yolo/darknet/cfg/yolov3-tiny.cfg' --weights '/content/drive/My Drive/yolo/darknet/weights/yolov3-tiny.backup' --meta "/content/drive/My Drive/yolo/darknet/cfg/mask_det.data"
import cv2
from google.colab.patches import cv2_imshow
img = cv2.imread('/content/drive/My Drive/yolo/darknet/prediction.jpg')
print(img.shape)
cv2_imshow(img)
!pwd
%cd /content/drive/My Drive/yolo/darknet
!python3 detect_img.py --image '1' --path '/content/mydirver/yolo/darknet/data/mask2.jpg' --cfg '/content/drive/My Drive/yolo/darknet/cfg/yolov3-tiny.cfg' --weights '/content/drive/My Drive/yolo/darknet/weights/yolov3-tiny.backup' --meta "/content/drive/My Drive/yolo/darknet/cfg/mask_det.data"
import cv2
from google.colab.patches import cv2_imshow
img = cv2.imread('/content/drive/My Drive/yolo/darknet/prediction.jpg')
print(img.shape)
cv2_imshow(img)