FT（IG）显著+分割+Surf识别笔记（一）

本文介绍了一种图像处理技术，包括FT显著性分析、MeanShift图像分割及SURF特征检测。FT显著性分析用于提取图像中的显著区域，MeanShift分割则实现了图像的高效分割，而SURF特征检测则用于目标识别。文章提供了详细的代码实现，包括从RGB到LAB颜色空间的转换、高斯平滑、显著性地图生成、MeanShift分割以及SURF特征检测。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

FT（IG）显著+分割+Surf识别笔记

FT代码注释
Mean Shift分割代码“
Surf部分`

代码块

//FT显著
#include "Saliency.h"
#include "cmath"
//这只是显著部分，要自己写个main函数，读取图像，还要转成灰度图，太累了不复制了
> void Saliency::RGB2LAB(
    const vector<unsigned int>&             ubuff,
    vector<double>&                 lvec,
    vector<double>&                 avec,
    vector<double>&                 bvec)
{//这部分是求RGB转到LAB的过程，要经过转XYZ的过程
    int sz = int(ubuff.size());
    lvec.resize(sz);
    avec.resize(sz);
    bvec.resize(sz);

    for( int j = 0; j < sz; j++ )
    {
        int r = (ubuff[j] >> 16) & 0xFF;
        int g = (ubuff[j] >>  8) & 0xFF;
        int b = (ubuff[j]      ) & 0xFF;

        double xval = 0.412453 * r + 0.357580 * g + 0.180423 * b;
        double yval = 0.212671 * r + 0.715160 * g + 0.072169 * b;
        double zVal = 0.019334 * r + 0.119193 * g + 0.950227 * b;

        xval /= (255.0 * 0.950456);
        yval /=  255.0;
        zVal /= (255.0 * 1.088754);

        double fX, fY, fZ;
        double lval, aval, bval;

        if (yval > 0.008856)
        {
            fY = pow(yval, 1.0 / 3.0);
            lval = 116.0 * fY - 16.0;
        }
        else
        {
            fY = 7.787 * yval + 16.0 / 116.0;
            lval = 903.3 * yval;
        }

        if (xval > 0.008856)
            fX = pow(xval, 1.0 / 3.0);
        else
            fX = 7.787 * xval + 16.0 / 116.0;

        if (zVal > 0.008856)
            fZ = pow(zVal, 1.0 / 3.0);
        else
            fZ = 7.787 * zVal + 16.0 / 116.0;

        aval = 500.0 * (fX - fY)+128.0;
        bval = 200.0 * (fY - fZ)+128.0;

        lvec[j] = lval;
        avec[j] = aval;
        bvec[j] = bval;
    }
}


/// 高斯平滑


void Saliency::GaussianSmooth(
    const vector<double>&           inputImg,
    const int&                      width,
    const int&                      height,
    const vector<double>&           kernel,
    vector<double>&                 smoothImg)
{
    int center = int(kernel.size())/2;

    int sz = width*height;
    smoothImg.clear();
    smoothImg.resize(sz);
    vector<double> tempim(sz);
    int rows = height;
    int cols = width;

   // Blur in the x direction.

    {int index(0);
    for( int r = 0; r < rows; r++ )
    {
        for( int c = 0; c < cols; c++ )
        {
            double kernelsum(0);
            double sum(0);
            for( int cc = (-center); cc <= center; cc++ )
            {
                if(((c+cc) >= 0) && ((c+cc) < cols))
                {
                    sum += inputImg[r*cols+(c+cc)] * kernel[center+cc];
                    kernelsum += kernel[center+cc];
                }
            }
            tempim[index] = sum/kernelsum;
            index++;
        }
    }}


    // Blur in the y direction.

    {int index = 0;
    for( int r = 0; r < rows; r++ )
    {
        for( int c = 0; c < cols; c++ )
        {
            double kernelsum(0);
            double sum(0);
            for( int rr = (-center); rr <= center; rr++ )
            {
                if(((r+rr) >= 0) && ((r+rr) < rows))
                {
                   sum += tempim[(r+rr)*cols+c] * kernel[center+rr];
                   kernelsum += kernel[center+rr];
                }
            }
            smoothImg[index] = sum/kernelsum;
            index++;
        }
    }}
}


/// GetSaliencyMap
///
/// Outputs a saliency map with a value assigned per pixel. The values are
/// normalized in the interval [0,255] if normflag is set true (default value).

void Saliency::GetSaliencyMap(
    const vector<unsigned int>&     inputimg,
    const int&                      width,
    const int&                      height,
    vector<double>&                 salmap,
    const bool&                     normflag) 
{
    int sz = width*height;
    salmap.clear();
    salmap.resize(sz);

    vector<double> lvec(0), avec(0), bvec(0);
    RGB2LAB(inputimg, lvec, avec, bvec);

    // Obtain Lab average values

    double avgl(0), avga(0), avgb(0);
    {for( int i = 0; i < sz; i++ )
    {
        avgl += lvec[i];
        avga += avec[i];
        avgb += bvec[i];
    }}
    avgl /= sz;
    avga /= sz;
    avgb /= sz;

    vector<double> slvec(0), savec(0), sbvec(0);


    // The kernel can be [1 2 1] or [1 4 6 4 1] as needed.
    // The code below show usage of [1 2 1] kernel.

    vector<double> kernel(0);
    //高斯滤波的核是[1,2,1]
    kernel.push_back(1.0);
    kernel.push_back(2.0);
    kernel.push_back(1.0);

    GaussianSmooth(lvec, width, height, kernel, slvec);  //高斯平滑
    GaussianSmooth(avec, width, height, kernel, savec); 
    GaussianSmooth(bvec, width, height, kernel, sbvec);

    {for( int i = 0; i < sz; i++ )  //得到的结果高斯平滑减去原始图像的均值，如果有低频信号通过图像平滑最后减去均值就没有响应了，相反高频部分，通过平滑残留了高频响应减去了均值还是有响应，这部分我们认为是显著区域，最后返回结果
    {
        salmap[i] = (slvec[i]-avgl)*(slvec[i]-avgl) +
                    (savec[i]-avga)*(savec[i]-avga) +
                    (sbvec[i]-avgb)*(sbvec[i]-avgb);
    }}
//以上就是FT求图像显著图的主要代码
    if( true == normflag )//这是归一化，就是每个图像像素值都缩小为[0-1]的函数。函数为Normalize
    {
        vector<double> normalized(0);
        Normalize(salmap, width, height, normalized);
        swap(salmap, normalized);
    }
}
 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191

//分割代码

#include "stdafx.h"  
//stdafx.h要自己建一个这样的库
// meanshift_segmentation.cpp : 定义控制台应用程序的入口点。  
//  

#include"opencv2/core/core.hpp" 
#include "opencv2/highgui/highgui.hpp" 
#include "opencv2/imgproc/imgproc.hpp"  
#include "iostream"  

#pragma comment(lib,"opencv_highgui2413d.lib")  
#pragma comment(lib,"opencv_core2413d.lib")  
#pragma comment(lib,"opencv_imgproc2413d.lib")  

using namespace cv;
using namespace std;


Mat src, dst;

int spatialRad, colorRad, maxPryLevel;
int main(int argc, uchar* argv[])

{
    int maxPyrLevel = 3;  //金字塔层数  
    Mat res;
    double duration = static_cast<double>(getTickCount());


    Mat img = imread("E:/Codes/CprimerPlus/chapter13/分割/31.jpg"); //读图路径
    //路径要加引号，一般程序出错最有可能是路径问题，有绝对路径和相对路径之分，绝对路径：/
    //相对路径：可直接引用程序下的图片
    int spatialRad = 4; //值越大时间会越长
    int colorRad = 30; //值越大图像会分割的区域数越小
    pyrMeanShiftFiltering(img, res, spatialRad, colorRad, maxPyrLevel); 



    imshow("res", res);
    RNG rng = theRNG();
    Mat mask(res.rows + 2, res.cols + 2, CV_8UC1, Scalar::all(0));  //掩模  
    for (int y = 0; y < res.rows; y++)
    {
        for (int x = 0; x < res.cols; x++)
        {
            if (mask.at<uchar>(y + 1, x + 1) == 0)  //非0处即为1，表示已经经过填充，不再处理  
            {
                //Scalar newVal(rng(256), rng(256), rng(256));
                //floodFill(res, mask, Point(x, y), newVal, 0, Scalar::all(5), Scalar::all(5)); //执行漫水填充  
            }
        }
    }
    imshow("meanShift图像分割", res);
    imwrite("result.jpg", res);
    duration = ((double)getTickCount() - duration) / getTickFrequency();
    cout << "运行时间" << duration << "秒" << endl;

    waitKey();
    return 0;
    ｝
 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

Surf部分没什么好写的，基本是OPENCV3书上原装程序，要说所有程序完全是我自己写的只有得到分割后将它扣出来，其他都是主程序，自己修修小数据

 1

#include "stdafx.h"  
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/opencv.hpp"
#include "iostream"
using namespace cv;
using namespace std;
int main(int argc, char** argv)
{
    Mat  binary;
    Mat result = imread("2.jpg", 0);//这里0表示读取图像并转为灰度图即[0-255]
    Mat tongji;
    threshold(result, binary, 50, 255, CV_THRESH_BINARY);//阀值化，固定阀值吧，不是自适应的。
    //里面的参数分别代表;输入图像；输出图像；最低阀值，往上为1，下为0；这个图像里像素值的最大///值；二值化的表示，也有其他表示方法
    Mat element = getStructuringElement(MORPH_RECT, Size(50, 50));//膨胀操作的一个自定义核
    Mat out;
    double maxArea = 0;
    vector<cv::Point> maxContour;
    dilate(binary, out, element);//膨胀。一般写前面三个参数，后面有默认的
    vector<vector<cv::Point>> contours;//vector是结构体或者容器

    findContours(out, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE);//寻找轮廓
    //里面参数：输入；输出；检测轮廓类别，这个是检测最外围轮廓（还有所有轮廓并放list中，提取所有轮廓//并组织为双层结构，最后一个是提取轮廓建立网状结构）；获取轮廓的每个像素。
    for (int i = 0; i < contours.size(); i++)
    {
        double area = contourArea(contours[i]);//轮廓面积
        if (area > maxArea)
        {
            maxArea = area;
            maxContour = contours[i];
        }
    }
    Rect maxRect = boundingRect(maxContour);//求最大轮廓面积，并返回轮廓的最大矩形

    Mat original = imread("1.jpg");//原图
    Mat gray = result;
    Mat hole(gray.size(), CV_8U, Scalar(0));//建立模版方便抠图所用的转换，与原图大小一样
    rectangle(hole, Rect(maxRect.x, maxRect.y, maxRect.width, maxRect.height), Scalar(255, 255, 255), -1, 1, 0);//刚刚矩形的位置，在模版相同的矩形位置变白色
    double a = maxRect.x;
    double b = maxRect.y;
    double c = maxRect.width;
    double d = maxRect.height;
    printf("左上点x:%f\n", a);
    printf("左上点y:%f\n", b);
    printf("宽:%f\n", c);
    printf("高:%f\n", d);//显示矩形的位置数据



    namedWindow("My hole");
    imshow("My hole", hole);
    Mat crop(original.rows, original.cols, CV_8UC3);
    original.copyTo(crop, hole);//将原图像拷贝进遮罩图层  
    namedWindow("My warpPerspective");
    imshow("My warpPerspective", crop);
    imwrite("result.jpg", crop);



    imshow("image", result);
    waitKey();

    return 0;
    ｝
 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

					<link href="https://csdnimg.cn/release/phoenix/mdeditor/markdown_views-2b43bc2447.css" rel="stylesheet">
            </div>