-
Notifications
You must be signed in to change notification settings - Fork 819
Expand file tree
/
Copy pathconvolution.c
More file actions
103 lines (90 loc) · 3.53 KB
/
convolution.c
File metadata and controls
103 lines (90 loc) · 3.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
/**
*
* Copyright 2016-2020 Netflix, Inc.
*
* Licensed under the BSD+Patent License (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://opensource.org/licenses/BSDplusPatent
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#include "alignment.h"
#include "config.h"
#include "convolution.h"
#include "convolution_internal.h"
#include "cpu.h"
extern int vmaf_floorn(int, int);
extern int vmaf_ceiln(int, int);
void convolution_x_c_s(const float *filter, int filter_width, const float *src, float *dst, int width, int height, int src_stride, int dst_stride, int step)
{
int radius = filter_width / 2;
int borders_left = vmaf_ceiln(radius, step);
int borders_right = vmaf_floorn(width - (filter_width - radius), step);
for (int i = 0; i < height; ++i) {
const float *src_row = src + i * src_stride;
float *dst_row = dst + i * dst_stride;
for (int j = 0; j < borders_left; j += step) {
dst_row[j / step] = convolution_edge_s(true, filter, filter_width, src, width, height, src_stride, i, j);
}
for (int j = borders_left; j < borders_right; j += step) {
const float *src_ptr = src_row + j - radius;
float accum = 0;
for (int k = 0; k < filter_width; ++k) {
accum += filter[k] * src_ptr[k];
}
dst_row[j / step] = accum;
}
for (int j = borders_right; j < width; j += step) {
dst_row[j / step] = convolution_edge_s(true, filter, filter_width, src, width, height, src_stride, i, j);
}
}
}
void convolution_y_c_s(const float *filter, int filter_width, const float *src, float *dst, int width, int height, int src_stride, int dst_stride, int step)
{
int radius = filter_width / 2;
int borders_top = vmaf_ceiln(radius, step);
int borders_bottom = vmaf_floorn(height - (filter_width - radius), step);
for (int i = 0; i < borders_top; i += step) {
for (int j = 0; j < width; ++j) {
dst[(i / step) * dst_stride + j] = convolution_edge_s(false, filter, filter_width, src, width, height, src_stride, i, j);
}
}
for (int i = borders_top; i < borders_bottom; i += step) {
float *dst_row = dst + (i / step) * dst_stride;
for (int j = 0; j < width; ++j) {
float accum = 0;
for (int k = 0; k < filter_width; ++k) {
accum += filter[k] * src[(i - radius + k) * src_stride + j];
}
dst_row[j] = accum;
}
}
for (int i = borders_bottom; i < height; i += step) {
for (int j = 0; j < width; ++j) {
dst[(i / step) * dst_stride + j] = convolution_edge_s(false, filter, filter_width, src, width, height, src_stride, i, j);
}
}
}
void convolution_f32_c_s(const float *filter, int filter_width, const float *src, float *dst, float *tmp, int width, int height, int src_stride, int dst_stride)
{
/* if support avx */
#if ARCH_X86
const unsigned flags = vmaf_get_cpu_flags();
if (flags & VMAF_X86_CPU_FLAG_AVX2) {
convolution_f32_avx_s(filter, filter_width, src, dst, tmp, width,
height, src_stride, dst_stride);
return;
}
#endif
/* fall back */
// convolve along y first then x
convolution_y_c_s(filter, filter_width, src, tmp, width, height, src_stride, dst_stride, 1);
convolution_x_c_s(filter, filter_width, tmp, dst, width, height, src_stride, dst_stride, 1);
}