FFmpeg  4.4.6
vf_yadif_cuda.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2018 Philip Langdale <philipl@overt.org>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
23 #include "libavutil/cuda_check.h"
24 #include "internal.h"
25 #include "yadif.h"
26 
27 extern char vf_yadif_cuda_ptx[];
28 
29 typedef struct DeintCUDAContext {
31 
36 
37  CUcontext cu_ctx;
38  CUstream stream;
39  CUmodule cu_module;
40  CUfunction cu_func_uchar;
41  CUfunction cu_func_uchar2;
42  CUfunction cu_func_ushort;
43  CUfunction cu_func_ushort2;
45 
46 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
47 #define ALIGN_UP(a, b) (((a) + (b) - 1) & ~((b) - 1))
48 #define BLOCKX 32
49 #define BLOCKY 16
50 
51 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
52 
53 static CUresult call_kernel(AVFilterContext *ctx, CUfunction func,
54  CUdeviceptr prev, CUdeviceptr cur, CUdeviceptr next,
55  CUarray_format format, int channels,
56  int src_width, // Width is pixels per channel
57  int src_height, // Height is pixels per channel
58  int src_pitch, // Pitch is bytes
59  CUdeviceptr dst,
60  int dst_width, // Width is pixels per channel
61  int dst_height, // Height is pixels per channel
62  int dst_pitch, // Pitch is pixels per channel
63  int parity, int tff)
64 {
65  DeintCUDAContext *s = ctx->priv;
66  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
67  CUtexObject tex_prev = 0, tex_cur = 0, tex_next = 0;
68  int ret;
69  int skip_spatial_check = s->yadif.mode&2;
70 
71  void *args[] = { &dst, &tex_prev, &tex_cur, &tex_next,
72  &dst_width, &dst_height, &dst_pitch,
73  &src_width, &src_height, &parity, &tff,
74  &skip_spatial_check };
75 
76  CUDA_TEXTURE_DESC tex_desc = {
77  .filterMode = CU_TR_FILTER_MODE_POINT,
78  .flags = CU_TRSF_READ_AS_INTEGER,
79  };
80 
81  CUDA_RESOURCE_DESC res_desc = {
82  .resType = CU_RESOURCE_TYPE_PITCH2D,
83  .res.pitch2D.format = format,
84  .res.pitch2D.numChannels = channels,
85  .res.pitch2D.width = src_width,
86  .res.pitch2D.height = src_height,
87  .res.pitch2D.pitchInBytes = src_pitch,
88  };
89 
90  res_desc.res.pitch2D.devPtr = (CUdeviceptr)prev;
91  ret = CHECK_CU(cu->cuTexObjectCreate(&tex_prev, &res_desc, &tex_desc, NULL));
92  if (ret < 0)
93  goto exit;
94 
95  res_desc.res.pitch2D.devPtr = (CUdeviceptr)cur;
96  ret = CHECK_CU(cu->cuTexObjectCreate(&tex_cur, &res_desc, &tex_desc, NULL));
97  if (ret < 0)
98  goto exit;
99 
100  res_desc.res.pitch2D.devPtr = (CUdeviceptr)next;
101  ret = CHECK_CU(cu->cuTexObjectCreate(&tex_next, &res_desc, &tex_desc, NULL));
102  if (ret < 0)
103  goto exit;
104 
105  ret = CHECK_CU(cu->cuLaunchKernel(func,
106  DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
107  BLOCKX, BLOCKY, 1,
108  0, s->stream, args, NULL));
109 
110 exit:
111  if (tex_prev)
112  CHECK_CU(cu->cuTexObjectDestroy(tex_prev));
113  if (tex_cur)
114  CHECK_CU(cu->cuTexObjectDestroy(tex_cur));
115  if (tex_next)
116  CHECK_CU(cu->cuTexObjectDestroy(tex_next));
117 
118  return ret;
119 }
120 
121 static void filter(AVFilterContext *ctx, AVFrame *dst,
122  int parity, int tff)
123 {
124  DeintCUDAContext *s = ctx->priv;
125  YADIFContext *y = &s->yadif;
126  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
127  CUcontext dummy;
128  int i, ret;
129 
130  ret = CHECK_CU(cu->cuCtxPushCurrent(s->cu_ctx));
131  if (ret < 0)
132  return;
133 
134  for (i = 0; i < y->csp->nb_components; i++) {
135  CUfunction func;
136  CUarray_format format;
137  int pixel_size, channels;
138  const AVComponentDescriptor *comp = &y->csp->comp[i];
139 
140  if (comp->plane < i) {
141  // We process planes as a whole, so don't reprocess
142  // them for additional components
143  continue;
144  }
145 
146  pixel_size = (comp->depth + comp->shift) / 8;
147  channels = comp->step / pixel_size;
148  if (pixel_size > 2 || channels > 2) {
149  av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", y->csp->name);
150  goto exit;
151  }
152  switch (pixel_size) {
153  case 1:
154  func = channels == 1 ? s->cu_func_uchar : s->cu_func_uchar2;
155  format = CU_AD_FORMAT_UNSIGNED_INT8;
156  break;
157  case 2:
158  func = channels == 1 ? s->cu_func_ushort : s->cu_func_ushort2;
159  format = CU_AD_FORMAT_UNSIGNED_INT16;
160  break;
161  default:
162  av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", y->csp->name);
163  goto exit;
164  }
166  "Deinterlacing plane %d: pixel_size: %d channels: %d\n",
167  comp->plane, pixel_size, channels);
169  (CUdeviceptr)y->prev->data[i],
170  (CUdeviceptr)y->cur->data[i],
171  (CUdeviceptr)y->next->data[i],
172  format, channels,
173  AV_CEIL_RSHIFT(y->cur->width, i ? y->csp->log2_chroma_w : 0),
174  AV_CEIL_RSHIFT(y->cur->height, i ? y->csp->log2_chroma_h : 0),
175  y->cur->linesize[i],
176  (CUdeviceptr)dst->data[i],
177  AV_CEIL_RSHIFT(dst->width, i ? y->csp->log2_chroma_w : 0),
178  AV_CEIL_RSHIFT(dst->height, i ? y->csp->log2_chroma_h : 0),
179  dst->linesize[i] / comp->step,
180  parity, tff);
181  }
182 
183 exit:
184  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
185  return;
186 }
187 
189 {
190  CUcontext dummy;
191  DeintCUDAContext *s = ctx->priv;
192  YADIFContext *y = &s->yadif;
193 
194  if (s->hwctx && s->cu_module) {
195  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
196  CHECK_CU(cu->cuCtxPushCurrent(s->cu_ctx));
197  CHECK_CU(cu->cuModuleUnload(s->cu_module));
198  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
199  }
200 
201  av_frame_free(&y->prev);
202  av_frame_free(&y->cur);
203  av_frame_free(&y->next);
204 
205  av_buffer_unref(&s->device_ref);
206  s->hwctx = NULL;
207  av_buffer_unref(&s->input_frames_ref);
208  s->input_frames = NULL;
209 }
210 
212 {
213  enum AVPixelFormat pix_fmts[] = {
215  };
216  int ret;
217 
219  &ctx->inputs[0]->outcfg.formats)) < 0)
220  return ret;
222  &ctx->outputs[0]->incfg.formats)) < 0)
223  return ret;
224 
225  return 0;
226 }
227 
228 static int config_input(AVFilterLink *inlink)
229 {
230  AVFilterContext *ctx = inlink->dst;
231  DeintCUDAContext *s = ctx->priv;
232 
233  if (!inlink->hw_frames_ctx) {
234  av_log(ctx, AV_LOG_ERROR, "A hardware frames reference is "
235  "required to associate the processing device.\n");
236  return AVERROR(EINVAL);
237  }
238 
239  s->input_frames_ref = av_buffer_ref(inlink->hw_frames_ctx);
240  if (!s->input_frames_ref) {
241  av_log(ctx, AV_LOG_ERROR, "A input frames reference create "
242  "failed.\n");
243  return AVERROR(ENOMEM);
244  }
245  s->input_frames = (AVHWFramesContext*)s->input_frames_ref->data;
246 
247  return 0;
248 }
249 
250 static int config_output(AVFilterLink *link)
251 {
252  AVHWFramesContext *output_frames;
253  AVFilterContext *ctx = link->src;
254  DeintCUDAContext *s = ctx->priv;
255  YADIFContext *y = &s->yadif;
256  CudaFunctions *cu;
257  int ret = 0;
258  CUcontext dummy;
259 
260  av_assert0(s->input_frames);
261  s->device_ref = av_buffer_ref(s->input_frames->device_ref);
262  if (!s->device_ref) {
263  av_log(ctx, AV_LOG_ERROR, "A device reference create "
264  "failed.\n");
265  return AVERROR(ENOMEM);
266  }
267  s->hwctx = ((AVHWDeviceContext*)s->device_ref->data)->hwctx;
268  s->cu_ctx = s->hwctx->cuda_ctx;
269  s->stream = s->hwctx->stream;
270  cu = s->hwctx->internal->cuda_dl;
271 
272  link->hw_frames_ctx = av_hwframe_ctx_alloc(s->device_ref);
273  if (!link->hw_frames_ctx) {
274  av_log(ctx, AV_LOG_ERROR, "Failed to create HW frame context "
275  "for output.\n");
276  ret = AVERROR(ENOMEM);
277  goto exit;
278  }
279 
280  output_frames = (AVHWFramesContext*)link->hw_frames_ctx->data;
281 
282  output_frames->format = AV_PIX_FMT_CUDA;
283  output_frames->sw_format = s->input_frames->sw_format;
284  output_frames->width = ctx->inputs[0]->w;
285  output_frames->height = ctx->inputs[0]->h;
286 
287  output_frames->initial_pool_size = 4;
288 
289  ret = ff_filter_init_hw_frames(ctx, link, 10);
290  if (ret < 0)
291  goto exit;
292 
293  ret = av_hwframe_ctx_init(link->hw_frames_ctx);
294  if (ret < 0) {
295  av_log(ctx, AV_LOG_ERROR, "Failed to initialise CUDA frame "
296  "context for output: %d\n", ret);
297  goto exit;
298  }
299 
300  link->time_base.num = ctx->inputs[0]->time_base.num;
301  link->time_base.den = ctx->inputs[0]->time_base.den * 2;
302  link->w = ctx->inputs[0]->w;
303  link->h = ctx->inputs[0]->h;
304 
305  if(y->mode & 1)
306  link->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
307  (AVRational){2, 1});
308 
309  if (link->w < 3 || link->h < 3) {
310  av_log(ctx, AV_LOG_ERROR, "Video of less than 3 columns or lines is not supported\n");
311  ret = AVERROR(EINVAL);
312  goto exit;
313  }
314 
315  y->csp = av_pix_fmt_desc_get(output_frames->sw_format);
316  y->filter = filter;
317 
318  ret = CHECK_CU(cu->cuCtxPushCurrent(s->cu_ctx));
319  if (ret < 0)
320  goto exit;
321 
322  ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx));
323  if (ret < 0)
324  goto exit;
325 
326  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "yadif_uchar"));
327  if (ret < 0)
328  goto exit;
329 
330  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "yadif_uchar2"));
331  if (ret < 0)
332  goto exit;
333 
334  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "yadif_ushort"));
335  if (ret < 0)
336  goto exit;
337 
338  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "yadif_ushort2"));
339  if (ret < 0)
340  goto exit;
341 
342 exit:
343  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
344 
345  return ret;
346 }
347 
348 static const AVClass yadif_cuda_class = {
349  .class_name = "yadif_cuda",
350  .item_name = av_default_item_name,
351  .option = ff_yadif_options,
352  .version = LIBAVUTIL_VERSION_INT,
353  .category = AV_CLASS_CATEGORY_FILTER,
354 };
355 
356 static const AVFilterPad deint_cuda_inputs[] = {
357  {
358  .name = "default",
359  .type = AVMEDIA_TYPE_VIDEO,
360  .filter_frame = ff_yadif_filter_frame,
361  .config_props = config_input,
362  },
363  { NULL }
364 };
365 
366 static const AVFilterPad deint_cuda_outputs[] = {
367  {
368  .name = "default",
369  .type = AVMEDIA_TYPE_VIDEO,
370  .request_frame = ff_yadif_request_frame,
371  .config_props = config_output,
372  },
373  { NULL }
374 };
375 
377  .name = "yadif_cuda",
378  .description = NULL_IF_CONFIG_SMALL("Deinterlace CUDA frames"),
379  .priv_size = sizeof(DeintCUDAContext),
380  .priv_class = &yadif_cuda_class,
386  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
387 };
static int query_formats(AVFilterContext *ctx)
Definition: aeval.c:243
static const AVFilterPad inputs[]
Definition: af_acontrast.c:193
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
static const char *const format[]
Definition: af_aiir.c:456
channels
Definition: aptx.h:33
#define av_cold
Definition: attributes.h:88
simple assert() macros that are a bit more flexible than ISO C assert().
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
int ff_filter_init_hw_frames(AVFilterContext *avctx, AVFilterLink *link, int default_pool_size)
Perform any additional setup required for hardware frames.
Definition: avfilter.c:1653
#define flags(name, subs,...)
Definition: cbs_av1.c:572
#define s(width, name)
Definition: cbs_vp9.c:257
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
#define NULL
Definition: coverity.c:32
static av_cold int uninit(AVCodecContext *avctx)
Definition: crystalhd.c:279
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:85
int ff_formats_ref(AVFilterFormats *f, AVFilterFormats **ref)
Add ref as a new reference to formats.
Definition: formats.c:466
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:286
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
Definition: avfilter.h:134
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:125
AVBufferRef * av_buffer_ref(AVBufferRef *buf)
Create a new reference to an AVBuffer.
Definition: buffer.c:93
#define AVERROR(e)
Definition: error.h:43
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
#define AV_LOG_TRACE
Extremely verbose debugging, useful for libav* development.
Definition: log.h:220
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
const char * av_default_item_name(void *ptr)
Return the context name.
Definition: log.c:235
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
Definition: rational.c:80
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
#define LIBAVUTIL_VERSION_INT
Definition: version.h:85
int av_hwframe_ctx_init(AVBufferRef *ref)
Finalize the context before use.
Definition: hwcontext.c:333
AVBufferRef * av_hwframe_ctx_alloc(AVBufferRef *device_ref_in)
Allocate an AVHWFramesContext tied to a given device context.
Definition: hwcontext.c:247
FFmpeg internal API for CUDA.
int i
Definition: input.c:407
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:67
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:339
common internal API header
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:309
@ AV_CLASS_CATEGORY_FILTER
Definition: log.h:37
int dummy
Definition: motion.c:64
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2573
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:65
@ AV_PIX_FMT_CUDA
HW acceleration through CUDA.
Definition: pixfmt.h:235
A reference to a data buffer.
Definition: buffer.h:84
uint8_t * data
The data buffer.
Definition: buffer.h:92
This struct is allocated as AVHWDeviceContext.hwctx.
Describe the class of an AVClass context structure.
Definition: log.h:67
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:72
An instance of a filter.
Definition: avfilter.h:341
A filter pad used for either input or output.
Definition: internal.h:54
const char * name
Pad name.
Definition: internal.h:60
Filter definition.
Definition: avfilter.h:145
const char * name
Filter name.
Definition: avfilter.h:149
This structure describes decoded (raw) audio or video data.
Definition: frame.h:318
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:332
int width
Definition: frame.h:376
int height
Definition: frame.h:376
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:349
This struct aggregates all the (hardware/vendor-specific) "high-level" state, i.e.
Definition: hwcontext.h:61
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:124
enum AVPixelFormat format
The pixel format identifying the underlying HW surface type.
Definition: hwcontext.h:209
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:222
int initial_pool_size
Initial size of the frame pool.
Definition: hwcontext.h:199
int width
The allocated dimensions of the frames in this pool.
Definition: hwcontext.h:229
const char * name
Definition: pixdesc.h:82
AVComponentDescriptor comp[4]
Parameters that describe how pixels are packed.
Definition: pixdesc.h:117
uint8_t log2_chroma_w
Amount to shift the luma width right to find the chroma width.
Definition: pixdesc.h:92
uint8_t log2_chroma_h
Amount to shift the luma height right to find the chroma height.
Definition: pixdesc.h:101
uint8_t nb_components
The number of components each pixel has, (1-4)
Definition: pixdesc.h:83
Rational number (pair of numerator and denominator).
Definition: rational.h:58
int num
Numerator.
Definition: rational.h:59
int den
Denominator.
Definition: rational.h:60
CUcontext cu_ctx
Definition: vf_yadif_cuda.c:37
AVBufferRef * device_ref
Definition: vf_yadif_cuda.c:33
AVBufferRef * input_frames_ref
Definition: vf_yadif_cuda.c:34
AVCUDADeviceContext * hwctx
Definition: vf_yadif_cuda.c:32
CUfunction cu_func_uchar2
Definition: vf_yadif_cuda.c:41
CUfunction cu_func_ushort2
Definition: vf_yadif_cuda.c:43
CUmodule cu_module
Definition: vf_yadif_cuda.c:39
AVHWFramesContext * input_frames
Definition: vf_yadif_cuda.c:35
CUfunction cu_func_ushort
Definition: vf_yadif_cuda.c:42
CUfunction cu_func_uchar
Definition: vf_yadif_cuda.c:40
YADIFContext yadif
Definition: vf_yadif_cuda.c:30
AVFrame * prev
Definition: yadif.h:61
AVFrame * cur
Definition: yadif.h:59
AVFrame * next
Definition: yadif.h:60
void(* filter)(AVFilterContext *ctx, AVFrame *dstpic, int parity, int tff)
Definition: yadif.h:64
int mode
YADIFMode.
Definition: yadif.h:53
const AVPixFmtDescriptor * csp
Definition: yadif.h:75
#define av_log(a,...)
AVFormatContext * ctx
Definition: movenc.c:48
mcdeint parity
Definition: vf_mcdeint.c:277
static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, CUdeviceptr prev, CUdeviceptr cur, CUdeviceptr next, CUarray_format format, int channels, int src_width, int src_height, int src_pitch, CUdeviceptr dst, int dst_width, int dst_height, int dst_pitch, int parity, int tff)
Definition: vf_yadif_cuda.c:53
static void filter(AVFilterContext *ctx, AVFrame *dst, int parity, int tff)
char vf_yadif_cuda_ptx[]
#define DIV_UP(a, b)
Definition: vf_yadif_cuda.c:46
static av_cold void deint_cuda_uninit(AVFilterContext *ctx)
static int config_input(AVFilterLink *inlink)
#define BLOCKX
Definition: vf_yadif_cuda.c:48
static int deint_cuda_query_formats(AVFilterContext *ctx)
AVFilter ff_vf_yadif_cuda
static const AVClass yadif_cuda_class
#define CHECK_CU(x)
Definition: vf_yadif_cuda.c:51
static const AVFilterPad deint_cuda_inputs[]
#define BLOCKY
Definition: vf_yadif_cuda.c:49
static int config_output(AVFilterLink *link)
static const AVFilterPad deint_cuda_outputs[]
int ff_yadif_filter_frame(AVFilterLink *link, AVFrame *frame)
Definition: yadif_common.c:92
int ff_yadif_request_frame(AVFilterLink *link)
Definition: yadif_common.c:159
const AVOption ff_yadif_options[]
Definition: yadif_common.c:198