FFmpeg  4.4.6
vf_sr.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * Filter implementing image super-resolution using deep convolutional networks.
24  * https://arxiv.org/abs/1501.00092
25  * https://arxiv.org/abs/1609.05158
26  */
27 
28 #include "avfilter.h"
29 #include "formats.h"
30 #include "internal.h"
31 #include "libavutil/opt.h"
32 #include "libavutil/pixdesc.h"
33 #include "libavformat/avio.h"
34 #include "libswscale/swscale.h"
35 #include "dnn_filter_common.h"
36 
37 typedef struct SRContext {
38  const AVClass *class;
44 } SRContext;
45 
46 #define OFFSET(x) offsetof(SRContext, x)
47 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
48 static const AVOption sr_options[] = {
49  { "dnn_backend", "DNN backend used for model execution", OFFSET(dnnctx.backend_type), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, "backend" },
50  { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" },
51 #if (CONFIG_LIBTENSORFLOW == 1)
52  { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" },
53 #endif
54  { "scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor), AV_OPT_TYPE_INT, { .i64 = 2 }, 2, 4, FLAGS },
55  { "model", "path to model file specifying network architecture and its parameters", OFFSET(dnnctx.model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
56  { "input", "input name of the model", OFFSET(dnnctx.model_inputname), AV_OPT_TYPE_STRING, { .str = "x" }, 0, 0, FLAGS },
57  { "output", "output name of the model", OFFSET(dnnctx.model_outputname), AV_OPT_TYPE_STRING, { .str = "y" }, 0, 0, FLAGS },
58  { NULL }
59 };
60 
62 
63 static av_cold int init(AVFilterContext *context)
64 {
65  SRContext *sr_context = context->priv;
66  return ff_dnn_init(&sr_context->dnnctx, DFT_PROCESS_FRAME, context);
67 }
68 
69 static int query_formats(AVFilterContext *context)
70 {
74  AVFilterFormats *formats_list;
75 
76  formats_list = ff_make_format_list(pixel_formats);
77  if (!formats_list){
78  av_log(context, AV_LOG_ERROR, "could not create formats list\n");
79  return AVERROR(ENOMEM);
80  }
81 
82  return ff_set_common_formats(context, formats_list);
83 }
84 
85 static int config_output(AVFilterLink *outlink)
86 {
87  AVFilterContext *context = outlink->src;
88  SRContext *ctx = context->priv;
89  DNNReturnType result;
90  AVFilterLink *inlink = context->inputs[0];
91  int out_width, out_height;
92 
93  // have a try run in case that the dnn model resize the frame
94  result = ff_dnn_get_output(&ctx->dnnctx, inlink->w, inlink->h, &out_width, &out_height);
95  if (result != DNN_SUCCESS) {
96  av_log(ctx, AV_LOG_ERROR, "could not get output from the model\n");
97  return AVERROR(EIO);
98  }
99 
100  if (inlink->w != out_width || inlink->h != out_height) {
101  //espcn
102  outlink->w = out_width;
103  outlink->h = out_height;
104  if (inlink->format != AV_PIX_FMT_GRAY8){
106  int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
107  int sws_src_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
108  int sws_dst_h = AV_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h);
109  int sws_dst_w = AV_CEIL_RSHIFT(outlink->w, desc->log2_chroma_w);
110  ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8,
111  sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8,
113  ctx->sws_uv_height = sws_src_h;
114  }
115  } else {
116  //srcnn
117  outlink->w = out_width * ctx->scale_factor;
118  outlink->h = out_height * ctx->scale_factor;
119  ctx->sws_pre_scale = sws_getContext(inlink->w, inlink->h, inlink->format,
120  outlink->w, outlink->h, outlink->format,
122  }
123 
124  return 0;
125 }
126 
127 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
128 {
129  AVFilterContext *context = inlink->dst;
130  SRContext *ctx = context->priv;
131  AVFilterLink *outlink = context->outputs[0];
132  AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
133  DNNReturnType dnn_result;
134 
135  if (!out){
136  av_log(context, AV_LOG_ERROR, "could not allocate memory for output frame\n");
137  av_frame_free(&in);
138  return AVERROR(ENOMEM);
139  }
141 
142  if (ctx->sws_pre_scale) {
143  sws_scale(ctx->sws_pre_scale,
144  (const uint8_t **)in->data, in->linesize, 0, in->height,
145  out->data, out->linesize);
146  dnn_result = ff_dnn_execute_model(&ctx->dnnctx, out, out);
147  } else {
148  dnn_result = ff_dnn_execute_model(&ctx->dnnctx, in, out);
149  }
150 
151  if (dnn_result != DNN_SUCCESS){
152  av_log(ctx, AV_LOG_ERROR, "failed to execute loaded model\n");
153  av_frame_free(&in);
154  av_frame_free(&out);
155  return AVERROR(EIO);
156  }
157 
158  if (ctx->sws_uv_scale) {
159  sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), in->linesize + 1,
160  0, ctx->sws_uv_height, out->data + 1, out->linesize + 1);
161  sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 2), in->linesize + 2,
162  0, ctx->sws_uv_height, out->data + 2, out->linesize + 2);
163  }
164 
165  av_frame_free(&in);
166  return ff_filter_frame(outlink, out);
167 }
168 
169 static av_cold void uninit(AVFilterContext *context)
170 {
171  SRContext *sr_context = context->priv;
172 
173  ff_dnn_uninit(&sr_context->dnnctx);
174  sws_freeContext(sr_context->sws_uv_scale);
175  sws_freeContext(sr_context->sws_pre_scale);
176 }
177 
178 static const AVFilterPad sr_inputs[] = {
179  {
180  .name = "default",
181  .type = AVMEDIA_TYPE_VIDEO,
182  .filter_frame = filter_frame,
183  },
184  { NULL }
185 };
186 
187 static const AVFilterPad sr_outputs[] = {
188  {
189  .name = "default",
190  .config_props = config_output,
191  .type = AVMEDIA_TYPE_VIDEO,
192  },
193  { NULL }
194 };
195 
197  .name = "sr",
198  .description = NULL_IF_CONFIG_SMALL("Apply DNN-based image super resolution to the input."),
199  .priv_size = sizeof(SRContext),
200  .init = init,
201  .uninit = uninit,
203  .inputs = sr_inputs,
204  .outputs = sr_outputs,
205  .priv_class = &sr_class,
206 };
static const AVFilterPad inputs[]
Definition: af_acontrast.c:193
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
#define av_cold
Definition: attributes.h:88
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
uint8_t
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1096
Main libavfilter public API header.
Buffered I/O operations.
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
#define NULL
Definition: coverity.c:32
void ff_dnn_uninit(DnnContext *ctx)
DNNReturnType ff_dnn_execute_model(DnnContext *ctx, AVFrame *in_frame, AVFrame *out_frame)
DNNReturnType ff_dnn_get_output(DnnContext *ctx, int input_width, int input_height, int *output_width, int *output_height)
int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
common functions for the dnn based filters
DNNReturnType
Definition: dnn_interface.h:33
@ DNN_SUCCESS
Definition: dnn_interface.h:33
@ DFT_PROCESS_FRAME
Definition: dnn_interface.h:53
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:587
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:286
@ AV_OPT_TYPE_CONST
Definition: opt.h:234
@ AV_OPT_TYPE_INT
Definition: opt.h:225
@ AV_OPT_TYPE_STRING
Definition: opt.h:229
#define AVERROR(e)
Definition: error.h:43
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:658
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
#define SWS_BICUBIC
Definition: swscale.h:60
void sws_freeContext(struct SwsContext *swsContext)
Free the swscaler context swsContext.
Definition: utils.c:2337
int attribute_align_arg sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t *const dst[], const int dstStride[])
swscale wrapper, so we don't need to export the SwsContext.
Definition: swscale.c:745
struct SwsContext * sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, int dstW, int dstH, enum AVPixelFormat dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
Allocate and return an SwsContext.
Definition: utils.c:1917
common internal API header
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
const char * desc
Definition: libsvtav1.c:79
AVOptions.
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2573
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:65
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:70
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
Definition: pixfmt.h:74
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:72
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:73
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
Describe the class of an AVClass context structure.
Definition: log.h:67
An instance of a filter.
Definition: avfilter.h:341
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:349
void * priv
private data for use by the filter
Definition: avfilter.h:356
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:353
A list of supported formats for one end of a filter link.
Definition: formats.h:65
A filter pad used for either input or output.
Definition: internal.h:54
const char * name
Pad name.
Definition: internal.h:60
Filter definition.
Definition: avfilter.h:145
const char * name
Filter name.
Definition: avfilter.h:149
This structure describes decoded (raw) audio or video data.
Definition: frame.h:318
AVOption.
Definition: opt.h:248
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
DnnContext dnnctx
Definition: vf_sr.c:39
int sws_uv_height
Definition: vf_sr.c:42
int scale_factor
Definition: vf_sr.c:40
struct SwsContext * sws_uv_scale
Definition: vf_sr.c:41
struct SwsContext * sws_pre_scale
Definition: vf_sr.c:43
external API header
#define av_log(a,...)
FILE * out
Definition: movenc.c:54
AVFormatContext * ctx
Definition: movenc.c:48
static const AVFilterPad sr_inputs[]
Definition: vf_sr.c:178
static av_cold void uninit(AVFilterContext *context)
Definition: vf_sr.c:169
static av_cold int init(AVFilterContext *context)
Definition: vf_sr.c:63
#define FLAGS
Definition: vf_sr.c:47
static int query_formats(AVFilterContext *context)
Definition: vf_sr.c:69
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
Definition: vf_sr.c:127
AVFilter ff_vf_sr
Definition: vf_sr.c:196
#define OFFSET(x)
Definition: vf_sr.c:46
static int config_output(AVFilterLink *outlink)
Definition: vf_sr.c:85
static const AVOption sr_options[]
Definition: vf_sr.c:48
static const AVFilterPad sr_outputs[]
Definition: vf_sr.c:187
AVFILTER_DEFINE_CLASS(sr)
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:104