FFmpeg  4.4.6
vf_signature.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 Gerion Entrup
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 /**
22  * @file
23  * MPEG-7 video signature calculation and lookup filter
24  * @see http://epubs.surrey.ac.uk/531590/1/MPEG-7%20Video%20Signature%20Author%27s%20Copy.pdf
25  */
26 
27 #include <float.h>
28 #include "libavcodec/put_bits.h"
29 #include "libavformat/avformat.h"
30 #include "libavutil/opt.h"
31 #include "libavutil/avstring.h"
32 #include "libavutil/intreadwrite.h"
33 #include "libavutil/timestamp.h"
34 #include "avfilter.h"
35 #include "internal.h"
36 #include "signature.h"
37 #include "signature_lookup.c"
38 
39 #define OFFSET(x) offsetof(SignatureContext, x)
40 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
41 #define BLOCK_LCM (int64_t) 476985600
42 
43 static const AVOption signature_options[] = {
44  { "detectmode", "set the detectmode",
45  OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_OFF}, 0, NB_LOOKUP_MODE-1, FLAGS, "mode" },
46  { "off", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_OFF}, 0, 0, .flags = FLAGS, "mode" },
47  { "full", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FULL}, 0, 0, .flags = FLAGS, "mode" },
48  { "fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FAST}, 0, 0, .flags = FLAGS, "mode" },
49  { "nb_inputs", "number of inputs",
50  OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64 = 1}, 1, INT_MAX, FLAGS },
51  { "filename", "filename for output files",
52  OFFSET(filename), AV_OPT_TYPE_STRING, {.str = ""}, 0, NB_FORMATS-1, FLAGS },
53  { "format", "set output format",
54  OFFSET(format), AV_OPT_TYPE_INT, {.i64 = FORMAT_BINARY}, 0, 1, FLAGS , "format" },
55  { "binary", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_BINARY}, 0, 0, FLAGS, "format" },
56  { "xml", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_XML}, 0, 0, FLAGS, "format" },
57  { "th_d", "threshold to detect one word as similar",
58  OFFSET(thworddist), AV_OPT_TYPE_INT, {.i64 = 9000}, 1, INT_MAX, FLAGS },
59  { "th_dc", "threshold to detect all words as similar",
60  OFFSET(thcomposdist), AV_OPT_TYPE_INT, {.i64 = 60000}, 1, INT_MAX, FLAGS },
61  { "th_xh", "threshold to detect frames as similar",
62  OFFSET(thl1), AV_OPT_TYPE_INT, {.i64 = 116}, 1, INT_MAX, FLAGS },
63  { "th_di", "minimum length of matching sequence in frames",
64  OFFSET(thdi), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
65  { "th_it", "threshold for relation of good to all frames",
66  OFFSET(thit), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0.0, 1.0, FLAGS },
67  { NULL }
68 };
69 
71 
73 {
74  /* all formats with a separate gray value */
75  static const enum AVPixelFormat pix_fmts[] = {
85  };
86 
88 }
89 
90 static int config_input(AVFilterLink *inlink)
91 {
92  AVFilterContext *ctx = inlink->dst;
93  SignatureContext *sic = ctx->priv;
94  StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);
95 
96  sc->time_base = inlink->time_base;
97  /* test for overflow */
98  sc->divide = (((uint64_t) inlink->w/32) * (inlink->w/32 + 1) * (inlink->h/32 * inlink->h/32 + 1) > INT64_MAX / (BLOCK_LCM * 255));
99  if (sc->divide) {
100  av_log(ctx, AV_LOG_WARNING, "Input dimension too high for precise calculation, numbers will be rounded.\n");
101  }
102  sc->w = inlink->w;
103  sc->h = inlink->h;
104  return 0;
105 }
106 
107 static int get_block_size(const Block *b)
108 {
109  return (b->to.y - b->up.y + 1) * (b->to.x - b->up.x + 1);
110 }
111 
112 static uint64_t get_block_sum(StreamContext *sc, uint64_t intpic[32][32], const Block *b)
113 {
114  uint64_t sum = 0;
115 
116  int x0, y0, x1, y1;
117 
118  x0 = b->up.x;
119  y0 = b->up.y;
120  x1 = b->to.x;
121  y1 = b->to.y;
122 
123  if (x0-1 >= 0 && y0-1 >= 0) {
124  sum = intpic[y1][x1] + intpic[y0-1][x0-1] - intpic[y1][x0-1] - intpic[y0-1][x1];
125  } else if (x0-1 >= 0) {
126  sum = intpic[y1][x1] - intpic[y1][x0-1];
127  } else if (y0-1 >= 0) {
128  sum = intpic[y1][x1] - intpic[y0-1][x1];
129  } else {
130  sum = intpic[y1][x1];
131  }
132  return sum;
133 }
134 
135 static int cmp(const void *x, const void *y)
136 {
137  const uint64_t *a = x, *b = y;
138  return *a < *b ? -1 : ( *a > *b ? 1 : 0 );
139 }
140 
141 /**
142  * sets the bit at position pos to 1 in data
143  */
144 static void set_bit(uint8_t* data, size_t pos)
145 {
146  uint8_t mask = 1 << 7-(pos%8);
147  data[pos/8] |= mask;
148 }
149 
150 static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
151 {
152  AVFilterContext *ctx = inlink->dst;
153  SignatureContext *sic = ctx->priv;
154  StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);
155  FineSignature* fs;
156 
157  static const uint8_t pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
158  /* indexes of words : 210,217,219,274,334 44,175,233,270,273 57,70,103,237,269 100,285,295,337,354 101,102,111,275,296
159  s2usw = sorted to unsorted wordvec: 44 is at index 5, 57 at index 10...
160  */
161  static const unsigned int wordvec[25] = {44,57,70,100,101,102,103,111,175,210,217,219,233,237,269,270,273,274,275,285,295,296,334,337,354};
162  static const uint8_t s2usw[25] = { 5,10,11, 15, 20, 21, 12, 22, 6, 0, 1, 2, 7, 13, 14, 8, 9, 3, 23, 16, 17, 24, 4, 18, 19};
163 
164  uint8_t wordt2b[5] = { 0, 0, 0, 0, 0 }; /* word ternary to binary */
165  uint64_t intpic[32][32];
166  uint64_t rowcount;
167  uint8_t *p = picref->data[0];
168  int inti, intj;
169  int *intjlut;
170 
171  uint64_t conflist[DIFFELEM_SIZE];
172  int f = 0, g = 0, w = 0;
173  int32_t dh1 = 1, dh2 = 1, dw1 = 1, dw2 = 1, a, b;
174  int64_t denom;
175  int i, j, k, ternary;
176  uint64_t blocksum;
177  int blocksize;
178  int64_t th; /* threshold */
179  int64_t sum;
180 
181  int64_t precfactor = (sc->divide) ? 65536 : BLOCK_LCM;
182 
183  /* initialize fs */
184  if (sc->curfinesig) {
185  fs = av_mallocz(sizeof(FineSignature));
186  if (!fs)
187  return AVERROR(ENOMEM);
188  sc->curfinesig->next = fs;
189  fs->prev = sc->curfinesig;
190  sc->curfinesig = fs;
191  } else {
192  fs = sc->curfinesig = sc->finesiglist;
193  sc->curcoarsesig1->first = fs;
194  }
195 
196  fs->pts = picref->pts;
197  fs->index = sc->lastindex++;
198 
199  memset(intpic, 0, sizeof(uint64_t)*32*32);
200  intjlut = av_malloc_array(inlink->w, sizeof(int));
201  if (!intjlut)
202  return AVERROR(ENOMEM);
203  for (i = 0; i < inlink->w; i++) {
204  intjlut[i] = (i*32)/inlink->w;
205  }
206 
207  for (i = 0; i < inlink->h; i++) {
208  inti = (i*32)/inlink->h;
209  for (j = 0; j < inlink->w; j++) {
210  intj = intjlut[j];
211  intpic[inti][intj] += p[j];
212  }
213  p += picref->linesize[0];
214  }
215  av_freep(&intjlut);
216 
217  /* The following calculates a summed area table (intpic) and brings the numbers
218  * in intpic to the same denominator.
219  * So you only have to handle the numinator in the following sections.
220  */
221  dh1 = inlink->h / 32;
222  if (inlink->h % 32)
223  dh2 = dh1 + 1;
224  dw1 = inlink->w / 32;
225  if (inlink->w % 32)
226  dw2 = dw1 + 1;
227  denom = (sc->divide) ? dh1 * (int64_t)dh2 * dw1 * dw2 : 1;
228 
229  for (i = 0; i < 32; i++) {
230  rowcount = 0;
231  a = 1;
232  if (dh2 > 1) {
233  a = ((inlink->h*(i+1))%32 == 0) ? (inlink->h*(i+1))/32 - 1 : (inlink->h*(i+1))/32;
234  a -= ((inlink->h*i)%32 == 0) ? (inlink->h*i)/32 - 1 : (inlink->h*i)/32;
235  a = (a == dh1)? dh2 : dh1;
236  }
237  for (j = 0; j < 32; j++) {
238  b = 1;
239  if (dw2 > 1) {
240  b = ((inlink->w*(j+1))%32 == 0) ? (inlink->w*(j+1))/32 - 1 : (inlink->w*(j+1))/32;
241  b -= ((inlink->w*j)%32 == 0) ? (inlink->w*j)/32 - 1 : (inlink->w*j)/32;
242  b = (b == dw1)? dw2 : dw1;
243  }
244  rowcount += intpic[i][j] * a * b * precfactor / denom;
245  if (i > 0) {
246  intpic[i][j] = intpic[i-1][j] + rowcount;
247  } else {
248  intpic[i][j] = rowcount;
249  }
250  }
251  }
252 
253  denom = (sc->divide) ? 1 : dh1 * (int64_t)dh2 * dw1 * dw2;
254 
255  for (i = 0; i < ELEMENT_COUNT; i++) {
256  const ElemCat* elemcat = elements[i];
257  int64_t* elemsignature;
258  uint64_t* sortsignature;
259 
260  elemsignature = av_malloc_array(elemcat->elem_count, sizeof(int64_t));
261  if (!elemsignature)
262  return AVERROR(ENOMEM);
263  sortsignature = av_malloc_array(elemcat->elem_count, sizeof(int64_t));
264  if (!sortsignature) {
265  av_freep(&elemsignature);
266  return AVERROR(ENOMEM);
267  }
268 
269  for (j = 0; j < elemcat->elem_count; j++) {
270  blocksum = 0;
271  blocksize = 0;
272  for (k = 0; k < elemcat->left_count; k++) {
273  blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);
274  blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);
275  }
276  sum = blocksum / blocksize;
277  if (elemcat->av_elem) {
278  sum -= 128 * precfactor * denom;
279  } else {
280  blocksum = 0;
281  blocksize = 0;
282  for (; k < elemcat->block_count; k++) {
283  blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);
284  blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);
285  }
286  sum -= blocksum / blocksize;
287  conflist[g++] = FFABS(sum * 8 / (precfactor * denom));
288  }
289 
290  elemsignature[j] = sum;
291  sortsignature[j] = FFABS(sum);
292  }
293 
294  /* get threshold */
295  qsort(sortsignature, elemcat->elem_count, sizeof(uint64_t), cmp);
296  th = sortsignature[(int) (elemcat->elem_count*0.333)];
297 
298  /* ternarize */
299  for (j = 0; j < elemcat->elem_count; j++) {
300  if (elemsignature[j] < -th) {
301  ternary = 0;
302  } else if (elemsignature[j] <= th) {
303  ternary = 1;
304  } else {
305  ternary = 2;
306  }
307  fs->framesig[f/5] += ternary * pot3[f%5];
308 
309  if (f == wordvec[w]) {
310  fs->words[s2usw[w]/5] += ternary * pot3[wordt2b[s2usw[w]/5]++];
311  if (w < 24)
312  w++;
313  }
314  f++;
315  }
316  av_freep(&elemsignature);
317  av_freep(&sortsignature);
318  }
319 
320  /* confidence */
321  qsort(conflist, DIFFELEM_SIZE, sizeof(uint64_t), cmp);
322  fs->confidence = FFMIN(conflist[DIFFELEM_SIZE/2], 255);
323 
324  /* coarsesignature */
325  if (sc->coarsecount == 0) {
326  if (sc->curcoarsesig2) {
328  if (!sc->curcoarsesig1)
329  return AVERROR(ENOMEM);
330  sc->curcoarsesig1->first = fs;
331  sc->curcoarsesig2->next = sc->curcoarsesig1;
332  sc->coarseend = sc->curcoarsesig1;
333  }
334  }
335  if (sc->coarsecount == 45) {
336  sc->midcoarse = 1;
338  if (!sc->curcoarsesig2)
339  return AVERROR(ENOMEM);
340  sc->curcoarsesig2->first = fs;
341  sc->curcoarsesig1->next = sc->curcoarsesig2;
342  sc->coarseend = sc->curcoarsesig2;
343  }
344  for (i = 0; i < 5; i++) {
345  set_bit(sc->curcoarsesig1->data[i], fs->words[i]);
346  }
347  /* assuming the actual frame is the last */
348  sc->curcoarsesig1->last = fs;
349  if (sc->midcoarse) {
350  for (i = 0; i < 5; i++) {
351  set_bit(sc->curcoarsesig2->data[i], fs->words[i]);
352  }
353  sc->curcoarsesig2->last = fs;
354  }
355 
356  sc->coarsecount = (sc->coarsecount+1)%90;
357 
358  /* debug printing finesignature */
359  if (av_log_get_level() == AV_LOG_DEBUG) {
360  av_log(ctx, AV_LOG_DEBUG, "input %d, confidence: %d\n", FF_INLINK_IDX(inlink), fs->confidence);
361 
362  av_log(ctx, AV_LOG_DEBUG, "words:");
363  for (i = 0; i < 5; i++) {
364  av_log(ctx, AV_LOG_DEBUG, " %d:", fs->words[i] );
365  av_log(ctx, AV_LOG_DEBUG, " %d", fs->words[i] / pot3[0] );
366  for (j = 1; j < 5; j++)
367  av_log(ctx, AV_LOG_DEBUG, ",%d", fs->words[i] % pot3[j-1] / pot3[j] );
368  av_log(ctx, AV_LOG_DEBUG, ";");
369  }
370  av_log(ctx, AV_LOG_DEBUG, "\n");
371 
372  av_log(ctx, AV_LOG_DEBUG, "framesignature:");
373  for (i = 0; i < SIGELEM_SIZE/5; i++) {
374  av_log(ctx, AV_LOG_DEBUG, " %d", fs->framesig[i] / pot3[0] );
375  for (j = 1; j < 5; j++)
376  av_log(ctx, AV_LOG_DEBUG, ",%d", fs->framesig[i] % pot3[j-1] / pot3[j] );
377  }
378  av_log(ctx, AV_LOG_DEBUG, "\n");
379  }
380 
381  if (FF_INLINK_IDX(inlink) == 0)
382  return ff_filter_frame(inlink->dst->outputs[0], picref);
383  return 1;
384 }
385 
386 static int xml_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)
387 {
388  FineSignature* fs;
389  CoarseSignature* cs;
390  int i, j;
391  FILE* f;
392  unsigned int pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
393 
394  if (!sc->coarseend->last)
395  return AVERROR(EINVAL); // No frames ?
396 
397  f = fopen(filename, "w");
398  if (!f) {
399  int err = AVERROR(EINVAL);
400  char buf[128];
401  av_strerror(err, buf, sizeof(buf));
402  av_log(ctx, AV_LOG_ERROR, "cannot open xml file %s: %s\n", filename, buf);
403  return err;
404  }
405 
406  /* header */
407  fprintf(f, "<?xml version='1.0' encoding='ASCII' ?>\n");
408  fprintf(f, "<Mpeg7 xmlns=\"urn:mpeg:mpeg7:schema:2001\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"urn:mpeg:mpeg7:schema:2001 schema/Mpeg7-2001.xsd\">\n");
409  fprintf(f, " <DescriptionUnit xsi:type=\"DescriptorCollectionType\">\n");
410  fprintf(f, " <Descriptor xsi:type=\"VideoSignatureType\">\n");
411  fprintf(f, " <VideoSignatureRegion>\n");
412  fprintf(f, " <VideoSignatureSpatialRegion>\n");
413  fprintf(f, " <Pixel>0 0 </Pixel>\n");
414  fprintf(f, " <Pixel>%d %d </Pixel>\n", sc->w - 1, sc->h - 1);
415  fprintf(f, " </VideoSignatureSpatialRegion>\n");
416  fprintf(f, " <StartFrameOfSpatialRegion>0</StartFrameOfSpatialRegion>\n");
417  /* hoping num is 1, other values are vague */
418  fprintf(f, " <MediaTimeUnit>%d</MediaTimeUnit>\n", sc->time_base.den / sc->time_base.num);
419  fprintf(f, " <MediaTimeOfSpatialRegion>\n");
420  fprintf(f, " <StartMediaTimeOfSpatialRegion>0</StartMediaTimeOfSpatialRegion>\n");
421  fprintf(f, " <EndMediaTimeOfSpatialRegion>%" PRIu64 "</EndMediaTimeOfSpatialRegion>\n", sc->coarseend->last->pts);
422  fprintf(f, " </MediaTimeOfSpatialRegion>\n");
423 
424  /* coarsesignatures */
425  for (cs = sc->coarsesiglist; cs; cs = cs->next) {
426  fprintf(f, " <VSVideoSegment>\n");
427  fprintf(f, " <StartFrameOfSegment>%" PRIu32 "</StartFrameOfSegment>\n", cs->first->index);
428  fprintf(f, " <EndFrameOfSegment>%" PRIu32 "</EndFrameOfSegment>\n", cs->last->index);
429  fprintf(f, " <MediaTimeOfSegment>\n");
430  fprintf(f, " <StartMediaTimeOfSegment>%" PRIu64 "</StartMediaTimeOfSegment>\n", cs->first->pts);
431  fprintf(f, " <EndMediaTimeOfSegment>%" PRIu64 "</EndMediaTimeOfSegment>\n", cs->last->pts);
432  fprintf(f, " </MediaTimeOfSegment>\n");
433  for (i = 0; i < 5; i++) {
434  fprintf(f, " <BagOfWords>");
435  for (j = 0; j < 31; j++) {
436  uint8_t n = cs->data[i][j];
437  if (j < 30) {
438  fprintf(f, "%d %d %d %d %d %d %d %d ", (n & 0x80) >> 7,
439  (n & 0x40) >> 6,
440  (n & 0x20) >> 5,
441  (n & 0x10) >> 4,
442  (n & 0x08) >> 3,
443  (n & 0x04) >> 2,
444  (n & 0x02) >> 1,
445  (n & 0x01));
446  } else {
447  /* print only 3 bit in last byte */
448  fprintf(f, "%d %d %d ", (n & 0x80) >> 7,
449  (n & 0x40) >> 6,
450  (n & 0x20) >> 5);
451  }
452  }
453  fprintf(f, "</BagOfWords>\n");
454  }
455  fprintf(f, " </VSVideoSegment>\n");
456  }
457 
458  /* finesignatures */
459  for (fs = sc->finesiglist; fs; fs = fs->next) {
460  fprintf(f, " <VideoFrame>\n");
461  fprintf(f, " <MediaTimeOfFrame>%" PRIu64 "</MediaTimeOfFrame>\n", fs->pts);
462  /* confidence */
463  fprintf(f, " <FrameConfidence>%d</FrameConfidence>\n", fs->confidence);
464  /* words */
465  fprintf(f, " <Word>");
466  for (i = 0; i < 5; i++) {
467  fprintf(f, "%d ", fs->words[i]);
468  if (i < 4) {
469  fprintf(f, " ");
470  }
471  }
472  fprintf(f, "</Word>\n");
473  /* framesignature */
474  fprintf(f, " <FrameSignature>");
475  for (i = 0; i< SIGELEM_SIZE/5; i++) {
476  if (i > 0) {
477  fprintf(f, " ");
478  }
479  fprintf(f, "%d ", fs->framesig[i] / pot3[0]);
480  for (j = 1; j < 5; j++)
481  fprintf(f, " %d ", fs->framesig[i] % pot3[j-1] / pot3[j] );
482  }
483  fprintf(f, "</FrameSignature>\n");
484  fprintf(f, " </VideoFrame>\n");
485  }
486  fprintf(f, " </VideoSignatureRegion>\n");
487  fprintf(f, " </Descriptor>\n");
488  fprintf(f, " </DescriptionUnit>\n");
489  fprintf(f, "</Mpeg7>\n");
490 
491  fclose(f);
492  return 0;
493 }
494 
495 static int binary_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)
496 {
497  FILE* f;
498  FineSignature* fs;
499  CoarseSignature* cs;
500  uint32_t numofsegments = (sc->lastindex + 44)/45;
501  int i, j;
502  PutBitContext buf;
503  /* buffer + header + coarsesignatures + finesignature */
504  int len = (512 + 6 * 32 + 3*16 + 2 +
505  numofsegments * (4*32 + 1 + 5*243) +
506  sc->lastindex * (2 + 32 + 6*8 + 608)) / 8;
508  if (!buffer)
509  return AVERROR(ENOMEM);
510 
511  f = fopen(filename, "wb");
512  if (!f) {
513  int err = AVERROR(EINVAL);
514  char buf[128];
515  av_strerror(err, buf, sizeof(buf));
516  av_log(ctx, AV_LOG_ERROR, "cannot open file %s: %s\n", filename, buf);
517  av_freep(&buffer);
518  return err;
519  }
520  init_put_bits(&buf, buffer, len);
521 
522  put_bits32(&buf, 1); /* NumOfSpatial Regions, only 1 supported */
523  put_bits(&buf, 1, 1); /* SpatialLocationFlag, always the whole image */
524  put_bits32(&buf, 0); /* PixelX,1 PixelY,1, 0,0 */
525  put_bits(&buf, 16, sc->w-1 & 0xFFFF); /* PixelX,2 */
526  put_bits(&buf, 16, sc->h-1 & 0xFFFF); /* PixelY,2 */
527  put_bits32(&buf, 0); /* StartFrameOfSpatialRegion */
528  put_bits32(&buf, sc->lastindex); /* NumOfFrames */
529  /* hoping num is 1, other values are vague */
530  /* den/num might be greater than 16 bit, so cutting it */
531  put_bits(&buf, 16, 0xFFFF & (sc->time_base.den / sc->time_base.num)); /* MediaTimeUnit */
532  put_bits(&buf, 1, 1); /* MediaTimeFlagOfSpatialRegion */
533  put_bits32(&buf, 0); /* StartMediaTimeOfSpatialRegion */
534  put_bits32(&buf, 0xFFFFFFFF & sc->coarseend->last->pts); /* EndMediaTimeOfSpatialRegion */
535  put_bits32(&buf, numofsegments); /* NumOfSegments */
536  /* coarsesignatures */
537  for (cs = sc->coarsesiglist; cs; cs = cs->next) {
538  put_bits32(&buf, cs->first->index); /* StartFrameOfSegment */
539  put_bits32(&buf, cs->last->index); /* EndFrameOfSegment */
540  put_bits(&buf, 1, 1); /* MediaTimeFlagOfSegment */
541  put_bits32(&buf, 0xFFFFFFFF & cs->first->pts); /* StartMediaTimeOfSegment */
542  put_bits32(&buf, 0xFFFFFFFF & cs->last->pts); /* EndMediaTimeOfSegment */
543  for (i = 0; i < 5; i++) {
544  /* put 243 bits ( = 7 * 32 + 19 = 8 * 28 + 19) into buffer */
545  for (j = 0; j < 30; j++) {
546  put_bits(&buf, 8, cs->data[i][j]);
547  }
548  put_bits(&buf, 3, cs->data[i][30] >> 5);
549  }
550  }
551  /* finesignatures */
552  put_bits(&buf, 1, 0); /* CompressionFlag, only 0 supported */
553  for (fs = sc->finesiglist; fs; fs = fs->next) {
554  put_bits(&buf, 1, 1); /* MediaTimeFlagOfFrame */
555  put_bits32(&buf, 0xFFFFFFFF & fs->pts); /* MediaTimeOfFrame */
556  put_bits(&buf, 8, fs->confidence); /* FrameConfidence */
557  for (i = 0; i < 5; i++) {
558  put_bits(&buf, 8, fs->words[i]); /* Words */
559  }
560  /* framesignature */
561  for (i = 0; i < SIGELEM_SIZE/5; i++) {
562  put_bits(&buf, 8, fs->framesig[i]);
563  }
564  }
565 
566  flush_put_bits(&buf);
567  fwrite(buffer, 1, put_bits_count(&buf)/8, f);
568  fclose(f);
569  av_freep(&buffer);
570  return 0;
571 }
572 
573 static int export(AVFilterContext *ctx, StreamContext *sc, int input)
574 {
575  SignatureContext* sic = ctx->priv;
576  char filename[1024];
577 
578  if (sic->nb_inputs > 1) {
579  /* error already handled */
580  av_assert0(av_get_frame_filename(filename, sizeof(filename), sic->filename, input) == 0);
581  } else {
582  if (av_strlcpy(filename, sic->filename, sizeof(filename)) >= sizeof(filename))
583  return AVERROR(EINVAL);
584  }
585  if (sic->format == FORMAT_XML) {
586  return xml_export(ctx, sc, filename);
587  } else {
588  return binary_export(ctx, sc, filename);
589  }
590 }
591 
592 static int request_frame(AVFilterLink *outlink)
593 {
594  AVFilterContext *ctx = outlink->src;
595  SignatureContext *sic = ctx->priv;
596  StreamContext *sc, *sc2;
597  MatchingInfo match;
598  int i, j, ret;
599  int lookup = 1; /* indicates wheather EOF of all files is reached */
600 
601  /* process all inputs */
602  for (i = 0; i < sic->nb_inputs; i++){
603  sc = &(sic->streamcontexts[i]);
604 
605  ret = ff_request_frame(ctx->inputs[i]);
606 
607  /* return if unexpected error occurs in input stream */
608  if (ret < 0 && ret != AVERROR_EOF)
609  return ret;
610 
611  /* export signature at EOF */
612  if (ret == AVERROR_EOF && !sc->exported) {
613  /* export if wanted */
614  if (strlen(sic->filename) > 0) {
615  if (export(ctx, sc, i) < 0)
616  return ret;
617  }
618  sc->exported = 1;
619  }
620  lookup &= sc->exported;
621  }
622 
623  /* signature lookup */
624  if (lookup && sic->mode != MODE_OFF) {
625  /* iterate over every pair */
626  for (i = 0; i < sic->nb_inputs; i++) {
627  sc = &(sic->streamcontexts[i]);
628  for (j = i+1; j < sic->nb_inputs; j++) {
629  sc2 = &(sic->streamcontexts[j]);
630  match = lookup_signatures(ctx, sic, sc, sc2, sic->mode);
631  if (match.score != 0) {
632  av_log(ctx, AV_LOG_INFO, "matching of video %d at %f and %d at %f, %d frames matching\n",
633  i, ((double) match.first->pts * sc->time_base.num) / sc->time_base.den,
634  j, ((double) match.second->pts * sc2->time_base.num) / sc2->time_base.den,
635  match.matchframes);
636  if (match.whole)
637  av_log(ctx, AV_LOG_INFO, "whole video matching\n");
638  } else {
639  av_log(ctx, AV_LOG_INFO, "no matching of video %d and %d\n", i, j);
640  }
641  }
642  }
643  }
644 
645  return ret;
646 }
647 
649 {
650 
651  SignatureContext *sic = ctx->priv;
652  StreamContext *sc;
653  int i, ret;
654  char tmp[1024];
655 
656  sic->streamcontexts = av_mallocz(sic->nb_inputs * sizeof(StreamContext));
657  if (!sic->streamcontexts)
658  return AVERROR(ENOMEM);
659 
660  for (i = 0; i < sic->nb_inputs; i++) {
661  AVFilterPad pad = {
663  .name = av_asprintf("in%d", i),
664  .config_props = config_input,
665  .filter_frame = filter_frame,
666  };
667 
668  if (!pad.name)
669  return AVERROR(ENOMEM);
670  if ((ret = ff_insert_inpad(ctx, i, &pad)) < 0) {
671  av_freep(&pad.name);
672  return ret;
673  }
674 
675  sc = &(sic->streamcontexts[i]);
676 
677  sc->lastindex = 0;
678  sc->finesiglist = av_mallocz(sizeof(FineSignature));
679  if (!sc->finesiglist)
680  return AVERROR(ENOMEM);
681  sc->curfinesig = NULL;
682 
684  if (!sc->coarsesiglist)
685  return AVERROR(ENOMEM);
686  sc->curcoarsesig1 = sc->coarsesiglist;
687  sc->coarseend = sc->coarsesiglist;
688  sc->coarsecount = 0;
689  sc->midcoarse = 0;
690  }
691 
692  /* check filename */
693  if (sic->nb_inputs > 1 && strlen(sic->filename) > 0 && av_get_frame_filename(tmp, sizeof(tmp), sic->filename, 0) == -1) {
694  av_log(ctx, AV_LOG_ERROR, "The filename must contain %%d or %%0nd, if you have more than one input.\n");
695  return AVERROR(EINVAL);
696  }
697 
698  return 0;
699 }
700 
701 
702 
704 {
705  SignatureContext *sic = ctx->priv;
706  StreamContext *sc;
707  void* tmp;
708  FineSignature* finsig;
709  CoarseSignature* cousig;
710  int i;
711 
712 
713  /* free the lists */
714  if (sic->streamcontexts != NULL) {
715  for (i = 0; i < sic->nb_inputs; i++) {
716  sc = &(sic->streamcontexts[i]);
717  finsig = sc->finesiglist;
718  cousig = sc->coarsesiglist;
719 
720  while (finsig) {
721  tmp = finsig;
722  finsig = finsig->next;
723  av_freep(&tmp);
724  }
725  sc->finesiglist = NULL;
726 
727  while (cousig) {
728  tmp = cousig;
729  cousig = cousig->next;
730  av_freep(&tmp);
731  }
732  sc->coarsesiglist = NULL;
733  }
734  av_freep(&sic->streamcontexts);
735  }
736  for (unsigned i = 0; i < ctx->nb_inputs; i++)
737  av_freep(&ctx->input_pads[i].name);
738 }
739 
740 static int config_output(AVFilterLink *outlink)
741 {
742  AVFilterContext *ctx = outlink->src;
743  AVFilterLink *inlink = ctx->inputs[0];
744 
745  outlink->time_base = inlink->time_base;
746  outlink->frame_rate = inlink->frame_rate;
747  outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
748  outlink->w = inlink->w;
749  outlink->h = inlink->h;
750 
751  return 0;
752 }
753 
754 static const AVFilterPad signature_outputs[] = {
755  {
756  .name = "default",
757  .type = AVMEDIA_TYPE_VIDEO,
758  .request_frame = request_frame,
759  .config_props = config_output,
760  },
761  { NULL }
762 };
763 
765  .name = "signature",
766  .description = NULL_IF_CONFIG_SMALL("Calculate the MPEG-7 video signature"),
767  .priv_size = sizeof(SignatureContext),
768  .priv_class = &signature_class,
769  .init = init,
770  .uninit = uninit,
773  .inputs = NULL,
775 };
static const AVFilterPad inputs[]
Definition: af_acontrast.c:193
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
static const char *const format[]
Definition: af_aiir.c:456
#define av_cold
Definition: attributes.h:88
uint8_t
int32_t
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1096
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Definition: avfilter.c:408
Main libavfilter public API header.
Main libavformat public API header.
char * av_asprintf(const char *fmt,...)
Definition: avstring.c:113
#define flags(name, subs,...)
Definition: cbs_av1.c:572
#define f(width, name)
Definition: cbs_vp9.c:255
#define fs(width, name, subs,...)
Definition: cbs_vp9.c:259
#define FFMIN(a, b)
Definition: common.h:105
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
#define NULL
Definition: coverity.c:32
long long int64_t
Definition: coverity.c:34
mode
Use these values in ebur128_init (or'ed).
Definition: ebur128.h:83
int
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:587
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:286
@ AV_OPT_TYPE_CONST
Definition: opt.h:234
@ AV_OPT_TYPE_INT
Definition: opt.h:225
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:227
@ AV_OPT_TYPE_STRING
Definition: opt.h:229
int av_get_frame_filename(char *buf, int buf_size, const char *path, int number)
Definition: utils.c:4794
#define AVFILTER_FLAG_DYNAMIC_INPUTS
The number of the filter inputs is not determined just by AVFilter.inputs.
Definition: avfilter.h:106
int av_strerror(int errnum, char *errbuf, size_t errbuf_size)
Put a description of the AVERROR code errnum in errbuf.
Definition: error.c:105
#define AVERROR_EOF
End of file.
Definition: error.h:55
#define AVERROR(e)
Definition: error.h:43
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
Definition: log.h:215
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:200
#define AV_LOG_INFO
Standard information.
Definition: log.h:205
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
int av_log_get_level(void)
Get the current log level.
Definition: log.c:435
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:237
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
size_t av_strlcpy(char *dst, const char *src, size_t size)
Copy the string src to dst, but no more than size - 1 bytes, and null-terminate dst.
Definition: avstring.c:83
for(j=16;j >0;--j)
int i
Definition: input.c:407
static const char signature[]
Definition: ipmovie.c:615
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:218
static int ff_insert_inpad(AVFilterContext *f, unsigned index, AVFilterPad *p)
Insert a new input pad for the filter.
Definition: internal.h:240
#define FF_INLINK_IDX(link)
Find the index of a link.
Definition: internal.h:302
common internal API header
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:309
uint8_t w
Definition: llviddspenc.c:39
static const uint16_t mask[17]
Definition: lzw.c:38
const char data[16]
Definition: mxf.c:142
AVOptions.
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:65
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
@ AV_PIX_FMT_YUV440P
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
Definition: pixfmt.h:99
@ AV_PIX_FMT_NV21
as above, but U and V bytes are swapped
Definition: pixfmt.h:90
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:70
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
Definition: pixfmt.h:74
@ AV_PIX_FMT_YUVJ440P
planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range
Definition: pixfmt.h:100
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:72
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:73
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
@ AV_PIX_FMT_YUVJ411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples) full scale (JPEG), deprecated in favor ...
Definition: pixfmt.h:258
@ AV_PIX_FMT_YUVJ422P
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
Definition: pixfmt.h:79
@ AV_PIX_FMT_YUVJ444P
planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting col...
Definition: pixfmt.h:80
@ AV_PIX_FMT_YUVJ420P
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
Definition: pixfmt.h:78
bitstream writer API
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:57
static void av_unused put_bits32(PutBitContext *s, uint32_t value)
Write exactly 32 bits into a bitstream.
Definition: put_bits.h:263
static int put_bits_count(PutBitContext *s)
Definition: put_bits.h:76
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:110
#define th
Definition: regdef.h:75
static char buffer[20]
Definition: seek.c:32
MPEG-7 video signature calculation and lookup filter.
@ NB_FORMATS
Definition: signature.h:51
@ FORMAT_BINARY
Definition: signature.h:49
@ FORMAT_XML
Definition: signature.h:50
static const ElemCat * elements[ELEMENT_COUNT]
Definition: signature.h:566
#define SIGELEM_SIZE
Definition: signature.h:37
#define ELEMENT_COUNT
Definition: signature.h:36
#define DIFFELEM_SIZE
Definition: signature.h:38
@ MODE_OFF
Definition: signature.h:42
@ NB_LOOKUP_MODE
Definition: signature.h:45
@ MODE_FULL
Definition: signature.h:43
@ MODE_FAST
Definition: signature.h:44
MPEG-7 video signature calculation and lookup filter.
static MatchingInfo lookup_signatures(AVFilterContext *ctx, SignatureContext *sc, StreamContext *first, StreamContext *second, int mode)
unsigned int pos
Definition: spdifenc.c:412
An instance of a filter.
Definition: avfilter.h:341
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:353
A filter pad used for either input or output.
Definition: internal.h:54
enum AVMediaType type
AVFilterPad type.
Definition: internal.h:65
const char * name
Pad name.
Definition: internal.h:60
Filter definition.
Definition: avfilter.h:145
const char * name
Filter name.
Definition: avfilter.h:149
This structure describes decoded (raw) audio or video data.
Definition: frame.h:318
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:411
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:332
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:349
AVOption.
Definition: opt.h:248
int num
Numerator.
Definition: rational.h:59
int den
Denominator.
Definition: rational.h:60
struct FineSignature * first
Definition: signature.h:84
struct CoarseSignature * next
Definition: signature.h:86
uint8_t data[5][31]
Definition: signature.h:83
struct FineSignature * last
Definition: signature.h:85
short left_count
Definition: signature.h:66
int av_elem
Definition: signature.h:65
short elem_count
Definition: signature.h:68
const Block * blocks
Definition: signature.h:69
short block_count
Definition: signature.h:67
struct FineSignature * next
Definition: signature.h:73
uint64_t pts
Definition: signature.h:75
uint32_t index
Definition: signature.h:76
struct FineSignature * second
Definition: signature.h:98
struct FineSignature * first
Definition: signature.h:97
int matchframes
Definition: signature.h:95
StreamContext * streamcontexts
Definition: signature.h:142
FineSignature * finesiglist
Definition: signature.h:111
CoarseSignature * curcoarsesig1
Definition: signature.h:117
FineSignature * curfinesig
Definition: signature.h:112
uint32_t lastindex
Definition: signature.h:122
AVRational time_base
Definition: signature.h:103
CoarseSignature * coarsesiglist
Definition: signature.h:114
CoarseSignature * curcoarsesig2
Definition: signature.h:118
CoarseSignature * coarseend
Definition: signature.h:115
#define av_malloc_array(a, b)
#define av_freep(p)
#define av_log(a,...)
static uint8_t tmp[11]
Definition: aes_ctr.c:27
AVFormatContext * ctx
Definition: movenc.c:48
timestamp utils, mostly useful for debugging/logging purposes
const char * b
Definition: vf_curves.c:118
const char * g
Definition: vf_curves.c:117
static int xml_export(AVFilterContext *ctx, StreamContext *sc, const char *filename)
Definition: vf_signature.c:386
#define BLOCK_LCM
Definition: vf_signature.c:41
static int export(AVFilterContext *ctx, StreamContext *sc, int input)
Definition: vf_signature.c:573
static int query_formats(AVFilterContext *ctx)
Definition: vf_signature.c:72
static const AVOption signature_options[]
Definition: vf_signature.c:43
static int config_input(AVFilterLink *inlink)
Definition: vf_signature.c:90
#define FLAGS
Definition: vf_signature.c:40
static int binary_export(AVFilterContext *ctx, StreamContext *sc, const char *filename)
Definition: vf_signature.c:495
static int request_frame(AVFilterLink *outlink)
Definition: vf_signature.c:592
static void set_bit(uint8_t *data, size_t pos)
sets the bit at position pos to 1 in data
Definition: vf_signature.c:144
static int cmp(const void *x, const void *y)
Definition: vf_signature.c:135
static av_cold int init(AVFilterContext *ctx)
Definition: vf_signature.c:648
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_signature.c:703
static const AVFilterPad signature_outputs[]
Definition: vf_signature.c:754
static int get_block_size(const Block *b)
Definition: vf_signature.c:107
#define OFFSET(x)
Definition: vf_signature.c:39
static int config_output(AVFilterLink *outlink)
Definition: vf_signature.c:740
static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
Definition: vf_signature.c:150
AVFILTER_DEFINE_CLASS(signature)
AVFilter ff_vf_signature
Definition: vf_signature.c:764
static uint64_t get_block_sum(StreamContext *sc, uint64_t intpic[32][32], const Block *b)
Definition: vf_signature.c:112
int lookup
int len