github.com/cellofellow/gopkg@v0.0.0-20140722061823-eec0544a62ad/image/webp/libwebp/src/enc/frame.c (about)

     1  // Copyright 2011 Google Inc. All Rights Reserved.
     2  //
     3  // Use of this source code is governed by a BSD-style license
     4  // that can be found in the COPYING file in the root of the source
     5  // tree. An additional intellectual property rights grant can be found
     6  // in the file PATENTS. All contributing project authors may
     7  // be found in the AUTHORS file in the root of the source tree.
     8  // -----------------------------------------------------------------------------
     9  //
    10  //   frame coding and analysis
    11  //
    12  // Author: Skal (pascal.massimino@gmail.com)
    13  
    14  #include <assert.h>
    15  #include <stdlib.h>
    16  #include <string.h>
    17  #include <math.h>
    18  
    19  #include "./vp8enci.h"
    20  #include "./cost.h"
    21  #include "../webp/format_constants.h"  // RIFF constants
    22  
    23  #define SEGMENT_VISU 0
    24  #define DEBUG_SEARCH 0    // useful to track search convergence
    25  
    26  // On-the-fly info about the current set of residuals. Handy to avoid
    27  // passing zillions of params.
    28  typedef struct {
    29    int first;
    30    int last;
    31    const int16_t* coeffs;
    32  
    33    int coeff_type;
    34    ProbaArray* prob;
    35    StatsArray* stats;
    36    CostArray*  cost;
    37  } VP8Residual;
    38  
    39  //------------------------------------------------------------------------------
    40  // multi-pass convergence
    41  
    42  #define HEADER_SIZE_ESTIMATE (RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE +  \
    43                                VP8_FRAME_HEADER_SIZE)
    44  #define DQ_LIMIT 0.4  // convergence is considered reached if dq < DQ_LIMIT
    45  // we allow 2k of extra head-room in PARTITION0 limit.
    46  #define PARTITION0_SIZE_LIMIT ((VP8_MAX_PARTITION0_SIZE - 2048ULL) << 11)
    47  
    48  typedef struct {  // struct for organizing convergence in either size or PSNR
    49    int is_first;
    50    float dq;
    51    float q, last_q;
    52    double value, last_value;   // PSNR or size
    53    double target;
    54    int do_size_search;
    55  } PassStats;
    56  
    57  static int InitPassStats(const VP8Encoder* const enc, PassStats* const s) {
    58    const uint64_t target_size = (uint64_t)enc->config_->target_size;
    59    const int do_size_search = (target_size != 0);
    60    const float target_PSNR = enc->config_->target_PSNR;
    61  
    62    s->is_first = 1;
    63    s->dq = 10.f;
    64    s->q = s->last_q = enc->config_->quality;
    65    s->target = do_size_search ? (double)target_size
    66              : (target_PSNR > 0.) ? target_PSNR
    67              : 40.;   // default, just in case
    68    s->value = s->last_value = 0.;
    69    s->do_size_search = do_size_search;
    70    return do_size_search;
    71  }
    72  
    73  static float Clamp(float v, float min, float max) {
    74    return (v < min) ? min : (v > max) ? max : v;
    75  }
    76  
    77  static float ComputeNextQ(PassStats* const s) {
    78    float dq;
    79    if (s->is_first) {
    80      dq = (s->value > s->target) ? -s->dq : s->dq;
    81      s->is_first = 0;
    82    } else if (s->value != s->last_value) {
    83      const double slope = (s->target - s->value) / (s->last_value - s->value);
    84      dq = (float)(slope * (s->last_q - s->q));
    85    } else {
    86      dq = 0.;  // we're done?!
    87    }
    88    // Limit variable to avoid large swings.
    89    s->dq = Clamp(dq, -30.f, 30.f);
    90    s->last_q = s->q;
    91    s->last_value = s->value;
    92    s->q = Clamp(s->q + s->dq, 0.f, 100.f);
    93    return s->q;
    94  }
    95  
    96  //------------------------------------------------------------------------------
    97  // Tables for level coding
    98  
    99  const uint8_t VP8EncBands[16 + 1] = {
   100    0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
   101    0  // sentinel
   102  };
   103  
   104  const uint8_t VP8Cat3[] = { 173, 148, 140 };
   105  const uint8_t VP8Cat4[] = { 176, 155, 140, 135 };
   106  const uint8_t VP8Cat5[] = { 180, 157, 141, 134, 130 };
   107  const uint8_t VP8Cat6[] =
   108      { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129 };
   109  
   110  //------------------------------------------------------------------------------
   111  // Reset the statistics about: number of skips, token proba, level cost,...
   112  
   113  static void ResetStats(VP8Encoder* const enc) {
   114    VP8Proba* const proba = &enc->proba_;
   115    VP8CalculateLevelCosts(proba);
   116    proba->nb_skip_ = 0;
   117  }
   118  
   119  //------------------------------------------------------------------------------
   120  // Skip decision probability
   121  
   122  #define SKIP_PROBA_THRESHOLD 250  // value below which using skip_proba is OK.
   123  
   124  static int CalcSkipProba(uint64_t nb, uint64_t total) {
   125    return (int)(total ? (total - nb) * 255 / total : 255);
   126  }
   127  
   128  // Returns the bit-cost for coding the skip probability.
   129  static int FinalizeSkipProba(VP8Encoder* const enc) {
   130    VP8Proba* const proba = &enc->proba_;
   131    const int nb_mbs = enc->mb_w_ * enc->mb_h_;
   132    const int nb_events = proba->nb_skip_;
   133    int size;
   134    proba->skip_proba_ = CalcSkipProba(nb_events, nb_mbs);
   135    proba->use_skip_proba_ = (proba->skip_proba_ < SKIP_PROBA_THRESHOLD);
   136    size = 256;   // 'use_skip_proba' bit
   137    if (proba->use_skip_proba_) {
   138      size +=  nb_events * VP8BitCost(1, proba->skip_proba_)
   139           + (nb_mbs - nb_events) * VP8BitCost(0, proba->skip_proba_);
   140      size += 8 * 256;   // cost of signaling the skip_proba_ itself.
   141    }
   142    return size;
   143  }
   144  
   145  //------------------------------------------------------------------------------
   146  // Recording of token probabilities.
   147  
   148  static void ResetTokenStats(VP8Encoder* const enc) {
   149    VP8Proba* const proba = &enc->proba_;
   150    memset(proba->stats_, 0, sizeof(proba->stats_));
   151  }
   152  
   153  // Record proba context used
   154  static int Record(int bit, proba_t* const stats) {
   155    proba_t p = *stats;
   156    if (p >= 0xffff0000u) {               // an overflow is inbound.
   157      p = ((p + 1u) >> 1) & 0x7fff7fffu;  // -> divide the stats by 2.
   158    }
   159    // record bit count (lower 16 bits) and increment total count (upper 16 bits).
   160    p += 0x00010000u + bit;
   161    *stats = p;
   162    return bit;
   163  }
   164  
   165  // We keep the table free variant around for reference, in case.
   166  #define USE_LEVEL_CODE_TABLE
   167  
   168  // Simulate block coding, but only record statistics.
   169  // Note: no need to record the fixed probas.
   170  static int RecordCoeffs(int ctx, const VP8Residual* const res) {
   171    int n = res->first;
   172    // should be stats[VP8EncBands[n]], but it's equivalent for n=0 or 1
   173    proba_t* s = res->stats[n][ctx];
   174    if (res->last  < 0) {
   175      Record(0, s + 0);
   176      return 0;
   177    }
   178    while (n <= res->last) {
   179      int v;
   180      Record(1, s + 0);  // order of record doesn't matter
   181      while ((v = res->coeffs[n++]) == 0) {
   182        Record(0, s + 1);
   183        s = res->stats[VP8EncBands[n]][0];
   184      }
   185      Record(1, s + 1);
   186      if (!Record(2u < (unsigned int)(v + 1), s + 2)) {  // v = -1 or 1
   187        s = res->stats[VP8EncBands[n]][1];
   188      } else {
   189        v = abs(v);
   190  #if !defined(USE_LEVEL_CODE_TABLE)
   191        if (!Record(v > 4, s + 3)) {
   192          if (Record(v != 2, s + 4))
   193            Record(v == 4, s + 5);
   194        } else if (!Record(v > 10, s + 6)) {
   195          Record(v > 6, s + 7);
   196        } else if (!Record((v >= 3 + (8 << 2)), s + 8)) {
   197          Record((v >= 3 + (8 << 1)), s + 9);
   198        } else {
   199          Record((v >= 3 + (8 << 3)), s + 10);
   200        }
   201  #else
   202        if (v > MAX_VARIABLE_LEVEL)
   203          v = MAX_VARIABLE_LEVEL;
   204  
   205        {
   206          const int bits = VP8LevelCodes[v - 1][1];
   207          int pattern = VP8LevelCodes[v - 1][0];
   208          int i;
   209          for (i = 0; (pattern >>= 1) != 0; ++i) {
   210            const int mask = 2 << i;
   211            if (pattern & 1) Record(!!(bits & mask), s + 3 + i);
   212          }
   213        }
   214  #endif
   215        s = res->stats[VP8EncBands[n]][2];
   216      }
   217    }
   218    if (n < 16) Record(0, s + 0);
   219    return 1;
   220  }
   221  
   222  // Collect statistics and deduce probabilities for next coding pass.
   223  // Return the total bit-cost for coding the probability updates.
   224  static int CalcTokenProba(int nb, int total) {
   225    assert(nb <= total);
   226    return nb ? (255 - nb * 255 / total) : 255;
   227  }
   228  
   229  // Cost of coding 'nb' 1's and 'total-nb' 0's using 'proba' probability.
   230  static int BranchCost(int nb, int total, int proba) {
   231    return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba);
   232  }
   233  
   234  static int FinalizeTokenProbas(VP8Proba* const proba) {
   235    int has_changed = 0;
   236    int size = 0;
   237    int t, b, c, p;
   238    for (t = 0; t < NUM_TYPES; ++t) {
   239      for (b = 0; b < NUM_BANDS; ++b) {
   240        for (c = 0; c < NUM_CTX; ++c) {
   241          for (p = 0; p < NUM_PROBAS; ++p) {
   242            const proba_t stats = proba->stats_[t][b][c][p];
   243            const int nb = (stats >> 0) & 0xffff;
   244            const int total = (stats >> 16) & 0xffff;
   245            const int update_proba = VP8CoeffsUpdateProba[t][b][c][p];
   246            const int old_p = VP8CoeffsProba0[t][b][c][p];
   247            const int new_p = CalcTokenProba(nb, total);
   248            const int old_cost = BranchCost(nb, total, old_p)
   249                               + VP8BitCost(0, update_proba);
   250            const int new_cost = BranchCost(nb, total, new_p)
   251                               + VP8BitCost(1, update_proba)
   252                               + 8 * 256;
   253            const int use_new_p = (old_cost > new_cost);
   254            size += VP8BitCost(use_new_p, update_proba);
   255            if (use_new_p) {  // only use proba that seem meaningful enough.
   256              proba->coeffs_[t][b][c][p] = new_p;
   257              has_changed |= (new_p != old_p);
   258              size += 8 * 256;
   259            } else {
   260              proba->coeffs_[t][b][c][p] = old_p;
   261            }
   262          }
   263        }
   264      }
   265    }
   266    proba->dirty_ = has_changed;
   267    return size;
   268  }
   269  
   270  //------------------------------------------------------------------------------
   271  // Finalize Segment probability based on the coding tree
   272  
   273  static int GetProba(int a, int b) {
   274    const int total = a + b;
   275    return (total == 0) ? 255     // that's the default probability.
   276                        : (255 * a + total / 2) / total;  // rounded proba
   277  }
   278  
   279  static void SetSegmentProbas(VP8Encoder* const enc) {
   280    int p[NUM_MB_SEGMENTS] = { 0 };
   281    int n;
   282  
   283    for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
   284      const VP8MBInfo* const mb = &enc->mb_info_[n];
   285      p[mb->segment_]++;
   286    }
   287    if (enc->pic_->stats != NULL) {
   288      for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
   289        enc->pic_->stats->segment_size[n] = p[n];
   290      }
   291    }
   292    if (enc->segment_hdr_.num_segments_ > 1) {
   293      uint8_t* const probas = enc->proba_.segments_;
   294      probas[0] = GetProba(p[0] + p[1], p[2] + p[3]);
   295      probas[1] = GetProba(p[0], p[1]);
   296      probas[2] = GetProba(p[2], p[3]);
   297  
   298      enc->segment_hdr_.update_map_ =
   299          (probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255);
   300      enc->segment_hdr_.size_ =
   301          p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) +
   302          p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) +
   303          p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) +
   304          p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2]));
   305    } else {
   306      enc->segment_hdr_.update_map_ = 0;
   307      enc->segment_hdr_.size_ = 0;
   308    }
   309  }
   310  
   311  //------------------------------------------------------------------------------
   312  // helper functions for residuals struct VP8Residual.
   313  
   314  static void InitResidual(int first, int coeff_type,
   315                           VP8Encoder* const enc, VP8Residual* const res) {
   316    res->coeff_type = coeff_type;
   317    res->prob  = enc->proba_.coeffs_[coeff_type];
   318    res->stats = enc->proba_.stats_[coeff_type];
   319    res->cost  = enc->proba_.level_cost_[coeff_type];
   320    res->first = first;
   321  }
   322  
   323  static void SetResidualCoeffs(const int16_t* const coeffs,
   324                                VP8Residual* const res) {
   325    int n;
   326    res->last = -1;
   327    for (n = 15; n >= res->first; --n) {
   328      if (coeffs[n]) {
   329        res->last = n;
   330        break;
   331      }
   332    }
   333    res->coeffs = coeffs;
   334  }
   335  
   336  //------------------------------------------------------------------------------
   337  // Mode costs
   338  
   339  static int GetResidualCost(int ctx0, const VP8Residual* const res) {
   340    int n = res->first;
   341    // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
   342    int p0 = res->prob[n][ctx0][0];
   343    const uint16_t* t = res->cost[n][ctx0];
   344    int cost;
   345  
   346    if (res->last < 0) {
   347      return VP8BitCost(0, p0);
   348    }
   349    cost = VP8BitCost(1, p0);
   350    for (; n < res->last; ++n) {
   351      const int v = abs(res->coeffs[n]);
   352      const int b = VP8EncBands[n + 1];
   353      const int ctx = (v >= 2) ? 2 : v;
   354      cost += VP8LevelCost(t, v);
   355      t = res->cost[b][ctx];
   356      // the masking trick is faster than "if (v) cost += ..." with clang
   357      cost += (v ? ~0U : 0) & VP8BitCost(1, res->prob[b][ctx][0]);
   358    }
   359    // Last coefficient is always non-zero
   360    {
   361      const int v = abs(res->coeffs[n]);
   362      assert(v != 0);
   363      cost += VP8LevelCost(t, v);
   364      if (n < 15) {
   365        const int b = VP8EncBands[n + 1];
   366        const int ctx = (v == 1) ? 1 : 2;
   367        const int last_p0 = res->prob[b][ctx][0];
   368        cost += VP8BitCost(0, last_p0);
   369      }
   370    }
   371    return cost;
   372  }
   373  
   374  int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) {
   375    const int x = (it->i4_ & 3), y = (it->i4_ >> 2);
   376    VP8Residual res;
   377    VP8Encoder* const enc = it->enc_;
   378    int R = 0;
   379    int ctx;
   380  
   381    InitResidual(0, 3, enc, &res);
   382    ctx = it->top_nz_[x] + it->left_nz_[y];
   383    SetResidualCoeffs(levels, &res);
   384    R += GetResidualCost(ctx, &res);
   385    return R;
   386  }
   387  
   388  int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) {
   389    VP8Residual res;
   390    VP8Encoder* const enc = it->enc_;
   391    int x, y;
   392    int R = 0;
   393  
   394    VP8IteratorNzToBytes(it);   // re-import the non-zero context
   395  
   396    // DC
   397    InitResidual(0, 1, enc, &res);
   398    SetResidualCoeffs(rd->y_dc_levels, &res);
   399    R += GetResidualCost(it->top_nz_[8] + it->left_nz_[8], &res);
   400  
   401    // AC
   402    InitResidual(1, 0, enc, &res);
   403    for (y = 0; y < 4; ++y) {
   404      for (x = 0; x < 4; ++x) {
   405        const int ctx = it->top_nz_[x] + it->left_nz_[y];
   406        SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
   407        R += GetResidualCost(ctx, &res);
   408        it->top_nz_[x] = it->left_nz_[y] = (res.last >= 0);
   409      }
   410    }
   411    return R;
   412  }
   413  
   414  int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) {
   415    VP8Residual res;
   416    VP8Encoder* const enc = it->enc_;
   417    int ch, x, y;
   418    int R = 0;
   419  
   420    VP8IteratorNzToBytes(it);  // re-import the non-zero context
   421  
   422    InitResidual(0, 2, enc, &res);
   423    for (ch = 0; ch <= 2; ch += 2) {
   424      for (y = 0; y < 2; ++y) {
   425        for (x = 0; x < 2; ++x) {
   426          const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
   427          SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
   428          R += GetResidualCost(ctx, &res);
   429          it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = (res.last >= 0);
   430        }
   431      }
   432    }
   433    return R;
   434  }
   435  
   436  //------------------------------------------------------------------------------
   437  // Coefficient coding
   438  
   439  static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) {
   440    int n = res->first;
   441    // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
   442    const uint8_t* p = res->prob[n][ctx];
   443    if (!VP8PutBit(bw, res->last >= 0, p[0])) {
   444      return 0;
   445    }
   446  
   447    while (n < 16) {
   448      const int c = res->coeffs[n++];
   449      const int sign = c < 0;
   450      int v = sign ? -c : c;
   451      if (!VP8PutBit(bw, v != 0, p[1])) {
   452        p = res->prob[VP8EncBands[n]][0];
   453        continue;
   454      }
   455      if (!VP8PutBit(bw, v > 1, p[2])) {
   456        p = res->prob[VP8EncBands[n]][1];
   457      } else {
   458        if (!VP8PutBit(bw, v > 4, p[3])) {
   459          if (VP8PutBit(bw, v != 2, p[4]))
   460            VP8PutBit(bw, v == 4, p[5]);
   461        } else if (!VP8PutBit(bw, v > 10, p[6])) {
   462          if (!VP8PutBit(bw, v > 6, p[7])) {
   463            VP8PutBit(bw, v == 6, 159);
   464          } else {
   465            VP8PutBit(bw, v >= 9, 165);
   466            VP8PutBit(bw, !(v & 1), 145);
   467          }
   468        } else {
   469          int mask;
   470          const uint8_t* tab;
   471          if (v < 3 + (8 << 1)) {          // VP8Cat3  (3b)
   472            VP8PutBit(bw, 0, p[8]);
   473            VP8PutBit(bw, 0, p[9]);
   474            v -= 3 + (8 << 0);
   475            mask = 1 << 2;
   476            tab = VP8Cat3;
   477          } else if (v < 3 + (8 << 2)) {   // VP8Cat4  (4b)
   478            VP8PutBit(bw, 0, p[8]);
   479            VP8PutBit(bw, 1, p[9]);
   480            v -= 3 + (8 << 1);
   481            mask = 1 << 3;
   482            tab = VP8Cat4;
   483          } else if (v < 3 + (8 << 3)) {   // VP8Cat5  (5b)
   484            VP8PutBit(bw, 1, p[8]);
   485            VP8PutBit(bw, 0, p[10]);
   486            v -= 3 + (8 << 2);
   487            mask = 1 << 4;
   488            tab = VP8Cat5;
   489          } else {                         // VP8Cat6 (11b)
   490            VP8PutBit(bw, 1, p[8]);
   491            VP8PutBit(bw, 1, p[10]);
   492            v -= 3 + (8 << 3);
   493            mask = 1 << 10;
   494            tab = VP8Cat6;
   495          }
   496          while (mask) {
   497            VP8PutBit(bw, !!(v & mask), *tab++);
   498            mask >>= 1;
   499          }
   500        }
   501        p = res->prob[VP8EncBands[n]][2];
   502      }
   503      VP8PutBitUniform(bw, sign);
   504      if (n == 16 || !VP8PutBit(bw, n <= res->last, p[0])) {
   505        return 1;   // EOB
   506      }
   507    }
   508    return 1;
   509  }
   510  
   511  static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it,
   512                            const VP8ModeScore* const rd) {
   513    int x, y, ch;
   514    VP8Residual res;
   515    uint64_t pos1, pos2, pos3;
   516    const int i16 = (it->mb_->type_ == 1);
   517    const int segment = it->mb_->segment_;
   518    VP8Encoder* const enc = it->enc_;
   519  
   520    VP8IteratorNzToBytes(it);
   521  
   522    pos1 = VP8BitWriterPos(bw);
   523    if (i16) {
   524      InitResidual(0, 1, enc, &res);
   525      SetResidualCoeffs(rd->y_dc_levels, &res);
   526      it->top_nz_[8] = it->left_nz_[8] =
   527        PutCoeffs(bw, it->top_nz_[8] + it->left_nz_[8], &res);
   528      InitResidual(1, 0, enc, &res);
   529    } else {
   530      InitResidual(0, 3, enc, &res);
   531    }
   532  
   533    // luma-AC
   534    for (y = 0; y < 4; ++y) {
   535      for (x = 0; x < 4; ++x) {
   536        const int ctx = it->top_nz_[x] + it->left_nz_[y];
   537        SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
   538        it->top_nz_[x] = it->left_nz_[y] = PutCoeffs(bw, ctx, &res);
   539      }
   540    }
   541    pos2 = VP8BitWriterPos(bw);
   542  
   543    // U/V
   544    InitResidual(0, 2, enc, &res);
   545    for (ch = 0; ch <= 2; ch += 2) {
   546      for (y = 0; y < 2; ++y) {
   547        for (x = 0; x < 2; ++x) {
   548          const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
   549          SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
   550          it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
   551              PutCoeffs(bw, ctx, &res);
   552        }
   553      }
   554    }
   555    pos3 = VP8BitWriterPos(bw);
   556    it->luma_bits_ = pos2 - pos1;
   557    it->uv_bits_ = pos3 - pos2;
   558    it->bit_count_[segment][i16] += it->luma_bits_;
   559    it->bit_count_[segment][2] += it->uv_bits_;
   560    VP8IteratorBytesToNz(it);
   561  }
   562  
   563  // Same as CodeResiduals, but doesn't actually write anything.
   564  // Instead, it just records the event distribution.
   565  static void RecordResiduals(VP8EncIterator* const it,
   566                              const VP8ModeScore* const rd) {
   567    int x, y, ch;
   568    VP8Residual res;
   569    VP8Encoder* const enc = it->enc_;
   570  
   571    VP8IteratorNzToBytes(it);
   572  
   573    if (it->mb_->type_ == 1) {   // i16x16
   574      InitResidual(0, 1, enc, &res);
   575      SetResidualCoeffs(rd->y_dc_levels, &res);
   576      it->top_nz_[8] = it->left_nz_[8] =
   577        RecordCoeffs(it->top_nz_[8] + it->left_nz_[8], &res);
   578      InitResidual(1, 0, enc, &res);
   579    } else {
   580      InitResidual(0, 3, enc, &res);
   581    }
   582  
   583    // luma-AC
   584    for (y = 0; y < 4; ++y) {
   585      for (x = 0; x < 4; ++x) {
   586        const int ctx = it->top_nz_[x] + it->left_nz_[y];
   587        SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
   588        it->top_nz_[x] = it->left_nz_[y] = RecordCoeffs(ctx, &res);
   589      }
   590    }
   591  
   592    // U/V
   593    InitResidual(0, 2, enc, &res);
   594    for (ch = 0; ch <= 2; ch += 2) {
   595      for (y = 0; y < 2; ++y) {
   596        for (x = 0; x < 2; ++x) {
   597          const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
   598          SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
   599          it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
   600              RecordCoeffs(ctx, &res);
   601        }
   602      }
   603    }
   604  
   605    VP8IteratorBytesToNz(it);
   606  }
   607  
   608  //------------------------------------------------------------------------------
   609  // Token buffer
   610  
   611  #if !defined(DISABLE_TOKEN_BUFFER)
   612  
   613  static void RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
   614                           VP8TBuffer* const tokens) {
   615    int x, y, ch;
   616    VP8Residual res;
   617    VP8Encoder* const enc = it->enc_;
   618  
   619    VP8IteratorNzToBytes(it);
   620    if (it->mb_->type_ == 1) {   // i16x16
   621      const int ctx = it->top_nz_[8] + it->left_nz_[8];
   622      InitResidual(0, 1, enc, &res);
   623      SetResidualCoeffs(rd->y_dc_levels, &res);
   624      it->top_nz_[8] = it->left_nz_[8] =
   625          VP8RecordCoeffTokens(ctx, 1,
   626                               res.first, res.last, res.coeffs, tokens);
   627      RecordCoeffs(ctx, &res);
   628      InitResidual(1, 0, enc, &res);
   629    } else {
   630      InitResidual(0, 3, enc, &res);
   631    }
   632  
   633    // luma-AC
   634    for (y = 0; y < 4; ++y) {
   635      for (x = 0; x < 4; ++x) {
   636        const int ctx = it->top_nz_[x] + it->left_nz_[y];
   637        SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
   638        it->top_nz_[x] = it->left_nz_[y] =
   639            VP8RecordCoeffTokens(ctx, res.coeff_type,
   640                                 res.first, res.last, res.coeffs, tokens);
   641        RecordCoeffs(ctx, &res);
   642      }
   643    }
   644  
   645    // U/V
   646    InitResidual(0, 2, enc, &res);
   647    for (ch = 0; ch <= 2; ch += 2) {
   648      for (y = 0; y < 2; ++y) {
   649        for (x = 0; x < 2; ++x) {
   650          const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
   651          SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
   652          it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
   653              VP8RecordCoeffTokens(ctx, 2,
   654                                   res.first, res.last, res.coeffs, tokens);
   655          RecordCoeffs(ctx, &res);
   656        }
   657      }
   658    }
   659    VP8IteratorBytesToNz(it);
   660  }
   661  
   662  #endif    // !DISABLE_TOKEN_BUFFER
   663  
   664  //------------------------------------------------------------------------------
   665  // ExtraInfo map / Debug function
   666  
   667  #if SEGMENT_VISU
   668  static void SetBlock(uint8_t* p, int value, int size) {
   669    int y;
   670    for (y = 0; y < size; ++y) {
   671      memset(p, value, size);
   672      p += BPS;
   673    }
   674  }
   675  #endif
   676  
   677  static void ResetSSE(VP8Encoder* const enc) {
   678    enc->sse_[0] = 0;
   679    enc->sse_[1] = 0;
   680    enc->sse_[2] = 0;
   681    // Note: enc->sse_[3] is managed by alpha.c
   682    enc->sse_count_ = 0;
   683  }
   684  
   685  static void StoreSSE(const VP8EncIterator* const it) {
   686    VP8Encoder* const enc = it->enc_;
   687    const uint8_t* const in = it->yuv_in_;
   688    const uint8_t* const out = it->yuv_out_;
   689    // Note: not totally accurate at boundary. And doesn't include in-loop filter.
   690    enc->sse_[0] += VP8SSE16x16(in + Y_OFF, out + Y_OFF);
   691    enc->sse_[1] += VP8SSE8x8(in + U_OFF, out + U_OFF);
   692    enc->sse_[2] += VP8SSE8x8(in + V_OFF, out + V_OFF);
   693    enc->sse_count_ += 16 * 16;
   694  }
   695  
   696  static void StoreSideInfo(const VP8EncIterator* const it) {
   697    VP8Encoder* const enc = it->enc_;
   698    const VP8MBInfo* const mb = it->mb_;
   699    WebPPicture* const pic = enc->pic_;
   700  
   701    if (pic->stats != NULL) {
   702      StoreSSE(it);
   703      enc->block_count_[0] += (mb->type_ == 0);
   704      enc->block_count_[1] += (mb->type_ == 1);
   705      enc->block_count_[2] += (mb->skip_ != 0);
   706    }
   707  
   708    if (pic->extra_info != NULL) {
   709      uint8_t* const info = &pic->extra_info[it->x_ + it->y_ * enc->mb_w_];
   710      switch (pic->extra_info_type) {
   711        case 1: *info = mb->type_; break;
   712        case 2: *info = mb->segment_; break;
   713        case 3: *info = enc->dqm_[mb->segment_].quant_; break;
   714        case 4: *info = (mb->type_ == 1) ? it->preds_[0] : 0xff; break;
   715        case 5: *info = mb->uv_mode_; break;
   716        case 6: {
   717          const int b = (int)((it->luma_bits_ + it->uv_bits_ + 7) >> 3);
   718          *info = (b > 255) ? 255 : b; break;
   719        }
   720        case 7: *info = mb->alpha_; break;
   721        default: *info = 0; break;
   722      };
   723    }
   724  #if SEGMENT_VISU  // visualize segments and prediction modes
   725    SetBlock(it->yuv_out_ + Y_OFF, mb->segment_ * 64, 16);
   726    SetBlock(it->yuv_out_ + U_OFF, it->preds_[0] * 64, 8);
   727    SetBlock(it->yuv_out_ + V_OFF, mb->uv_mode_ * 64, 8);
   728  #endif
   729  }
   730  
   731  static double GetPSNR(uint64_t mse, uint64_t size) {
   732    return (mse > 0 && size > 0) ? 10. * log10(255. * 255. * size / mse) : 99;
   733  }
   734  
   735  //------------------------------------------------------------------------------
   736  //  StatLoop(): only collect statistics (number of skips, token usage, ...).
   737  //  This is used for deciding optimal probabilities. It also modifies the
   738  //  quantizer value if some target (size, PSNR) was specified.
   739  
   740  static void SetLoopParams(VP8Encoder* const enc, float q) {
   741    // Make sure the quality parameter is inside valid bounds
   742    q = Clamp(q, 0.f, 100.f);
   743  
   744    VP8SetSegmentParams(enc, q);      // setup segment quantizations and filters
   745    SetSegmentProbas(enc);            // compute segment probabilities
   746  
   747    ResetStats(enc);
   748    ResetSSE(enc);
   749  }
   750  
   751  static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt,
   752                              int nb_mbs, int percent_delta,
   753                              PassStats* const s) {
   754    VP8EncIterator it;
   755    uint64_t size = 0;
   756    uint64_t size_p0 = 0;
   757    uint64_t distortion = 0;
   758    const uint64_t pixel_count = nb_mbs * 384;
   759  
   760    VP8IteratorInit(enc, &it);
   761    SetLoopParams(enc, s->q);
   762    do {
   763      VP8ModeScore info;
   764      VP8IteratorImport(&it, NULL);
   765      if (VP8Decimate(&it, &info, rd_opt)) {
   766        // Just record the number of skips and act like skip_proba is not used.
   767        enc->proba_.nb_skip_++;
   768      }
   769      RecordResiduals(&it, &info);
   770      size += info.R + info.H;
   771      size_p0 += info.H;
   772      distortion += info.D;
   773      if (percent_delta && !VP8IteratorProgress(&it, percent_delta))
   774        return 0;
   775      VP8IteratorSaveBoundary(&it);
   776    } while (VP8IteratorNext(&it) && --nb_mbs > 0);
   777  
   778    size_p0 += enc->segment_hdr_.size_;
   779    if (s->do_size_search) {
   780      size += FinalizeSkipProba(enc);
   781      size += FinalizeTokenProbas(&enc->proba_);
   782      size = ((size + size_p0 + 1024) >> 11) + HEADER_SIZE_ESTIMATE;
   783      s->value = (double)size;
   784    } else {
   785      s->value = GetPSNR(distortion, pixel_count);
   786    }
   787    return size_p0;
   788  }
   789  
   790  static int StatLoop(VP8Encoder* const enc) {
   791    const int method = enc->method_;
   792    const int do_search = enc->do_search_;
   793    const int fast_probe = ((method == 0 || method == 3) && !do_search);
   794    int num_pass_left = enc->config_->pass;
   795    const int task_percent = 20;
   796    const int percent_per_pass =
   797        (task_percent + num_pass_left / 2) / num_pass_left;
   798    const int final_percent = enc->percent_ + task_percent;
   799    const VP8RDLevel rd_opt =
   800        (method >= 3 || do_search) ? RD_OPT_BASIC : RD_OPT_NONE;
   801    int nb_mbs = enc->mb_w_ * enc->mb_h_;
   802    PassStats stats;
   803  
   804    InitPassStats(enc, &stats);
   805    ResetTokenStats(enc);
   806  
   807    // Fast mode: quick analysis pass over few mbs. Better than nothing.
   808    if (fast_probe) {
   809      if (method == 3) {  // we need more stats for method 3 to be reliable.
   810        nb_mbs = (nb_mbs > 200) ? nb_mbs >> 1 : 100;
   811      } else {
   812        nb_mbs = (nb_mbs > 200) ? nb_mbs >> 2 : 50;
   813      }
   814    }
   815  
   816    while (num_pass_left-- > 0) {
   817      const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) ||
   818                               (num_pass_left == 0) ||
   819                               (enc->max_i4_header_bits_ == 0);
   820      const uint64_t size_p0 =
   821          OneStatPass(enc, rd_opt, nb_mbs, percent_per_pass, &stats);
   822      if (size_p0 == 0) return 0;
   823  #if (DEBUG_SEARCH > 0)
   824      printf("#%d value:%.1lf -> %.1lf   q:%.2f -> %.2f\n",
   825             num_pass_left, stats.last_value, stats.value, stats.last_q, stats.q);
   826  #endif
   827      if (enc->max_i4_header_bits_ > 0 && size_p0 > PARTITION0_SIZE_LIMIT) {
   828        ++num_pass_left;
   829        enc->max_i4_header_bits_ >>= 1;  // strengthen header bit limitation...
   830        continue;                        // ...and start over
   831      }
   832      if (is_last_pass) {
   833        break;
   834      }
   835      // If no target size: just do several pass without changing 'q'
   836      if (do_search) {
   837        ComputeNextQ(&stats);
   838        if (fabs(stats.dq) <= DQ_LIMIT) break;
   839      }
   840    }
   841    if (!do_search || !stats.do_size_search) {
   842      // Need to finalize probas now, since it wasn't done during the search.
   843      FinalizeSkipProba(enc);
   844      FinalizeTokenProbas(&enc->proba_);
   845    }
   846    VP8CalculateLevelCosts(&enc->proba_);  // finalize costs
   847    return WebPReportProgress(enc->pic_, final_percent, &enc->percent_);
   848  }
   849  
   850  //------------------------------------------------------------------------------
   851  // Main loops
   852  //
   853  
   854  static const int kAverageBytesPerMB[8] = { 50, 24, 16, 9, 7, 5, 3, 2 };
   855  
   856  static int PreLoopInitialize(VP8Encoder* const enc) {
   857    int p;
   858    int ok = 1;
   859    const int average_bytes_per_MB = kAverageBytesPerMB[enc->base_quant_ >> 4];
   860    const int bytes_per_parts =
   861        enc->mb_w_ * enc->mb_h_ * average_bytes_per_MB / enc->num_parts_;
   862    // Initialize the bit-writers
   863    for (p = 0; ok && p < enc->num_parts_; ++p) {
   864      ok = VP8BitWriterInit(enc->parts_ + p, bytes_per_parts);
   865    }
   866    if (!ok) VP8EncFreeBitWriters(enc);  // malloc error occurred
   867    return ok;
   868  }
   869  
   870  static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
   871    VP8Encoder* const enc = it->enc_;
   872    if (ok) {      // Finalize the partitions, check for extra errors.
   873      int p;
   874      for (p = 0; p < enc->num_parts_; ++p) {
   875        VP8BitWriterFinish(enc->parts_ + p);
   876        ok &= !enc->parts_[p].error_;
   877      }
   878    }
   879  
   880    if (ok) {      // All good. Finish up.
   881      if (enc->pic_->stats != NULL) {  // finalize byte counters...
   882        int i, s;
   883        for (i = 0; i <= 2; ++i) {
   884          for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
   885            enc->residual_bytes_[i][s] = (int)((it->bit_count_[s][i] + 7) >> 3);
   886          }
   887        }
   888      }
   889      VP8AdjustFilterStrength(it);     // ...and store filter stats.
   890    } else {
   891      // Something bad happened -> need to do some memory cleanup.
   892      VP8EncFreeBitWriters(enc);
   893    }
   894    return ok;
   895  }
   896  
   897  //------------------------------------------------------------------------------
   898  //  VP8EncLoop(): does the final bitstream coding.
   899  
   900  static void ResetAfterSkip(VP8EncIterator* const it) {
   901    if (it->mb_->type_ == 1) {
   902      *it->nz_ = 0;  // reset all predictors
   903      it->left_nz_[8] = 0;
   904    } else {
   905      *it->nz_ &= (1 << 24);  // preserve the dc_nz bit
   906    }
   907  }
   908  
   909  int VP8EncLoop(VP8Encoder* const enc) {
   910    VP8EncIterator it;
   911    int ok = PreLoopInitialize(enc);
   912    if (!ok) return 0;
   913  
   914    StatLoop(enc);  // stats-collection loop
   915  
   916    VP8IteratorInit(enc, &it);
   917    VP8InitFilter(&it);
   918    do {
   919      VP8ModeScore info;
   920      const int dont_use_skip = !enc->proba_.use_skip_proba_;
   921      const VP8RDLevel rd_opt = enc->rd_opt_level_;
   922  
   923      VP8IteratorImport(&it, NULL);
   924      // Warning! order is important: first call VP8Decimate() and
   925      // *then* decide how to code the skip decision if there's one.
   926      if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) {
   927        CodeResiduals(it.bw_, &it, &info);
   928      } else {   // reset predictors after a skip
   929        ResetAfterSkip(&it);
   930      }
   931  #ifdef WEBP_EXPERIMENTAL_FEATURES
   932      if (enc->use_layer_) {
   933        VP8EncCodeLayerBlock(&it);
   934      }
   935  #endif
   936      StoreSideInfo(&it);
   937      VP8StoreFilterStats(&it);
   938      VP8IteratorExport(&it);
   939      ok = VP8IteratorProgress(&it, 20);
   940      VP8IteratorSaveBoundary(&it);
   941    } while (ok && VP8IteratorNext(&it));
   942  
   943    return PostLoopFinalize(&it, ok);
   944  }
   945  
   946  //------------------------------------------------------------------------------
   947  // Single pass using Token Buffer.
   948  
   949  #if !defined(DISABLE_TOKEN_BUFFER)
   950  
   951  #define MIN_COUNT 96  // minimum number of macroblocks before updating stats
   952  
   953  int VP8EncTokenLoop(VP8Encoder* const enc) {
   954    // Roughly refresh the proba eight times per pass
   955    int max_count = (enc->mb_w_ * enc->mb_h_) >> 3;
   956    int num_pass_left = enc->config_->pass;
   957    const int do_search = enc->do_search_;
   958    VP8EncIterator it;
   959    VP8Proba* const proba = &enc->proba_;
   960    const VP8RDLevel rd_opt = enc->rd_opt_level_;
   961    const uint64_t pixel_count = enc->mb_w_ * enc->mb_h_ * 384;
   962    PassStats stats;
   963    int ok;
   964  
   965    InitPassStats(enc, &stats);
   966    ok = PreLoopInitialize(enc);
   967    if (!ok) return 0;
   968  
   969    if (max_count < MIN_COUNT) max_count = MIN_COUNT;
   970  
   971    assert(enc->num_parts_ == 1);
   972    assert(enc->use_tokens_);
   973    assert(proba->use_skip_proba_ == 0);
   974    assert(rd_opt >= RD_OPT_BASIC);   // otherwise, token-buffer won't be useful
   975    assert(num_pass_left > 0);
   976  
   977    while (ok && num_pass_left-- > 0) {
   978      const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) ||
   979                               (num_pass_left == 0) ||
   980                               (enc->max_i4_header_bits_ == 0);
   981      uint64_t size_p0 = 0;
   982      uint64_t distortion = 0;
   983      int cnt = max_count;
   984      VP8IteratorInit(enc, &it);
   985      SetLoopParams(enc, stats.q);
   986      if (is_last_pass) {
   987        ResetTokenStats(enc);
   988        VP8InitFilter(&it);  // don't collect stats until last pass (too costly)
   989      }
   990      VP8TBufferClear(&enc->tokens_);
   991      do {
   992        VP8ModeScore info;
   993        VP8IteratorImport(&it, NULL);
   994        if (--cnt < 0) {
   995          FinalizeTokenProbas(proba);
   996          VP8CalculateLevelCosts(proba);  // refresh cost tables for rd-opt
   997          cnt = max_count;
   998        }
   999        VP8Decimate(&it, &info, rd_opt);
  1000        RecordTokens(&it, &info, &enc->tokens_);
  1001        size_p0 += info.H;
  1002        distortion += info.D;
  1003  #ifdef WEBP_EXPERIMENTAL_FEATURES
  1004        if (enc->use_layer_) {
  1005          VP8EncCodeLayerBlock(&it);
  1006        }
  1007  #endif
  1008        if (is_last_pass) {
  1009          StoreSideInfo(&it);
  1010          VP8StoreFilterStats(&it);
  1011          VP8IteratorExport(&it);
  1012          ok = VP8IteratorProgress(&it, 20);
  1013        }
  1014        VP8IteratorSaveBoundary(&it);
  1015      } while (ok && VP8IteratorNext(&it));
  1016      if (!ok) break;
  1017  
  1018      size_p0 += enc->segment_hdr_.size_;
  1019      if (stats.do_size_search) {
  1020        uint64_t size = FinalizeTokenProbas(&enc->proba_);
  1021        size += VP8EstimateTokenSize(&enc->tokens_,
  1022                                     (const uint8_t*)proba->coeffs_);
  1023        size = (size + size_p0 + 1024) >> 11;  // -> size in bytes
  1024        size += HEADER_SIZE_ESTIMATE;
  1025        stats.value = (double)size;
  1026      } else {  // compute and store PSNR
  1027        stats.value = GetPSNR(distortion, pixel_count);
  1028      }
  1029  
  1030  #if (DEBUG_SEARCH > 0)
  1031      printf("#%2d metric:%.1lf -> %.1lf   last_q=%.2lf q=%.2lf dq=%.2lf\n",
  1032             num_pass_left, stats.last_value, stats.value,
  1033             stats.last_q, stats.q, stats.dq);
  1034  #endif
  1035      if (size_p0 > PARTITION0_SIZE_LIMIT) {
  1036        ++num_pass_left;
  1037        enc->max_i4_header_bits_ >>= 1;  // strengthen header bit limitation...
  1038        continue;                        // ...and start over
  1039      }
  1040      if (is_last_pass) {
  1041        break;   // done
  1042      }
  1043      if (do_search) {
  1044        ComputeNextQ(&stats);  // Adjust q
  1045      }
  1046    }
  1047    if (ok) {
  1048      if (!stats.do_size_search) {
  1049        FinalizeTokenProbas(&enc->proba_);
  1050      }
  1051      ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0,
  1052                         (const uint8_t*)proba->coeffs_, 1);
  1053    }
  1054    ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
  1055    return PostLoopFinalize(&it, ok);
  1056  }
  1057  
  1058  #else
  1059  
  1060  int VP8EncTokenLoop(VP8Encoder* const enc) {
  1061    (void)enc;
  1062    return 0;   // we shouldn't be here.
  1063  }
  1064  
  1065  #endif    // DISABLE_TOKEN_BUFFER
  1066  
  1067  //------------------------------------------------------------------------------
  1068