github.com/cellofellow/gopkg@v0.0.0-20140722061823-eec0544a62ad/image/jxr/jxrlib/test/jpeg/jpgd.cpp (about)

     1  // jpgd.cpp - C++ class for JPEG decompression.
     2  // Public domain, Rich Geldreich <richgel99@gmail.com>
     3  // Alex Evans: Linear memory allocator (taken from jpge.h).
     4  // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
     5  //
     6  // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
     7  //
     8  // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
     9  // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
    10  // http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
    11  
    12  #include "jpgd.h"
    13  #include <string.h>
    14  
    15  #include <assert.h>
    16  #define JPGD_ASSERT(x) assert(x)
    17  
    18  #ifdef _MSC_VER
    19  #pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable
    20  #endif
    21  
    22  // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
    23  // This is slower, but results in higher quality on images with highly saturated colors.
    24  #define JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING 1
    25  
    26  #define JPGD_TRUE (1)
    27  #define JPGD_FALSE (0)
    28  
    29  #define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b))
    30  #define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b))
    31  
    32  namespace jpgd {
    33  
    34  static inline void *jpgd_malloc(size_t nSize) { return malloc(nSize); }
    35  static inline void jpgd_free(void *p) { free(p); }
    36  
    37  // DCT coefficients are stored in this sequence.
    38  static int g_ZAG[64] = {  0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 };
    39  
    40  enum JPEG_MARKER
    41  {
    42    M_SOF0  = 0xC0, M_SOF1  = 0xC1, M_SOF2  = 0xC2, M_SOF3  = 0xC3, M_SOF5  = 0xC5, M_SOF6  = 0xC6, M_SOF7  = 0xC7, M_JPG   = 0xC8,
    43    M_SOF9  = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT   = 0xC4, M_DAC   = 0xCC,
    44    M_RST0  = 0xD0, M_RST1  = 0xD1, M_RST2  = 0xD2, M_RST3  = 0xD3, M_RST4  = 0xD4, M_RST5  = 0xD5, M_RST6  = 0xD6, M_RST7  = 0xD7,
    45    M_SOI   = 0xD8, M_EOI   = 0xD9, M_SOS   = 0xDA, M_DQT   = 0xDB, M_DNL   = 0xDC, M_DRI   = 0xDD, M_DHP   = 0xDE, M_EXP   = 0xDF,
    46    M_APP0  = 0xE0, M_APP15 = 0xEF, M_JPG0  = 0xF0, M_JPG13 = 0xFD, M_COM   = 0xFE, M_TEM   = 0x01, M_ERROR = 0x100, RST0   = 0xD0
    47  };
    48  
    49  enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
    50  
    51  #define CONST_BITS  13
    52  #define PASS1_BITS  2
    53  #define SCALEDONE ((int32)1)
    54  
    55  #define FIX_0_298631336  ((int32)2446)        /* FIX(0.298631336) */
    56  #define FIX_0_390180644  ((int32)3196)        /* FIX(0.390180644) */
    57  #define FIX_0_541196100  ((int32)4433)        /* FIX(0.541196100) */
    58  #define FIX_0_765366865  ((int32)6270)        /* FIX(0.765366865) */
    59  #define FIX_0_899976223  ((int32)7373)        /* FIX(0.899976223) */
    60  #define FIX_1_175875602  ((int32)9633)        /* FIX(1.175875602) */
    61  #define FIX_1_501321110  ((int32)12299)       /* FIX(1.501321110) */
    62  #define FIX_1_847759065  ((int32)15137)       /* FIX(1.847759065) */
    63  #define FIX_1_961570560  ((int32)16069)       /* FIX(1.961570560) */
    64  #define FIX_2_053119869  ((int32)16819)       /* FIX(2.053119869) */
    65  #define FIX_2_562915447  ((int32)20995)       /* FIX(2.562915447) */
    66  #define FIX_3_072711026  ((int32)25172)       /* FIX(3.072711026) */
    67  
    68  #define DESCALE(x,n)  (((x) + (SCALEDONE << ((n)-1))) >> (n))
    69  #define DESCALE_ZEROSHIFT(x,n)  (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n))
    70  
    71  #define MULTIPLY(var, cnst)  ((var) * (cnst))
    72  
    73  #define CLAMP(i) ((static_cast<uint>(i) > 255) ? (((~i) >> 31) & 0xFF) : (i))
    74  
    75  // Compiler creates a fast path 1D IDCT for X non-zero columns
    76  template <int NONZERO_COLS>
    77  struct Row
    78  {
    79    static void idct(int* pTemp, const jpgd_block_t* pSrc)
    80    {
    81      // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
    82      #define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
    83  
    84      const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6);
    85  
    86      const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
    87      const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
    88      const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
    89  
    90      const int tmp0 = (ACCESS_COL(0) + ACCESS_COL(4)) << CONST_BITS;
    91      const int tmp1 = (ACCESS_COL(0) - ACCESS_COL(4)) << CONST_BITS;
    92  
    93      const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
    94  
    95      const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1);
    96  
    97      const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
    98      const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
    99  
   100      const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
   101      const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
   102      const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
   103      const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
   104  
   105      const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
   106      const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
   107      const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
   108      const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
   109  
   110      pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
   111      pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
   112      pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
   113      pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
   114      pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
   115      pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
   116      pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
   117      pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
   118    }
   119  };
   120  
   121  template <>
   122  struct Row<0>
   123  {
   124    static void idct(int* pTemp, const jpgd_block_t* pSrc)
   125    {
   126  #ifdef _MSC_VER
   127      pTemp; pSrc;
   128  #endif
   129    }
   130  };
   131  
   132  template <>
   133  struct Row<1>
   134  {
   135    static void idct(int* pTemp, const jpgd_block_t* pSrc)
   136    {
   137      const int dcval = (pSrc[0] << PASS1_BITS);
   138  
   139      pTemp[0] = dcval;
   140      pTemp[1] = dcval;
   141      pTemp[2] = dcval;
   142      pTemp[3] = dcval;
   143      pTemp[4] = dcval;
   144      pTemp[5] = dcval;
   145      pTemp[6] = dcval;
   146      pTemp[7] = dcval;
   147    }
   148  };
   149  
   150  // Compiler creates a fast path 1D IDCT for X non-zero rows
   151  template <int NONZERO_ROWS>
   152  struct Col
   153  {
   154    static void idct(uint8* pDst_ptr, const int* pTemp)
   155    {
   156      // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
   157      #define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
   158  
   159      const int z2 = ACCESS_ROW(2);
   160      const int z3 = ACCESS_ROW(6);
   161  
   162      const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
   163      const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
   164      const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
   165  
   166      const int tmp0 = (ACCESS_ROW(0) + ACCESS_ROW(4)) << CONST_BITS;
   167      const int tmp1 = (ACCESS_ROW(0) - ACCESS_ROW(4)) << CONST_BITS;
   168  
   169      const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
   170  
   171      const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1);
   172  
   173      const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
   174      const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
   175  
   176      const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
   177      const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
   178      const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
   179      const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
   180  
   181      const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
   182      const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
   183      const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
   184      const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
   185  
   186      int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
   187      pDst_ptr[8*0] = (uint8)CLAMP(i);
   188  
   189      i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
   190      pDst_ptr[8*7] = (uint8)CLAMP(i);
   191  
   192      i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
   193      pDst_ptr[8*1] = (uint8)CLAMP(i);
   194  
   195      i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
   196      pDst_ptr[8*6] = (uint8)CLAMP(i);
   197  
   198      i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
   199      pDst_ptr[8*2] = (uint8)CLAMP(i);
   200  
   201      i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
   202      pDst_ptr[8*5] = (uint8)CLAMP(i);
   203  
   204      i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
   205      pDst_ptr[8*3] = (uint8)CLAMP(i);
   206  
   207      i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
   208      pDst_ptr[8*4] = (uint8)CLAMP(i);
   209    }
   210  };
   211  
   212  template <>
   213  struct Col<1>
   214  {
   215    static void idct(uint8* pDst_ptr, const int* pTemp)
   216    {
   217      int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
   218      const uint8 dcval_clamped = (uint8)CLAMP(dcval);
   219      pDst_ptr[0*8] = dcval_clamped;
   220      pDst_ptr[1*8] = dcval_clamped;
   221      pDst_ptr[2*8] = dcval_clamped;
   222      pDst_ptr[3*8] = dcval_clamped;
   223      pDst_ptr[4*8] = dcval_clamped;
   224      pDst_ptr[5*8] = dcval_clamped;
   225      pDst_ptr[6*8] = dcval_clamped;
   226      pDst_ptr[7*8] = dcval_clamped;
   227    }
   228  };
   229  
   230  static const uint8 s_idct_row_table[] =
   231  {
   232    1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
   233    4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
   234    6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
   235    6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
   236    8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
   237    8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
   238    8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
   239    8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
   240  };
   241  
   242  static const uint8 s_idct_col_table[] = { 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 };
   243  
   244  void idct(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag)
   245  {
   246    JPGD_ASSERT(block_max_zag >= 1);
   247    JPGD_ASSERT(block_max_zag <= 64);
   248  
   249    if (block_max_zag <= 1)
   250    {
   251      int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
   252      k = CLAMP(k);
   253      k = k | (k<<8);
   254      k = k | (k<<16);
   255  
   256      for (int i = 8; i > 0; i--)
   257      {
   258        *(int*)&pDst_ptr[0] = k;
   259        *(int*)&pDst_ptr[4] = k;
   260        pDst_ptr += 8;
   261      }
   262      return;
   263    }
   264  
   265    int temp[64];
   266  
   267    const jpgd_block_t* pSrc = pSrc_ptr;
   268    int* pTemp = temp;
   269  
   270    const uint8* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8];
   271    int i;
   272    for (i = 8; i > 0; i--, pRow_tab++)
   273    {
   274      switch (*pRow_tab)
   275      {
   276        case 0: Row<0>::idct(pTemp, pSrc); break;
   277        case 1: Row<1>::idct(pTemp, pSrc); break;
   278        case 2: Row<2>::idct(pTemp, pSrc); break;
   279        case 3: Row<3>::idct(pTemp, pSrc); break;
   280        case 4: Row<4>::idct(pTemp, pSrc); break;
   281        case 5: Row<5>::idct(pTemp, pSrc); break;
   282        case 6: Row<6>::idct(pTemp, pSrc); break;
   283        case 7: Row<7>::idct(pTemp, pSrc); break;
   284        case 8: Row<8>::idct(pTemp, pSrc); break;
   285      }
   286  
   287      pSrc += 8;
   288      pTemp += 8;
   289    }
   290  
   291    pTemp = temp;
   292  
   293    const int nonzero_rows = s_idct_col_table[block_max_zag - 1];
   294    for (i = 8; i > 0; i--)
   295    {
   296      switch (nonzero_rows)
   297      {
   298        case 1: Col<1>::idct(pDst_ptr, pTemp); break;
   299        case 2: Col<2>::idct(pDst_ptr, pTemp); break;
   300        case 3: Col<3>::idct(pDst_ptr, pTemp); break;
   301        case 4: Col<4>::idct(pDst_ptr, pTemp); break;
   302        case 5: Col<5>::idct(pDst_ptr, pTemp); break;
   303        case 6: Col<6>::idct(pDst_ptr, pTemp); break;
   304        case 7: Col<7>::idct(pDst_ptr, pTemp); break;
   305        case 8: Col<8>::idct(pDst_ptr, pTemp); break;
   306      }
   307  
   308      pTemp++;
   309      pDst_ptr++;
   310    }
   311  }
   312  
   313  void idct_4x4(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr)
   314  {
   315    int temp[64];
   316    int* pTemp = temp;
   317    const jpgd_block_t* pSrc = pSrc_ptr;
   318  
   319    for (int i = 4; i > 0; i--)
   320    {
   321      Row<4>::idct(pTemp, pSrc);
   322      pSrc += 8;
   323      pTemp += 8;
   324    }
   325  
   326    pTemp = temp;
   327    for (int i = 8; i > 0; i--)
   328    {
   329      Col<4>::idct(pDst_ptr, pTemp);
   330      pTemp++;
   331      pDst_ptr++;
   332    }
   333  }
   334  
   335  // Retrieve one character from the input stream.
   336  inline uint jpeg_decoder::get_char()
   337  {
   338    // Any bytes remaining in buffer?
   339    if (!m_in_buf_left)
   340    {
   341      // Try to get more bytes.
   342      prep_in_buffer();
   343      // Still nothing to get?
   344      if (!m_in_buf_left)
   345      {
   346        // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
   347        int t = m_tem_flag;
   348        m_tem_flag ^= 1;
   349        if (t)
   350          return 0xD9;
   351        else
   352          return 0xFF;
   353      }
   354    }
   355  
   356    uint c = *m_pIn_buf_ofs++;
   357    m_in_buf_left--;
   358  
   359    return c;
   360  }
   361  
   362  // Same as previous method, except can indicate if the character is a pad character or not.
   363  inline uint jpeg_decoder::get_char(bool *pPadding_flag)
   364  {
   365    if (!m_in_buf_left)
   366    {
   367      prep_in_buffer();
   368      if (!m_in_buf_left)
   369      {
   370        *pPadding_flag = true;
   371        int t = m_tem_flag;
   372        m_tem_flag ^= 1;
   373        if (t)
   374          return 0xD9;
   375        else
   376          return 0xFF;
   377      }
   378    }
   379  
   380    *pPadding_flag = false;
   381  
   382    uint c = *m_pIn_buf_ofs++;
   383    m_in_buf_left--;
   384  
   385    return c;
   386  }
   387  
   388  // Inserts a previously retrieved character back into the input buffer.
   389  inline void jpeg_decoder::stuff_char(uint8 q)
   390  {
   391    *(--m_pIn_buf_ofs) = q;
   392    m_in_buf_left++;
   393  }
   394  
   395  // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
   396  inline uint8 jpeg_decoder::get_octet()
   397  {
   398    bool padding_flag;
   399    int c = get_char(&padding_flag);
   400  
   401    if (c == 0xFF)
   402    {
   403      if (padding_flag)
   404        return 0xFF;
   405  
   406      c = get_char(&padding_flag);
   407      if (padding_flag)
   408      {
   409        stuff_char(0xFF);
   410        return 0xFF;
   411      }
   412  
   413      if (c == 0x00)
   414        return 0xFF;
   415      else
   416      {
   417        stuff_char(static_cast<uint8>(c));
   418        stuff_char(0xFF);
   419        return 0xFF;
   420      }
   421    }
   422  
   423    return static_cast<uint8>(c);
   424  }
   425  
   426  // Retrieves a variable number of bits from the input stream. Does not recognize markers.
   427  inline uint jpeg_decoder::get_bits(int num_bits)
   428  {
   429    if (!num_bits)
   430      return 0;
   431  
   432    uint i = m_bit_buf >> (32 - num_bits);
   433  
   434    if ((m_bits_left -= num_bits) <= 0)
   435    {
   436      m_bit_buf <<= (num_bits += m_bits_left);
   437  
   438      uint c1 = get_char();
   439      uint c2 = get_char();
   440      m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
   441  
   442      m_bit_buf <<= -m_bits_left;
   443  
   444      m_bits_left += 16;
   445  
   446      JPGD_ASSERT(m_bits_left >= 0);
   447    }
   448    else
   449      m_bit_buf <<= num_bits;
   450  
   451    return i;
   452  }
   453  
   454  // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
   455  inline uint jpeg_decoder::get_bits_no_markers(int num_bits)
   456  {
   457    if (!num_bits)
   458      return 0;
   459  
   460    uint i = m_bit_buf >> (32 - num_bits);
   461  
   462    if ((m_bits_left -= num_bits) <= 0)
   463    {
   464      m_bit_buf <<= (num_bits += m_bits_left);
   465  
   466      if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF))
   467      {
   468        uint c1 = get_octet();
   469        uint c2 = get_octet();
   470        m_bit_buf |= (c1 << 8) | c2;
   471      }
   472      else
   473      {
   474        m_bit_buf |= ((uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
   475        m_in_buf_left -= 2;
   476        m_pIn_buf_ofs += 2;
   477      }
   478  
   479      m_bit_buf <<= -m_bits_left;
   480  
   481      m_bits_left += 16;
   482  
   483      JPGD_ASSERT(m_bits_left >= 0);
   484    }
   485    else
   486      m_bit_buf <<= num_bits;
   487  
   488    return i;
   489  }
   490  
   491  // Decodes a Huffman encoded symbol.
   492  inline int jpeg_decoder::huff_decode(huff_tables *pH)
   493  {
   494    int symbol;
   495  
   496    // Check first 8-bits: do we have a complete symbol?
   497    if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0)
   498    {
   499      // Decode more bits, use a tree traversal to find symbol.
   500      int ofs = 23;
   501      do
   502      {
   503        symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))];
   504        ofs--;
   505      } while (symbol < 0);
   506  
   507      get_bits_no_markers(8 + (23 - ofs));
   508    }
   509    else
   510      get_bits_no_markers(pH->code_size[symbol]);
   511  
   512    return symbol;
   513  }
   514  
   515  // Decodes a Huffman encoded symbol.
   516  inline int jpeg_decoder::huff_decode(huff_tables *pH, int& extra_bits)
   517  {
   518    int symbol;
   519  
   520    // Check first 8-bits: do we have a complete symbol?
   521    if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0)
   522    {
   523      // Use a tree traversal to find symbol.
   524      int ofs = 23;
   525      do
   526      {
   527        symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))];
   528        ofs--;
   529      } while (symbol < 0);
   530  
   531      get_bits_no_markers(8 + (23 - ofs));
   532  
   533      extra_bits = get_bits_no_markers(symbol & 0xF);
   534    }
   535    else
   536    {
   537      JPGD_ASSERT(((symbol >> 8) & 31) == pH->code_size[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
   538  
   539      if (symbol & 0x8000)
   540      {
   541        get_bits_no_markers((symbol >> 8) & 31);
   542        extra_bits = symbol >> 16;
   543      }
   544      else
   545      {
   546        int code_size = (symbol >> 8) & 31;
   547        int num_extra_bits = symbol & 0xF;
   548        int bits = code_size + num_extra_bits;
   549        if (bits <= (m_bits_left + 16))
   550          extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
   551        else
   552        {
   553          get_bits_no_markers(code_size);
   554          extra_bits = get_bits_no_markers(num_extra_bits);
   555        }
   556      }
   557  
   558      symbol &= 0xFF;
   559    }
   560  
   561    return symbol;
   562  }
   563  
   564  // Tables and macro used to fully decode the DPCM differences.
   565  static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
   566  static const int s_extend_offset[16] = { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };
   567  static const int s_extend_mask[] = { 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) };
   568  // The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this)
   569  #define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
   570  
   571  // Clamps a value between 0-255.
   572  inline uint8 jpeg_decoder::clamp(int i)
   573  {
   574    if (static_cast<uint>(i) > 255)
   575      i = (((~i) >> 31) & 0xFF);
   576  
   577    return static_cast<uint8>(i);
   578  }
   579  
   580  namespace DCT_Upsample
   581  {
   582    struct Matrix44
   583    {
   584      typedef int Element_Type;
   585      enum { NUM_ROWS = 4, NUM_COLS = 4 };
   586  
   587      Element_Type v[NUM_ROWS][NUM_COLS];
   588  
   589      inline int rows() const { return NUM_ROWS; }
   590      inline int cols() const { return NUM_COLS; }
   591  
   592      inline const Element_Type & at(int r, int c) const { return v[r][c]; }
   593      inline       Element_Type & at(int r, int c)       { return v[r][c]; }
   594  
   595      inline Matrix44() { }
   596  
   597      inline Matrix44& operator += (const Matrix44& a)
   598      {
   599        for (int r = 0; r < NUM_ROWS; r++)
   600        {
   601          at(r, 0) += a.at(r, 0);
   602          at(r, 1) += a.at(r, 1);
   603          at(r, 2) += a.at(r, 2);
   604          at(r, 3) += a.at(r, 3);
   605        }
   606        return *this;
   607      }
   608  
   609      inline Matrix44& operator -= (const Matrix44& a)
   610      {
   611        for (int r = 0; r < NUM_ROWS; r++)
   612        {
   613          at(r, 0) -= a.at(r, 0);
   614          at(r, 1) -= a.at(r, 1);
   615          at(r, 2) -= a.at(r, 2);
   616          at(r, 3) -= a.at(r, 3);
   617        }
   618        return *this;
   619      }
   620  
   621      friend inline Matrix44 operator + (const Matrix44& a, const Matrix44& b)
   622      {
   623        Matrix44 ret;
   624        for (int r = 0; r < NUM_ROWS; r++)
   625        {
   626          ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
   627          ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
   628          ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
   629          ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
   630        }
   631        return ret;
   632      }
   633  
   634      friend inline Matrix44 operator - (const Matrix44& a, const Matrix44& b)
   635      {
   636        Matrix44 ret;
   637        for (int r = 0; r < NUM_ROWS; r++)
   638        {
   639          ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
   640          ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
   641          ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
   642          ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
   643        }
   644        return ret;
   645      }
   646  
   647      static inline void add_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
   648      {
   649        for (int r = 0; r < 4; r++)
   650        {
   651          pDst[0*8 + r] = static_cast<jpgd_block_t>(a.at(r, 0) + b.at(r, 0));
   652          pDst[1*8 + r] = static_cast<jpgd_block_t>(a.at(r, 1) + b.at(r, 1));
   653          pDst[2*8 + r] = static_cast<jpgd_block_t>(a.at(r, 2) + b.at(r, 2));
   654          pDst[3*8 + r] = static_cast<jpgd_block_t>(a.at(r, 3) + b.at(r, 3));
   655        }
   656      }
   657  
   658      static inline void sub_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
   659      {
   660        for (int r = 0; r < 4; r++)
   661        {
   662          pDst[0*8 + r] = static_cast<jpgd_block_t>(a.at(r, 0) - b.at(r, 0));
   663          pDst[1*8 + r] = static_cast<jpgd_block_t>(a.at(r, 1) - b.at(r, 1));
   664          pDst[2*8 + r] = static_cast<jpgd_block_t>(a.at(r, 2) - b.at(r, 2));
   665          pDst[3*8 + r] = static_cast<jpgd_block_t>(a.at(r, 3) - b.at(r, 3));
   666        }
   667      }
   668    };
   669  
   670    const int FRACT_BITS = 10;
   671    const int SCALE = 1 << FRACT_BITS;
   672  
   673    typedef int Temp_Type;
   674    #define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
   675    #define F(i) ((int)((i) * SCALE + .5f))
   676  
   677    // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
   678    #define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
   679  
   680    // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
   681    template<int NUM_ROWS, int NUM_COLS>
   682    struct P_Q
   683    {
   684      static void calc(Matrix44& P, Matrix44& Q, const jpgd_block_t* pSrc)
   685      {
   686        // 4x8 = 4x8 times 8x8, matrix 0 is constant
   687        const Temp_Type X000 = AT(0, 0);
   688        const Temp_Type X001 = AT(0, 1);
   689        const Temp_Type X002 = AT(0, 2);
   690        const Temp_Type X003 = AT(0, 3);
   691        const Temp_Type X004 = AT(0, 4);
   692        const Temp_Type X005 = AT(0, 5);
   693        const Temp_Type X006 = AT(0, 6);
   694        const Temp_Type X007 = AT(0, 7);
   695        const Temp_Type X010 = D(F(0.415735f) * AT(1, 0) + F(0.791065f) * AT(3, 0) + F(-0.352443f) * AT(5, 0) + F(0.277785f) * AT(7, 0));
   696        const Temp_Type X011 = D(F(0.415735f) * AT(1, 1) + F(0.791065f) * AT(3, 1) + F(-0.352443f) * AT(5, 1) + F(0.277785f) * AT(7, 1));
   697        const Temp_Type X012 = D(F(0.415735f) * AT(1, 2) + F(0.791065f) * AT(3, 2) + F(-0.352443f) * AT(5, 2) + F(0.277785f) * AT(7, 2));
   698        const Temp_Type X013 = D(F(0.415735f) * AT(1, 3) + F(0.791065f) * AT(3, 3) + F(-0.352443f) * AT(5, 3) + F(0.277785f) * AT(7, 3));
   699        const Temp_Type X014 = D(F(0.415735f) * AT(1, 4) + F(0.791065f) * AT(3, 4) + F(-0.352443f) * AT(5, 4) + F(0.277785f) * AT(7, 4));
   700        const Temp_Type X015 = D(F(0.415735f) * AT(1, 5) + F(0.791065f) * AT(3, 5) + F(-0.352443f) * AT(5, 5) + F(0.277785f) * AT(7, 5));
   701        const Temp_Type X016 = D(F(0.415735f) * AT(1, 6) + F(0.791065f) * AT(3, 6) + F(-0.352443f) * AT(5, 6) + F(0.277785f) * AT(7, 6));
   702        const Temp_Type X017 = D(F(0.415735f) * AT(1, 7) + F(0.791065f) * AT(3, 7) + F(-0.352443f) * AT(5, 7) + F(0.277785f) * AT(7, 7));
   703        const Temp_Type X020 = AT(4, 0);
   704        const Temp_Type X021 = AT(4, 1);
   705        const Temp_Type X022 = AT(4, 2);
   706        const Temp_Type X023 = AT(4, 3);
   707        const Temp_Type X024 = AT(4, 4);
   708        const Temp_Type X025 = AT(4, 5);
   709        const Temp_Type X026 = AT(4, 6);
   710        const Temp_Type X027 = AT(4, 7);
   711        const Temp_Type X030 = D(F(0.022887f) * AT(1, 0) + F(-0.097545f) * AT(3, 0) + F(0.490393f) * AT(5, 0) + F(0.865723f) * AT(7, 0));
   712        const Temp_Type X031 = D(F(0.022887f) * AT(1, 1) + F(-0.097545f) * AT(3, 1) + F(0.490393f) * AT(5, 1) + F(0.865723f) * AT(7, 1));
   713        const Temp_Type X032 = D(F(0.022887f) * AT(1, 2) + F(-0.097545f) * AT(3, 2) + F(0.490393f) * AT(5, 2) + F(0.865723f) * AT(7, 2));
   714        const Temp_Type X033 = D(F(0.022887f) * AT(1, 3) + F(-0.097545f) * AT(3, 3) + F(0.490393f) * AT(5, 3) + F(0.865723f) * AT(7, 3));
   715        const Temp_Type X034 = D(F(0.022887f) * AT(1, 4) + F(-0.097545f) * AT(3, 4) + F(0.490393f) * AT(5, 4) + F(0.865723f) * AT(7, 4));
   716        const Temp_Type X035 = D(F(0.022887f) * AT(1, 5) + F(-0.097545f) * AT(3, 5) + F(0.490393f) * AT(5, 5) + F(0.865723f) * AT(7, 5));
   717        const Temp_Type X036 = D(F(0.022887f) * AT(1, 6) + F(-0.097545f) * AT(3, 6) + F(0.490393f) * AT(5, 6) + F(0.865723f) * AT(7, 6));
   718        const Temp_Type X037 = D(F(0.022887f) * AT(1, 7) + F(-0.097545f) * AT(3, 7) + F(0.490393f) * AT(5, 7) + F(0.865723f) * AT(7, 7));
   719  
   720        // 4x4 = 4x8 times 8x4, matrix 1 is constant
   721        P.at(0, 0) = X000;
   722        P.at(0, 1) = D(X001 * F(0.415735f) + X003 * F(0.791065f) + X005 * F(-0.352443f) + X007 * F(0.277785f));
   723        P.at(0, 2) = X004;
   724        P.at(0, 3) = D(X001 * F(0.022887f) + X003 * F(-0.097545f) + X005 * F(0.490393f) + X007 * F(0.865723f));
   725        P.at(1, 0) = X010;
   726        P.at(1, 1) = D(X011 * F(0.415735f) + X013 * F(0.791065f) + X015 * F(-0.352443f) + X017 * F(0.277785f));
   727        P.at(1, 2) = X014;
   728        P.at(1, 3) = D(X011 * F(0.022887f) + X013 * F(-0.097545f) + X015 * F(0.490393f) + X017 * F(0.865723f));
   729        P.at(2, 0) = X020;
   730        P.at(2, 1) = D(X021 * F(0.415735f) + X023 * F(0.791065f) + X025 * F(-0.352443f) + X027 * F(0.277785f));
   731        P.at(2, 2) = X024;
   732        P.at(2, 3) = D(X021 * F(0.022887f) + X023 * F(-0.097545f) + X025 * F(0.490393f) + X027 * F(0.865723f));
   733        P.at(3, 0) = X030;
   734        P.at(3, 1) = D(X031 * F(0.415735f) + X033 * F(0.791065f) + X035 * F(-0.352443f) + X037 * F(0.277785f));
   735        P.at(3, 2) = X034;
   736        P.at(3, 3) = D(X031 * F(0.022887f) + X033 * F(-0.097545f) + X035 * F(0.490393f) + X037 * F(0.865723f));
   737        // 40 muls 24 adds
   738  
   739        // 4x4 = 4x8 times 8x4, matrix 1 is constant
   740        Q.at(0, 0) = D(X001 * F(0.906127f) + X003 * F(-0.318190f) + X005 * F(0.212608f) + X007 * F(-0.180240f));
   741        Q.at(0, 1) = X002;
   742        Q.at(0, 2) = D(X001 * F(-0.074658f) + X003 * F(0.513280f) + X005 * F(0.768178f) + X007 * F(-0.375330f));
   743        Q.at(0, 3) = X006;
   744        Q.at(1, 0) = D(X011 * F(0.906127f) + X013 * F(-0.318190f) + X015 * F(0.212608f) + X017 * F(-0.180240f));
   745        Q.at(1, 1) = X012;
   746        Q.at(1, 2) = D(X011 * F(-0.074658f) + X013 * F(0.513280f) + X015 * F(0.768178f) + X017 * F(-0.375330f));
   747        Q.at(1, 3) = X016;
   748        Q.at(2, 0) = D(X021 * F(0.906127f) + X023 * F(-0.318190f) + X025 * F(0.212608f) + X027 * F(-0.180240f));
   749        Q.at(2, 1) = X022;
   750        Q.at(2, 2) = D(X021 * F(-0.074658f) + X023 * F(0.513280f) + X025 * F(0.768178f) + X027 * F(-0.375330f));
   751        Q.at(2, 3) = X026;
   752        Q.at(3, 0) = D(X031 * F(0.906127f) + X033 * F(-0.318190f) + X035 * F(0.212608f) + X037 * F(-0.180240f));
   753        Q.at(3, 1) = X032;
   754        Q.at(3, 2) = D(X031 * F(-0.074658f) + X033 * F(0.513280f) + X035 * F(0.768178f) + X037 * F(-0.375330f));
   755        Q.at(3, 3) = X036;
   756        // 40 muls 24 adds
   757      }
   758    };
   759  
   760    template<int NUM_ROWS, int NUM_COLS>
   761    struct R_S
   762    {
   763      static void calc(Matrix44& R, Matrix44& S, const jpgd_block_t* pSrc)
   764      {
   765        // 4x8 = 4x8 times 8x8, matrix 0 is constant
   766        const Temp_Type X100 = D(F(0.906127f) * AT(1, 0) + F(-0.318190f) * AT(3, 0) + F(0.212608f) * AT(5, 0) + F(-0.180240f) * AT(7, 0));
   767        const Temp_Type X101 = D(F(0.906127f) * AT(1, 1) + F(-0.318190f) * AT(3, 1) + F(0.212608f) * AT(5, 1) + F(-0.180240f) * AT(7, 1));
   768        const Temp_Type X102 = D(F(0.906127f) * AT(1, 2) + F(-0.318190f) * AT(3, 2) + F(0.212608f) * AT(5, 2) + F(-0.180240f) * AT(7, 2));
   769        const Temp_Type X103 = D(F(0.906127f) * AT(1, 3) + F(-0.318190f) * AT(3, 3) + F(0.212608f) * AT(5, 3) + F(-0.180240f) * AT(7, 3));
   770        const Temp_Type X104 = D(F(0.906127f) * AT(1, 4) + F(-0.318190f) * AT(3, 4) + F(0.212608f) * AT(5, 4) + F(-0.180240f) * AT(7, 4));
   771        const Temp_Type X105 = D(F(0.906127f) * AT(1, 5) + F(-0.318190f) * AT(3, 5) + F(0.212608f) * AT(5, 5) + F(-0.180240f) * AT(7, 5));
   772        const Temp_Type X106 = D(F(0.906127f) * AT(1, 6) + F(-0.318190f) * AT(3, 6) + F(0.212608f) * AT(5, 6) + F(-0.180240f) * AT(7, 6));
   773        const Temp_Type X107 = D(F(0.906127f) * AT(1, 7) + F(-0.318190f) * AT(3, 7) + F(0.212608f) * AT(5, 7) + F(-0.180240f) * AT(7, 7));
   774        const Temp_Type X110 = AT(2, 0);
   775        const Temp_Type X111 = AT(2, 1);
   776        const Temp_Type X112 = AT(2, 2);
   777        const Temp_Type X113 = AT(2, 3);
   778        const Temp_Type X114 = AT(2, 4);
   779        const Temp_Type X115 = AT(2, 5);
   780        const Temp_Type X116 = AT(2, 6);
   781        const Temp_Type X117 = AT(2, 7);
   782        const Temp_Type X120 = D(F(-0.074658f) * AT(1, 0) + F(0.513280f) * AT(3, 0) + F(0.768178f) * AT(5, 0) + F(-0.375330f) * AT(7, 0));
   783        const Temp_Type X121 = D(F(-0.074658f) * AT(1, 1) + F(0.513280f) * AT(3, 1) + F(0.768178f) * AT(5, 1) + F(-0.375330f) * AT(7, 1));
   784        const Temp_Type X122 = D(F(-0.074658f) * AT(1, 2) + F(0.513280f) * AT(3, 2) + F(0.768178f) * AT(5, 2) + F(-0.375330f) * AT(7, 2));
   785        const Temp_Type X123 = D(F(-0.074658f) * AT(1, 3) + F(0.513280f) * AT(3, 3) + F(0.768178f) * AT(5, 3) + F(-0.375330f) * AT(7, 3));
   786        const Temp_Type X124 = D(F(-0.074658f) * AT(1, 4) + F(0.513280f) * AT(3, 4) + F(0.768178f) * AT(5, 4) + F(-0.375330f) * AT(7, 4));
   787        const Temp_Type X125 = D(F(-0.074658f) * AT(1, 5) + F(0.513280f) * AT(3, 5) + F(0.768178f) * AT(5, 5) + F(-0.375330f) * AT(7, 5));
   788        const Temp_Type X126 = D(F(-0.074658f) * AT(1, 6) + F(0.513280f) * AT(3, 6) + F(0.768178f) * AT(5, 6) + F(-0.375330f) * AT(7, 6));
   789        const Temp_Type X127 = D(F(-0.074658f) * AT(1, 7) + F(0.513280f) * AT(3, 7) + F(0.768178f) * AT(5, 7) + F(-0.375330f) * AT(7, 7));
   790        const Temp_Type X130 = AT(6, 0);
   791        const Temp_Type X131 = AT(6, 1);
   792        const Temp_Type X132 = AT(6, 2);
   793        const Temp_Type X133 = AT(6, 3);
   794        const Temp_Type X134 = AT(6, 4);
   795        const Temp_Type X135 = AT(6, 5);
   796        const Temp_Type X136 = AT(6, 6);
   797        const Temp_Type X137 = AT(6, 7);
   798        // 80 muls 48 adds
   799  
   800        // 4x4 = 4x8 times 8x4, matrix 1 is constant
   801        R.at(0, 0) = X100;
   802        R.at(0, 1) = D(X101 * F(0.415735f) + X103 * F(0.791065f) + X105 * F(-0.352443f) + X107 * F(0.277785f));
   803        R.at(0, 2) = X104;
   804        R.at(0, 3) = D(X101 * F(0.022887f) + X103 * F(-0.097545f) + X105 * F(0.490393f) + X107 * F(0.865723f));
   805        R.at(1, 0) = X110;
   806        R.at(1, 1) = D(X111 * F(0.415735f) + X113 * F(0.791065f) + X115 * F(-0.352443f) + X117 * F(0.277785f));
   807        R.at(1, 2) = X114;
   808        R.at(1, 3) = D(X111 * F(0.022887f) + X113 * F(-0.097545f) + X115 * F(0.490393f) + X117 * F(0.865723f));
   809        R.at(2, 0) = X120;
   810        R.at(2, 1) = D(X121 * F(0.415735f) + X123 * F(0.791065f) + X125 * F(-0.352443f) + X127 * F(0.277785f));
   811        R.at(2, 2) = X124;
   812        R.at(2, 3) = D(X121 * F(0.022887f) + X123 * F(-0.097545f) + X125 * F(0.490393f) + X127 * F(0.865723f));
   813        R.at(3, 0) = X130;
   814        R.at(3, 1) = D(X131 * F(0.415735f) + X133 * F(0.791065f) + X135 * F(-0.352443f) + X137 * F(0.277785f));
   815        R.at(3, 2) = X134;
   816        R.at(3, 3) = D(X131 * F(0.022887f) + X133 * F(-0.097545f) + X135 * F(0.490393f) + X137 * F(0.865723f));
   817        // 40 muls 24 adds
   818        // 4x4 = 4x8 times 8x4, matrix 1 is constant
   819        S.at(0, 0) = D(X101 * F(0.906127f) + X103 * F(-0.318190f) + X105 * F(0.212608f) + X107 * F(-0.180240f));
   820        S.at(0, 1) = X102;
   821        S.at(0, 2) = D(X101 * F(-0.074658f) + X103 * F(0.513280f) + X105 * F(0.768178f) + X107 * F(-0.375330f));
   822        S.at(0, 3) = X106;
   823        S.at(1, 0) = D(X111 * F(0.906127f) + X113 * F(-0.318190f) + X115 * F(0.212608f) + X117 * F(-0.180240f));
   824        S.at(1, 1) = X112;
   825        S.at(1, 2) = D(X111 * F(-0.074658f) + X113 * F(0.513280f) + X115 * F(0.768178f) + X117 * F(-0.375330f));
   826        S.at(1, 3) = X116;
   827        S.at(2, 0) = D(X121 * F(0.906127f) + X123 * F(-0.318190f) + X125 * F(0.212608f) + X127 * F(-0.180240f));
   828        S.at(2, 1) = X122;
   829        S.at(2, 2) = D(X121 * F(-0.074658f) + X123 * F(0.513280f) + X125 * F(0.768178f) + X127 * F(-0.375330f));
   830        S.at(2, 3) = X126;
   831        S.at(3, 0) = D(X131 * F(0.906127f) + X133 * F(-0.318190f) + X135 * F(0.212608f) + X137 * F(-0.180240f));
   832        S.at(3, 1) = X132;
   833        S.at(3, 2) = D(X131 * F(-0.074658f) + X133 * F(0.513280f) + X135 * F(0.768178f) + X137 * F(-0.375330f));
   834        S.at(3, 3) = X136;
   835        // 40 muls 24 adds
   836      }
   837    };
   838  } // end namespace DCT_Upsample
   839  
   840  // Unconditionally frees all allocated m_blocks.
   841  void jpeg_decoder::free_all_blocks()
   842  {
   843    m_pStream = NULL;
   844    for (mem_block *b = m_pMem_blocks; b; )
   845    {
   846      mem_block *n = b->m_pNext;
   847      jpgd_free(b);
   848      b = n;
   849    }
   850    m_pMem_blocks = NULL;
   851  }
   852  
   853  // This method handles all errors. It will never return.
   854  // It could easily be changed to use C++ exceptions.
   855  JPGD_NORETURN void jpeg_decoder::stop_decoding(jpgd_status status)
   856  {
   857    m_error_code = status;
   858    free_all_blocks();
   859    longjmp(m_jmp_state, status);
   860  }
   861  
   862  void *jpeg_decoder::alloc(size_t nSize, bool zero)
   863  {
   864    nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
   865    char *rv = NULL;
   866    for (mem_block *b = m_pMem_blocks; b; b = b->m_pNext)
   867    {
   868      if ((b->m_used_count + nSize) <= b->m_size)
   869      {
   870        rv = b->m_data + b->m_used_count;
   871        b->m_used_count += nSize;
   872        break;
   873      }
   874    }
   875    if (!rv)
   876    {
   877      int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
   878      mem_block *b = (mem_block*)jpgd_malloc(sizeof(mem_block) + capacity);
   879      if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); }
   880      b->m_pNext = m_pMem_blocks; m_pMem_blocks = b;
   881      b->m_used_count = nSize;
   882      b->m_size = capacity;
   883      rv = b->m_data;
   884    }
   885    if (zero) memset(rv, 0, nSize);
   886    return rv;
   887  }
   888  
   889  void jpeg_decoder::word_clear(void *p, uint16 c, uint n)
   890  {
   891    uint8 *pD = (uint8*)p;
   892    const uint8 l = c & 0xFF, h = (c >> 8) & 0xFF;
   893    while (n)
   894    {
   895      pD[0] = l; pD[1] = h; pD += 2;
   896      n--;
   897    }
   898  }
   899  
   900  // Refill the input buffer.
   901  // This method will sit in a loop until (A) the buffer is full or (B)
   902  // the stream's read() method reports and end of file condition.
   903  void jpeg_decoder::prep_in_buffer()
   904  {
   905    m_in_buf_left = 0;
   906    m_pIn_buf_ofs = m_in_buf;
   907  
   908    if (m_eof_flag)
   909      return;
   910  
   911    do
   912    {
   913      int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
   914      if (bytes_read == -1)
   915        stop_decoding(JPGD_STREAM_READ);
   916  
   917      m_in_buf_left += bytes_read;
   918    } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
   919  
   920    m_total_bytes_read += m_in_buf_left;
   921  
   922    // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
   923    // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
   924    word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
   925  }
   926  
   927  // Read a Huffman code table.
   928  void jpeg_decoder::read_dht_marker()
   929  {
   930    int i, index, count;
   931    uint8 huff_num[17];
   932    uint8 huff_val[256];
   933  
   934    uint num_left = get_bits(16);
   935  
   936    if (num_left < 2)
   937      stop_decoding(JPGD_BAD_DHT_MARKER);
   938  
   939    num_left -= 2;
   940  
   941    while (num_left)
   942    {
   943      index = get_bits(8);
   944  
   945      huff_num[0] = 0;
   946  
   947      count = 0;
   948  
   949      for (i = 1; i <= 16; i++)
   950      {
   951        huff_num[i] = static_cast<uint8>(get_bits(8));
   952        count += huff_num[i];
   953      }
   954  
   955      if (count > 255)
   956        stop_decoding(JPGD_BAD_DHT_COUNTS);
   957  
   958      for (i = 0; i < count; i++)
   959        huff_val[i] = static_cast<uint8>(get_bits(8));
   960  
   961      i = 1 + 16 + count;
   962  
   963      if (num_left < (uint)i)
   964        stop_decoding(JPGD_BAD_DHT_MARKER);
   965  
   966      num_left -= i;
   967  
   968      if ((index & 0x10) > 0x10)
   969        stop_decoding(JPGD_BAD_DHT_INDEX);
   970  
   971      index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
   972  
   973      if (index >= JPGD_MAX_HUFF_TABLES)
   974        stop_decoding(JPGD_BAD_DHT_INDEX);
   975  
   976      if (!m_huff_num[index])
   977        m_huff_num[index] = (uint8 *)alloc(17);
   978  
   979      if (!m_huff_val[index])
   980        m_huff_val[index] = (uint8 *)alloc(256);
   981  
   982      m_huff_ac[index] = (index & 0x10) != 0;
   983      memcpy(m_huff_num[index], huff_num, 17);
   984      memcpy(m_huff_val[index], huff_val, 256);
   985    }
   986  }
   987  
   988  // Read a quantization table.
   989  void jpeg_decoder::read_dqt_marker()
   990  {
   991    int n, i, prec;
   992    uint num_left;
   993    uint temp;
   994  
   995    num_left = get_bits(16);
   996  
   997    if (num_left < 2)
   998      stop_decoding(JPGD_BAD_DQT_MARKER);
   999  
  1000    num_left -= 2;
  1001  
  1002    while (num_left)
  1003    {
  1004      n = get_bits(8);
  1005      prec = n >> 4;
  1006      n &= 0x0F;
  1007  
  1008      if (n >= JPGD_MAX_QUANT_TABLES)
  1009        stop_decoding(JPGD_BAD_DQT_TABLE);
  1010  
  1011      if (!m_quant[n])
  1012        m_quant[n] = (jpgd_quant_t *)alloc(64 * sizeof(jpgd_quant_t));
  1013  
  1014      // read quantization entries, in zag order
  1015      for (i = 0; i < 64; i++)
  1016      {
  1017        temp = get_bits(8);
  1018  
  1019        if (prec)
  1020          temp = (temp << 8) + get_bits(8);
  1021  
  1022  			m_quant[n][i] = static_cast<jpgd_quant_t>(temp);
  1023      }
  1024  
  1025      i = 64 + 1;
  1026  
  1027      if (prec)
  1028        i += 64;
  1029  
  1030      if (num_left < (uint)i)
  1031        stop_decoding(JPGD_BAD_DQT_LENGTH);
  1032  
  1033      num_left -= i;
  1034    }
  1035  }
  1036  
  1037  // Read the start of frame (SOF) marker.
  1038  void jpeg_decoder::read_sof_marker()
  1039  {
  1040    int i;
  1041    uint num_left;
  1042  
  1043    num_left = get_bits(16);
  1044  
  1045    if (get_bits(8) != 8)   /* precision: sorry, only 8-bit precision is supported right now */
  1046      stop_decoding(JPGD_BAD_PRECISION);
  1047  
  1048    m_image_y_size = get_bits(16);
  1049  
  1050    if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
  1051      stop_decoding(JPGD_BAD_HEIGHT);
  1052  
  1053    m_image_x_size = get_bits(16);
  1054  
  1055    if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
  1056      stop_decoding(JPGD_BAD_WIDTH);
  1057  
  1058    m_comps_in_frame = get_bits(8);
  1059  
  1060    if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
  1061      stop_decoding(JPGD_TOO_MANY_COMPONENTS);
  1062  
  1063    if (num_left != (uint)(m_comps_in_frame * 3 + 8))
  1064      stop_decoding(JPGD_BAD_SOF_LENGTH);
  1065  
  1066    for (i = 0; i < m_comps_in_frame; i++)
  1067    {
  1068      m_comp_ident[i]  = get_bits(8);
  1069      m_comp_h_samp[i] = get_bits(4);
  1070      m_comp_v_samp[i] = get_bits(4);
  1071      m_comp_quant[i]  = get_bits(8);
  1072    }
  1073  }
  1074  
  1075  // Used to skip unrecognized markers.
  1076  void jpeg_decoder::skip_variable_marker()
  1077  {
  1078    uint num_left;
  1079  
  1080    num_left = get_bits(16);
  1081  
  1082    if (num_left < 2)
  1083      stop_decoding(JPGD_BAD_VARIABLE_MARKER);
  1084  
  1085    num_left -= 2;
  1086  
  1087    while (num_left)
  1088    {
  1089      get_bits(8);
  1090      num_left--;
  1091    }
  1092  }
  1093  
  1094  // Read a define restart interval (DRI) marker.
  1095  void jpeg_decoder::read_dri_marker()
  1096  {
  1097    if (get_bits(16) != 4)
  1098      stop_decoding(JPGD_BAD_DRI_LENGTH);
  1099  
  1100    m_restart_interval = get_bits(16);
  1101  }
  1102  
  1103  // Read a start of scan (SOS) marker.
  1104  void jpeg_decoder::read_sos_marker()
  1105  {
  1106    uint num_left;
  1107    int i, ci, n, c, cc;
  1108  
  1109    num_left = get_bits(16);
  1110  
  1111    n = get_bits(8);
  1112  
  1113    m_comps_in_scan = n;
  1114  
  1115    num_left -= 3;
  1116  
  1117    if ( (num_left != (uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
  1118      stop_decoding(JPGD_BAD_SOS_LENGTH);
  1119  
  1120    for (i = 0; i < n; i++)
  1121    {
  1122      cc = get_bits(8);
  1123      c = get_bits(8);
  1124      num_left -= 2;
  1125  
  1126      for (ci = 0; ci < m_comps_in_frame; ci++)
  1127        if (cc == m_comp_ident[ci])
  1128          break;
  1129  
  1130      if (ci >= m_comps_in_frame)
  1131        stop_decoding(JPGD_BAD_SOS_COMP_ID);
  1132  
  1133      m_comp_list[i]    = ci;
  1134      m_comp_dc_tab[ci] = (c >> 4) & 15;
  1135      m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
  1136    }
  1137  
  1138    m_spectral_start  = get_bits(8);
  1139    m_spectral_end    = get_bits(8);
  1140    m_successive_high = get_bits(4);
  1141    m_successive_low  = get_bits(4);
  1142  
  1143    if (!m_progressive_flag)
  1144    {
  1145      m_spectral_start = 0;
  1146      m_spectral_end = 63;
  1147    }
  1148  
  1149    num_left -= 3;
  1150  
  1151    while (num_left)                  /* read past whatever is num_left */
  1152    {
  1153      get_bits(8);
  1154      num_left--;
  1155    }
  1156  }
  1157  
  1158  // Finds the next marker.
  1159  int jpeg_decoder::next_marker()
  1160  {
  1161    uint c, bytes;
  1162  
  1163    bytes = 0;
  1164  
  1165    do
  1166    {
  1167      do
  1168      {
  1169        bytes++;
  1170        c = get_bits(8);
  1171      } while (c != 0xFF);
  1172  
  1173      do
  1174      {
  1175        c = get_bits(8);
  1176      } while (c == 0xFF);
  1177  
  1178    } while (c == 0);
  1179  
  1180    // If bytes > 0 here, there where extra bytes before the marker (not good).
  1181  
  1182    return c;
  1183  }
  1184  
  1185  // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
  1186  // encountered.
  1187  int jpeg_decoder::process_markers()
  1188  {
  1189    int c;
  1190  
  1191    for ( ; ; )
  1192    {
  1193      c = next_marker();
  1194  
  1195      switch (c)
  1196      {
  1197        case M_SOF0:
  1198        case M_SOF1:
  1199        case M_SOF2:
  1200        case M_SOF3:
  1201        case M_SOF5:
  1202        case M_SOF6:
  1203        case M_SOF7:
  1204  //      case M_JPG:
  1205        case M_SOF9:
  1206        case M_SOF10:
  1207        case M_SOF11:
  1208        case M_SOF13:
  1209        case M_SOF14:
  1210        case M_SOF15:
  1211        case M_SOI:
  1212        case M_EOI:
  1213        case M_SOS:
  1214        {
  1215          return c;
  1216        }
  1217        case M_DHT:
  1218        {
  1219          read_dht_marker();
  1220          break;
  1221        }
  1222        // No arithmitic support - dumb patents!
  1223        case M_DAC:
  1224        {
  1225          stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
  1226          break;
  1227        }
  1228        case M_DQT:
  1229        {
  1230          read_dqt_marker();
  1231          break;
  1232        }
  1233        case M_DRI:
  1234        {
  1235          read_dri_marker();
  1236          break;
  1237        }
  1238        //case M_APP0:  /* no need to read the JFIF marker */
  1239  
  1240        case M_JPG:
  1241        case M_RST0:    /* no parameters */
  1242        case M_RST1:
  1243        case M_RST2:
  1244        case M_RST3:
  1245        case M_RST4:
  1246        case M_RST5:
  1247        case M_RST6:
  1248        case M_RST7:
  1249        case M_TEM:
  1250        {
  1251          stop_decoding(JPGD_UNEXPECTED_MARKER);
  1252          break;
  1253        }
  1254        default:    /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
  1255        {
  1256          skip_variable_marker();
  1257          break;
  1258        }
  1259      }
  1260    }
  1261  }
  1262  
  1263  // Finds the start of image (SOI) marker.
  1264  // This code is rather defensive: it only checks the first 512 bytes to avoid
  1265  // false positives.
  1266  void jpeg_decoder::locate_soi_marker()
  1267  {
  1268    uint lastchar, thischar;
  1269    uint bytesleft;
  1270  
  1271    lastchar = get_bits(8);
  1272  
  1273    thischar = get_bits(8);
  1274  
  1275    /* ok if it's a normal JPEG file without a special header */
  1276  
  1277    if ((lastchar == 0xFF) && (thischar == M_SOI))
  1278      return;
  1279  
  1280    bytesleft = 4096; //512;
  1281  
  1282    for ( ; ; )
  1283    {
  1284      if (--bytesleft == 0)
  1285        stop_decoding(JPGD_NOT_JPEG);
  1286  
  1287      lastchar = thischar;
  1288  
  1289      thischar = get_bits(8);
  1290  
  1291      if (lastchar == 0xFF)
  1292      {
  1293        if (thischar == M_SOI)
  1294          break;
  1295        else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
  1296          stop_decoding(JPGD_NOT_JPEG);
  1297      }
  1298    }
  1299  
  1300    // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
  1301    thischar = (m_bit_buf >> 24) & 0xFF;
  1302  
  1303    if (thischar != 0xFF)
  1304      stop_decoding(JPGD_NOT_JPEG);
  1305  }
  1306  
  1307  // Find a start of frame (SOF) marker.
  1308  void jpeg_decoder::locate_sof_marker()
  1309  {
  1310    locate_soi_marker();
  1311  
  1312    int c = process_markers();
  1313  
  1314    switch (c)
  1315    {
  1316      case M_SOF2:
  1317        m_progressive_flag = JPGD_TRUE;
  1318      case M_SOF0:  /* baseline DCT */
  1319      case M_SOF1:  /* extended sequential DCT */
  1320      {
  1321        read_sof_marker();
  1322        break;
  1323      }
  1324      case M_SOF9:  /* Arithmitic coding */
  1325      {
  1326        stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
  1327        break;
  1328      }
  1329      default:
  1330      {
  1331        stop_decoding(JPGD_UNSUPPORTED_MARKER);
  1332        break;
  1333      }
  1334    }
  1335  }
  1336  
  1337  // Find a start of scan (SOS) marker.
  1338  int jpeg_decoder::locate_sos_marker()
  1339  {
  1340    int c;
  1341  
  1342    c = process_markers();
  1343  
  1344    if (c == M_EOI)
  1345      return JPGD_FALSE;
  1346    else if (c != M_SOS)
  1347      stop_decoding(JPGD_UNEXPECTED_MARKER);
  1348  
  1349    read_sos_marker();
  1350  
  1351    return JPGD_TRUE;
  1352  }
  1353  
  1354  // Reset everything to default/uninitialized state.
  1355  void jpeg_decoder::init(jpeg_decoder_stream *pStream)
  1356  {
  1357    m_pMem_blocks = NULL;
  1358    m_error_code = JPGD_SUCCESS;
  1359    m_ready_flag = false;
  1360    m_image_x_size = m_image_y_size = 0;
  1361    m_pStream = pStream;
  1362    m_progressive_flag = JPGD_FALSE;
  1363  
  1364    memset(m_huff_ac, 0, sizeof(m_huff_ac));
  1365    memset(m_huff_num, 0, sizeof(m_huff_num));
  1366    memset(m_huff_val, 0, sizeof(m_huff_val));
  1367    memset(m_quant, 0, sizeof(m_quant));
  1368  
  1369    m_scan_type = 0;
  1370    m_comps_in_frame = 0;
  1371  
  1372    memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp));
  1373    memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp));
  1374    memset(m_comp_quant, 0, sizeof(m_comp_quant));
  1375    memset(m_comp_ident, 0, sizeof(m_comp_ident));
  1376    memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks));
  1377    memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks));
  1378  
  1379    m_comps_in_scan = 0;
  1380    memset(m_comp_list, 0, sizeof(m_comp_list));
  1381    memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab));
  1382    memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab));
  1383  
  1384    m_spectral_start = 0;
  1385    m_spectral_end = 0;
  1386    m_successive_low = 0;
  1387    m_successive_high = 0;
  1388    m_max_mcu_x_size = 0;
  1389    m_max_mcu_y_size = 0;
  1390    m_blocks_per_mcu = 0;
  1391    m_max_blocks_per_row = 0;
  1392    m_mcus_per_row = 0;
  1393    m_mcus_per_col = 0;
  1394    m_expanded_blocks_per_component = 0;
  1395    m_expanded_blocks_per_mcu = 0;
  1396    m_expanded_blocks_per_row = 0;
  1397    m_freq_domain_chroma_upsample = false;
  1398  
  1399    memset(m_mcu_org, 0, sizeof(m_mcu_org));
  1400  
  1401    m_total_lines_left = 0;
  1402    m_mcu_lines_left = 0;
  1403    m_real_dest_bytes_per_scan_line = 0;
  1404    m_dest_bytes_per_scan_line = 0;
  1405    m_dest_bytes_per_pixel = 0;
  1406  
  1407    memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs));
  1408  
  1409    memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs));
  1410    memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs));
  1411    memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
  1412  
  1413    m_eob_run = 0;
  1414  
  1415    memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
  1416  
  1417    m_pIn_buf_ofs = m_in_buf;
  1418    m_in_buf_left = 0;
  1419    m_eof_flag = false;
  1420    m_tem_flag = 0;
  1421  
  1422    memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start));
  1423    memset(m_in_buf, 0, sizeof(m_in_buf));
  1424    memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end));
  1425  
  1426    m_restart_interval = 0;
  1427    m_restarts_left    = 0;
  1428    m_next_restart_num = 0;
  1429  
  1430    m_max_mcus_per_row = 0;
  1431    m_max_blocks_per_mcu = 0;
  1432    m_max_mcus_per_col = 0;
  1433  
  1434    memset(m_last_dc_val, 0, sizeof(m_last_dc_val));
  1435    m_pMCU_coefficients = NULL;
  1436    m_pSample_buf = NULL;
  1437  
  1438    m_total_bytes_read = 0;
  1439  
  1440    m_pScan_line_0 = NULL;
  1441    m_pScan_line_1 = NULL;
  1442  
  1443    // Ready the input buffer.
  1444    prep_in_buffer();
  1445  
  1446    // Prime the bit buffer.
  1447    m_bits_left = 16;
  1448    m_bit_buf = 0;
  1449  
  1450    get_bits(16);
  1451    get_bits(16);
  1452  
  1453    for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
  1454      m_mcu_block_max_zag[i] = 64;
  1455  }
  1456  
  1457  #define SCALEBITS 16
  1458  #define ONE_HALF  ((int) 1 << (SCALEBITS-1))
  1459  #define FIX(x)    ((int) ((x) * (1L<<SCALEBITS) + 0.5f))
  1460  
  1461  // Create a few tables that allow us to quickly convert YCbCr to RGB.
  1462  void jpeg_decoder::create_look_ups()
  1463  {
  1464    for (int i = 0; i <= 255; i++)
  1465    {
  1466      int k = i - 128;
  1467      m_crr[i] = ( FIX(1.40200f)  * k + ONE_HALF) >> SCALEBITS;
  1468      m_cbb[i] = ( FIX(1.77200f)  * k + ONE_HALF) >> SCALEBITS;
  1469      m_crg[i] = (-FIX(0.71414f)) * k;
  1470      m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF;
  1471    }
  1472  }
  1473  
  1474  // This method throws back into the stream any bytes that where read
  1475  // into the bit buffer during initial marker scanning.
  1476  void jpeg_decoder::fix_in_buffer()
  1477  {
  1478    // In case any 0xFF's where pulled into the buffer during marker scanning.
  1479    JPGD_ASSERT((m_bits_left & 7) == 0);
  1480  
  1481    if (m_bits_left == 16)
  1482      stuff_char( (uint8)(m_bit_buf & 0xFF));
  1483  
  1484    if (m_bits_left >= 8)
  1485      stuff_char( (uint8)((m_bit_buf >> 8) & 0xFF));
  1486  
  1487    stuff_char((uint8)((m_bit_buf >> 16) & 0xFF));
  1488    stuff_char((uint8)((m_bit_buf >> 24) & 0xFF));
  1489  
  1490    m_bits_left = 16;
  1491    get_bits_no_markers(16);
  1492    get_bits_no_markers(16);
  1493  }
  1494  
  1495  void jpeg_decoder::transform_mcu(int mcu_row)
  1496  {
  1497    jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
  1498    uint8* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
  1499  
  1500    for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
  1501    {
  1502      idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
  1503      pSrc_ptr += 64;
  1504      pDst_ptr += 64;
  1505    }
  1506  }
  1507  
  1508  static const uint8 s_max_rc[64] =
  1509  {
  1510    17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
  1511    102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
  1512    136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
  1513    136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
  1514  };
  1515  
  1516  void jpeg_decoder::transform_mcu_expand(int mcu_row)
  1517  {
  1518    jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
  1519    uint8* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
  1520  
  1521    // Y IDCT
  1522  	int mcu_block;
  1523    for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
  1524    {
  1525      idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
  1526      pSrc_ptr += 64;
  1527      pDst_ptr += 64;
  1528    }
  1529  
  1530    // Chroma IDCT, with upsampling
  1531  	jpgd_block_t temp_block[64];
  1532  
  1533    for (int i = 0; i < 2; i++)
  1534    {
  1535      DCT_Upsample::Matrix44 P, Q, R, S;
  1536  
  1537      JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] >= 1);
  1538      JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] <= 64);
  1539  
  1540      int max_zag = m_mcu_block_max_zag[mcu_block++] - 1; 
  1541      if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
  1542      switch (s_max_rc[max_zag])
  1543      {
  1544      case 1*16+1:
  1545        DCT_Upsample::P_Q<1, 1>::calc(P, Q, pSrc_ptr);
  1546        DCT_Upsample::R_S<1, 1>::calc(R, S, pSrc_ptr);
  1547        break;
  1548      case 1*16+2:
  1549        DCT_Upsample::P_Q<1, 2>::calc(P, Q, pSrc_ptr);
  1550        DCT_Upsample::R_S<1, 2>::calc(R, S, pSrc_ptr);
  1551        break;
  1552      case 2*16+2:
  1553        DCT_Upsample::P_Q<2, 2>::calc(P, Q, pSrc_ptr);
  1554        DCT_Upsample::R_S<2, 2>::calc(R, S, pSrc_ptr);
  1555        break;
  1556      case 3*16+2:
  1557        DCT_Upsample::P_Q<3, 2>::calc(P, Q, pSrc_ptr);
  1558        DCT_Upsample::R_S<3, 2>::calc(R, S, pSrc_ptr);
  1559        break;
  1560      case 3*16+3:
  1561        DCT_Upsample::P_Q<3, 3>::calc(P, Q, pSrc_ptr);
  1562        DCT_Upsample::R_S<3, 3>::calc(R, S, pSrc_ptr);
  1563        break;
  1564      case 3*16+4:
  1565        DCT_Upsample::P_Q<3, 4>::calc(P, Q, pSrc_ptr);
  1566        DCT_Upsample::R_S<3, 4>::calc(R, S, pSrc_ptr);
  1567        break;
  1568      case 4*16+4:
  1569        DCT_Upsample::P_Q<4, 4>::calc(P, Q, pSrc_ptr);
  1570        DCT_Upsample::R_S<4, 4>::calc(R, S, pSrc_ptr);
  1571        break;
  1572      case 5*16+4:
  1573        DCT_Upsample::P_Q<5, 4>::calc(P, Q, pSrc_ptr);
  1574        DCT_Upsample::R_S<5, 4>::calc(R, S, pSrc_ptr);
  1575        break;
  1576      case 5*16+5:
  1577        DCT_Upsample::P_Q<5, 5>::calc(P, Q, pSrc_ptr);
  1578        DCT_Upsample::R_S<5, 5>::calc(R, S, pSrc_ptr);
  1579        break;
  1580      case 5*16+6:
  1581        DCT_Upsample::P_Q<5, 6>::calc(P, Q, pSrc_ptr);
  1582        DCT_Upsample::R_S<5, 6>::calc(R, S, pSrc_ptr);
  1583        break;
  1584      case 6*16+6:
  1585        DCT_Upsample::P_Q<6, 6>::calc(P, Q, pSrc_ptr);
  1586        DCT_Upsample::R_S<6, 6>::calc(R, S, pSrc_ptr);
  1587        break;
  1588      case 7*16+6:
  1589        DCT_Upsample::P_Q<7, 6>::calc(P, Q, pSrc_ptr);
  1590        DCT_Upsample::R_S<7, 6>::calc(R, S, pSrc_ptr);
  1591        break;
  1592      case 7*16+7:
  1593        DCT_Upsample::P_Q<7, 7>::calc(P, Q, pSrc_ptr);
  1594        DCT_Upsample::R_S<7, 7>::calc(R, S, pSrc_ptr);
  1595        break;
  1596      case 7*16+8:
  1597        DCT_Upsample::P_Q<7, 8>::calc(P, Q, pSrc_ptr);
  1598        DCT_Upsample::R_S<7, 8>::calc(R, S, pSrc_ptr);
  1599        break;
  1600      case 8*16+8:
  1601        DCT_Upsample::P_Q<8, 8>::calc(P, Q, pSrc_ptr);
  1602        DCT_Upsample::R_S<8, 8>::calc(R, S, pSrc_ptr);
  1603        break;
  1604      default:
  1605        JPGD_ASSERT(false);
  1606      }
  1607  
  1608      DCT_Upsample::Matrix44 a(P + Q); P -= Q;
  1609      DCT_Upsample::Matrix44& b = P;
  1610      DCT_Upsample::Matrix44 c(R + S); R -= S;
  1611      DCT_Upsample::Matrix44& d = R;
  1612  
  1613      DCT_Upsample::Matrix44::add_and_store(temp_block, a, c);
  1614      idct_4x4(temp_block, pDst_ptr);
  1615      pDst_ptr += 64;
  1616  
  1617      DCT_Upsample::Matrix44::sub_and_store(temp_block, a, c);
  1618      idct_4x4(temp_block, pDst_ptr);
  1619      pDst_ptr += 64;
  1620  
  1621      DCT_Upsample::Matrix44::add_and_store(temp_block, b, d);
  1622      idct_4x4(temp_block, pDst_ptr);
  1623      pDst_ptr += 64;
  1624  
  1625      DCT_Upsample::Matrix44::sub_and_store(temp_block, b, d);
  1626      idct_4x4(temp_block, pDst_ptr);
  1627      pDst_ptr += 64;
  1628  
  1629      pSrc_ptr += 64;
  1630    }
  1631  }
  1632  
  1633  // Loads and dequantizes the next row of (already decoded) coefficients.
  1634  // Progressive images only.
  1635  void jpeg_decoder::load_next_row()
  1636  {
  1637    int i;
  1638    jpgd_block_t *p;
  1639    jpgd_quant_t *q;
  1640    int mcu_row, mcu_block, row_block = 0;
  1641    int component_num, component_id;
  1642    int block_x_mcu[JPGD_MAX_COMPONENTS];
  1643  
  1644    memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int));
  1645  
  1646    for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
  1647    {
  1648      int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
  1649  
  1650      for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
  1651      {
  1652        component_id = m_mcu_org[mcu_block];
  1653        q = m_quant[m_comp_quant[component_id]];
  1654  
  1655        p = m_pMCU_coefficients + 64 * mcu_block;
  1656  
  1657        jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
  1658        jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
  1659        p[0] = pDC[0];
  1660        memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t));
  1661  
  1662        for (i = 63; i > 0; i--)
  1663          if (p[g_ZAG[i]])
  1664            break;
  1665  
  1666        m_mcu_block_max_zag[mcu_block] = i + 1;
  1667  
  1668        for ( ; i >= 0; i--)
  1669  				if (p[g_ZAG[i]])
  1670  					p[g_ZAG[i]] = static_cast<jpgd_block_t>(p[g_ZAG[i]] * q[i]);
  1671  
  1672        row_block++;
  1673  
  1674        if (m_comps_in_scan == 1)
  1675          block_x_mcu[component_id]++;
  1676        else
  1677        {
  1678          if (++block_x_mcu_ofs == m_comp_h_samp[component_id])
  1679          {
  1680            block_x_mcu_ofs = 0;
  1681  
  1682            if (++block_y_mcu_ofs == m_comp_v_samp[component_id])
  1683            {
  1684              block_y_mcu_ofs = 0;
  1685  
  1686              block_x_mcu[component_id] += m_comp_h_samp[component_id];
  1687            }
  1688          }
  1689        }
  1690      }
  1691  
  1692      if (m_freq_domain_chroma_upsample)
  1693        transform_mcu_expand(mcu_row);
  1694      else
  1695        transform_mcu(mcu_row);
  1696    }
  1697  
  1698    if (m_comps_in_scan == 1)
  1699      m_block_y_mcu[m_comp_list[0]]++;
  1700    else
  1701    {
  1702      for (component_num = 0; component_num < m_comps_in_scan; component_num++)
  1703      {
  1704        component_id = m_comp_list[component_num];
  1705  
  1706        m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
  1707      }
  1708    }
  1709  }
  1710  
  1711  // Restart interval processing.
  1712  void jpeg_decoder::process_restart()
  1713  {
  1714    int i;
  1715    int c = 0;
  1716  
  1717    // Align to a byte boundry
  1718    // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
  1719    //get_bits_no_markers(m_bits_left & 7);
  1720  
  1721    // Let's scan a little bit to find the marker, but not _too_ far.
  1722    // 1536 is a "fudge factor" that determines how much to scan.
  1723    for (i = 1536; i > 0; i--)
  1724      if (get_char() == 0xFF)
  1725        break;
  1726  
  1727    if (i == 0)
  1728      stop_decoding(JPGD_BAD_RESTART_MARKER);
  1729  
  1730    for ( ; i > 0; i--)
  1731      if ((c = get_char()) != 0xFF)
  1732        break;
  1733  
  1734    if (i == 0)
  1735      stop_decoding(JPGD_BAD_RESTART_MARKER);
  1736  
  1737    // Is it the expected marker? If not, something bad happened.
  1738    if (c != (m_next_restart_num + M_RST0))
  1739      stop_decoding(JPGD_BAD_RESTART_MARKER);
  1740  
  1741    // Reset each component's DC prediction values.
  1742    memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint));
  1743  
  1744    m_eob_run = 0;
  1745  
  1746    m_restarts_left = m_restart_interval;
  1747  
  1748    m_next_restart_num = (m_next_restart_num + 1) & 7;
  1749  
  1750    // Get the bit buffer going again...
  1751  
  1752    m_bits_left = 16;
  1753    get_bits_no_markers(16);
  1754    get_bits_no_markers(16);
  1755  }
  1756  
  1757  static inline int dequantize_ac(int c, int q) {	c *= q;	return c; }
  1758  
  1759  // Decodes and dequantizes the next row of coefficients.
  1760  void jpeg_decoder::decode_next_row()
  1761  {
  1762    int row_block = 0;
  1763  
  1764    for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
  1765    {
  1766      if ((m_restart_interval) && (m_restarts_left == 0))
  1767        process_restart();
  1768  
  1769      jpgd_block_t* p = m_pMCU_coefficients;
  1770      for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
  1771      {
  1772        int component_id = m_mcu_org[mcu_block];
  1773        jpgd_quant_t* q = m_quant[m_comp_quant[component_id]];
  1774  
  1775        int r, s;
  1776        s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r);
  1777        s = JPGD_HUFF_EXTEND(r, s);
  1778  
  1779        m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]);
  1780  
  1781        p[0] = static_cast<jpgd_block_t>(s * q[0]);
  1782  
  1783        int prev_num_set = m_mcu_block_max_zag[mcu_block];
  1784  
  1785        huff_tables *pH = m_pHuff_tabs[m_comp_ac_tab[component_id]];
  1786  
  1787        int k;
  1788        for (k = 1; k < 64; k++)
  1789        {
  1790          int extra_bits;
  1791          s = huff_decode(pH, extra_bits);
  1792  
  1793          r = s >> 4;
  1794          s &= 15;
  1795  
  1796          if (s)
  1797          {
  1798            if (r)
  1799            {
  1800              if ((k + r) > 63)
  1801                stop_decoding(JPGD_DECODE_ERROR);
  1802  
  1803              if (k < prev_num_set)
  1804              {
  1805                int n = JPGD_MIN(r, prev_num_set - k);
  1806                int kt = k;
  1807                while (n--)
  1808                  p[g_ZAG[kt++]] = 0;
  1809              }
  1810  
  1811              k += r;
  1812            }
  1813            
  1814            s = JPGD_HUFF_EXTEND(extra_bits, s);
  1815  
  1816            JPGD_ASSERT(k < 64);
  1817  
  1818            p[g_ZAG[k]] = static_cast<jpgd_block_t>(dequantize_ac(s, q[k])); //s * q[k];
  1819          }
  1820          else
  1821          {
  1822            if (r == 15)
  1823            {
  1824              if ((k + 16) > 64)
  1825                stop_decoding(JPGD_DECODE_ERROR);
  1826  
  1827              if (k < prev_num_set)
  1828              {
  1829                int n = JPGD_MIN(16, prev_num_set - k);
  1830                int kt = k;
  1831                while (n--)
  1832                {
  1833                  JPGD_ASSERT(kt <= 63);
  1834                  p[g_ZAG[kt++]] = 0;
  1835                }
  1836              }
  1837  
  1838              k += 16 - 1; // - 1 because the loop counter is k
  1839              JPGD_ASSERT(p[g_ZAG[k]] == 0);
  1840            }
  1841            else
  1842              break;
  1843          }
  1844        }
  1845  
  1846        if (k < prev_num_set)
  1847        {
  1848          int kt = k;
  1849          while (kt < prev_num_set)
  1850            p[g_ZAG[kt++]] = 0;
  1851        }
  1852  
  1853        m_mcu_block_max_zag[mcu_block] = k;
  1854  
  1855        row_block++;
  1856      }
  1857  
  1858      if (m_freq_domain_chroma_upsample)
  1859        transform_mcu_expand(mcu_row);
  1860      else
  1861        transform_mcu(mcu_row);
  1862  
  1863      m_restarts_left--;
  1864    }
  1865  }
  1866  
  1867  // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
  1868  void jpeg_decoder::H1V1Convert()
  1869  {
  1870    int row = m_max_mcu_y_size - m_mcu_lines_left;
  1871    uint8 *d = m_pScan_line_0;
  1872    uint8 *s = m_pSample_buf + row * 8;
  1873  
  1874    for (int i = m_max_mcus_per_row; i > 0; i--)
  1875    {
  1876      for (int j = 0; j < 8; j++)
  1877      {
  1878        int y = s[j];
  1879        int cb = s[64+j];
  1880        int cr = s[128+j];
  1881  
  1882        d[0] = clamp(y + m_crr[cr]);
  1883        d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
  1884        d[2] = clamp(y + m_cbb[cb]);
  1885        d[3] = 255;
  1886  
  1887        d += 4;
  1888      }
  1889  
  1890      s += 64*3;
  1891    }
  1892  }
  1893  
  1894  // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
  1895  void jpeg_decoder::H2V1Convert()
  1896  {
  1897    int row = m_max_mcu_y_size - m_mcu_lines_left;
  1898    uint8 *d0 = m_pScan_line_0;
  1899    uint8 *y = m_pSample_buf + row * 8;
  1900    uint8 *c = m_pSample_buf + 2*64 + row * 8;
  1901  
  1902    for (int i = m_max_mcus_per_row; i > 0; i--)
  1903    {
  1904      for (int l = 0; l < 2; l++)
  1905      {
  1906        for (int j = 0; j < 4; j++)
  1907        {
  1908          int cb = c[0];
  1909          int cr = c[64];
  1910  
  1911          int rc = m_crr[cr];
  1912          int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
  1913          int bc = m_cbb[cb];
  1914  
  1915          int yy = y[j<<1];
  1916          d0[0] = clamp(yy+rc);
  1917          d0[1] = clamp(yy+gc);
  1918          d0[2] = clamp(yy+bc);
  1919          d0[3] = 255;
  1920  
  1921          yy = y[(j<<1)+1];
  1922          d0[4] = clamp(yy+rc);
  1923          d0[5] = clamp(yy+gc);
  1924          d0[6] = clamp(yy+bc);
  1925          d0[7] = 255;
  1926  
  1927          d0 += 8;
  1928  
  1929          c++;
  1930        }
  1931        y += 64;
  1932      }
  1933  
  1934      y += 64*4 - 64*2;
  1935      c += 64*4 - 8;
  1936    }
  1937  }
  1938  
  1939  // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
  1940  void jpeg_decoder::H1V2Convert()
  1941  {
  1942    int row = m_max_mcu_y_size - m_mcu_lines_left;
  1943    uint8 *d0 = m_pScan_line_0;
  1944    uint8 *d1 = m_pScan_line_1;
  1945    uint8 *y;
  1946    uint8 *c;
  1947  
  1948    if (row < 8)
  1949      y = m_pSample_buf + row * 8;
  1950    else
  1951      y = m_pSample_buf + 64*1 + (row & 7) * 8;
  1952  
  1953    c = m_pSample_buf + 64*2 + (row >> 1) * 8;
  1954  
  1955    for (int i = m_max_mcus_per_row; i > 0; i--)
  1956    {
  1957      for (int j = 0; j < 8; j++)
  1958      {
  1959        int cb = c[0+j];
  1960        int cr = c[64+j];
  1961  
  1962        int rc = m_crr[cr];
  1963        int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
  1964        int bc = m_cbb[cb];
  1965  
  1966        int yy = y[j];
  1967        d0[0] = clamp(yy+rc);
  1968        d0[1] = clamp(yy+gc);
  1969        d0[2] = clamp(yy+bc);
  1970        d0[3] = 255;
  1971  
  1972        yy = y[8+j];
  1973        d1[0] = clamp(yy+rc);
  1974        d1[1] = clamp(yy+gc);
  1975        d1[2] = clamp(yy+bc);
  1976        d1[3] = 255;
  1977  
  1978        d0 += 4;
  1979        d1 += 4;
  1980      }
  1981  
  1982      y += 64*4;
  1983      c += 64*4;
  1984    }
  1985  }
  1986  
  1987  // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
  1988  void jpeg_decoder::H2V2Convert()
  1989  {
  1990  	int row = m_max_mcu_y_size - m_mcu_lines_left;
  1991  	uint8 *d0 = m_pScan_line_0;
  1992  	uint8 *d1 = m_pScan_line_1;
  1993  	uint8 *y;
  1994  	uint8 *c;
  1995  
  1996  	if (row < 8)
  1997  		y = m_pSample_buf + row * 8;
  1998  	else
  1999  		y = m_pSample_buf + 64*2 + (row & 7) * 8;
  2000  
  2001  	c = m_pSample_buf + 64*4 + (row >> 1) * 8;
  2002  
  2003  	for (int i = m_max_mcus_per_row; i > 0; i--)
  2004  	{
  2005  		for (int l = 0; l < 2; l++)
  2006  		{
  2007  			for (int j = 0; j < 8; j += 2)
  2008  			{
  2009  				int cb = c[0];
  2010  				int cr = c[64];
  2011  
  2012  				int rc = m_crr[cr];
  2013  				int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
  2014  				int bc = m_cbb[cb];
  2015  
  2016  				int yy = y[j];
  2017  				d0[0] = clamp(yy+rc);
  2018  				d0[1] = clamp(yy+gc);
  2019  				d0[2] = clamp(yy+bc);
  2020  				d0[3] = 255;
  2021  
  2022  				yy = y[j+1];
  2023  				d0[4] = clamp(yy+rc);
  2024  				d0[5] = clamp(yy+gc);
  2025  				d0[6] = clamp(yy+bc);
  2026  				d0[7] = 255;
  2027  
  2028  				yy = y[j+8];
  2029  				d1[0] = clamp(yy+rc);
  2030  				d1[1] = clamp(yy+gc);
  2031  				d1[2] = clamp(yy+bc);
  2032  				d1[3] = 255;
  2033  
  2034  				yy = y[j+8+1];
  2035  				d1[4] = clamp(yy+rc);
  2036  				d1[5] = clamp(yy+gc);
  2037  				d1[6] = clamp(yy+bc);
  2038  				d1[7] = 255;
  2039  
  2040  				d0 += 8;
  2041  				d1 += 8;
  2042  
  2043  				c++;
  2044  			}
  2045  			y += 64;
  2046  		}
  2047  
  2048  		y += 64*6 - 64*2;
  2049  		c += 64*6 - 8;
  2050  	}
  2051  }
  2052  
  2053  // Y (1 block per MCU) to 8-bit grayscale
  2054  void jpeg_decoder::gray_convert()
  2055  {
  2056    int row = m_max_mcu_y_size - m_mcu_lines_left;
  2057    uint8 *d = m_pScan_line_0;
  2058    uint8 *s = m_pSample_buf + row * 8;
  2059  
  2060    for (int i = m_max_mcus_per_row; i > 0; i--)
  2061    {
  2062      *(uint *)d = *(uint *)s;
  2063      *(uint *)(&d[4]) = *(uint *)(&s[4]);
  2064  
  2065      s += 64;
  2066      d += 8;
  2067    }
  2068  }
  2069  
  2070  void jpeg_decoder::expanded_convert()
  2071  {
  2072    int row = m_max_mcu_y_size - m_mcu_lines_left;
  2073  
  2074    uint8* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp[0] + (row & 7) * 8;
  2075  
  2076    uint8* d = m_pScan_line_0;
  2077  
  2078    for (int i = m_max_mcus_per_row; i > 0; i--)
  2079    {
  2080      for (int k = 0; k < m_max_mcu_x_size; k += 8)
  2081      {
  2082        const int Y_ofs = k * 8;
  2083        const int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
  2084        const int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
  2085        for (int j = 0; j < 8; j++)
  2086        {
  2087          int y = Py[Y_ofs + j];
  2088          int cb = Py[Cb_ofs + j];
  2089          int cr = Py[Cr_ofs + j];
  2090  
  2091          d[0] = clamp(y + m_crr[cr]);
  2092          d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
  2093          d[2] = clamp(y + m_cbb[cb]);
  2094          d[3] = 255;
  2095  
  2096          d += 4;
  2097        }
  2098      }
  2099  
  2100      Py += 64 * m_expanded_blocks_per_mcu;
  2101    }
  2102  }
  2103  
  2104  // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
  2105  void jpeg_decoder::find_eoi()
  2106  {
  2107    if (!m_progressive_flag)
  2108    {
  2109      // Attempt to read the EOI marker.
  2110      //get_bits_no_markers(m_bits_left & 7);
  2111  
  2112      // Prime the bit buffer
  2113      m_bits_left = 16;
  2114      get_bits(16);
  2115      get_bits(16);
  2116  
  2117      // The next marker _should_ be EOI
  2118      process_markers();
  2119    }
  2120  
  2121    m_total_bytes_read -= m_in_buf_left;
  2122  }
  2123  
  2124  int jpeg_decoder::decode(const void** pScan_line, uint* pScan_line_len)
  2125  {
  2126    if ((m_error_code) || (!m_ready_flag))
  2127      return JPGD_FAILED;
  2128  
  2129    if (m_total_lines_left == 0)
  2130      return JPGD_DONE;
  2131  
  2132    if (m_mcu_lines_left == 0)
  2133    {
  2134      if (setjmp(m_jmp_state))
  2135        return JPGD_FAILED;
  2136  
  2137      if (m_progressive_flag)
  2138        load_next_row();
  2139      else
  2140        decode_next_row();
  2141  
  2142      // Find the EOI marker if that was the last row.
  2143      if (m_total_lines_left <= m_max_mcu_y_size)
  2144        find_eoi();
  2145  
  2146      m_mcu_lines_left = m_max_mcu_y_size;
  2147    }
  2148  
  2149    if (m_freq_domain_chroma_upsample)
  2150    {
  2151      expanded_convert();
  2152      *pScan_line = m_pScan_line_0;
  2153    }
  2154    else
  2155    {
  2156      switch (m_scan_type)
  2157      {
  2158        case JPGD_YH2V2:
  2159        {
  2160          if ((m_mcu_lines_left & 1) == 0)
  2161          {
  2162            H2V2Convert();
  2163            *pScan_line = m_pScan_line_0;
  2164          }
  2165          else
  2166            *pScan_line = m_pScan_line_1;
  2167  
  2168          break;
  2169        }
  2170        case JPGD_YH2V1:
  2171        {
  2172          H2V1Convert();
  2173          *pScan_line = m_pScan_line_0;
  2174          break;
  2175        }
  2176        case JPGD_YH1V2:
  2177        {
  2178          if ((m_mcu_lines_left & 1) == 0)
  2179          {
  2180            H1V2Convert();
  2181            *pScan_line = m_pScan_line_0;
  2182          }
  2183          else
  2184            *pScan_line = m_pScan_line_1;
  2185  
  2186          break;
  2187        }
  2188        case JPGD_YH1V1:
  2189        {
  2190          H1V1Convert();
  2191          *pScan_line = m_pScan_line_0;
  2192          break;
  2193        }
  2194        case JPGD_GRAYSCALE:
  2195        {
  2196          gray_convert();
  2197          *pScan_line = m_pScan_line_0;
  2198  
  2199          break;
  2200        }
  2201      }
  2202    }
  2203  
  2204    *pScan_line_len = m_real_dest_bytes_per_scan_line;
  2205  
  2206    m_mcu_lines_left--;
  2207    m_total_lines_left--;
  2208  
  2209    return JPGD_SUCCESS;
  2210  }
  2211  
  2212  // Creates the tables needed for efficient Huffman decoding.
  2213  void jpeg_decoder::make_huff_table(int index, huff_tables *pH)
  2214  {
  2215    int p, i, l, si;
  2216    uint8 huffsize[257];
  2217    uint huffcode[257];
  2218    uint code;
  2219    uint subtree;
  2220    int code_size;
  2221    int lastp;
  2222    int nextfreeentry;
  2223    int currententry;
  2224  
  2225    pH->ac_table = m_huff_ac[index] != 0;
  2226  
  2227    p = 0;
  2228  
  2229    for (l = 1; l <= 16; l++)
  2230    {
  2231      for (i = 1; i <= m_huff_num[index][l]; i++)
  2232        huffsize[p++] = static_cast<uint8>(l);
  2233    }
  2234  
  2235    huffsize[p] = 0;
  2236  
  2237    lastp = p;
  2238  
  2239    code = 0;
  2240    si = huffsize[0];
  2241    p = 0;
  2242  
  2243    while (huffsize[p])
  2244    {
  2245      while (huffsize[p] == si)
  2246      {
  2247        huffcode[p++] = code;
  2248        code++;
  2249      }
  2250  
  2251      code <<= 1;
  2252      si++;
  2253    }
  2254  
  2255    memset(pH->look_up, 0, sizeof(pH->look_up));
  2256    memset(pH->look_up2, 0, sizeof(pH->look_up2));
  2257    memset(pH->tree, 0, sizeof(pH->tree));
  2258    memset(pH->code_size, 0, sizeof(pH->code_size));
  2259  
  2260    nextfreeentry = -1;
  2261  
  2262    p = 0;
  2263  
  2264    while (p < lastp)
  2265    {
  2266      i = m_huff_val[index][p];
  2267      code = huffcode[p];
  2268      code_size = huffsize[p];
  2269  
  2270      pH->code_size[i] = static_cast<uint8>(code_size);
  2271  
  2272      if (code_size <= 8)
  2273      {
  2274        code <<= (8 - code_size);
  2275  
  2276        for (l = 1 << (8 - code_size); l > 0; l--)
  2277        {
  2278          JPGD_ASSERT(i < 256);
  2279  
  2280          pH->look_up[code] = i;
  2281  
  2282          bool has_extrabits = false;
  2283  				int extra_bits = 0;
  2284          int num_extra_bits = i & 15;
  2285  
  2286          int bits_to_fetch = code_size;
  2287          if (num_extra_bits)
  2288          {
  2289            int total_codesize = code_size + num_extra_bits;
  2290            if (total_codesize <= 8)
  2291            {
  2292              has_extrabits = true;
  2293              extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
  2294              JPGD_ASSERT(extra_bits <= 0x7FFF);
  2295              bits_to_fetch += num_extra_bits;
  2296            }
  2297          }
  2298  
  2299          if (!has_extrabits)
  2300            pH->look_up2[code] = i | (bits_to_fetch << 8);
  2301          else
  2302            pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
  2303  
  2304          code++;
  2305        }
  2306      }
  2307      else
  2308      {
  2309        subtree = (code >> (code_size - 8)) & 0xFF;
  2310  
  2311        currententry = pH->look_up[subtree];
  2312  
  2313        if (currententry == 0)
  2314        {
  2315          pH->look_up[subtree] = currententry = nextfreeentry;
  2316          pH->look_up2[subtree] = currententry = nextfreeentry;
  2317  
  2318          nextfreeentry -= 2;
  2319        }
  2320  
  2321        code <<= (16 - (code_size - 8));
  2322  
  2323        for (l = code_size; l > 9; l--)
  2324        {
  2325          if ((code & 0x8000) == 0)
  2326            currententry--;
  2327  
  2328          if (pH->tree[-currententry - 1] == 0)
  2329          {
  2330            pH->tree[-currententry - 1] = nextfreeentry;
  2331  
  2332            currententry = nextfreeentry;
  2333  
  2334            nextfreeentry -= 2;
  2335          }
  2336          else
  2337            currententry = pH->tree[-currententry - 1];
  2338  
  2339          code <<= 1;
  2340        }
  2341  
  2342        if ((code & 0x8000) == 0)
  2343          currententry--;
  2344  
  2345        pH->tree[-currententry - 1] = i;
  2346      }
  2347  
  2348      p++;
  2349    }
  2350  }
  2351  
  2352  // Verifies the quantization tables needed for this scan are available.
  2353  void jpeg_decoder::check_quant_tables()
  2354  {
  2355    for (int i = 0; i < m_comps_in_scan; i++)
  2356      if (m_quant[m_comp_quant[m_comp_list[i]]] == NULL)
  2357        stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
  2358  }
  2359  
  2360  // Verifies that all the Huffman tables needed for this scan are available.
  2361  void jpeg_decoder::check_huff_tables()
  2362  {
  2363    for (int i = 0; i < m_comps_in_scan; i++)
  2364    {
  2365      if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == NULL))
  2366        stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
  2367  
  2368      if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == NULL))
  2369        stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
  2370    }
  2371  
  2372    for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
  2373      if (m_huff_num[i])
  2374      {
  2375        if (!m_pHuff_tabs[i])
  2376          m_pHuff_tabs[i] = (huff_tables *)alloc(sizeof(huff_tables));
  2377  
  2378        make_huff_table(i, m_pHuff_tabs[i]);
  2379      }
  2380  }
  2381  
  2382  // Determines the component order inside each MCU.
  2383  // Also calcs how many MCU's are on each row, etc.
  2384  void jpeg_decoder::calc_mcu_block_order()
  2385  {
  2386    int component_num, component_id;
  2387    int max_h_samp = 0, max_v_samp = 0;
  2388  
  2389    for (component_id = 0; component_id < m_comps_in_frame; component_id++)
  2390    {
  2391      if (m_comp_h_samp[component_id] > max_h_samp)
  2392        max_h_samp = m_comp_h_samp[component_id];
  2393  
  2394      if (m_comp_v_samp[component_id] > max_v_samp)
  2395        max_v_samp = m_comp_v_samp[component_id];
  2396    }
  2397  
  2398    for (component_id = 0; component_id < m_comps_in_frame; component_id++)
  2399    {
  2400      m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
  2401      m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
  2402    }
  2403  
  2404    if (m_comps_in_scan == 1)
  2405    {
  2406      m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]];
  2407      m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]];
  2408    }
  2409    else
  2410    {
  2411      m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
  2412      m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
  2413    }
  2414  
  2415    if (m_comps_in_scan == 1)
  2416    {
  2417      m_mcu_org[0] = m_comp_list[0];
  2418  
  2419      m_blocks_per_mcu = 1;
  2420    }
  2421    else
  2422    {
  2423      m_blocks_per_mcu = 0;
  2424  
  2425      for (component_num = 0; component_num < m_comps_in_scan; component_num++)
  2426      {
  2427        int num_blocks;
  2428  
  2429        component_id = m_comp_list[component_num];
  2430  
  2431        num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id];
  2432  
  2433        while (num_blocks--)
  2434          m_mcu_org[m_blocks_per_mcu++] = component_id;
  2435      }
  2436    }
  2437  }
  2438  
  2439  // Starts a new scan.
  2440  int jpeg_decoder::init_scan()
  2441  {
  2442    if (!locate_sos_marker())
  2443      return JPGD_FALSE;
  2444  
  2445    calc_mcu_block_order();
  2446  
  2447    check_huff_tables();
  2448  
  2449    check_quant_tables();
  2450  
  2451    memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint));
  2452  
  2453    m_eob_run = 0;
  2454  
  2455    if (m_restart_interval)
  2456    {
  2457      m_restarts_left = m_restart_interval;
  2458      m_next_restart_num = 0;
  2459    }
  2460  
  2461    fix_in_buffer();
  2462  
  2463    return JPGD_TRUE;
  2464  }
  2465  
  2466  // Starts a frame. Determines if the number of components or sampling factors
  2467  // are supported.
  2468  void jpeg_decoder::init_frame()
  2469  {
  2470    int i;
  2471  
  2472    if (m_comps_in_frame == 1)
  2473    {
  2474      if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1))
  2475        stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
  2476  
  2477      m_scan_type = JPGD_GRAYSCALE;
  2478      m_max_blocks_per_mcu = 1;
  2479      m_max_mcu_x_size = 8;
  2480      m_max_mcu_y_size = 8;
  2481    }
  2482    else if (m_comps_in_frame == 3)
  2483    {
  2484      if ( ((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) ||
  2485           ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1)) )
  2486        stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
  2487  
  2488      if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
  2489      {
  2490        m_scan_type = JPGD_YH1V1;
  2491  
  2492        m_max_blocks_per_mcu = 3;
  2493        m_max_mcu_x_size = 8;
  2494        m_max_mcu_y_size = 8;
  2495      }
  2496      else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
  2497      {
  2498        m_scan_type = JPGD_YH2V1;
  2499        m_max_blocks_per_mcu = 4;
  2500        m_max_mcu_x_size = 16;
  2501        m_max_mcu_y_size = 8;
  2502      }
  2503      else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2))
  2504      {
  2505        m_scan_type = JPGD_YH1V2;
  2506        m_max_blocks_per_mcu = 4;
  2507        m_max_mcu_x_size = 8;
  2508        m_max_mcu_y_size = 16;
  2509      }
  2510      else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
  2511      {
  2512        m_scan_type = JPGD_YH2V2;
  2513        m_max_blocks_per_mcu = 6;
  2514        m_max_mcu_x_size = 16;
  2515        m_max_mcu_y_size = 16;
  2516      }
  2517      else
  2518        stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
  2519    }
  2520    else
  2521      stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
  2522  
  2523    m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
  2524    m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
  2525  
  2526    // These values are for the *destination* pixels: after conversion.
  2527    if (m_scan_type == JPGD_GRAYSCALE)
  2528      m_dest_bytes_per_pixel = 1;
  2529    else
  2530      m_dest_bytes_per_pixel = 4;
  2531  
  2532    m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
  2533  
  2534    m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
  2535  
  2536    // Initialize two scan line buffers.
  2537    m_pScan_line_0 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true);
  2538    if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
  2539      m_pScan_line_1 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true);
  2540  
  2541    m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
  2542  
  2543    // Should never happen
  2544    if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
  2545      stop_decoding(JPGD_ASSERTION_ERROR);
  2546  
  2547    // Allocate the coefficient buffer, enough for one MCU
  2548    m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t));
  2549  
  2550    for (i = 0; i < m_max_blocks_per_mcu; i++)
  2551      m_mcu_block_max_zag[i] = 64;
  2552  
  2553    m_expanded_blocks_per_component = m_comp_h_samp[0] * m_comp_v_samp[0];
  2554    m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
  2555    m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
  2556  	// Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
  2557    m_freq_domain_chroma_upsample = false;
  2558  #if JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING
  2559    m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
  2560  #endif
  2561  
  2562    if (m_freq_domain_chroma_upsample)
  2563      m_pSample_buf = (uint8 *)alloc(m_expanded_blocks_per_row * 64);
  2564    else
  2565      m_pSample_buf = (uint8 *)alloc(m_max_blocks_per_row * 64);
  2566  
  2567    m_total_lines_left = m_image_y_size;
  2568  
  2569    m_mcu_lines_left = 0;
  2570  
  2571    create_look_ups();
  2572  }
  2573  
  2574  // The coeff_buf series of methods originally stored the coefficients
  2575  // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
  2576  // was used to make this process more efficient. Now, we can store the entire
  2577  // thing in RAM.
  2578  jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y)
  2579  {
  2580    coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf));
  2581  
  2582    cb->block_num_x = block_num_x;
  2583    cb->block_num_y = block_num_y;
  2584    cb->block_len_x = block_len_x;
  2585    cb->block_len_y = block_len_y;
  2586    cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t);
  2587    cb->pData = (uint8 *)alloc(cb->block_size * block_num_x * block_num_y, true);
  2588    return cb;
  2589  }
  2590  
  2591  inline jpgd_block_t *jpeg_decoder::coeff_buf_getp(coeff_buf *cb, int block_x, int block_y)
  2592  {
  2593    JPGD_ASSERT((block_x < cb->block_num_x) && (block_y < cb->block_num_y));
  2594    return (jpgd_block_t *)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x));
  2595  }
  2596  
  2597  // The following methods decode the various types of m_blocks encountered
  2598  // in progressively encoded images.
  2599  void jpeg_decoder::decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
  2600  {
  2601    int s, r;
  2602    jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
  2603  
  2604    if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0)
  2605    {
  2606      r = pD->get_bits_no_markers(s);
  2607      s = JPGD_HUFF_EXTEND(r, s);
  2608    }
  2609  
  2610    pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]);
  2611  
  2612    p[0] = static_cast<jpgd_block_t>(s << pD->m_successive_low);
  2613  }
  2614  
  2615  void jpeg_decoder::decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
  2616  {
  2617    if (pD->get_bits_no_markers(1))
  2618    {
  2619      jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
  2620  
  2621      p[0] |= (1 << pD->m_successive_low);
  2622    }
  2623  }
  2624  
  2625  void jpeg_decoder::decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
  2626  {
  2627    int k, s, r;
  2628  
  2629    if (pD->m_eob_run)
  2630    {
  2631      pD->m_eob_run--;
  2632      return;
  2633    }
  2634  
  2635    jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
  2636  
  2637    for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++)
  2638    {
  2639      s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]);
  2640  
  2641      r = s >> 4;
  2642      s &= 15;
  2643  
  2644      if (s)
  2645      {
  2646        if ((k += r) > 63)
  2647          pD->stop_decoding(JPGD_DECODE_ERROR);
  2648  
  2649        r = pD->get_bits_no_markers(s);
  2650        s = JPGD_HUFF_EXTEND(r, s);
  2651  
  2652        p[g_ZAG[k]] = static_cast<jpgd_block_t>(s << pD->m_successive_low);
  2653      }
  2654      else
  2655      {
  2656        if (r == 15)
  2657        {
  2658          if ((k += 15) > 63)
  2659            pD->stop_decoding(JPGD_DECODE_ERROR);
  2660        }
  2661        else
  2662        {
  2663          pD->m_eob_run = 1 << r;
  2664  
  2665          if (r)
  2666            pD->m_eob_run += pD->get_bits_no_markers(r);
  2667  
  2668          pD->m_eob_run--;
  2669  
  2670          break;
  2671        }
  2672      }
  2673    }
  2674  }
  2675  
  2676  void jpeg_decoder::decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
  2677  {
  2678    int s, k, r;
  2679    int p1 = 1 << pD->m_successive_low;
  2680    int m1 = (-1) << pD->m_successive_low;
  2681    jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
  2682    
  2683    JPGD_ASSERT(pD->m_spectral_end <= 63);
  2684    
  2685    k = pD->m_spectral_start;
  2686    
  2687    if (pD->m_eob_run == 0)
  2688    {
  2689      for ( ; k <= pD->m_spectral_end; k++)
  2690      {
  2691        s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]);
  2692  
  2693        r = s >> 4;
  2694        s &= 15;
  2695  
  2696        if (s)
  2697        {
  2698          if (s != 1)
  2699            pD->stop_decoding(JPGD_DECODE_ERROR);
  2700  
  2701          if (pD->get_bits_no_markers(1))
  2702            s = p1;
  2703          else
  2704            s = m1;
  2705        }
  2706        else
  2707        {
  2708          if (r != 15)
  2709          {
  2710            pD->m_eob_run = 1 << r;
  2711  
  2712            if (r)
  2713              pD->m_eob_run += pD->get_bits_no_markers(r);
  2714  
  2715            break;
  2716          }
  2717        }
  2718  
  2719        do
  2720        {
  2721          jpgd_block_t *this_coef = p + g_ZAG[k & 63];
  2722  
  2723          if (*this_coef != 0)
  2724          {
  2725            if (pD->get_bits_no_markers(1))
  2726            {
  2727              if ((*this_coef & p1) == 0)
  2728              {
  2729                if (*this_coef >= 0)
  2730                  *this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
  2731                else
  2732                  *this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
  2733              }
  2734            }
  2735          }
  2736          else
  2737          {
  2738            if (--r < 0)
  2739              break;
  2740          }
  2741  
  2742          k++;
  2743  
  2744        } while (k <= pD->m_spectral_end);
  2745  
  2746        if ((s) && (k < 64))
  2747        {
  2748          p[g_ZAG[k]] = static_cast<jpgd_block_t>(s);
  2749        }
  2750      }
  2751    }
  2752  
  2753    if (pD->m_eob_run > 0)
  2754    {
  2755      for ( ; k <= pD->m_spectral_end; k++)
  2756      {
  2757        jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
  2758  
  2759        if (*this_coef != 0)
  2760        {
  2761          if (pD->get_bits_no_markers(1))
  2762          {
  2763            if ((*this_coef & p1) == 0)
  2764            {
  2765              if (*this_coef >= 0)
  2766                *this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
  2767              else
  2768                *this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
  2769            }
  2770          }
  2771        }
  2772      }
  2773  
  2774      pD->m_eob_run--;
  2775    }
  2776  }
  2777  
  2778  // Decode a scan in a progressively encoded image.
  2779  void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func)
  2780  {
  2781    int mcu_row, mcu_col, mcu_block;
  2782    int block_x_mcu[JPGD_MAX_COMPONENTS], m_block_y_mcu[JPGD_MAX_COMPONENTS];
  2783  
  2784    memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
  2785  
  2786    for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
  2787    {
  2788      int component_num, component_id;
  2789  
  2790      memset(block_x_mcu, 0, sizeof(block_x_mcu));
  2791  
  2792      for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
  2793      {
  2794        int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
  2795  
  2796        if ((m_restart_interval) && (m_restarts_left == 0))
  2797          process_restart();
  2798  
  2799        for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
  2800        {
  2801          component_id = m_mcu_org[mcu_block];
  2802  
  2803          decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
  2804  
  2805          if (m_comps_in_scan == 1)
  2806            block_x_mcu[component_id]++;
  2807          else
  2808          {
  2809            if (++block_x_mcu_ofs == m_comp_h_samp[component_id])
  2810            {
  2811              block_x_mcu_ofs = 0;
  2812  
  2813              if (++block_y_mcu_ofs == m_comp_v_samp[component_id])
  2814              {
  2815                block_y_mcu_ofs = 0;
  2816                block_x_mcu[component_id] += m_comp_h_samp[component_id];
  2817              }
  2818            }
  2819          }
  2820        }
  2821  
  2822        m_restarts_left--;
  2823      }
  2824  
  2825      if (m_comps_in_scan == 1)
  2826        m_block_y_mcu[m_comp_list[0]]++;
  2827      else
  2828      {
  2829        for (component_num = 0; component_num < m_comps_in_scan; component_num++)
  2830        {
  2831          component_id = m_comp_list[component_num];
  2832          m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
  2833        }
  2834      }
  2835    }
  2836  }
  2837  
  2838  // Decode a progressively encoded image.
  2839  void jpeg_decoder::init_progressive()
  2840  {
  2841    int i;
  2842  
  2843    if (m_comps_in_frame == 4)
  2844      stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
  2845  
  2846    // Allocate the coefficient buffers.
  2847    for (i = 0; i < m_comps_in_frame; i++)
  2848    {
  2849      m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1);
  2850      m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8);
  2851    }
  2852  
  2853    for ( ; ; )
  2854    {
  2855      int dc_only_scan, refinement_scan;
  2856      pDecode_block_func decode_block_func;
  2857  
  2858      if (!init_scan())
  2859        break;
  2860  
  2861      dc_only_scan = (m_spectral_start == 0);
  2862      refinement_scan = (m_successive_high != 0);
  2863  
  2864      if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
  2865        stop_decoding(JPGD_BAD_SOS_SPECTRAL);
  2866  
  2867      if (dc_only_scan)
  2868      {
  2869        if (m_spectral_end)
  2870          stop_decoding(JPGD_BAD_SOS_SPECTRAL);
  2871      }
  2872      else if (m_comps_in_scan != 1)  /* AC scans can only contain one component */
  2873        stop_decoding(JPGD_BAD_SOS_SPECTRAL);
  2874  
  2875      if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
  2876        stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
  2877  
  2878      if (dc_only_scan)
  2879      {
  2880        if (refinement_scan)
  2881          decode_block_func = decode_block_dc_refine;
  2882        else
  2883          decode_block_func = decode_block_dc_first;
  2884      }
  2885      else
  2886      {
  2887        if (refinement_scan)
  2888          decode_block_func = decode_block_ac_refine;
  2889        else
  2890          decode_block_func = decode_block_ac_first;
  2891      }
  2892  
  2893      decode_scan(decode_block_func);
  2894  
  2895      m_bits_left = 16;
  2896      get_bits(16);
  2897      get_bits(16);
  2898    }
  2899  
  2900    m_comps_in_scan = m_comps_in_frame;
  2901  
  2902    for (i = 0; i < m_comps_in_frame; i++)
  2903      m_comp_list[i] = i;
  2904  
  2905    calc_mcu_block_order();
  2906  }
  2907  
  2908  void jpeg_decoder::init_sequential()
  2909  {
  2910    if (!init_scan())
  2911      stop_decoding(JPGD_UNEXPECTED_MARKER);
  2912  }
  2913  
  2914  void jpeg_decoder::decode_start()
  2915  {
  2916    init_frame();
  2917  
  2918    if (m_progressive_flag)
  2919      init_progressive();
  2920    else
  2921      init_sequential();
  2922  }
  2923  
  2924  void jpeg_decoder::decode_init(jpeg_decoder_stream *pStream)
  2925  {
  2926    init(pStream);
  2927    locate_sof_marker();
  2928  }
  2929  
  2930  jpeg_decoder::jpeg_decoder(jpeg_decoder_stream *pStream)
  2931  {
  2932    if (setjmp(m_jmp_state))
  2933      return;
  2934    decode_init(pStream);
  2935  }
  2936  
  2937  int jpeg_decoder::begin_decoding()
  2938  {
  2939    if (m_ready_flag)
  2940      return JPGD_SUCCESS;
  2941  
  2942    if (m_error_code)
  2943      return JPGD_FAILED;
  2944  
  2945    if (setjmp(m_jmp_state))
  2946      return JPGD_FAILED;
  2947  
  2948    decode_start();
  2949  
  2950    m_ready_flag = true;
  2951  
  2952    return JPGD_SUCCESS;
  2953  }
  2954  
  2955  jpeg_decoder::~jpeg_decoder()
  2956  {
  2957    free_all_blocks();
  2958  }
  2959  
  2960  jpeg_decoder_file_stream::jpeg_decoder_file_stream()
  2961  {
  2962    m_pFile = NULL;
  2963    m_eof_flag = false;
  2964    m_error_flag = false;
  2965  }
  2966  
  2967  void jpeg_decoder_file_stream::close()
  2968  {
  2969    if (m_pFile)
  2970    {
  2971      fclose(m_pFile);
  2972      m_pFile = NULL;
  2973    }
  2974  
  2975    m_eof_flag = false;
  2976    m_error_flag = false;
  2977  }
  2978  
  2979  jpeg_decoder_file_stream::~jpeg_decoder_file_stream()
  2980  {
  2981    close();
  2982  }
  2983  
  2984  bool jpeg_decoder_file_stream::open(const char *Pfilename)
  2985  {
  2986    close();
  2987  
  2988    m_eof_flag = false;
  2989    m_error_flag = false;
  2990  
  2991  #if defined(_MSC_VER)
  2992    m_pFile = NULL;
  2993    fopen_s(&m_pFile, Pfilename, "rb");
  2994  #else
  2995    m_pFile = fopen(Pfilename, "rb");
  2996  #endif
  2997    return m_pFile != NULL;
  2998  }
  2999  
  3000  int jpeg_decoder_file_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag)
  3001  {
  3002    if (!m_pFile)
  3003      return -1;
  3004  
  3005    if (m_eof_flag)
  3006    {
  3007      *pEOF_flag = true;
  3008      return 0;
  3009    }
  3010  
  3011    if (m_error_flag)
  3012      return -1;
  3013  
  3014    int bytes_read = static_cast<int>(fread(pBuf, 1, max_bytes_to_read, m_pFile));
  3015    if (bytes_read < max_bytes_to_read)
  3016    {
  3017      if (ferror(m_pFile))
  3018      {
  3019        m_error_flag = true;
  3020        return -1;
  3021      }
  3022  
  3023      m_eof_flag = true;
  3024      *pEOF_flag = true;
  3025    }
  3026  
  3027    return bytes_read;
  3028  }
  3029  
  3030  bool jpeg_decoder_mem_stream::open(const uint8 *pSrc_data, uint size)
  3031  {
  3032    close();
  3033    m_pSrc_data = pSrc_data;
  3034    m_ofs = 0;
  3035    m_size = size;
  3036    return true;
  3037  }
  3038  
  3039  int jpeg_decoder_mem_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag)
  3040  {
  3041    *pEOF_flag = false;
  3042  
  3043    if (!m_pSrc_data)
  3044      return -1;
  3045  
  3046    uint bytes_remaining = m_size - m_ofs;
  3047    if ((uint)max_bytes_to_read > bytes_remaining)
  3048    {
  3049      max_bytes_to_read = bytes_remaining;
  3050      *pEOF_flag = true;
  3051    }
  3052  
  3053    memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read);
  3054    m_ofs += max_bytes_to_read;
  3055  
  3056    return max_bytes_to_read;
  3057  }
  3058  
  3059  unsigned char *decompress_jpeg_image_from_stream(jpeg_decoder_stream *pStream, int *width, int *height, int *actual_comps, int req_comps)
  3060  {
  3061    if (!actual_comps)
  3062      return NULL;
  3063    *actual_comps = 0;
  3064  
  3065    if ((!pStream) || (!width) || (!height) || (!req_comps))
  3066      return NULL;
  3067  
  3068    if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4))
  3069      return NULL;
  3070  
  3071    jpeg_decoder decoder(pStream);
  3072    if (decoder.get_error_code() != JPGD_SUCCESS)
  3073      return NULL;
  3074  
  3075    const int image_width = decoder.get_width(), image_height = decoder.get_height();
  3076    *width = image_width;
  3077    *height = image_height;
  3078    *actual_comps = decoder.get_num_components();
  3079  
  3080    if (decoder.begin_decoding() != JPGD_SUCCESS)
  3081      return NULL;
  3082  
  3083    const int dst_bpl = image_width * req_comps;
  3084  
  3085    uint8 *pImage_data = (uint8*)jpgd_malloc(dst_bpl * image_height);
  3086    if (!pImage_data)
  3087      return NULL;
  3088  
  3089    for (int y = 0; y < image_height; y++)
  3090    {
  3091      const uint8* pScan_line;
  3092      uint scan_line_len;
  3093      if (decoder.decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS)
  3094      {
  3095        jpgd_free(pImage_data);
  3096        return NULL;
  3097      }
  3098  
  3099      uint8 *pDst = pImage_data + y * dst_bpl;
  3100  
  3101      if (((req_comps == 1) && (decoder.get_num_components() == 1)) || ((req_comps == 4) && (decoder.get_num_components() == 3)))
  3102        memcpy(pDst, pScan_line, dst_bpl);
  3103      else if (decoder.get_num_components() == 1)
  3104      {
  3105        if (req_comps == 3)
  3106        {
  3107          for (int x = 0; x < image_width; x++)
  3108          {
  3109            uint8 luma = pScan_line[x];
  3110            pDst[0] = luma;
  3111            pDst[1] = luma;
  3112            pDst[2] = luma;
  3113            pDst += 3;
  3114          }
  3115        }
  3116        else
  3117        {
  3118          for (int x = 0; x < image_width; x++)
  3119          {
  3120            uint8 luma = pScan_line[x];
  3121            pDst[0] = luma;
  3122            pDst[1] = luma;
  3123            pDst[2] = luma;
  3124            pDst[3] = 255;
  3125            pDst += 4;
  3126          }
  3127        }
  3128      }
  3129      else if (decoder.get_num_components() == 3)
  3130      {
  3131        if (req_comps == 1)
  3132        {
  3133          const int YR = 19595, YG = 38470, YB = 7471;
  3134          for (int x = 0; x < image_width; x++)
  3135          {
  3136            int r = pScan_line[x*4+0];
  3137            int g = pScan_line[x*4+1];
  3138            int b = pScan_line[x*4+2];
  3139            *pDst++ = static_cast<uint8>((r * YR + g * YG + b * YB + 32768) >> 16);
  3140          }
  3141        }
  3142        else
  3143        {
  3144          for (int x = 0; x < image_width; x++)
  3145          {
  3146            pDst[0] = pScan_line[x*4+0];
  3147            pDst[1] = pScan_line[x*4+1];
  3148            pDst[2] = pScan_line[x*4+2];
  3149            pDst += 3;
  3150          }
  3151        }
  3152      }
  3153    }
  3154  
  3155    return pImage_data;
  3156  }
  3157  
  3158  unsigned char *decompress_jpeg_image_from_memory(const unsigned char *pSrc_data, int src_data_size, int *width, int *height, int *actual_comps, int req_comps)
  3159  {
  3160    jpgd::jpeg_decoder_mem_stream mem_stream(pSrc_data, src_data_size);
  3161    return decompress_jpeg_image_from_stream(&mem_stream, width, height, actual_comps, req_comps);
  3162  }
  3163  
  3164  unsigned char *decompress_jpeg_image_from_file(const char *pSrc_filename, int *width, int *height, int *actual_comps, int req_comps)
  3165  {
  3166    jpgd::jpeg_decoder_file_stream file_stream;
  3167    if (!file_stream.open(pSrc_filename))
  3168      return NULL;
  3169    return decompress_jpeg_image_from_stream(&file_stream, width, height, actual_comps, req_comps);
  3170  }
  3171  
  3172  } // namespace jpgd