// -*- C++ -*-
//
// CDR_Base.inl,v 4.4 2003/02/25 18:05:06 ossama Exp

//
// The ACE_CDR::swap_X and ACE_CDR::swap_X_array routines are broken
// in 4 cases for optimization:
//
// * x86 Pentium CPU + gnu g++
//   (ACE_HAS_PENTIUM && __GNUG__)
//   => gcc x86 inline assembly.
//
// * x86 Pentium CPU and (_MSC_VER) or BORLAND C++)
//   (ACE_HAS_PENTIUM && ( _MSC_VER || __BORLANDC__ )
//   => MSC x86 inline assembly.
//
// * 64 bit architecture
//   (ACE_SIZEOF_LONG == 8)
//   => shift/masks using 64bit words.
//
// * default
//   (none of the above)
//   => shift/masks using 32bit words.
//
//
// Some things you could find useful to know if you intend to mess
// with this optimizations for swaps:
//
//      * MSVC++ don't assume register values are conserved between
//        statements. So you can clobber any register you want,
//        whenever you want (well not *anyone* really, see manual).
//        The MSVC++ optimizer will try to pick different registers
//        for the C++ statements sorrounding your asm block, and if
//        it's not possible will use the stack.
//
//      * If you clobber registers with asm statements in gcc, you
//        better do it in an asm-only function, or save/restore them
//        before/after in the stack. If not, sorrounding C statements
//        could end using the same registers and big-badda-bum (been
//        there, done that...). The big-badda-bum could happen *even
//        if you specify the clobbered register in your asm's*.
//        Even better, use gcc asm syntax for detecting the register
//        asigned to a certain variable so you don't have to clobber any
//        register directly.
//

ACE_INLINE void
ACE_CDR::swap_2 (const char *orig, char* target)
{
#if defined(ACE_HAS_PENTIUM)
# if defined(__GNUG__)
  unsigned short a =
    *ACE_reinterpret_cast(const unsigned short*, orig);
  asm( "rolw $8, %0" : "=r" (a) : "0" (a) );
  *ACE_reinterpret_cast(unsigned short*, target) = a;
# elif (defined(_MSC_VER) || defined(__BORLANDC__)) \
       && !defined(ACE_LACKS_INLINE_ASSEMBLY)
  __asm mov ebx, orig;
  __asm mov ecx, target;
  __asm mov ax, [ebx];
  __asm rol ax, 8;
  __asm mov [ecx], ax;
# else
  // For CISC Platforms this is faster than shift/masks.
  target[1] = orig[0];
  target[0] = orig[1];
# endif
#else
  register ACE_UINT16 usrc = * ACE_reinterpret_cast(const ACE_UINT16*, orig);
  register ACE_UINT16* udst = ACE_reinterpret_cast(ACE_UINT16*, target);
  *udst = (usrc << 8) | (usrc >> 8);
#endif /* ACE_HAS_PENTIUM */
}

ACE_INLINE void
ACE_CDR::swap_4 (const char* orig, char* target)
{
#if defined(ACE_HAS_PENTIUM) && defined(__GNUG__)
  // We have ACE_HAS_PENTIUM, so we know the sizeof's.
  register unsigned int j =
    *ACE_reinterpret_cast(const unsigned int*, orig);
  asm ("bswap %1" : "=r" (j) : "0" (j));
  *ACE_reinterpret_cast(unsigned int*, target) = j;
#elif defined(ACE_HAS_PENTIUM) \
      && (defined(_MSC_VER) || defined(__BORLANDC__)) \
      && !defined(ACE_LACKS_INLINE_ASSEMBLY)
  __asm mov ebx, orig;
  __asm mov ecx, target;
  __asm mov eax, [ebx];
  __asm bswap eax;
  __asm mov [ecx], eax;
#else
  register ACE_UINT32 x = * ACE_reinterpret_cast(const ACE_UINT32*, orig);
  x = (x << 24) | ((x & 0xff00) << 8) | ((x & 0xff0000) >> 8) | (x >> 24);
  * ACE_reinterpret_cast(ACE_UINT32*, target) = x;
#endif
}

ACE_INLINE void
ACE_CDR::swap_8 (const char* orig, char* target)
{
#if defined(ACE_HAS_PENTIUM) && defined(__GNUG__)
  register unsigned int i =
    *ACE_reinterpret_cast(const unsigned int*, orig);
  register unsigned int j =
    *ACE_reinterpret_cast(const unsigned int*, orig + 4);
  asm ("bswap %1" : "=r" (i) : "0" (i));
  asm ("bswap %1" : "=r" (j) : "0" (j));
  *ACE_reinterpret_cast(unsigned int*, target + 4) = i;
  *ACE_reinterpret_cast(unsigned int*, target) = j;
#elif defined(ACE_HAS_PENTIUM) \
      && (defined(_MSC_VER) || defined(__BORLANDC__)) \
      && !defined(ACE_LACKS_INLINE_ASSEMBLY)
  __asm mov ecx, orig;
  __asm mov edx, target;
  __asm mov eax, [ecx];
  __asm mov ebx, 4[ecx];
  __asm bswap eax;
  __asm bswap ebx;
  __asm mov 4[edx], eax;
  __asm mov [edx], ebx;
#elif ACE_SIZEOF_LONG == 8
  // 64 bit architecture.
  register unsigned long x =
    * ACE_reinterpret_cast(const unsigned long*, orig);
  register unsigned long x84 = (x & 0x000000ff000000ffUL) << 24;
  register unsigned long x73 = (x & 0x0000ff000000ff00UL) << 8;
  register unsigned long x62 = (x & 0x00ff000000ff0000UL) >> 8;
  register unsigned long x51 = (x & 0xff000000ff000000UL) >> 24;
  x = (x84 | x73 | x62 | x51);
  x = (x << 32) | (x >> 32);
  *ACE_reinterpret_cast(unsigned long*, target) = x;
#else
  register ACE_UINT32 x =
    * ACE_reinterpret_cast(const ACE_UINT32*, orig);
  register ACE_UINT32 y =
    * ACE_reinterpret_cast(const ACE_UINT32*, orig + 4);
  x = (x << 24) | ((x & 0xff00) << 8) | ((x & 0xff0000) >> 8) | (x >> 24);
  y = (y << 24) | ((y & 0xff00) << 8) | ((y & 0xff0000) >> 8) | (y >> 24);
  * ACE_reinterpret_cast(ACE_UINT32*, target) = y;
  * ACE_reinterpret_cast(ACE_UINT32*, target + 4) = x;
#endif
}

ACE_INLINE void
ACE_CDR::swap_16 (const char* orig, char* target)
{
  swap_8 (orig + 8, target);
  swap_8 (orig, target + 8);
}

ACE_INLINE size_t
ACE_CDR::first_size (size_t minsize)
{
  if (minsize == 0)
    return ACE_CDR::DEFAULT_BUFSIZE;

  size_t newsize = ACE_CDR::DEFAULT_BUFSIZE;
  while (newsize < minsize)
    {
      if (newsize < ACE_CDR::EXP_GROWTH_MAX)
        {
          // We grow exponentially at the beginning, this is fast and
          // reduces the number of allocations.
          newsize *= 2;
        }
      else
        {
          // but continuing with exponential growth can result in over
          // allocations and easily yield an allocation failure.
          // So we grow linearly when the buffer is too big.
          newsize += ACE_CDR::LINEAR_GROWTH_CHUNK;
        }
    }
  return newsize;
}

ACE_INLINE size_t
ACE_CDR::next_size (size_t minsize)
{
  size_t newsize =
    ACE_CDR::first_size (minsize);

  if (newsize == minsize)
    {
      // If necessary increment the size
      if (newsize < ACE_CDR::EXP_GROWTH_MAX)
        newsize *= 2;
      else
        newsize += ACE_CDR::LINEAR_GROWTH_CHUNK;
    }

  return newsize;
}

// ****************************************************************