From 8eaa9292e57675aa7ff1d7b890c63a4b506dc06d Mon Sep 17 00:00:00 2001
From: Frank Bossen <fbossen@gmail.com>
Date: Fri, 8 Jul 2022 23:17:32 -0400
Subject: [PATCH] Clean up filter management for MC interpolation

- Enumerate filter types
- Define 6-tap vertical affine filter to reduce number of operations
---
 source/Lib/CommonLib/CommonDef.h              |  10 +-
 source/Lib/CommonLib/InterPrediction.cpp      |  96 ++++-----
 source/Lib/CommonLib/InterpolationFilter.cpp  | 189 +++++++-----------
 source/Lib/CommonLib/InterpolationFilter.h    |  22 +-
 .../CommonLib/x86/InterpolationFilterX86.h    |  48 +++--
 source/Lib/EncoderLib/InterSearch.cpp         |  12 +-
 6 files changed, 171 insertions(+), 206 deletions(-)

diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index 039a162f4d..10e2d56e9f 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -408,10 +408,12 @@ static constexpr int MAX_TESTED_QPs =   ( 1 + 1 + ( MAX_DELTA_QP << 1 ) );
 
 static constexpr int COM16_C806_TRANS_PREC =                            0;
 
-static constexpr int NTAPS_LUMA      = 8;   // Number of taps for luma
-static constexpr int NTAPS_CHROMA    = 4;   // Number of taps for chroma
-static constexpr int NTAPS_BILINEAR  = 2;   // Number of taps for bilinear filter
-static constexpr int MAX_FILTER_SIZE = NTAPS_LUMA > NTAPS_CHROMA ? NTAPS_LUMA : NTAPS_CHROMA;
+static constexpr int NTAPS_LUMA          = 8;   // Number of taps for luma
+static constexpr int NTAPS_LUMA_AFFINE   = 6;   // Number of taps for luma affine
+static constexpr int NTAPS_CHROMA        = 4;   // Number of taps for chroma
+static constexpr int NTAPS_CHROMA_AFFINE = 4;   // Number of taps for chroma affine
+static constexpr int NTAPS_BILINEAR      = 2;   // Number of taps for bilinear filter
+static constexpr int MAX_FILTER_SIZE     = NTAPS_LUMA > NTAPS_CHROMA ? NTAPS_LUMA : NTAPS_CHROMA;
 
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
 static constexpr int MAX_LADF_INTERVALS       =                         5; /// max number of luma adaptive deblocking filter qp offset intervals
diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp
index 21721a8bec..3385f49b04 100644
--- a/source/Lib/CommonLib/InterPrediction.cpp
+++ b/source/Lib/CommonLib/InterPrediction.cpp
@@ -760,15 +760,17 @@ void InterPrediction::xPredInterBlk(const ComponentID &compID, const PredictionU
       dstBuf.buf    = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + 2 * dstBuf.stride + 2;
     }
 
+    const int filterIdx = bilinearMC ? InterpolationFilter::FILTER_DMVR : InterpolationFilter::FILTER_DEFAULT;
+
     if (yFrac == 0)
     {
       m_if.filterHor(compID, (Pel *) refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight,
-                     xFrac, rndRes, clpRng, bilinearMC, bilinearMC, useAltHpelIf);
+                     xFrac, rndRes, clpRng, filterIdx, useAltHpelIf);
     }
     else if (xFrac == 0)
     {
       m_if.filterVer(compID, (Pel *) refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight,
-                     yFrac, true, rndRes, clpRng, bilinearMC, bilinearMC, useAltHpelIf);
+                     yFrac, true, rndRes, clpRng, filterIdx, useAltHpelIf);
     }
     else
     {
@@ -785,12 +787,11 @@ void InterPrediction::xPredInterBlk(const ComponentID &compID, const PredictionU
         vFilterSize = NTAPS_BILINEAR;
       }
       m_if.filterHor(compID, (Pel *) refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf,
-                     tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, clpRng, bilinearMC,
-                     bilinearMC, useAltHpelIf);
+                     tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, clpRng, filterIdx,
+                     useAltHpelIf);
       JVET_J0090_SET_CACHE_ENABLE(false);
       m_if.filterVer(compID, (Pel *) tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf,
-                     dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, clpRng, bilinearMC, bilinearMC,
-                     useAltHpelIf);
+                     dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, clpRng, filterIdx, useAltHpelIf);
     }
     JVET_J0090_SET_CACHE_ENABLE(
       (srcPadStride == 0)
@@ -1100,12 +1101,14 @@ void InterPrediction::xPredAffineBlk(const ComponentID &compID, const Prediction
       }
 #endif
 
+      const int filterIdx = InterpolationFilter::FILTER_AFFINE;
+
       if( isRefScaled )
       {
         CHECK(enableProf, "PROF should be disabled with RPR");
         xPredInterBlkRPR(scalingRatio, pps,
                          CompArea(compID, chFmt, pu.blocks[compID].offset(w, h), Size(sbWidth, sbHeight)), refPic,
-                         curMv, dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, bi, wrapRef, clpRng, 2);
+                         curMv, dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, bi, wrapRef, clpRng, filterIdx);
       }
       else
       {
@@ -1139,21 +1142,23 @@ void InterPrediction::xPredAffineBlk(const ComponentID &compID, const Prediction
 
         if (yFrac == 0)
         {
-          m_if.filterHor(compID, ref, refStride, dst, dstStride, sbWidth, sbHeight, xFrac, isLast, clpRng);
+          m_if.filterHor(compID, ref, refStride, dst, dstStride, sbWidth, sbHeight, xFrac, isLast, clpRng, filterIdx);
         }
         else if (xFrac == 0)
         {
-          m_if.filterVer(compID, ref, refStride, dst, dstStride, sbWidth, sbHeight, yFrac, true, isLast, clpRng);
+          m_if.filterVer(compID, ref, refStride, dst, dstStride, sbWidth, sbHeight, yFrac, true, isLast, clpRng,
+                         filterIdx);
         }
         else
         {
-          const int filterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA;
+          const int filterSize = isLuma(compID) ? NTAPS_LUMA_AFFINE : NTAPS_CHROMA_AFFINE;
+          const int rowsAbove  = (filterSize - 1) >> 1;
 
-          m_if.filterHor(compID, ref - ((filterSize >> 1) - 1) * refStride, refStride, tmpBuf.buf, tmpBuf.stride,
-                         sbWidth, sbHeight + filterSize - 1, xFrac, false, clpRng);
+          m_if.filterHor(compID, ref - rowsAbove * refStride, refStride, tmpBuf.buf, tmpBuf.stride, sbWidth,
+                         sbHeight + filterSize - 1, xFrac, false, clpRng, filterIdx);
           JVET_J0090_SET_CACHE_ENABLE(false);
-          m_if.filterVer(compID, tmpBuf.buf + ((filterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dst, dstStride,
-                         sbWidth, sbHeight, yFrac, false, isLast, clpRng);
+          m_if.filterVer(compID, tmpBuf.buf + rowsAbove * tmpBuf.stride, tmpBuf.stride, dst, dstStride, sbWidth,
+                         sbHeight, yFrac, false, isLast, clpRng, filterIdx);
           JVET_J0090_SET_CACHE_ENABLE(true);
         }
 
@@ -2300,67 +2305,44 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio,
     int yFilter = filterIndex;
     const int rprThreshold1 = ( 1 << SCALE_RATIO_BITS ) * 5 / 4;
     const int rprThreshold2 = ( 1 << SCALE_RATIO_BITS ) * 7 / 4;
-    if( filterIndex == 0 )
+    if (filterIndex == InterpolationFilter::FILTER_DEFAULT || !isLuma(compID))
     {
       if( scalingRatio.first > rprThreshold2 )
       {
-        xFilter = 4;
+        xFilter = InterpolationFilter::FILTER_RPR2;
       }
       else if( scalingRatio.first > rprThreshold1 )
       {
-        xFilter = 3;
+        xFilter = InterpolationFilter::FILTER_RPR1;
       }
 
       if( scalingRatio.second > rprThreshold2 )
       {
-        yFilter = 4;
+        yFilter = InterpolationFilter::FILTER_RPR2;
       }
       else if( scalingRatio.second > rprThreshold1 )
       {
-        yFilter = 3;
+        yFilter = InterpolationFilter::FILTER_RPR1;
       }
     }
-    if (filterIndex == 2)
+    else if (filterIndex == InterpolationFilter::FILTER_AFFINE)
     {
-      if (isLuma(compID))
+      if (scalingRatio.first > rprThreshold2)
       {
-        if (scalingRatio.first > rprThreshold2)
-        {
-          xFilter = 6;
-        }
-        else if (scalingRatio.first > rprThreshold1)
-        {
-          xFilter = 5;
-        }
-
-        if (scalingRatio.second > rprThreshold2)
-        {
-          yFilter = 6;
-        }
-        else if (scalingRatio.second > rprThreshold1)
-        {
-          yFilter = 5;
-        }
+        xFilter = InterpolationFilter::FILTER_AFFINE_RPR2;
       }
-      else
+      else if (scalingRatio.first > rprThreshold1)
       {
-        if (scalingRatio.first > rprThreshold2)
-        {
-          xFilter = 4;
-        }
-        else if (scalingRatio.first > rprThreshold1)
-        {
-          xFilter = 3;
-        }
+        xFilter = InterpolationFilter::FILTER_AFFINE_RPR1;
+      }
 
-        if (scalingRatio.second > rprThreshold2)
-        {
-          yFilter = 4;
-        }
-        else if (scalingRatio.second > rprThreshold1)
-        {
-          yFilter = 3;
-        }
+      if (scalingRatio.second > rprThreshold2)
+      {
+        yFilter = InterpolationFilter::FILTER_AFFINE_RPR2;
+      }
+      else if (scalingRatio.second > rprThreshold1)
+      {
+        yFilter = InterpolationFilter::FILTER_AFFINE_RPR1;
       }
     }
 
@@ -2433,7 +2415,7 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio,
       Pel *const tempBuf = m_filteredBlockTmpRPR + col;
 
       m_if.filterHor(compID, (Pel *) refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tempBuf,
-                     tmpStride, 1, refHeight + vFilterSize - 1 + extSize, xFrac, false, clpRng, xFilter, false,
+                     tmpStride, 1, refHeight + vFilterSize - 1 + extSize, xFrac, false, clpRng, xFilter,
                      useAltHpelIf && scalingRatio.first == 1 << SCALE_RATIO_BITS);
     }
 
@@ -2450,7 +2432,7 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio,
 
       JVET_J0090_SET_CACHE_ENABLE( false );
       m_if.filterVer(compID, tempBuf + ((vFilterSize >> 1) - 1) * tmpStride, tmpStride, dst + row * dstStride,
-                     dstStride, width, 1, yFrac, false, rndRes, clpRng, yFilter, false,
+                     dstStride, width, 1, yFrac, false, rndRes, clpRng, yFilter,
                      useAltHpelIf && scalingRatio.second == 1 << SCALE_RATIO_BITS);
       JVET_J0090_SET_CACHE_ENABLE( true );
     }
diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp
index 88304e6919..431123e6b6 100644
--- a/source/Lib/CommonLib/InterpolationFilter.cpp
+++ b/source/Lib/CommonLib/InterpolationFilter.cpp
@@ -54,6 +54,7 @@ CacheModel* InterpolationFilter::m_cacheModel;
 // ====================================================================================================================
 // Tables
 // ====================================================================================================================
+// TODO: implement 6-tap horizontal filtering in SIMD code such that m_affineLumaFilter can be used instead of m_lumaFilter4x4
 const TFilterCoeff InterpolationFilter::m_lumaFilter4x4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA] =
 {
   {  0, 0,   0, 64,  0,   0,  0,  0 },
@@ -74,6 +75,31 @@ const TFilterCoeff InterpolationFilter::m_lumaFilter4x4[LUMA_INTERPOLATION_FILTE
   {  0, 1,  -2,  4, 63,  -3,  1,  0 }
 };
 
+// clang-format off
+const TFilterCoeff InterpolationFilter::m_affineLumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA] =
+{
+  { 0,   0,  64,   0,   0, 0, 0, 0 },
+  { 1,  -3,  63,   4,  -2, 1, 0, 0 },
+  { 1,  -5,  62,   8,  -3, 1, 0, 0 },
+  { 2,  -8,  60,  13,  -4, 1, 0, 0 },
+
+  { 3, -10,  58,  17,  -5, 1, 0, 0 },
+  { 3, -11,  52,  26,  -8, 2, 0, 0 },
+  { 2,  -9,  47,  31, -10, 3, 0, 0 },
+  { 3, -11,  45,  34, -10, 3, 0, 0 },
+
+  { 3, -11,  40,  40, -11, 3, 0, 0 },
+  { 3, -10,  34,  45, -11, 3, 0, 0 },
+  { 3, -10,  31,  47,  -9, 2, 0, 0 },
+  { 2,  -8,  26,  52, -11, 3, 0, 0 },
+
+  { 1,  -5,  17,  58, -10, 3, 0, 0 },
+  { 1,  -4,  13,  60,  -8, 2, 0, 0 },
+  { 1,  -3,   8,  62,  -5, 1, 0, 0 },
+  { 1,  -2,   4,  63,  -3, 1, 0, 0 }
+};
+// clang-format on
+
 const TFilterCoeff InterpolationFilter::m_lumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA] =
 {
   {  0, 0,   0, 64,  0,   0,  0,  0 },
@@ -344,6 +370,11 @@ InterpolationFilter::InterpolationFilter()
   m_filterVer[2][1][0] = filter<2, true, true, false>;
   m_filterVer[2][1][1] = filter<2, true, true, true>;
 
+  m_filterVer[3][0][0] = filter<6, true, false, false>;
+  m_filterVer[3][0][1] = filter<6, true, false, true>;
+  m_filterVer[3][1][0] = filter<6, true, true, false>;
+  m_filterVer[3][1][1] = filter<6, true, true, true>;
+
   m_filterCopy[0][0]   = filterCopy<false, false>;
   m_filterCopy[0][1]   = filterCopy<false, true>;
   m_filterCopy[1][0]   = filterCopy<true, false>;
@@ -625,128 +656,65 @@ void InterpolationFilter::filter(const ClpRng& clpRng, Pel const *src, int srcSt
   }
 }
 
-/**
- * \brief Filter a block of samples (horizontal)
- *
- * \tparam N          Number of taps
- * \param  bitDepth   Bit depth of samples
- * \param  src        Pointer to source samples
- * \param  srcStride  Stride of source samples
- * \param  dst        Pointer to destination samples
- * \param  dstStride  Stride of destination samples
- * \param  width      Width of block
- * \param  height     Height of block
- * \param  isLast     Flag indicating whether it is the last filtering operation
- * \param  coeff      Pointer to filter taps
- */
+static constexpr int tapToIdx(const int N)
+{
+  return N == 8 ? 0 : (N == 4 ? 1 : (N == 2 ? 2 : (N == 6 ? 3 : 4)));
+}
+
 template<int N>
 void InterpolationFilter::filterHor(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isLast, TFilterCoeff const *coeff, bool biMCForDMVR)
 {
-//#if ENABLE_SIMD_OPT_MCIF
-  if( N == 8 )
-  {
-    m_filterHor[0][1][isLast](clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR);
-  }
-  else if( N == 4 )
-  {
-    m_filterHor[1][1][isLast](clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR);
-  }
-  else if( N == 2 )
-  {
-    m_filterHor[2][1][isLast](clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR);
-  }
-  else
-  {
-    THROW( "Invalid tap number" );
-  }
+  constexpr int IDX = tapToIdx(N);
+  static_assert(IDX < 3, "Unsupported tap count");
+  m_filterHor[IDX][1][isLast](clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR);
 }
 
-/**
- * \brief Filter a block of samples (vertical)
- *
- * \tparam N          Number of taps
- * \param  bitDepth   Bit depth
- * \param  src        Pointer to source samples
- * \param  srcStride  Stride of source samples
- * \param  dst        Pointer to destination samples
- * \param  dstStride  Stride of destination samples
- * \param  width      Width of block
- * \param  height     Height of block
- * \param  isFirst    Flag indicating whether it is the first filtering operation
- * \param  isLast     Flag indicating whether it is the last filtering operation
- * \param  coeff      Pointer to filter taps
- */
 template<int N>
 void InterpolationFilter::filterVer(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isFirst, bool isLast, TFilterCoeff const *coeff, bool biMCForDMVR)
 {
-//#if ENABLE_SIMD_OPT_MCIF
-  if( N == 8 )
-  {
-    m_filterVer[0][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR);
-  }
-  else if( N == 4 )
-  {
-    m_filterVer[1][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR);
-  }
-  else if( N == 2 )
-  {
-    m_filterVer[2][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR);
-  }
-  else{
-    THROW( "Invalid tap number" );
-  }
+  constexpr int IDX = tapToIdx(N);
+  static_assert(IDX < 4, "Unsupported tap count");
+  m_filterVer[IDX][isFirst][isLast](clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR);
 }
 
 // ====================================================================================================================
 // Public member functions
 // ====================================================================================================================
 
-/**
- * \brief Filter a block of Luma/Chroma samples (horizontal)
- *
- * \param  compID     Chroma component ID
- * \param  src        Pointer to source samples
- * \param  srcStride  Stride of source samples
- * \param  dst        Pointer to destination samples
- * \param  dstStride  Stride of destination samples
- * \param  width      Width of block
- * \param  height     Height of block
- * \param  frac       Fractional sample offset
- * \param  isLast     Flag indicating whether it is the last filtering operation
- * \param  bitDepth   Bit depth
- */
 void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride,
                                     int width, int height, int frac, bool isLast, const ClpRng &clpRng, int nFilterIdx,
-                                    bool biMCForDMVR, bool useAltHpelIf)
+                                    bool useAltHpelIf)
 {
-  if( frac == 0 && nFilterIdx < 2 )
+  const bool biMCForDMVR = nFilterIdx == FILTER_DMVR;
+
+  if (frac == 0 && nFilterIdx <= FILTER_AFFINE)
   {
     m_filterCopy[true][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR );
   }
   else if( isLuma( compID ) )
   {
     CHECK( frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" );
-    if( nFilterIdx == 1 )
+    if (nFilterIdx == FILTER_DMVR)
     {
       filterHor<NTAPS_BILINEAR>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR );
     }
-    else if( nFilterIdx == 2 )
+    else if (nFilterIdx == FILTER_AFFINE)
     {
       filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter4x4[frac], biMCForDMVR );
     }
-    else if( nFilterIdx == 3 )
+    else if (nFilterIdx == FILTER_RPR1)
     {
       filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilterRPR1[frac], biMCForDMVR );
     }
-    else if( nFilterIdx == 4 )
+    else if (nFilterIdx == FILTER_RPR2)
     {
       filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilterRPR2[frac], biMCForDMVR );
     }
-    else if (nFilterIdx == 5)
+    else if (nFilterIdx == FILTER_AFFINE_RPR1)
     {
       filterHor<NTAPS_LUMA>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_affineLumaFilterRPR1[frac], biMCForDMVR);
     }
-    else if (nFilterIdx == 6)
+    else if (nFilterIdx == FILTER_AFFINE_RPR2)
     {
       filterHor<NTAPS_LUMA>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_affineLumaFilterRPR2[frac], biMCForDMVR);
     }
@@ -754,10 +722,6 @@ void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, in
     {
       filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaAltHpelIFilter, biMCForDMVR );
     }
-    else if( ( width == 4 && height == 4 ) || ( width == 4 && height == ( 4 + NTAPS_LUMA - 1 ) ) )
-    {
-      filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter4x4[frac], biMCForDMVR );
-    }
     else
     {
       filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter[frac], biMCForDMVR );
@@ -766,12 +730,12 @@ void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, in
   else
   {
     CHECK(frac < 0 || frac >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction");
-    if( nFilterIdx == 3 )
+    if (nFilterIdx == FILTER_RPR1)
     {
       filterHor<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilterRPR1[frac],
                               biMCForDMVR);
     }
-    else if( nFilterIdx == 4 )
+    else if (nFilterIdx == FILTER_RPR2)
     {
       filterHor<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilterRPR2[frac],
                               biMCForDMVR);
@@ -784,54 +748,41 @@ void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, in
   }
 }
 
-
-/**
- * \brief Filter a block of Luma/Chroma samples (vertical)
- *
- * \param  compID     Colour component ID
- * \param  src        Pointer to source samples
- * \param  srcStride  Stride of source samples
- * \param  dst        Pointer to destination samples
- * \param  dstStride  Stride of destination samples
- * \param  width      Width of block
- * \param  height     Height of block
- * \param  frac       Fractional sample offset
- * \param  isFirst    Flag indicating whether it is the first filtering operation
- * \param  isLast     Flag indicating whether it is the last filtering operation
- * \param  bitDepth   Bit depth
- */
 void InterpolationFilter::filterVer(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride,
                                     int width, int height, int frac, bool isFirst, bool isLast, const ClpRng &clpRng,
-                                    int nFilterIdx, bool biMCForDMVR, bool useAltHpelIf)
+                                    int nFilterIdx, bool useAltHpelIf)
 {
-  if( frac == 0 && nFilterIdx < 2 )
+  const bool biMCForDMVR = nFilterIdx == FILTER_DMVR;
+
+  if (frac == 0 && nFilterIdx <= FILTER_AFFINE)
   {
     m_filterCopy[isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR );
   }
   else if( isLuma( compID ) )
   {
     CHECK( frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" );
-    if( nFilterIdx == 1 )
+    if (nFilterIdx == FILTER_DMVR)
     {
       filterVer<NTAPS_BILINEAR>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR );
     }
-    else if( nFilterIdx == 2 )
+    else if (nFilterIdx == FILTER_AFFINE)
     {
-      filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter4x4[frac], biMCForDMVR );
+      filterVer<NTAPS_LUMA_AFFINE>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast,
+                                   m_affineLumaFilter[frac], biMCForDMVR);
     }
-    else if( nFilterIdx == 3 )
+    else if (nFilterIdx == FILTER_RPR1)
     {
       filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilterRPR1[frac], biMCForDMVR );
     }
-    else if( nFilterIdx == 4 )
+    else if (nFilterIdx == FILTER_RPR2)
     {
       filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilterRPR2[frac], biMCForDMVR );
     }
-    else if (nFilterIdx == 5)
+    else if (nFilterIdx == FILTER_AFFINE_RPR1)
     {
       filterVer<NTAPS_LUMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_affineLumaFilterRPR1[frac], biMCForDMVR);
     }
-    else if (nFilterIdx == 6)
+    else if (nFilterIdx == FILTER_AFFINE_RPR2)
     {
       filterVer<NTAPS_LUMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_affineLumaFilterRPR2[frac], biMCForDMVR);
     }
@@ -839,10 +790,6 @@ void InterpolationFilter::filterVer(const ComponentID compID, Pel const *src, in
     {
       filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaAltHpelIFilter, biMCForDMVR );
     }
-    else if( width == 4 && height == 4 )
-    {
-      filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter4x4[frac], biMCForDMVR );
-    }
     else
     {
       filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter[frac], biMCForDMVR );
@@ -851,12 +798,12 @@ void InterpolationFilter::filterVer(const ComponentID compID, Pel const *src, in
   else
   {
     CHECK(frac < 0 || frac >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction");
-    if( nFilterIdx == 3 )
+    if (nFilterIdx == FILTER_RPR1)
     {
       filterVer<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast,
                               m_chromaFilterRPR1[frac], biMCForDMVR);
     }
-    else if( nFilterIdx == 4 )
+    else if (nFilterIdx == FILTER_RPR2)
     {
       filterVer<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast,
                               m_chromaFilterRPR2[frac], biMCForDMVR);
diff --git a/source/Lib/CommonLib/InterpolationFilter.h b/source/Lib/CommonLib/InterpolationFilter.h
index 2a99646f22..abdbc51ab7 100644
--- a/source/Lib/CommonLib/InterpolationFilter.h
+++ b/source/Lib/CommonLib/InterpolationFilter.h
@@ -57,6 +57,8 @@ static inline int IF_INTERNAL_FRAC_BITS(const int bd) { return std::max(2, IF_IN
 class InterpolationFilter
 {
   static const TFilterCoeff m_lumaFilter4x4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA];
+  static const TFilterCoeff m_affineLumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA];
+
 public:
   static const TFilterCoeff m_lumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps
   static const TFilterCoeff m_chromaFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA]; ///< Chroma filter taps
@@ -88,10 +90,22 @@ protected:
   static CacheModel* m_cacheModel;
 #endif
 public:
+  enum
+  {
+    FILTER_DEFAULT = 0,
+    FILTER_DMVR,
+    FILTER_AFFINE,
+    FILTER_RPR1,
+    FILTER_RPR2,
+    FILTER_AFFINE_RPR1,
+    FILTER_AFFINE_RPR2,
+  };
+
   InterpolationFilter();
   ~InterpolationFilter() {}
   void( *m_filterHor[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMCForDMVR);
-  void( *m_filterVer[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMCForDMVR);
+  void (*m_filterVer[4][2][2])(const ClpRng &clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width,
+                               int height, TFilterCoeff const *coeff, bool biMCForDMVR);
   void( *m_filterCopy[2][2] )  ( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool biMCForDMVR);
   void( *m_weightedGeoBlk )(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const uint8_t splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1);
 
@@ -102,11 +116,11 @@ public:
   void _initInterpolationFilterX86();
 #endif
   void filterHor(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width,
-                 int height, int frac, bool isLast, const ClpRng &clpRng, int nFilterIdx = 0, bool biMCForDMVR = false,
+                 int height, int frac, bool isLast, const ClpRng &clpRng, int nFilterIdx = FILTER_DEFAULT,
                  bool useAltHpelIf = false);
   void filterVer(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width,
-                 int height, int frac, bool isFirst, bool isLast, const ClpRng &clpRng, int nFilterIdx = 0,
-                 bool biMCForDMVR = false, bool useAltHpelIf = false);
+                 int height, int frac, bool isFirst, bool isLast, const ClpRng &clpRng, int nFilterIdx = FILTER_DEFAULT,
+                 bool useAltHpelIf = false);
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   void cacheAssign( CacheModel *cache ) { m_cacheModel = cache; }
 #endif
diff --git a/source/Lib/CommonLib/x86/InterpolationFilterX86.h b/source/Lib/CommonLib/x86/InterpolationFilterX86.h
index d61d1415cb..f721ed9064 100644
--- a/source/Lib/CommonLib/x86/InterpolationFilterX86.h
+++ b/source/Lib/CommonLib/x86/InterpolationFilterX86.h
@@ -2131,27 +2131,31 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel
     }
   }
   {
-    if( N == 8 && !( width & 0x07 ) )
+    if ((N == 8 || N == 6) && !(width & 0x07))
     {
       if( !isVertical )
       {
 #if RExt__HIGH_BIT_DEPTH_SUPPORT
         if (vext >= AVX2)
         {
-          simdInterpolateHorM8_HBD_AVX2<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
+          simdInterpolateHorM8_HBD_AVX2<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset,
+                                                         clpRng, c);
         }
         else
         {
-          simdInterpolateHorM8_HBD<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
+          simdInterpolateHorM8_HBD<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset,
+                                                    clpRng, c);
         }
 #else
         if( vext>= AVX2 )
         {
-          simdInterpolateHorM8_AVX2<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+          simdInterpolateHorM8_AVX2<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset,
+                                                     clpRng, c);
         }
         else
         {
-          simdInterpolateHorM8<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+          simdInterpolateHorM8<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng,
+                                                c);
         }
 #endif
       }
@@ -2160,41 +2164,47 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel
 #if RExt__HIGH_BIT_DEPTH_SUPPORT
         if (vext >= AVX2)
         {
-          simdInterpolateVerM8_HBD_AVX2<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
+          simdInterpolateVerM8_HBD_AVX2<vext, N, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset,
+                                                         clpRng, c);
         }
         else
         {
-          simdInterpolateVerM8_HBD<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
+          simdInterpolateVerM8_HBD<vext, N, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset,
+                                                    clpRng, c);
         }
 #else
         if( vext>= AVX2 )
         {
-          simdInterpolateVerM8_AVX2<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+          simdInterpolateVerM8_AVX2<vext, N, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset,
+                                                     clpRng, c);
         }
         else
         {
-          simdInterpolateVerM8<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+          simdInterpolateVerM8<vext, N, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng,
+                                                c);
         }
 #endif
       }
       return;
     }
-    else if( N == 8 && !( width & 0x03 ) )
+    else if ((N == 8 || N == 6) && !(width & 0x03))
     {
       if( !isVertical )
       {
 #if RExt__HIGH_BIT_DEPTH_SUPPORT
-        simdInterpolateHorM4_HBD<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
+        simdInterpolateHorM4_HBD<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng,
+                                                  c);
 #else
-        simdInterpolateHorM4<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+        simdInterpolateHorM4<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
 #endif
       }
       else
       {
 #if RExt__HIGH_BIT_DEPTH_SUPPORT
-        simdInterpolateVerM4_HBD<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
+        simdInterpolateVerM4_HBD<vext, N, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng,
+                                                  c);
 #else
-        simdInterpolateVerM4<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+        simdInterpolateVerM4<vext, N, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
 #endif
       }
       return;
@@ -2585,6 +2595,11 @@ void InterpolationFilter::_initInterpolationFilterX86()
   m_filterVer[2][1][0] = simdFilter<vext, 2, true, true, false>;
   m_filterVer[2][1][1] = simdFilter<vext, 2, true, true, true>;
 
+  m_filterVer[3][0][0] = simdFilter<vext, 6, true, false, false>;
+  m_filterVer[3][0][1] = simdFilter<vext, 6, true, false, true>;
+  m_filterVer[3][1][0] = simdFilter<vext, 6, true, true, false>;
+  m_filterVer[3][1][1] = simdFilter<vext, 6, true, true, true>;
+
   m_filterCopy[0][0] = simdFilterCopy_HBD<vext, false, false>;
   m_filterCopy[0][1] = simdFilterCopy_HBD<vext, false, true>;
   m_filterCopy[1][0] = simdFilterCopy_HBD<vext, true, false>;
@@ -2623,6 +2638,11 @@ void InterpolationFilter::_initInterpolationFilterX86()
   m_filterVer[2][1][0] = simdFilter<vext, 2, true, true, false>;
   m_filterVer[2][1][1] = simdFilter<vext, 2, true, true, true>;
 
+  m_filterVer[3][0][0] = simdFilter<vext, 6, true, false, false>;
+  m_filterVer[3][0][1] = simdFilter<vext, 6, true, false, true>;
+  m_filterVer[3][1][0] = simdFilter<vext, 6, true, true, false>;
+  m_filterVer[3][1][1] = simdFilter<vext, 6, true, true, true>;
+
   m_filterCopy[0][0]   = simdFilterCopy<vext, false, false>;
   m_filterCopy[0][1]   = simdFilterCopy<vext, false, true>;
   m_filterCopy[1][0]   = simdFilterCopy<vext, true, false>;
diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp
index 258d33a7b3..e21c55060b 100644
--- a/source/Lib/EncoderLib/InterSearch.cpp
+++ b/source/Lib/EncoderLib/InterSearch.cpp
@@ -9030,17 +9030,17 @@ void InterSearch::xExtDIFUpSamplingH(CPelBuf* pattern, bool useAltHpelIf)
   const Pel *srcPtr = pattern->buf - halfFilterSize*srcStride - 1;
 
   m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[0][0], intStride, width + 1, height + filterSize,
-                 0 << MV_FRACTIONAL_BITS_DIFF, false, clpRng, 0, false, useAltHpelIf);
+                 0 << MV_FRACTIONAL_BITS_DIFF, false, clpRng, InterpolationFilter::FILTER_DEFAULT, useAltHpelIf);
   if (!m_skipFracME)
   {
     m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[2][0], intStride, width + 1, height + filterSize,
-                   2 << MV_FRACTIONAL_BITS_DIFF, false, clpRng, 0, false, useAltHpelIf);
+                   2 << MV_FRACTIONAL_BITS_DIFF, false, clpRng, InterpolationFilter::FILTER_DEFAULT, useAltHpelIf);
   }
 
   intPtr = m_filteredBlockTmp[0][0] + halfFilterSize * intStride + 1;
   dstPtr = m_filteredBlock[0][0][0];
   m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF,
-                 false, true, clpRng, 0, false, useAltHpelIf);
+                 false, true, clpRng, InterpolationFilter::FILTER_DEFAULT, useAltHpelIf);
   if (m_skipFracME)
   {
     return;
@@ -9049,17 +9049,17 @@ void InterSearch::xExtDIFUpSamplingH(CPelBuf* pattern, bool useAltHpelIf)
   intPtr = m_filteredBlockTmp[0][0] + (halfFilterSize - 1) * intStride + 1;
   dstPtr = m_filteredBlock[2][0][0];
   m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF,
-                 false, true, clpRng, 0, false, useAltHpelIf);
+                 false, true, clpRng, InterpolationFilter::FILTER_DEFAULT, useAltHpelIf);
 
   intPtr = m_filteredBlockTmp[2][0] + halfFilterSize * intStride;
   dstPtr = m_filteredBlock[0][2][0];
   m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF,
-                 false, true, clpRng, 0, false, useAltHpelIf);
+                 false, true, clpRng, InterpolationFilter::FILTER_DEFAULT, useAltHpelIf);
 
   intPtr = m_filteredBlockTmp[2][0] + (halfFilterSize - 1) * intStride;
   dstPtr = m_filteredBlock[2][2][0];
   m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF,
-                 false, true, clpRng, 0, false, useAltHpelIf);
+                 false, true, clpRng, InterpolationFilter::FILTER_DEFAULT, useAltHpelIf);
 }
 
 
-- 
GitLab