From 8eaa9292e57675aa7ff1d7b890c63a4b506dc06d Mon Sep 17 00:00:00 2001 From: Frank Bossen <fbossen@gmail.com> Date: Fri, 8 Jul 2022 23:17:32 -0400 Subject: [PATCH] Clean up filter management for MC interpolation - Enumerate filter types - Define 6-tap vertical affine filter to reduce number of operations --- source/Lib/CommonLib/CommonDef.h | 10 +- source/Lib/CommonLib/InterPrediction.cpp | 96 ++++----- source/Lib/CommonLib/InterpolationFilter.cpp | 189 +++++++----------- source/Lib/CommonLib/InterpolationFilter.h | 22 +- .../CommonLib/x86/InterpolationFilterX86.h | 48 +++-- source/Lib/EncoderLib/InterSearch.cpp | 12 +- 6 files changed, 171 insertions(+), 206 deletions(-) diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index 039a162f4d..10e2d56e9f 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -408,10 +408,12 @@ static constexpr int MAX_TESTED_QPs = ( 1 + 1 + ( MAX_DELTA_QP << 1 ) ); static constexpr int COM16_C806_TRANS_PREC = 0; -static constexpr int NTAPS_LUMA = 8; // Number of taps for luma -static constexpr int NTAPS_CHROMA = 4; // Number of taps for chroma -static constexpr int NTAPS_BILINEAR = 2; // Number of taps for bilinear filter -static constexpr int MAX_FILTER_SIZE = NTAPS_LUMA > NTAPS_CHROMA ? NTAPS_LUMA : NTAPS_CHROMA; +static constexpr int NTAPS_LUMA = 8; // Number of taps for luma +static constexpr int NTAPS_LUMA_AFFINE = 6; // Number of taps for luma affine +static constexpr int NTAPS_CHROMA = 4; // Number of taps for chroma +static constexpr int NTAPS_CHROMA_AFFINE = 4; // Number of taps for chroma affine +static constexpr int NTAPS_BILINEAR = 2; // Number of taps for bilinear filter +static constexpr int MAX_FILTER_SIZE = NTAPS_LUMA > NTAPS_CHROMA ? NTAPS_LUMA : NTAPS_CHROMA; #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET static constexpr int MAX_LADF_INTERVALS = 5; /// max number of luma adaptive deblocking filter qp offset intervals diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index 21721a8bec..3385f49b04 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -760,15 +760,17 @@ void InterPrediction::xPredInterBlk(const ComponentID &compID, const PredictionU dstBuf.buf = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + 2 * dstBuf.stride + 2; } + const int filterIdx = bilinearMC ? InterpolationFilter::FILTER_DMVR : InterpolationFilter::FILTER_DEFAULT; + if (yFrac == 0) { m_if.filterHor(compID, (Pel *) refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, - xFrac, rndRes, clpRng, bilinearMC, bilinearMC, useAltHpelIf); + xFrac, rndRes, clpRng, filterIdx, useAltHpelIf); } else if (xFrac == 0) { m_if.filterVer(compID, (Pel *) refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, - yFrac, true, rndRes, clpRng, bilinearMC, bilinearMC, useAltHpelIf); + yFrac, true, rndRes, clpRng, filterIdx, useAltHpelIf); } else { @@ -785,12 +787,11 @@ void InterPrediction::xPredInterBlk(const ComponentID &compID, const PredictionU vFilterSize = NTAPS_BILINEAR; } m_if.filterHor(compID, (Pel *) refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, - tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, clpRng, bilinearMC, - bilinearMC, useAltHpelIf); + tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, clpRng, filterIdx, + useAltHpelIf); JVET_J0090_SET_CACHE_ENABLE(false); m_if.filterVer(compID, (Pel *) tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, - dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, clpRng, bilinearMC, bilinearMC, - useAltHpelIf); + dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, clpRng, filterIdx, useAltHpelIf); } JVET_J0090_SET_CACHE_ENABLE( (srcPadStride == 0) @@ -1100,12 +1101,14 @@ void InterPrediction::xPredAffineBlk(const ComponentID &compID, const Prediction } #endif + const int filterIdx = InterpolationFilter::FILTER_AFFINE; + if( isRefScaled ) { CHECK(enableProf, "PROF should be disabled with RPR"); xPredInterBlkRPR(scalingRatio, pps, CompArea(compID, chFmt, pu.blocks[compID].offset(w, h), Size(sbWidth, sbHeight)), refPic, - curMv, dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, bi, wrapRef, clpRng, 2); + curMv, dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, bi, wrapRef, clpRng, filterIdx); } else { @@ -1139,21 +1142,23 @@ void InterPrediction::xPredAffineBlk(const ComponentID &compID, const Prediction if (yFrac == 0) { - m_if.filterHor(compID, ref, refStride, dst, dstStride, sbWidth, sbHeight, xFrac, isLast, clpRng); + m_if.filterHor(compID, ref, refStride, dst, dstStride, sbWidth, sbHeight, xFrac, isLast, clpRng, filterIdx); } else if (xFrac == 0) { - m_if.filterVer(compID, ref, refStride, dst, dstStride, sbWidth, sbHeight, yFrac, true, isLast, clpRng); + m_if.filterVer(compID, ref, refStride, dst, dstStride, sbWidth, sbHeight, yFrac, true, isLast, clpRng, + filterIdx); } else { - const int filterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA; + const int filterSize = isLuma(compID) ? NTAPS_LUMA_AFFINE : NTAPS_CHROMA_AFFINE; + const int rowsAbove = (filterSize - 1) >> 1; - m_if.filterHor(compID, ref - ((filterSize >> 1) - 1) * refStride, refStride, tmpBuf.buf, tmpBuf.stride, - sbWidth, sbHeight + filterSize - 1, xFrac, false, clpRng); + m_if.filterHor(compID, ref - rowsAbove * refStride, refStride, tmpBuf.buf, tmpBuf.stride, sbWidth, + sbHeight + filterSize - 1, xFrac, false, clpRng, filterIdx); JVET_J0090_SET_CACHE_ENABLE(false); - m_if.filterVer(compID, tmpBuf.buf + ((filterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dst, dstStride, - sbWidth, sbHeight, yFrac, false, isLast, clpRng); + m_if.filterVer(compID, tmpBuf.buf + rowsAbove * tmpBuf.stride, tmpBuf.stride, dst, dstStride, sbWidth, + sbHeight, yFrac, false, isLast, clpRng, filterIdx); JVET_J0090_SET_CACHE_ENABLE(true); } @@ -2300,67 +2305,44 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio, int yFilter = filterIndex; const int rprThreshold1 = ( 1 << SCALE_RATIO_BITS ) * 5 / 4; const int rprThreshold2 = ( 1 << SCALE_RATIO_BITS ) * 7 / 4; - if( filterIndex == 0 ) + if (filterIndex == InterpolationFilter::FILTER_DEFAULT || !isLuma(compID)) { if( scalingRatio.first > rprThreshold2 ) { - xFilter = 4; + xFilter = InterpolationFilter::FILTER_RPR2; } else if( scalingRatio.first > rprThreshold1 ) { - xFilter = 3; + xFilter = InterpolationFilter::FILTER_RPR1; } if( scalingRatio.second > rprThreshold2 ) { - yFilter = 4; + yFilter = InterpolationFilter::FILTER_RPR2; } else if( scalingRatio.second > rprThreshold1 ) { - yFilter = 3; + yFilter = InterpolationFilter::FILTER_RPR1; } } - if (filterIndex == 2) + else if (filterIndex == InterpolationFilter::FILTER_AFFINE) { - if (isLuma(compID)) + if (scalingRatio.first > rprThreshold2) { - if (scalingRatio.first > rprThreshold2) - { - xFilter = 6; - } - else if (scalingRatio.first > rprThreshold1) - { - xFilter = 5; - } - - if (scalingRatio.second > rprThreshold2) - { - yFilter = 6; - } - else if (scalingRatio.second > rprThreshold1) - { - yFilter = 5; - } + xFilter = InterpolationFilter::FILTER_AFFINE_RPR2; } - else + else if (scalingRatio.first > rprThreshold1) { - if (scalingRatio.first > rprThreshold2) - { - xFilter = 4; - } - else if (scalingRatio.first > rprThreshold1) - { - xFilter = 3; - } + xFilter = InterpolationFilter::FILTER_AFFINE_RPR1; + } - if (scalingRatio.second > rprThreshold2) - { - yFilter = 4; - } - else if (scalingRatio.second > rprThreshold1) - { - yFilter = 3; - } + if (scalingRatio.second > rprThreshold2) + { + yFilter = InterpolationFilter::FILTER_AFFINE_RPR2; + } + else if (scalingRatio.second > rprThreshold1) + { + yFilter = InterpolationFilter::FILTER_AFFINE_RPR1; } } @@ -2433,7 +2415,7 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio, Pel *const tempBuf = m_filteredBlockTmpRPR + col; m_if.filterHor(compID, (Pel *) refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tempBuf, - tmpStride, 1, refHeight + vFilterSize - 1 + extSize, xFrac, false, clpRng, xFilter, false, + tmpStride, 1, refHeight + vFilterSize - 1 + extSize, xFrac, false, clpRng, xFilter, useAltHpelIf && scalingRatio.first == 1 << SCALE_RATIO_BITS); } @@ -2450,7 +2432,7 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio, JVET_J0090_SET_CACHE_ENABLE( false ); m_if.filterVer(compID, tempBuf + ((vFilterSize >> 1) - 1) * tmpStride, tmpStride, dst + row * dstStride, - dstStride, width, 1, yFrac, false, rndRes, clpRng, yFilter, false, + dstStride, width, 1, yFrac, false, rndRes, clpRng, yFilter, useAltHpelIf && scalingRatio.second == 1 << SCALE_RATIO_BITS); JVET_J0090_SET_CACHE_ENABLE( true ); } diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp index 88304e6919..431123e6b6 100644 --- a/source/Lib/CommonLib/InterpolationFilter.cpp +++ b/source/Lib/CommonLib/InterpolationFilter.cpp @@ -54,6 +54,7 @@ CacheModel* InterpolationFilter::m_cacheModel; // ==================================================================================================================== // Tables // ==================================================================================================================== +// TODO: implement 6-tap horizontal filtering in SIMD code such that m_affineLumaFilter can be used instead of m_lumaFilter4x4 const TFilterCoeff InterpolationFilter::m_lumaFilter4x4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA] = { { 0, 0, 0, 64, 0, 0, 0, 0 }, @@ -74,6 +75,31 @@ const TFilterCoeff InterpolationFilter::m_lumaFilter4x4[LUMA_INTERPOLATION_FILTE { 0, 1, -2, 4, 63, -3, 1, 0 } }; +// clang-format off +const TFilterCoeff InterpolationFilter::m_affineLumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA] = +{ + { 0, 0, 64, 0, 0, 0, 0, 0 }, + { 1, -3, 63, 4, -2, 1, 0, 0 }, + { 1, -5, 62, 8, -3, 1, 0, 0 }, + { 2, -8, 60, 13, -4, 1, 0, 0 }, + + { 3, -10, 58, 17, -5, 1, 0, 0 }, + { 3, -11, 52, 26, -8, 2, 0, 0 }, + { 2, -9, 47, 31, -10, 3, 0, 0 }, + { 3, -11, 45, 34, -10, 3, 0, 0 }, + + { 3, -11, 40, 40, -11, 3, 0, 0 }, + { 3, -10, 34, 45, -11, 3, 0, 0 }, + { 3, -10, 31, 47, -9, 2, 0, 0 }, + { 2, -8, 26, 52, -11, 3, 0, 0 }, + + { 1, -5, 17, 58, -10, 3, 0, 0 }, + { 1, -4, 13, 60, -8, 2, 0, 0 }, + { 1, -3, 8, 62, -5, 1, 0, 0 }, + { 1, -2, 4, 63, -3, 1, 0, 0 } +}; +// clang-format on + const TFilterCoeff InterpolationFilter::m_lumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA] = { { 0, 0, 0, 64, 0, 0, 0, 0 }, @@ -344,6 +370,11 @@ InterpolationFilter::InterpolationFilter() m_filterVer[2][1][0] = filter<2, true, true, false>; m_filterVer[2][1][1] = filter<2, true, true, true>; + m_filterVer[3][0][0] = filter<6, true, false, false>; + m_filterVer[3][0][1] = filter<6, true, false, true>; + m_filterVer[3][1][0] = filter<6, true, true, false>; + m_filterVer[3][1][1] = filter<6, true, true, true>; + m_filterCopy[0][0] = filterCopy<false, false>; m_filterCopy[0][1] = filterCopy<false, true>; m_filterCopy[1][0] = filterCopy<true, false>; @@ -625,128 +656,65 @@ void InterpolationFilter::filter(const ClpRng& clpRng, Pel const *src, int srcSt } } -/** - * \brief Filter a block of samples (horizontal) - * - * \tparam N Number of taps - * \param bitDepth Bit depth of samples - * \param src Pointer to source samples - * \param srcStride Stride of source samples - * \param dst Pointer to destination samples - * \param dstStride Stride of destination samples - * \param width Width of block - * \param height Height of block - * \param isLast Flag indicating whether it is the last filtering operation - * \param coeff Pointer to filter taps - */ +static constexpr int tapToIdx(const int N) +{ + return N == 8 ? 0 : (N == 4 ? 1 : (N == 2 ? 2 : (N == 6 ? 3 : 4))); +} + template<int N> void InterpolationFilter::filterHor(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isLast, TFilterCoeff const *coeff, bool biMCForDMVR) { -//#if ENABLE_SIMD_OPT_MCIF - if( N == 8 ) - { - m_filterHor[0][1][isLast](clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR); - } - else if( N == 4 ) - { - m_filterHor[1][1][isLast](clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR); - } - else if( N == 2 ) - { - m_filterHor[2][1][isLast](clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR); - } - else - { - THROW( "Invalid tap number" ); - } + constexpr int IDX = tapToIdx(N); + static_assert(IDX < 3, "Unsupported tap count"); + m_filterHor[IDX][1][isLast](clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR); } -/** - * \brief Filter a block of samples (vertical) - * - * \tparam N Number of taps - * \param bitDepth Bit depth - * \param src Pointer to source samples - * \param srcStride Stride of source samples - * \param dst Pointer to destination samples - * \param dstStride Stride of destination samples - * \param width Width of block - * \param height Height of block - * \param isFirst Flag indicating whether it is the first filtering operation - * \param isLast Flag indicating whether it is the last filtering operation - * \param coeff Pointer to filter taps - */ template<int N> void InterpolationFilter::filterVer(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isFirst, bool isLast, TFilterCoeff const *coeff, bool biMCForDMVR) { -//#if ENABLE_SIMD_OPT_MCIF - if( N == 8 ) - { - m_filterVer[0][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR); - } - else if( N == 4 ) - { - m_filterVer[1][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR); - } - else if( N == 2 ) - { - m_filterVer[2][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR); - } - else{ - THROW( "Invalid tap number" ); - } + constexpr int IDX = tapToIdx(N); + static_assert(IDX < 4, "Unsupported tap count"); + m_filterVer[IDX][isFirst][isLast](clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMCForDMVR); } // ==================================================================================================================== // Public member functions // ==================================================================================================================== -/** - * \brief Filter a block of Luma/Chroma samples (horizontal) - * - * \param compID Chroma component ID - * \param src Pointer to source samples - * \param srcStride Stride of source samples - * \param dst Pointer to destination samples - * \param dstStride Stride of destination samples - * \param width Width of block - * \param height Height of block - * \param frac Fractional sample offset - * \param isLast Flag indicating whether it is the last filtering operation - * \param bitDepth Bit depth - */ void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ClpRng &clpRng, int nFilterIdx, - bool biMCForDMVR, bool useAltHpelIf) + bool useAltHpelIf) { - if( frac == 0 && nFilterIdx < 2 ) + const bool biMCForDMVR = nFilterIdx == FILTER_DMVR; + + if (frac == 0 && nFilterIdx <= FILTER_AFFINE) { m_filterCopy[true][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR ); } else if( isLuma( compID ) ) { CHECK( frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" ); - if( nFilterIdx == 1 ) + if (nFilterIdx == FILTER_DMVR) { filterHor<NTAPS_BILINEAR>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR ); } - else if( nFilterIdx == 2 ) + else if (nFilterIdx == FILTER_AFFINE) { filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter4x4[frac], biMCForDMVR ); } - else if( nFilterIdx == 3 ) + else if (nFilterIdx == FILTER_RPR1) { filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilterRPR1[frac], biMCForDMVR ); } - else if( nFilterIdx == 4 ) + else if (nFilterIdx == FILTER_RPR2) { filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilterRPR2[frac], biMCForDMVR ); } - else if (nFilterIdx == 5) + else if (nFilterIdx == FILTER_AFFINE_RPR1) { filterHor<NTAPS_LUMA>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_affineLumaFilterRPR1[frac], biMCForDMVR); } - else if (nFilterIdx == 6) + else if (nFilterIdx == FILTER_AFFINE_RPR2) { filterHor<NTAPS_LUMA>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_affineLumaFilterRPR2[frac], biMCForDMVR); } @@ -754,10 +722,6 @@ void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, in { filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaAltHpelIFilter, biMCForDMVR ); } - else if( ( width == 4 && height == 4 ) || ( width == 4 && height == ( 4 + NTAPS_LUMA - 1 ) ) ) - { - filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter4x4[frac], biMCForDMVR ); - } else { filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter[frac], biMCForDMVR ); @@ -766,12 +730,12 @@ void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, in else { CHECK(frac < 0 || frac >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction"); - if( nFilterIdx == 3 ) + if (nFilterIdx == FILTER_RPR1) { filterHor<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilterRPR1[frac], biMCForDMVR); } - else if( nFilterIdx == 4 ) + else if (nFilterIdx == FILTER_RPR2) { filterHor<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilterRPR2[frac], biMCForDMVR); @@ -784,54 +748,41 @@ void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, in } } - -/** - * \brief Filter a block of Luma/Chroma samples (vertical) - * - * \param compID Colour component ID - * \param src Pointer to source samples - * \param srcStride Stride of source samples - * \param dst Pointer to destination samples - * \param dstStride Stride of destination samples - * \param width Width of block - * \param height Height of block - * \param frac Fractional sample offset - * \param isFirst Flag indicating whether it is the first filtering operation - * \param isLast Flag indicating whether it is the last filtering operation - * \param bitDepth Bit depth - */ void InterpolationFilter::filterVer(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ClpRng &clpRng, - int nFilterIdx, bool biMCForDMVR, bool useAltHpelIf) + int nFilterIdx, bool useAltHpelIf) { - if( frac == 0 && nFilterIdx < 2 ) + const bool biMCForDMVR = nFilterIdx == FILTER_DMVR; + + if (frac == 0 && nFilterIdx <= FILTER_AFFINE) { m_filterCopy[isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR ); } else if( isLuma( compID ) ) { CHECK( frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" ); - if( nFilterIdx == 1 ) + if (nFilterIdx == FILTER_DMVR) { filterVer<NTAPS_BILINEAR>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR ); } - else if( nFilterIdx == 2 ) + else if (nFilterIdx == FILTER_AFFINE) { - filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter4x4[frac], biMCForDMVR ); + filterVer<NTAPS_LUMA_AFFINE>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, + m_affineLumaFilter[frac], biMCForDMVR); } - else if( nFilterIdx == 3 ) + else if (nFilterIdx == FILTER_RPR1) { filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilterRPR1[frac], biMCForDMVR ); } - else if( nFilterIdx == 4 ) + else if (nFilterIdx == FILTER_RPR2) { filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilterRPR2[frac], biMCForDMVR ); } - else if (nFilterIdx == 5) + else if (nFilterIdx == FILTER_AFFINE_RPR1) { filterVer<NTAPS_LUMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_affineLumaFilterRPR1[frac], biMCForDMVR); } - else if (nFilterIdx == 6) + else if (nFilterIdx == FILTER_AFFINE_RPR2) { filterVer<NTAPS_LUMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_affineLumaFilterRPR2[frac], biMCForDMVR); } @@ -839,10 +790,6 @@ void InterpolationFilter::filterVer(const ComponentID compID, Pel const *src, in { filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaAltHpelIFilter, biMCForDMVR ); } - else if( width == 4 && height == 4 ) - { - filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter4x4[frac], biMCForDMVR ); - } else { filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter[frac], biMCForDMVR ); @@ -851,12 +798,12 @@ void InterpolationFilter::filterVer(const ComponentID compID, Pel const *src, in else { CHECK(frac < 0 || frac >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction"); - if( nFilterIdx == 3 ) + if (nFilterIdx == FILTER_RPR1) { filterVer<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilterRPR1[frac], biMCForDMVR); } - else if( nFilterIdx == 4 ) + else if (nFilterIdx == FILTER_RPR2) { filterVer<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilterRPR2[frac], biMCForDMVR); diff --git a/source/Lib/CommonLib/InterpolationFilter.h b/source/Lib/CommonLib/InterpolationFilter.h index 2a99646f22..abdbc51ab7 100644 --- a/source/Lib/CommonLib/InterpolationFilter.h +++ b/source/Lib/CommonLib/InterpolationFilter.h @@ -57,6 +57,8 @@ static inline int IF_INTERNAL_FRAC_BITS(const int bd) { return std::max(2, IF_IN class InterpolationFilter { static const TFilterCoeff m_lumaFilter4x4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; + static const TFilterCoeff m_affineLumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; + public: static const TFilterCoeff m_lumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps static const TFilterCoeff m_chromaFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA]; ///< Chroma filter taps @@ -88,10 +90,22 @@ protected: static CacheModel* m_cacheModel; #endif public: + enum + { + FILTER_DEFAULT = 0, + FILTER_DMVR, + FILTER_AFFINE, + FILTER_RPR1, + FILTER_RPR2, + FILTER_AFFINE_RPR1, + FILTER_AFFINE_RPR2, + }; + InterpolationFilter(); ~InterpolationFilter() {} void( *m_filterHor[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMCForDMVR); - void( *m_filterVer[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMCForDMVR); + void (*m_filterVer[4][2][2])(const ClpRng &clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, + int height, TFilterCoeff const *coeff, bool biMCForDMVR); void( *m_filterCopy[2][2] ) ( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool biMCForDMVR); void( *m_weightedGeoBlk )(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const uint8_t splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1); @@ -102,11 +116,11 @@ public: void _initInterpolationFilterX86(); #endif void filterHor(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, - int height, int frac, bool isLast, const ClpRng &clpRng, int nFilterIdx = 0, bool biMCForDMVR = false, + int height, int frac, bool isLast, const ClpRng &clpRng, int nFilterIdx = FILTER_DEFAULT, bool useAltHpelIf = false); void filterVer(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, - int height, int frac, bool isFirst, bool isLast, const ClpRng &clpRng, int nFilterIdx = 0, - bool biMCForDMVR = false, bool useAltHpelIf = false); + int height, int frac, bool isFirst, bool isLast, const ClpRng &clpRng, int nFilterIdx = FILTER_DEFAULT, + bool useAltHpelIf = false); #if JVET_J0090_MEMORY_BANDWITH_MEASURE void cacheAssign( CacheModel *cache ) { m_cacheModel = cache; } #endif diff --git a/source/Lib/CommonLib/x86/InterpolationFilterX86.h b/source/Lib/CommonLib/x86/InterpolationFilterX86.h index d61d1415cb..f721ed9064 100644 --- a/source/Lib/CommonLib/x86/InterpolationFilterX86.h +++ b/source/Lib/CommonLib/x86/InterpolationFilterX86.h @@ -2131,27 +2131,31 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel } } { - if( N == 8 && !( width & 0x07 ) ) + if ((N == 8 || N == 6) && !(width & 0x07)) { if( !isVertical ) { #if RExt__HIGH_BIT_DEPTH_SUPPORT if (vext >= AVX2) { - simdInterpolateHorM8_HBD_AVX2<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c); + simdInterpolateHorM8_HBD_AVX2<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, + clpRng, c); } else { - simdInterpolateHorM8_HBD<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c); + simdInterpolateHorM8_HBD<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, + clpRng, c); } #else if( vext>= AVX2 ) { - simdInterpolateHorM8_AVX2<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); + simdInterpolateHorM8_AVX2<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, + clpRng, c); } else { - simdInterpolateHorM8<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); + simdInterpolateHorM8<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, + c); } #endif } @@ -2160,41 +2164,47 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel #if RExt__HIGH_BIT_DEPTH_SUPPORT if (vext >= AVX2) { - simdInterpolateVerM8_HBD_AVX2<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c); + simdInterpolateVerM8_HBD_AVX2<vext, N, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, + clpRng, c); } else { - simdInterpolateVerM8_HBD<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c); + simdInterpolateVerM8_HBD<vext, N, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, + clpRng, c); } #else if( vext>= AVX2 ) { - simdInterpolateVerM8_AVX2<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); + simdInterpolateVerM8_AVX2<vext, N, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, + clpRng, c); } else { - simdInterpolateVerM8<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); + simdInterpolateVerM8<vext, N, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, + c); } #endif } return; } - else if( N == 8 && !( width & 0x03 ) ) + else if ((N == 8 || N == 6) && !(width & 0x03)) { if( !isVertical ) { #if RExt__HIGH_BIT_DEPTH_SUPPORT - simdInterpolateHorM4_HBD<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c); + simdInterpolateHorM4_HBD<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, + c); #else - simdInterpolateHorM4<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); + simdInterpolateHorM4<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c); #endif } else { #if RExt__HIGH_BIT_DEPTH_SUPPORT - simdInterpolateVerM4_HBD<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c); + simdInterpolateVerM4_HBD<vext, N, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, + c); #else - simdInterpolateVerM4<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); + simdInterpolateVerM4<vext, N, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c); #endif } return; @@ -2585,6 +2595,11 @@ void InterpolationFilter::_initInterpolationFilterX86() m_filterVer[2][1][0] = simdFilter<vext, 2, true, true, false>; m_filterVer[2][1][1] = simdFilter<vext, 2, true, true, true>; + m_filterVer[3][0][0] = simdFilter<vext, 6, true, false, false>; + m_filterVer[3][0][1] = simdFilter<vext, 6, true, false, true>; + m_filterVer[3][1][0] = simdFilter<vext, 6, true, true, false>; + m_filterVer[3][1][1] = simdFilter<vext, 6, true, true, true>; + m_filterCopy[0][0] = simdFilterCopy_HBD<vext, false, false>; m_filterCopy[0][1] = simdFilterCopy_HBD<vext, false, true>; m_filterCopy[1][0] = simdFilterCopy_HBD<vext, true, false>; @@ -2623,6 +2638,11 @@ void InterpolationFilter::_initInterpolationFilterX86() m_filterVer[2][1][0] = simdFilter<vext, 2, true, true, false>; m_filterVer[2][1][1] = simdFilter<vext, 2, true, true, true>; + m_filterVer[3][0][0] = simdFilter<vext, 6, true, false, false>; + m_filterVer[3][0][1] = simdFilter<vext, 6, true, false, true>; + m_filterVer[3][1][0] = simdFilter<vext, 6, true, true, false>; + m_filterVer[3][1][1] = simdFilter<vext, 6, true, true, true>; + m_filterCopy[0][0] = simdFilterCopy<vext, false, false>; m_filterCopy[0][1] = simdFilterCopy<vext, false, true>; m_filterCopy[1][0] = simdFilterCopy<vext, true, false>; diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp index 258d33a7b3..e21c55060b 100644 --- a/source/Lib/EncoderLib/InterSearch.cpp +++ b/source/Lib/EncoderLib/InterSearch.cpp @@ -9030,17 +9030,17 @@ void InterSearch::xExtDIFUpSamplingH(CPelBuf* pattern, bool useAltHpelIf) const Pel *srcPtr = pattern->buf - halfFilterSize*srcStride - 1; m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[0][0], intStride, width + 1, height + filterSize, - 0 << MV_FRACTIONAL_BITS_DIFF, false, clpRng, 0, false, useAltHpelIf); + 0 << MV_FRACTIONAL_BITS_DIFF, false, clpRng, InterpolationFilter::FILTER_DEFAULT, useAltHpelIf); if (!m_skipFracME) { m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[2][0], intStride, width + 1, height + filterSize, - 2 << MV_FRACTIONAL_BITS_DIFF, false, clpRng, 0, false, useAltHpelIf); + 2 << MV_FRACTIONAL_BITS_DIFF, false, clpRng, InterpolationFilter::FILTER_DEFAULT, useAltHpelIf); } intPtr = m_filteredBlockTmp[0][0] + halfFilterSize * intStride + 1; dstPtr = m_filteredBlock[0][0][0]; m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, - false, true, clpRng, 0, false, useAltHpelIf); + false, true, clpRng, InterpolationFilter::FILTER_DEFAULT, useAltHpelIf); if (m_skipFracME) { return; @@ -9049,17 +9049,17 @@ void InterSearch::xExtDIFUpSamplingH(CPelBuf* pattern, bool useAltHpelIf) intPtr = m_filteredBlockTmp[0][0] + (halfFilterSize - 1) * intStride + 1; dstPtr = m_filteredBlock[2][0][0]; m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, - false, true, clpRng, 0, false, useAltHpelIf); + false, true, clpRng, InterpolationFilter::FILTER_DEFAULT, useAltHpelIf); intPtr = m_filteredBlockTmp[2][0] + halfFilterSize * intStride; dstPtr = m_filteredBlock[0][2][0]; m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, - false, true, clpRng, 0, false, useAltHpelIf); + false, true, clpRng, InterpolationFilter::FILTER_DEFAULT, useAltHpelIf); intPtr = m_filteredBlockTmp[2][0] + (halfFilterSize - 1) * intStride; dstPtr = m_filteredBlock[2][2][0]; m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, - false, true, clpRng, 0, false, useAltHpelIf); + false, true, clpRng, InterpolationFilter::FILTER_DEFAULT, useAltHpelIf); } -- GitLab