|
- /**************************************************************************************
- * Copyright (c) 2018-2020 ["Peking University Shenzhen Graduate School",
- * "Peng Cheng Laboratory", and "Guangdong Bohua UHD Innovation Corporation"]
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes the software uAVS3d developed by
- * Peking University Shenzhen Graduate School, Peng Cheng Laboratory
- * and Guangdong Bohua UHD Innovation Corporation.
- * 4. Neither the name of the organizations (Peking University Shenzhen Graduate School,
- * Peng Cheng Laboratory and Guangdong Bohua UHD Innovation Corporation) nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * For more information, contact us at rgwang@pkusz.edu.cn.
- **************************************************************************************/
-
- #include "com_modules.h"
- #include <assert.h>
-
- void com_mc_blk_luma(com_pic_t *pic, pel *dst, int dst_stride, int x_pos, int y_pos, int width, int height, int widx, int max_posx, int max_posy, int max_val, int hp_flag)
- {
- const s8(*coeff)[8];
- int dx, dy;
- int i_src = pic->stride_luma;
-
- if (hp_flag) {
- dx = x_pos & 15;
- dy = y_pos & 15;
- x_pos >>= 4;
- y_pos >>= 4;
- coeff = com_tbl_mc_l_coeff_hp;
- } else {
- dx = x_pos & 3;
- dy = y_pos & 3;
- x_pos >>= 2;
- y_pos >>= 2;
- coeff = com_tbl_mc_l_coeff;
- }
-
- x_pos = COM_CLIP3(-MAX_CU_SIZE - 4, max_posx, x_pos);
- y_pos = COM_CLIP3(-MAX_CU_SIZE - 4, max_posy, y_pos);
-
- if (hp_flag) {
- if ((dx == 0 || dx == 8) && (dy == 0 && dy == 8)) {
- dx = dx >> 2;
- dy = dy >> 2;
- pel *src = (pel*)pic->subpel->imgs[dy][dx]->planes[0] + y_pos * i_src + x_pos;
- uavs3e_funs_handle.ipcpy[widx](src, i_src, dst, dst_stride, width, height);
- } else {
- pel *src = pic->y + y_pos * i_src + x_pos;
-
- if (dx == 0 && dy == 0) {
- uavs3e_funs_handle.ipcpy[widx](src, i_src, dst, dst_stride, width, height);
- } else if (dy == 0) {
- uavs3e_funs_handle.ipflt[IPFILTER_H_8][widx](src, i_src, dst, dst_stride, width, height, coeff[dx], max_val);
- } else if (dx == 0) {
- uavs3e_funs_handle.ipflt[IPFILTER_V_8][widx](src, i_src, dst, dst_stride, width, height, coeff[dy], max_val);
- } else {
- uavs3e_funs_handle.ipflt_ext[IPFILTER_EXT_8][widx](src, i_src, dst, dst_stride, width, height, coeff[dx], coeff[dy], max_val);
- }
- }
- } else {
- pel *src = (pel*)pic->subpel->imgs[dy][dx]->planes[0] + y_pos * i_src + x_pos;
- uavs3e_funs_handle.ipcpy[widx](src, i_src, dst, dst_stride, width, height);
- }
- }
-
- pel* com_mc_blk_luma_pointer(com_pic_t *pic, int x_pos, int y_pos, int max_posx, int max_posy)
- {
- int dx = x_pos & 3;
- int dy = y_pos & 3;
-
- x_pos >>= 2;
- y_pos >>= 2;
-
- x_pos = COM_CLIP3(-MAX_CU_SIZE - 4, max_posx, x_pos);
- y_pos = COM_CLIP3(-MAX_CU_SIZE - 4, max_posy, y_pos);
-
- return (pel*)pic->subpel->imgs[dy][dx]->planes[0] + y_pos * pic->stride_luma + x_pos;
- }
-
- static void com_mc_blk_chroma(com_pic_t *pic, int uv_flag, pel *dst, int dst_stride, int x_pos, int y_pos, int width, int height, int widx, int max_posx, int max_posy, int max_val, int hp_flag)
- {
- int dx, dy;
- const s8(*coeff)[4];
- pel *src = uv_flag ? pic->v : pic->u;
- int i_src = pic->stride_chroma;
-
- if (hp_flag) {
- dx = x_pos & 31;
- dy = y_pos & 31;
- x_pos >>= 5;
- y_pos >>= 5;
- coeff = com_tbl_mc_c_coeff_hp;
- } else {
- dx = x_pos & 7;
- dy = y_pos & 7;
- x_pos >>= 3;
- y_pos >>= 3;
- coeff = com_tbl_mc_c_coeff;
- }
-
- x_pos = COM_CLIP3(-(MAX_CU_SIZE >> 1) - 2, max_posx, x_pos);
- y_pos = COM_CLIP3(-(MAX_CU_SIZE >> 1) - 2, max_posy, y_pos);
-
- src += y_pos * i_src + x_pos;
-
- if (dx == 0 && dy == 0) {
- uavs3e_funs_handle.ipcpy[widx](src, i_src, dst, dst_stride, width, height);
- } else if (dy == 0) {
- uavs3e_funs_handle.ipflt[IPFILTER_H_4][widx](src, i_src, dst, dst_stride, width, height, coeff[dx], max_val);
- } else if (dx == 0) {
- uavs3e_funs_handle.ipflt[IPFILTER_V_4][widx](src, i_src, dst, dst_stride, width, height, coeff[dy], max_val);
- } else {
- uavs3e_funs_handle.ipflt_ext[IPFILTER_EXT_4][widx](src, i_src, dst, dst_stride, width, height, coeff[dx], coeff[dy], max_val);
- }
- }
-
- void com_mc_cu(int x, int y, int pic_w, int pic_h, int w, int h, s8 refi[REFP_NUM], s16 mv[REFP_NUM][MV_D], com_ref_pic_t(*refp)[REFP_NUM], pel pred_buf[N_C][MAX_CU_DIM], int pred_stride, channel_type_t channel, int bit_depth)
- {
- ALIGNED_32(pel pred_snd[N_C][MAX_CU_DIM]);
- pel(*pred)[MAX_CU_DIM] = pred_buf;
- int max_posx = pic_w + 4;
- int max_posy = pic_h + 4;
- int widx = CONV_LOG2(w) - MIN_CU_LOG2;
- int bidx = 0;
-
- if (REFI_IS_VALID(refi[REFP_0])) {
- /* forward */
- com_pic_t *ref_pic = refp[refi[REFP_0]][REFP_0].pic;
- int qpel_gmv_x = (x << 2) + mv[REFP_0][MV_X];
- int qpel_gmv_y = (y << 2) + mv[REFP_0][MV_Y];
-
- if (channel != CHANNEL_C) {
- com_mc_blk_luma(ref_pic, pred[Y_C], pred_stride, qpel_gmv_x, qpel_gmv_y, w, h, widx, max_posx, max_posy, (1 << bit_depth) - 1, 0);
- }
- if (channel != CHANNEL_L) {
- com_mc_blk_chroma(ref_pic, 0, pred[U_C], pred_stride >> 1, qpel_gmv_x, qpel_gmv_y, w >> 1, h >> 1, widx - 1, max_posx >> 1, max_posy >> 1, (1 << bit_depth) - 1, 0);
- com_mc_blk_chroma(ref_pic, 1, pred[V_C], pred_stride >> 1, qpel_gmv_x, qpel_gmv_y, w >> 1, h >> 1, widx - 1, max_posx >> 1, max_posy >> 1, (1 << bit_depth) - 1, 0);
- }
- bidx++;
- }
- /* check identical motion */
- if (REFI_IS_VALID(refi[REFP_0]) && REFI_IS_VALID(refi[REFP_1])) {
- if (refp[refi[REFP_0]][REFP_0].pic->ptr == refp[refi[REFP_1]][REFP_1].pic->ptr && mv[REFP_0][MV_X] == mv[REFP_1][MV_X] && mv[REFP_0][MV_Y] == mv[REFP_1][MV_Y]) {
- return;
- }
- }
- if (REFI_IS_VALID(refi[REFP_1])) {
- /* backward */
- if (bidx) {
- pred = pred_snd;
- }
- com_pic_t *ref_pic = refp[refi[REFP_1]][REFP_1].pic;
- int qpel_gmv_x = (x << 2) + mv[REFP_1][MV_X];
- int qpel_gmv_y = (y << 2) + mv[REFP_1][MV_Y];
-
- if (channel != CHANNEL_C) {
- com_mc_blk_luma(ref_pic, pred[Y_C], pred_stride, qpel_gmv_x, qpel_gmv_y, w, h, widx, max_posx, max_posy, (1 << bit_depth) - 1, 0);
- }
- if (channel != CHANNEL_L) {
- com_mc_blk_chroma(ref_pic, 0, pred[U_C], pred_stride >> 1, qpel_gmv_x, qpel_gmv_y, w >> 1, h >> 1, widx - 1, max_posx >> 1, max_posy >> 1, (1 << bit_depth) - 1, 0);
- com_mc_blk_chroma(ref_pic, 1, pred[V_C], pred_stride >> 1, qpel_gmv_x, qpel_gmv_y, w >> 1, h >> 1, widx - 1, max_posx >> 1, max_posy >> 1, (1 << bit_depth) - 1, 0);
- }
- bidx++;
- }
- if (bidx == 2) {
- if (channel != CHANNEL_C) {
- uavs3e_funs_handle.pel_avrg[widx](pred_buf[Y_C], pred_stride, pred_buf[Y_C], pred_snd[Y_C], h);
- }
- if (channel != CHANNEL_L) {
- w >>= 1;
- h >>= 1;
- pred_stride >>= 1;
- uavs3e_funs_handle.pel_avrg[widx - 1](pred_buf[U_C], pred_stride, pred_buf[U_C], pred_snd[U_C], h);
- uavs3e_funs_handle.pel_avrg[widx - 1](pred_buf[V_C], pred_stride, pred_buf[V_C], pred_snd[V_C], h);
- }
- }
- }
-
- void com_mc_blk_affine_luma(int x, int y, int pic_w, int pic_h, int cu_width, int cu_height, CPMV ac_mv[VER_NUM][MV_D], com_pic_t *ref_pic, pel pred[MAX_CU_DIM], int sub_w, int sub_h, int bit_depth)
- {
- assert(com_tbl_log2[cu_width ] >= 4);
- assert(com_tbl_log2[cu_height] >= 4);
-
- int half_w = sub_w >> 1;
- int half_h = sub_h >> 1;
- s32 mv_scale_hor = (s32)ac_mv[0][MV_X] << 7;
- s32 mv_scale_ver = (s32)ac_mv[0][MV_Y] << 7;
- s32 mv_scale_tmp_hor, mv_scale_tmp_ver;
-
- // convert to 2^(storeBit + bit) precision
- s32 dmv_hor_x = (((s32)ac_mv[1][MV_X] - (s32)ac_mv[0][MV_X]) << 7) >> com_tbl_log2[cu_width]; // deltaMvHor
- s32 dmv_hor_y = (((s32)ac_mv[1][MV_Y] - (s32)ac_mv[0][MV_Y]) << 7) >> com_tbl_log2[cu_width];
- s32 dmv_ver_x = -dmv_hor_y; // deltaMvVer
- s32 dmv_ver_y = dmv_hor_x;
-
- int widx = CONV_LOG2(sub_w) - MIN_CU_LOG2;
- int max_posx = pic_w + 4;
- int max_posy = pic_h + 4;
-
- // get prediction block by block
- for (int h = 0; h < cu_height; h += sub_h) {
- for (int w = 0; w < cu_width; w += sub_w) {
- int pos_x = w + half_w;
- int pos_y = h + half_h;
-
- if (w == 0 && h == 0) {
- pos_x = 0;
- pos_y = 0;
- } else if (w + sub_w == cu_width && h == 0) {
- pos_x = cu_width;
- pos_y = 0;
- }
-
- mv_scale_tmp_hor = mv_scale_hor + dmv_hor_x * pos_x + dmv_ver_x * pos_y;
- mv_scale_tmp_ver = mv_scale_ver + dmv_hor_y * pos_x + dmv_ver_y * pos_y;
-
- // 1/16 precision, 18 bits for mc
- com_mv_rounding_s32(mv_scale_tmp_hor, mv_scale_tmp_ver, &mv_scale_tmp_hor, &mv_scale_tmp_ver, 7, 0);
- mv_scale_tmp_hor = COM_CLIP3(COM_INT18_MIN, COM_INT18_MAX, mv_scale_tmp_hor);
- mv_scale_tmp_ver = COM_CLIP3(COM_INT18_MIN, COM_INT18_MAX, mv_scale_tmp_ver);
-
- int qpel_gmv_x = ((x + w) << 4) + mv_scale_tmp_hor;
- int qpel_gmv_y = ((y + h) << 4) + mv_scale_tmp_ver;
-
- com_mc_blk_luma(ref_pic, pred + w, cu_width, qpel_gmv_x, qpel_gmv_y, sub_w, sub_h, widx, max_posx, max_posy, (1 << bit_depth) - 1, 1);
- }
- pred += (cu_width * sub_h);
- }
- }
-
- static void com_mc_blk_affine(int x, int y, int pic_w, int pic_h, int cu_width, int cu_height, CPMV ac_mv[VER_NUM][MV_D], com_pic_t *ref_pic, pel pred[N_C][MAX_CU_DIM], int cp_num, int sub_w, int sub_h, int lidx, int bit_depth)
- {
- assert(com_tbl_log2[cu_width] >= 4);
- assert(com_tbl_log2[cu_height] >= 4);
-
- int qpel_gmv_x, qpel_gmv_y;
- pel *pred_y = pred[Y_C], *pred_u = pred[U_C], *pred_v = pred[V_C];
- int w, h;
- int half_w, half_h;
-
- int dmv_hor_x, dmv_ver_x, dmv_hor_y, dmv_ver_y;
- s32 mv_scale_hor = (s32)ac_mv[0][MV_X] << 7;
- s32 mv_scale_ver = (s32)ac_mv[0][MV_Y] << 7;
- s32 mv_scale_tmp_hor, mv_scale_tmp_ver;
- s32 mv_save[MAX_CU_SIZE >> MIN_CU_LOG2][MAX_CU_SIZE >> MIN_CU_LOG2][MV_D];
-
- half_w = sub_w >> 1;
- half_h = sub_h >> 1;
-
- dmv_hor_x = (((s32)ac_mv[1][MV_X] - (s32)ac_mv[0][MV_X]) << 7) >> com_tbl_log2[cu_width]; // deltaMvHor
- dmv_hor_y = (((s32)ac_mv[1][MV_Y] - (s32)ac_mv[0][MV_Y]) << 7) >> com_tbl_log2[cu_width];
-
- if (cp_num == 3) {
- dmv_ver_x = (((s32)ac_mv[2][MV_X] - (s32)ac_mv[0][MV_X]) << 7) >> com_tbl_log2[cu_height]; // deltaMvVer
- dmv_ver_y = (((s32)ac_mv[2][MV_Y] - (s32)ac_mv[0][MV_Y]) << 7) >> com_tbl_log2[cu_height];
- } else {
- dmv_ver_x = -dmv_hor_y; // deltaMvVer
- dmv_ver_y = dmv_hor_x;
- }
-
- int widx = CONV_LOG2(sub_w) - MIN_CU_LOG2;
- int max_posx = pic_w + 4;
- int max_posy = pic_h + 4;
-
- for (h = 0; h < cu_height; h += sub_h) {
- for (w = 0; w < cu_width; w += sub_w) {
- int pos_x = w + half_w;
- int pos_y = h + half_h;
-
- if (w == 0 && h == 0) {
- pos_x = 0;
- pos_y = 0;
- } else if (w + sub_w == cu_width && h == 0) {
- pos_x = cu_width;
- pos_y = 0;
- } else if (w == 0 && h + sub_h == cu_height && cp_num == 3) {
- pos_x = 0;
- pos_y = cu_height;
- }
-
- mv_scale_tmp_hor = mv_scale_hor + dmv_hor_x * pos_x + dmv_ver_x * pos_y;
- mv_scale_tmp_ver = mv_scale_ver + dmv_hor_y * pos_x + dmv_ver_y * pos_y;
-
- // 1/16 precision, 18 bits for mc
- com_mv_rounding_s32(mv_scale_tmp_hor, mv_scale_tmp_ver, &mv_scale_tmp_hor, &mv_scale_tmp_ver, 7, 0);
- mv_scale_tmp_hor = COM_CLIP3(COM_INT18_MIN, COM_INT18_MAX, mv_scale_tmp_hor);
- mv_scale_tmp_ver = COM_CLIP3(COM_INT18_MIN, COM_INT18_MAX, mv_scale_tmp_ver);
-
- // save MVF for chroma interpolation
- int w_scu = PEL2SCU(w);
- int h_scu = PEL2SCU(h);
- mv_save[w_scu][h_scu][MV_X] = mv_scale_tmp_hor;
- mv_save[w_scu][h_scu][MV_Y] = mv_scale_tmp_ver;
-
- if (sub_w == 8 && sub_h == 8) {
- mv_save[w_scu + 1][h_scu][MV_X] = mv_scale_tmp_hor;
- mv_save[w_scu + 1][h_scu][MV_Y] = mv_scale_tmp_ver;
- mv_save[w_scu][h_scu + 1][MV_X] = mv_scale_tmp_hor;
- mv_save[w_scu][h_scu + 1][MV_Y] = mv_scale_tmp_ver;
- mv_save[w_scu + 1][h_scu + 1][MV_X] = mv_scale_tmp_hor;
- mv_save[w_scu + 1][h_scu + 1][MV_Y] = mv_scale_tmp_ver;
- }
- qpel_gmv_x = ((x + w) << 4) + mv_scale_tmp_hor;
- qpel_gmv_y = ((y + h) << 4) + mv_scale_tmp_ver;
- com_mc_blk_luma(ref_pic, pred_y + w, cu_width, qpel_gmv_x, qpel_gmv_y, sub_w, sub_h, widx, max_posx, max_posy, (1 << bit_depth) - 1, 1);
- }
- pred_y += (cu_width * sub_h);
- }
-
- // get prediction block by block (chroma)
- sub_w = 8;
- sub_h = 8;
- widx = CONV_LOG2(sub_w) - MIN_CU_LOG2;
- max_posx >>= 1;
- max_posy >>= 1;
-
- for (h = 0; h < cu_height; h += sub_h) {
- for (w = 0; w < cu_width; w += sub_w) {
- int w_scu = PEL2SCU(w);
- int h_scu = PEL2SCU(h);
-
- mv_scale_tmp_hor = (mv_save[w_scu][h_scu][MV_X] + mv_save[w_scu + 1][h_scu][MV_X] + mv_save[w_scu][h_scu + 1][MV_X] + mv_save[w_scu + 1][h_scu + 1][MV_X] + 2) >> 2;
- mv_scale_tmp_ver = (mv_save[w_scu][h_scu][MV_Y] + mv_save[w_scu + 1][h_scu][MV_Y] + mv_save[w_scu][h_scu + 1][MV_Y] + mv_save[w_scu + 1][h_scu + 1][MV_Y] + 2) >> 2;
-
- qpel_gmv_x = ((x + w) << 4) + mv_scale_tmp_hor;
- qpel_gmv_y = ((y + h) << 4) + mv_scale_tmp_ver;
-
- com_mc_blk_chroma(ref_pic, 0, pred_u + (w >> 1), cu_width >> 1, qpel_gmv_x, qpel_gmv_y, sub_w >> 1, sub_h >> 1, widx - 1, max_posx, max_posy, (1 << bit_depth) - 1, 1);
- com_mc_blk_chroma(ref_pic, 1, pred_v + (w >> 1), cu_width >> 1, qpel_gmv_x, qpel_gmv_y, sub_w >> 1, sub_h >> 1, widx - 1, max_posx, max_posy, (1 << bit_depth) - 1, 1);
- }
- pred_u += (cu_width * sub_h) >> 2;
- pred_v += (cu_width * sub_h) >> 2;
- }
- }
-
- void com_mc_cu_affine(int x, int y, int pic_w, int pic_h, int w, int h, s8 refi[REFP_NUM], CPMV mv[REFP_NUM][VER_NUM][MV_D], com_ref_pic_t(*refp)[REFP_NUM], pel pred_buf[N_C][MAX_CU_DIM], int vertex_num, com_pic_header_t *sh, int bit_depth)
- {
- com_pic_t *ref_pic;
- ALIGNED_32(pel pred_snd[N_C][MAX_CU_DIM]);
- pel(*pred)[MAX_CU_DIM] = pred_buf;
- pel *p0, *p1, *p2, *p3;
- int i, j, bidx = 0;
-
- int sub_w = 4;
- int sub_h = 4;
- if (sh->affine_subblk_size_idx == 1) {
- sub_w = 8;
- sub_h = 8;
- }
- if (REFI_IS_VALID(refi[REFP_0]) && REFI_IS_VALID(refi[REFP_1])) {
- sub_w = 8;
- sub_h = 8;
- }
-
- if (REFI_IS_VALID(refi[REFP_0])) {
- /* forward */
- ref_pic = refp[refi[REFP_0]][REFP_0].pic;
- com_mc_blk_affine(x, y, pic_w, pic_h, w, h, mv[REFP_0], ref_pic, pred, vertex_num, sub_w, sub_h, 0, bit_depth);
- bidx++;
- }
-
- if (REFI_IS_VALID(refi[REFP_1])) {
- /* backward */
- if (bidx) {
- pred = pred_snd;
- }
- ref_pic = refp[refi[REFP_1]][REFP_1].pic;
- com_mc_blk_affine(x, y, pic_w, pic_h, w, h, mv[REFP_1], ref_pic, pred, vertex_num, sub_w, sub_h, 1, bit_depth);
- bidx++;
- }
-
- if (bidx == 2) {
- p0 = pred_buf[Y_C];
- p1 = pred_snd[Y_C];
- for (j = 0; j < h; j++) {
- for (i = 0; i < w; i++) {
- p0[i] = (p0[i] + p1[i] + 1) >> 1;
- }
- p0 += w;
- p1 += w;
- }
- p0 = pred_buf[U_C];
- p1 = pred_snd[U_C];
- p2 = pred_buf[V_C];
- p3 = pred_snd[V_C];
- w >>= 1;
- h >>= 1;
- for (j = 0; j < h; j++) {
- for (i = 0; i < w; i++) {
- p0[i] = (p0[i] + p1[i] + 1) >> 1;
- p2[i] = (p2[i] + p3[i] + 1) >> 1;
- }
- p0 += w;
- p1 += w;
- p2 += w;
- p3 += w;
- }
- }
- }
-
- #define FLT_8TAP_HOR(src, i, coef) ( \
- (src)[i-3] * (coef)[0] + \
- (src)[i-2] * (coef)[1] + \
- (src)[i-1] * (coef)[2] + \
- (src)[i ] * (coef)[3] + \
- (src)[i+1] * (coef)[4] + \
- (src)[i+2] * (coef)[5] + \
- (src)[i+3] * (coef)[6] + \
- (src)[i+4] * (coef)[7])
-
- #define FLT_8TAP_VER(src, i, i_src, coef) ( \
- (src)[i-3 * i_src] * (coef)[0] + \
- (src)[i-2 * i_src] * (coef)[1] + \
- (src)[i-1 * i_src] * (coef)[2] + \
- (src)[i ] * (coef)[3] + \
- (src)[i+1 * i_src] * (coef)[4] + \
- (src)[i+2 * i_src] * (coef)[5] + \
- (src)[i+3 * i_src] * (coef)[6] + \
- (src)[i+4 * i_src] * (coef)[7])
-
- #define FLT_4TAP_HOR(src, i, coef) ( \
- (src)[i - 1] * (coef)[0] + \
- (src)[i ] * (coef)[1] + \
- (src)[i + 1] * (coef)[2] + \
- (src)[i + 2] * (coef)[3])
-
- #define FLT_4TAP_VER(src, i, i_src, coef) ( \
- (src)[i-1 * i_src] * (coef)[0] + \
- (src)[i ] * (coef)[1] + \
- (src)[i+1 * i_src] * (coef)[2] + \
- (src)[i+2 * i_src] * (coef)[3])
-
-
- /****************************************************************************
- * interpolation for luma
- ****************************************************************************/
-
- static void mc_if_hor_luma(const pel *src, int i_src, pel *dst, int i_dst, int width, int height, s8 const *coeff, int max_val)
- {
- int row, col;
- int sum, val;
-
- for (row = 0; row < height; row++) {
- for (col = 0; col < width; col++) {
- sum = FLT_8TAP_HOR(src, col, coeff);
- val = (sum + 32) >> 6;
- dst[col] = COM_CLIP3(0, max_val, val);
- }
- src += i_src;
- dst += i_dst;
- }
- }
-
- static void mc_if_ver_luma(const pel *src, int i_src, pel *dst, int i_dst, int width, int height, s8 const *coeff, int max_val)
- {
- int row, col;
- int sum, val;
-
- for (row = 0; row < height; row++) {
- for (col = 0; col < width; col++) {
- sum = FLT_8TAP_VER(src, col, i_src, coeff);
- val = (sum + 32) >> 6;
- dst[col] = COM_CLIP3(0, max_val, val);
- }
- src += i_src;
- dst += i_dst;
- }
- }
-
- static void mc_if_hor_ver_luma(const pel *src, int i_src, pel *dst, int i_dst, int width, int height, const s8 *coeff_h, const s8 *coeff_v, int max_val)
- {
- int row, col;
- int sum, val;
- int add1, shift1;
- int add2, shift2;
-
- ALIGNED_16(s16 tmp_res[(128 + 7) * 128]);
- s16 *tmp;
-
- if (max_val == 255) { // 8 bit_depth
- shift1 = 0;
- shift2 = 12;
- } else { // 10 bit_depth
- shift1 = 2;
- shift2 = 10;
- }
-
- add1 = (1 << (shift1)) >> 1;
- add2 = 1 << (shift2 - 1);
-
- src += -3 * i_src;
- tmp = tmp_res;
-
- if (shift1) {
- for (row = -3; row < height + 4; row++) {
- for (col = 0; col < width; col++) {
- sum = FLT_8TAP_HOR(src, col, coeff_h);
- tmp[col] = (sum + add1) >> shift1;
- }
- src += i_src;
- tmp += width;
- }
- } else {
- for (row = -3; row < height + 4; row++) {
- for (col = 0; col < width; col++) {
- tmp[col] = FLT_8TAP_HOR(src, col, coeff_h);
- }
- src += i_src;
- tmp += width;
- }
- }
-
- tmp = tmp_res + 3 * width;
-
- for (row = 0;
- row < height;
- row++) {
- for (col = 0; col < width; col++) {
- sum = FLT_8TAP_VER(tmp, col, width, coeff_v);
- val = (sum + add2) >> shift2;
- dst[col] = COM_CLIP3(0, max_val, val);
- }
- dst += i_dst;
- tmp += width;
- }
- }
-
- /****************************************************************************
- * interpolation for chroma
- ****************************************************************************/
-
- static void mc_if_hor_chroma(const pel *src, int i_src, pel *dst, int i_dst, int width, int height, s8 const *coeff, int max_val)
- {
- int row, col;
- int sumu, valu;
-
- for (row = 0; row < height; row++) {
- for (col = 0; col < width; col++) {
- sumu = FLT_4TAP_HOR(src, col, coeff);
- valu = (sumu + 32) >> 6;
- dst[col] = COM_CLIP3(0, max_val, valu);
- }
- src += i_src;
- dst += i_dst;
- }
- }
-
- static void mc_if_ver_chroma(const pel *src, int i_src, pel *dst, int i_dst, int width, int height, s8 const *coeff, int max_val)
- {
- int row, col;
- int sumu, valu;
-
- for (row = 0; row < height; row++) {
- for (col = 0; col < width; col++) {
- sumu = FLT_4TAP_VER(src, col, i_src, coeff);
- valu = (sumu + 32) >> 6;
- dst[col] = COM_CLIP3(0, max_val, valu);
- }
- src += i_src;
- dst += i_dst;
- }
- }
-
- static void mc_if_hor_ver_chroma(const pel *src, int i_src, pel *dst, int i_dst, int width, int height, const s8 *coeff_h, const s8 *coeff_v, int max_val)
- {
- int row, col;
- int sumu, valu;
- int add1, shift1;
- int add2, shift2;
-
- ALIGNED_16(s16 tmp_res[(64 + 3) * 64]); // UV interlaced
- s16 *tmp;
-
- if (max_val == 255) { // 8 bit_depth
- shift1 = 0;
- shift2 = 12;
- } else { // 10 bit_depth
- shift1 = 2;
- shift2 = 10;
- }
-
- add1 = (1 << (shift1)) >> 1;
- add2 = 1 << (shift2 - 1);
-
- src += -1 * i_src;
- tmp = tmp_res;
-
- if (shift1) {
- for (row = -1; row < height + 2; row++) {
- for (col = 0; col < width; col++) {
- sumu = FLT_4TAP_HOR(src, col, coeff_h);
- tmp[col] = (sumu + add1) >> shift1;
- }
- src += i_src;
- tmp += width;
- }
- } else {
- for (row = -1; row < height + 2; row++) {
- for (col = 0; col < width; col++) {
- tmp[col] = FLT_4TAP_HOR(src, col, coeff_h);
- }
- src += i_src;
- tmp += width;
- }
- }
-
- tmp = tmp_res + 1 * width;
-
- for (row = 0;
- row < height;
- row++) {
- for (col = 0; col < width; col++) {
- sumu = FLT_4TAP_VER(tmp, col, width, coeff_v);
- valu = (sumu + add2) >> shift2;
- dst[col] = COM_CLIP3(0, max_val, valu);
- }
- dst += i_dst;
- tmp += width;
- }
- }
-
- /****************************************************************************
- * copy
- ****************************************************************************/
-
- static void mc_if_cpy(const pel *src, int i_src, pel *dst, int i_dst, int width, int height)
- {
- int row;
- for (row = 0; row < height; row++) {
- memcpy(dst, src, sizeof(pel)* width);
- src += i_src;
- dst += i_dst;
- }
- }
-
- /****************************************************************************
- * interpolation for luma of frame
- ****************************************************************************/
-
- static avs3_always_inline pel pixel_clip(int x, int max_pixel_val)
- {
- return (pel)((x < 0) ? 0 : ((x > max_pixel_val) ? max_pixel_val : x));
- }
-
- static void com_if_filter_hor_8(const pel *src, int i_src, pel *dst[3], int i_dst, s16 *dst_tmp[3], int i_dst_tmp, int width, int height, s8(*coeff)[8], int bit_depth)
- {
- int i, j;
- pel *d0 = dst[0];
- pel *d1 = dst[1];
- pel *d2 = dst[2];
- s16 *dt0 = dst_tmp[0];
- s16 *dt1 = dst_tmp[1];
- s16 *dt2 = dst_tmp[2];
- int shift_tmp = bit_depth - 8;
- int add_tmp = (1 << (shift_tmp)) >> 1;
- int t1, t2, t3;
- int max_pixel = (1 << bit_depth) - 1;
-
- for (j = 0; j < height; j++) {
- for (i = 0; i < width; i++) {
- t1 = FLT_8TAP_HOR(src, i, coeff[0]);
- dt0[i] = (t1 + add_tmp) >> shift_tmp;
- d0[i] = pixel_clip((t1 + 32) >> 6, max_pixel);
- t2 = FLT_8TAP_HOR(src, i, coeff[1]);
- dt1[i] = (t2 + add_tmp) >> shift_tmp;
- d1[i] = pixel_clip((t2 + 32) >> 6, max_pixel);
- t3 = FLT_8TAP_HOR(src, i, coeff[2]);
- dt2[i] = (t3 + add_tmp) >> shift_tmp;
- d2[i] = pixel_clip((t3 + 32) >> 6, max_pixel);
- }
- d0 += i_dst;
- d1 += i_dst;
- d2 += i_dst;
- dt0 += i_dst_tmp;
- dt1 += i_dst_tmp;
- dt2 += i_dst_tmp;
- src += i_src;
- }
- }
-
- static void com_if_filter_ver_8(const pel *src, int i_src, pel *dst[3], int i_dst, int width, int height, s8(*coeff)[8], int bit_depth)
- {
- int i, j, val;
- pel *d0 = dst[0];
- pel *d1 = dst[1];
- pel *d2 = dst[2];
- int max_pixel = (1 << bit_depth) - 1;
-
- for (j = 0; j < height; j++) {
- for (i = 0; i < width; i++) {
- val = (FLT_8TAP_VER(src, i, i_src, coeff[0]) + 32) >> 6;
- d0[i] = pixel_clip(val, max_pixel);
-
- val = (FLT_8TAP_VER(src, i, i_src, coeff[1]) + 32) >> 6;
- d1[i] = pixel_clip(val, max_pixel);
-
- val = (FLT_8TAP_VER(src, i, i_src, coeff[2]) + 32) >> 6;
- d2[i] = pixel_clip(val, max_pixel);
- }
-
- d0 += i_dst;
- d1 += i_dst;
- d2 += i_dst;
- src += i_src;
- }
- }
-
- static void uavs3e_if_ver_luma_frame_ext(const s16 *src, int i_src, pel *dst[3], int i_dst, int width, int height, s8(*coeff)[8], int bit_depth)
- {
- int i, j, val;
- pel *d0 = dst[0];
- pel *d1 = dst[1];
- pel *d2 = dst[2];
- int max_pixel = (1 << bit_depth) - 1;
- int shift1 = 20 - bit_depth;
- int add1 = 1 << (shift1 - 1);
-
- for (j = 0; j < height; j++) {
- for (i = 0; i < width; i++) {
- val = (FLT_8TAP_VER(src, i, i_src, coeff[0]) + add1) >> shift1;
- d0[i] = pixel_clip(val, max_pixel);
-
- val = (FLT_8TAP_VER(src, i, i_src, coeff[1]) + add1) >> shift1;
- d1[i] = pixel_clip(val, max_pixel);
-
- val = (FLT_8TAP_VER(src, i, i_src, coeff[2]) + add1) >> shift1;
- d2[i] = pixel_clip(val, max_pixel);
- }
-
- d0 += i_dst;
- d1 += i_dst;
- d2 += i_dst;
- src += i_src;
- }
- }
-
- void com_if_luma_frame(com_img_t *img_list[4][4], s16 *tmp_buf[3], int bit_depth)
- {
- int dx, dy;
- com_img_t *img = img_list[0][0];
- int i_stride = img->stride[0] / sizeof(pel);
- int tmp_stride = img->width[0] + 16;
- pel *dst_tmp[3];
- s8(*coefs)[8] = (s8(*)[8])com_tbl_mc_l_coeff[1];
-
- int ip_width = img->width [0] + 8;
- int ip_height = img->height[0] + 8;
-
- //horizontal positions: a,1,b
- dst_tmp[0] = (pel*)img_list[0][1]->planes[0] - 8 * i_stride - 8;
- dst_tmp[1] = (pel*)img_list[0][2]->planes[0] - 8 * i_stride - 8;
- dst_tmp[2] = (pel*)img_list[0][3]->planes[0] - 8 * i_stride - 8;
- uavs3e_funs_handle.ip_flt_y_hor((pel*)img_list[0][0]->planes[0] - 8 * i_stride - 8, i_stride, dst_tmp, i_stride, tmp_buf, tmp_stride, img->width[0] + 16, img->height[0] + 16, coefs, bit_depth);
-
- //vertical positions: c,2,j
- dst_tmp[0] = (pel*)img_list[1][0]->planes[0] - 4 * i_stride - 4;
- dst_tmp[1] = (pel*)img_list[2][0]->planes[0] - 4 * i_stride - 4;
- dst_tmp[2] = (pel*)img_list[3][0]->planes[0] - 4 * i_stride - 4;
- uavs3e_funs_handle.ip_flt_y_ver((pel*)img_list[0][0]->planes[0] - 4 * i_stride - 4, i_stride, dst_tmp, i_stride, ip_width, ip_height, coefs, bit_depth);
-
- //vertical positions: d,h,k; e,3,1; f,i,m
- for (dx = 1; dx < 4; dx++) {
- dst_tmp[0] = (pel*)img_list[1][dx]->planes[0] - 4 * i_stride - 4;
- dst_tmp[1] = (pel*)img_list[2][dx]->planes[0] - 4 * i_stride - 4;
- dst_tmp[2] = (pel*)img_list[3][dx]->planes[0] - 4 * i_stride - 4;
- uavs3e_funs_handle.ip_flt_y_ver_ext(tmp_buf[dx - 1] + 4 * tmp_stride + 4, tmp_stride, dst_tmp, i_stride, ip_width, ip_height, coefs, bit_depth);
- }
-
- for (dx = 0; dx < 4; dx++) {
- for (dy = 0; dy < 4; dy++) {
- if (dx == 0 && dy == 0) {
- continue;
- }
- com_img_padding(img_list[dx][dy], 1, 4);
- }
- }
- }
-
- void uavs3e_funs_init_mc_c()
- {
- int i;
-
- uavs3e_funs_handle.ip_flt_y_hor = com_if_filter_hor_8;
- uavs3e_funs_handle.ip_flt_y_ver = com_if_filter_ver_8;
- uavs3e_funs_handle.ip_flt_y_ver_ext = uavs3e_if_ver_luma_frame_ext;
-
- for (i = 0; i < CU_SIZE_NUM; i++) {
- uavs3e_funs_handle.ipcpy[i] = mc_if_cpy;
- uavs3e_funs_handle.ipflt[IPFILTER_H_8][i] = mc_if_hor_luma;
- uavs3e_funs_handle.ipflt[IPFILTER_H_4][i] = mc_if_hor_chroma;
- uavs3e_funs_handle.ipflt[IPFILTER_V_8][i] = mc_if_ver_luma;
- uavs3e_funs_handle.ipflt[IPFILTER_V_4][i] = mc_if_ver_chroma;
- uavs3e_funs_handle.ipflt_ext[IPFILTER_EXT_8][i] = mc_if_hor_ver_luma;
- uavs3e_funs_handle.ipflt_ext[IPFILTER_EXT_4][i] = mc_if_hor_ver_chroma;
- }
- }
|