gzq
/
v03_nic

 
			
							import argparse
import math
import os
import struct
import sys
import time
import glob

import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image
from Util.metrics import evaluate

# import Util.AE as AE
import AE
import Model.model as model
from Model.context_model import Weighted_Gaussian
from Util import torch_msssim
from Util.block_metric import check_RD_GEO # blcok based metric
from Util.config import dict
from Util.generate_substitute import SubstituteGenerator

# avoid memory leak during cpu inference for Pytorch < 1.5
# more details: https://github.com/pytorch/pytorch/issues/27971
os.environ['LRU_CACHE_CAPACITY'] = '1'

GPU = dict['GPU']

# index - [0-15]
USE_VR_MODEL = dict['USE_VR_MODEL']
if USE_VR_MODEL:
    models = ["mse_VR_low", "mse_VR_high", "msssim_VR_low", "msssim_VR_high"]
    max_lambdas = [64, 256, 1.28, 6.40]
else:
    models = ["mse200", "mse400", "mse800", "mse1600", "mse3200", "mse6400", "mse12800", "mse25600",
              "msssim4", "msssim8", "msssim16", "msssim32", "msssim64", "msssim128", "msssim320", "msssim640"]

USE_PREPROCESSING = dict['USE_PREPROCESSING']
if USE_PREPROCESSING:
    num_steps = dict['num_steps']

USE_MULTI_HYPER = dict['USE_MULTI_HYPER']

assert (USE_MULTI_HYPER and USE_VR_MODEL) is False 

# @torch.no_grad()
def encode(im_dir, out_dir, model_dir, model_index, lambda_rd_ori):
    ############################## Load Encoding Configuration Parameters ########################
    SAVE_REC = dict['SAVE_REC']
    USE_GEO = dict['USE_GEO']
    block_width = dict['CTU_size']
    block_height = dict['CTU_size']
    file_object = open(out_dir, 'wb')

    if USE_VR_MODEL:
        lambda_rd_max = max_lambdas[model_index]
        if lambda_rd_ori > 1.2 * lambda_rd_max:
            lambda_rd_ori = 1.2 * lambda_rd_max
        lambda_rd_nom = lambda_rd_ori / lambda_rd_max
        lambda_rd_nom_scaled = int(lambda_rd_nom / 1.2 * pow(2, 16))
        lambda_rd_nom_used = lambda_rd_nom_scaled / pow(2, 16) * 1.2
        lambda_rd_numpy = np.zeros((1, 1), np.float32)
        lambda_rd_numpy[0, 0] = lambda_rd_nom_used
        lambda_rd = torch.Tensor(lambda_rd_numpy)
        M, N2 = 192, 128
        if (model_index == 1) or (model_index == 3):
            M, N2 = 256, 192
        image_comp = model.Image_coding(3, M, N2, M, M // 2)
        context = Weighted_Gaussian(M)
    else:
        M, N2 = 192, 128
        if (model_index == 6) or (model_index == 7) or (model_index == 14) or (model_index == 15):
            M, N2 = 256, 192
        if USE_MULTI_HYPER:
            image_comp = model.Image_coding_multi_hyper(3, M, N2, M, M // 2)
        else:
            image_comp = model.Image_coding(3, M, N2, M, M // 2)
        context = Weighted_Gaussian(M)
        lambda_rd = None

    if USE_PREPROCESSING:
        lmbda_list = [200, 400, 800, 1600, 3200, 6400, 12800, 25600, 4, 8, 16, 32, 64, 128, 320, 640]
        stepsize_list = [150, 75, 30, 10, 10, 5, 3, 1, 100, 10, 7, 5, 1, 1, 1, 0.3]

        if USE_VR_MODEL:
            lmbda = lambda_rd_ori * 100
            step_size= stepsize_list[lmbda_list.index(lmbda)]
            reconstruction_metric = 'mse' if model_index <= 1 else 'msssim'
        else:
            step_size = stepsize_list[model_index]
            lmbda = lmbda_list[model_index]
            reconstruction_metric = 'mse' if model_index <= 7 else 'msssim'

        substitute_generator = SubstituteGenerator(model=image_comp, context_model=context, llambda=lmbda,
                                           num_steps=num_steps, step_size=step_size,
                                           reconstruct_metric=reconstruction_metric,
                                           )

    ######################### Load Model #########################
    image_comp.load_state_dict(torch.load(
        os.path.join(model_dir, models[model_index] + r'.pkl'), map_location='cpu'))
    context.load_state_dict(torch.load(
        os.path.join(model_dir, models[model_index] + r'p.pkl'), map_location='cpu'))
    if GPU:
        image_comp = image_comp.cuda()
        context = context.cuda()
    ######################### Read Image #########################
    img = Image.open(im_dir)
    img = np.array(img) / 255.0
    H, W, _ = img.shape
    num_pixels = H * W
    C = 3

    Head = struct.pack('2HB3?H', H, W, model_index, USE_GEO, USE_VR_MODEL, USE_MULTI_HYPER, block_width)
    file_object.write(Head)
    if USE_VR_MODEL:
        Head_lmbda = struct.pack('H', lambda_rd_nom_scaled)
        file_object.write(Head_lmbda)
    ######################### spliting Image #########################
    Block_Num_in_Width = int(np.ceil(W / block_width))
    Block_Num_in_Height = int(np.ceil(H / block_height))
    img_block_list = []
    for i in range(Block_Num_in_Height):
        for j in range(Block_Num_in_Width):
            img_block_list.append(img[i * block_height:np.minimum((i + 1) * block_height, H),
                                  j * block_width:np.minimum((j + 1) * block_width, W), ...])

    print('check')
    ######################### Padding Image #########################
    Block_Idx = 0
    for img in img_block_list:  # Traverse CTUs
        block_H = img.shape[0]
        block_W = img.shape[1]
        tile = 64.
        block_H_PAD = int(tile * np.ceil(block_H / tile))
        block_W_PAD = int(tile * np.ceil(block_W / tile))
        im = np.zeros([block_H_PAD, block_W_PAD, 3], dtype='float32')
        im[:block_H, :block_W, :] = img[:, :, :3]
        im = torch.FloatTensor(im)
        im = im.permute(2, 0, 1).contiguous()
        im = im.view(1, C, block_H_PAD, block_W_PAD)
        if GPU:
            im = im.cuda()
            if USE_VR_MODEL:
                lambda_rd = lambda_rd.cuda()
        print('====> Encoding Image:', im_dir, "%dx%d" % (block_H, block_W), 'to', out_dir,
              " Block Idx: %d" % (Block_Idx))
        Block_Idx += 1
        # begin processing CTU
        im_block_list = []
        im_block_loc_list = []
        im_block_list.append(im)  # list size = 1
        im_block_loc_list.append([0, 0, block_H_PAD, block_W_PAD])
        for im_block_loc, im_block in zip(im_block_loc_list, im_block_list):
            ############################ Geometric Flip and rotate ########################
            if USE_GEO:
                _, _, geo_index, _ = check_RD_GEO(im_block, lambda_rd, image_comp, context, model_index)
                i_rot = int(geo_index % 4)
                if geo_index < 4:
                    im_block = torch.rot90(im_block, k=i_rot, dims=[2, 3])
                else:
                    im_block = torch.rot90(torch.flip(im_block, dims=[2]), k=i_rot, dims=[2, 3])

            ############################ Preprocessing to find a substitute ########################
            if USE_PREPROCESSING:
                if USE_VR_MODEL:
                    im_block = substitute_generator.perturb(orig_image=im_block, lambda_rd=lambda_rd)
                else:
                    im_block = substitute_generator.perturb(orig_image=im_block)
            
            if USE_MULTI_HYPER:
                with torch.no_grad():
                    y_main, y_hyper,y_hyper_2 = image_comp.encoder(im_block, lambda_rd)
                    y_main_q = torch.round(y_main)
                    y_main_q = torch.Tensor(y_main_q.cpu().numpy().astype(np.int))
                    
                    y_hyper_q = torch.round(y_hyper)
                    hyper_dec = image_comp.p(image_comp.hyper_1_dec(y_hyper_q))
                    y_hyper_q = torch.Tensor(y_hyper_q.cpu().numpy().astype(np.int))

                    y_hyper_2_q, xp3 = image_comp.factorized_entropy_func(y_hyper_2, 2)
                    hyper_2_dec = image_comp.p_2(image_comp.hyper_2_dec(y_hyper_2_q))
                    y_hyper_2_q = torch.Tensor(y_hyper_2_q.cpu().numpy().astype(np.int))

                    # params_prob = hyper_dec
                    xp3, params_prob = context(y_main_q.cuda(), hyper_dec)


                # Main Arith Encode
                Datas = torch.reshape(y_main_q, [-1]).cpu().numpy().astype(np.int).tolist()
                Max_Main = max(Datas)
                Min_Main = min(Datas)
                sample = np.arange(Min_Main, Max_Main+1+1)  # [Min_V - 0.5 , Max_V + 0.5]
                _, c, h, w = y_main_q.shape
                print("Main Channel:", c)
                sample = torch.FloatTensor(np.tile(sample, [1, c, h, w, 1])).cuda()

                # 3 gaussian
                prob0, mean0, scale0, prob1, mean1, scale1, prob2, mean2, scale2 = [
                    torch.chunk(params_prob, 9, dim=1)[i].squeeze(1) for i in range(9)]
                del params_prob
                # keep the weight summation of prob == 1
                probs = torch.stack([prob0, prob1, prob2], dim=-1)
                del prob0, prob1, prob2

                probs = F.softmax(probs, dim=-1)
                # process the scale value to positive non-zero
                scale0 = torch.abs(scale0)
                scale1 = torch.abs(scale1)
                scale2 = torch.abs(scale2)
                scale0[scale0 < 1e-6] = 1e-6
                scale1[scale1 < 1e-6] = 1e-6
                scale2[scale2 < 1e-6] = 1e-6
                m0 = torch.distributions.normal.Normal(mean0, scale0)
                m1 = torch.distributions.normal.Normal(mean1, scale1)
                m2 = torch.distributions.normal.Normal(mean2, scale2)
                lower = torch.zeros(1, c, h, w, Max_Main-Min_Main+2)
                
                
                for i in range(sample.shape[4]):
                    # print("CDF:", i)
                    lower0 = m0.cdf(sample[:, :, :, :, i].cuda()-0.5)
                    lower1 = m1.cdf(sample[:, :, :, :, i].cuda()-0.5)
                    lower2 = m2.cdf(sample[:, :, :, :, i].cuda()-0.5)
                    lower[:, :, :, :, i] = probs[:, :, :, :, 0]*lower0 + \
                        probs[:, :, :, :, 1]*lower1+probs[:, :, :, :, 2]*lower2
                del probs, lower0, lower1, lower2

                precise = 16
                cdf_m = lower.data.cpu().numpy()*((1 << precise) - (Max_Main -
                                                                    Min_Main + 1))  # [1, c, h, w ,Max-Min+1]
                cdf_m = cdf_m.astype(np.int32) + sample.cpu().numpy().astype(np.int32) - Min_Main
                cdf_main = np.reshape(cdf_m, [len(Datas), -1])

                # Cdf[Datas - Min_V]
                Cdf_lower = list(map(lambda x, y: int(y[x - Min_Main]), Datas, cdf_main))
                # Cdf[Datas + 1 - Min_V]
                Cdf_upper = list(map(lambda x, y: int(
                    y[x - Min_Main]), Datas, cdf_main[:, 1:]))
                AE.encode_cdf(Cdf_lower, Cdf_upper, "main.bin")
                FileSizeMain = os.path.getsize("main.bin")
                print("main.bin: %d bytes" % (FileSizeMain))


                # Hyper 1 Arith Encode
                Datas = torch.reshape(y_hyper_q, [-1]).cpu().numpy().astype(np.int).tolist()
                Max_HYPER_1 = max(Datas)
                Min_HYPER_1 = min(Datas)
                sample = np.arange(Min_HYPER_1, Max_HYPER_1+1+1)  # [Min_V - 0.5 , Max_V + 0.5]
                _, c, h, w = y_hyper_q.shape
                print("Hyper 1 Channel:", c)
                sample = torch.FloatTensor(np.tile(sample, [1, c, h, w, 1])).cuda()

                mean = hyper_2_dec[:, :c, :, :]
                scale = hyper_2_dec[:, c:, :, :]
                
                scale = torch.abs(scale)
                scale[scale < 1e-6] = 1e-6

                m = torch.distributions.normal.Normal(mean, scale)
                lower = torch.zeros(1, c, h, w, Max_HYPER_1-Min_HYPER_1+2).cuda()
                for ii in range(sample.shape[4]):
                    lower[:,:,:,:,ii] = m.cdf(sample[:,:,:,:,ii]-0.5)
                precise = 16
                cdf_m = lower.data.cpu().numpy()*((1 << precise) - (Max_HYPER_1 -
                                                                    Min_HYPER_1 + 1))  # [1, c, h, w ,Max-Min+1]
                cdf_m = cdf_m.astype(np.int32) + sample.cpu().numpy().astype(np.int32) - Min_HYPER_1
                cdf_main = np.reshape(cdf_m, [len(Datas), -1])

                # Cdf[Datas - Min_V]
                Cdf_lower = list(map(lambda x, y: int(y[x - Min_HYPER_1]), Datas, cdf_main))
                # Cdf[Datas + 1 - Min_V]
                Cdf_upper = list(map(lambda x, y: int(
                    y[x - Min_HYPER_1]), Datas, cdf_main[:, 1:]))
                AE.encode_cdf(Cdf_lower, Cdf_upper, "hyper_1.bin")
                FileSizeHyper1 = os.path.getsize("hyper_1.bin")
                print("hyper_1.bin: %d bytes" % (FileSizeHyper1))


                # Hyper 2 Arith Encode
                Min_HYPER_2 = torch.min(y_hyper_2_q).cpu().numpy().astype(np.int).tolist()
                Max_HYPER_2 = torch.max(y_hyper_2_q).cpu().numpy().astype(np.int).tolist()
                _, c, h, w = y_hyper_2_q.shape
                # print("Hyper Channel:", c)
                Datas_hyper = torch.reshape(
                    y_hyper_2_q, [c, -1]).cpu().numpy().astype(np.int).tolist()
                # [Min_V - 0.5 , Max_V + 0.5]
                sample = np.arange(Min_HYPER_2, Max_HYPER_2+1+1)
                sample = np.tile(sample, [c, 1, 1])
                lower = torch.sigmoid(image_comp.factorized_entropy_func._logits_cumulative(
                    torch.FloatTensor(sample).cuda() - 0.5, stop_gradient=False))
                
                cdf_h = lower.data.cpu().numpy()*((1 << precise) - (Max_HYPER_2 -
                                                                    Min_HYPER_2 + 1))  # [N1, 1, Max-Min+1]
                cdf_h = cdf_h.astype(np.int) + sample.astype(np.int) - Min_HYPER_2
                cdf_hyper = np.reshape(np.tile(cdf_h, [len(Datas_hyper[0]), 1, 1, 1]), [
                                    len(Datas_hyper[0]), c, -1])

                # Datas_hyper [256, N], cdf_hyper [256,1,X]
                Cdf_0, Cdf_1 = [], []
                for i in range(c):
                    Cdf_0.extend(list(map(lambda x, y: int(
                        y[x - Min_HYPER_2]), Datas_hyper[i], cdf_hyper[:, i, :])))   # Cdf[Datas - Min_V]
                    Cdf_1.extend(list(map(lambda x, y: int(
                        y[x - Min_HYPER_2]), Datas_hyper[i], cdf_hyper[:, i, 1:])))  # Cdf[Datas + 1 - Min_V]
                AE.encode_cdf(Cdf_0, Cdf_1, "hyper_2.bin")
                FileSizeHyper2 = os.path.getsize("hyper_2.bin")
                print("hyper_2.bin: %d bytes" % (FileSizeHyper2))

                if USE_GEO:
                    Head_block = struct.pack('6h3IB', Min_Main, Max_Main, Min_HYPER_1, Max_HYPER_1,Min_HYPER_2,Max_HYPER_2, FileSizeMain, FileSizeHyper1, FileSizeHyper2, geo_index)
                else:
                    Head_block = struct.pack('6h3I', Min_Main, Max_Main, Min_HYPER_1, Max_HYPER_1,Min_HYPER_2,Max_HYPER_2, FileSizeMain, FileSizeHyper1, FileSizeHyper2)
            
            else: # Single Hyper Model
                with torch.no_grad():
                    y_main, y_hyper = image_comp.encoder(im_block, lambda_rd)
                    y_main_q = torch.round(y_main)
                    y_main_q = torch.Tensor(y_main_q.cpu().numpy().astype(np.int))
                    if GPU:
                        y_main_q = y_main_q.cuda()

                    # y_hyper_q = torch.round(y_hyper)

                    y_hyper_q, xp2 = image_comp.factorized_entropy_func(y_hyper, 2)
                    y_hyper_q = torch.Tensor(y_hyper_q.cpu().numpy().astype(np.int))
                    if GPU:
                        y_hyper_q = y_hyper_q.cuda()

                    hyper_dec = image_comp.p(image_comp.hyper_dec(y_hyper_q))

                    xp3, params_prob = context(y_main_q, hyper_dec)

                # Main Arith Encode
                Datas = torch.reshape(y_main_q, [-1]).cpu().numpy().astype(np.int).tolist()
                Max_Main = max(Datas)
                Min_Main = min(Datas)
                sample = np.arange(Min_Main, Max_Main + 1 + 1)  # [Min_V - 0.5 , Max_V + 0.5]
                _, c, h, w = y_main_q.shape
                print("Main Channel:", c)
                sample = torch.FloatTensor(np.tile(sample, [1, c, h, w, 1]))
                if GPU:
                    sample = sample.cuda()

                # 3 gaussian
                prob0, mean0, scale0, prob1, mean1, scale1, prob2, mean2, scale2 = [
                    torch.chunk(params_prob, 9, dim=1)[i].squeeze(1) for i in range(9)]
                del params_prob
                # keep the weight summation of prob == 1
                probs = torch.stack([prob0, prob1, prob2], dim=-1)
                del prob0, prob1, prob2

                probs = F.softmax(probs, dim=-1)
                # process the scale value to positive non-zero
                scale0 = torch.abs(scale0)
                scale1 = torch.abs(scale1)
                scale2 = torch.abs(scale2)
                scale0[scale0 < 1e-6] = 1e-6
                scale1[scale1 < 1e-6] = 1e-6
                scale2[scale2 < 1e-6] = 1e-6

                m0 = torch.distributions.normal.Normal(mean0, scale0)
                m1 = torch.distributions.normal.Normal(mean1, scale1)
                m2 = torch.distributions.normal.Normal(mean2, scale2)
                lower = torch.zeros(1, c, h, w, Max_Main - Min_Main + 2)
                for i in range(sample.shape[4]):
                    # print("CDF:", i)
                    lower0 = m0.cdf(sample[:, :, :, :, i] - 0.5)
                    lower1 = m1.cdf(sample[:, :, :, :, i] - 0.5)
                    lower2 = m2.cdf(sample[:, :, :, :, i] - 0.5)
                    if GPU:
                        lower0 = lower0.cuda()
                        lower1 = lower1.cuda()
                        lower2 = lower2.cuda()
                    lower[:, :, :, :, i] = probs[:, :, :, :, 0] * lower0 + \
                                        probs[:, :, :, :, 1] * lower1 + probs[:, :, :, :, 2] * lower2
                del probs, lower0, lower1, lower2

                precise = 16
                cdf_m = lower.data.cpu().numpy() * ((1 << precise) - (Max_Main -
                                                                    Min_Main + 1))  # [1, c, h, w ,Max-Min+1]
                cdf_m = cdf_m.astype(np.int32) + sample.cpu().numpy().astype(np.int32) - Min_Main
                cdf_main = np.reshape(cdf_m, [len(Datas), -1])

                # Cdf[Datas - Min_V]
                Cdf_lower = list(map(lambda x, y: int(y[x - Min_Main]), Datas, cdf_main))
                # Cdf[Datas + 1 - Min_V]
                Cdf_upper = list(map(lambda x, y: int(
                    y[x - Min_Main]), Datas, cdf_main[:, 1:]))
                AE.encode_cdf(Cdf_lower, Cdf_upper, "main.bin")
                FileSizeMain = os.path.getsize("main.bin")
                print("main.bin: %d bytes" % (FileSizeMain))

                # Hyper Arith Encode
                Min_V_HYPER = torch.min(y_hyper_q).cpu().numpy().astype(np.int).tolist()
                Max_V_HYPER = torch.max(y_hyper_q).cpu().numpy().astype(np.int).tolist()
                _, c, h, w = y_hyper_q.shape
                # print("Hyper Channel:", c)
                Datas_hyper = torch.reshape(
                    y_hyper_q, [c, -1]).cpu().numpy().astype(np.int).tolist()
                # [Min_V - 0.5 , Max_V + 0.5]
                sample = np.arange(Min_V_HYPER, Max_V_HYPER + 1 + 1)
                sample = np.tile(sample, [c, 1, 1])
                sample_tensor = torch.FloatTensor(sample)
                if GPU:
                    sample_tensor = sample_tensor.cuda()
                lower = torch.sigmoid(image_comp.factorized_entropy_func._logits_cumulative(
                    sample_tensor - 0.5, stop_gradient=False))
                cdf_h = lower.data.cpu().numpy() * ((1 << precise) - (Max_V_HYPER -
                                                                    Min_V_HYPER + 1))  # [N1, 1, Max-Min+1]
                cdf_h = cdf_h.astype(np.int) + sample.astype(np.int) - Min_V_HYPER
                cdf_hyper = np.reshape(np.tile(cdf_h, [len(Datas_hyper[0]), 1, 1, 1]), [
                    len(Datas_hyper[0]), c, -1])

                # Datas_hyper [256 N], cdf_hyper [256,1,X]
                Cdf_0, Cdf_1 = [], []
                for i in range(c):
                    Cdf_0.extend(list(map(lambda x, y: int(
                        y[x - Min_V_HYPER]), Datas_hyper[i], cdf_hyper[:, i, :])))  # Cdf[Datas - Min_V]
                    Cdf_1.extend(list(map(lambda x, y: int(
                        y[x - Min_V_HYPER]), Datas_hyper[i], cdf_hyper[:, i, 1:])))  # Cdf[Datas + 1 - Min_V]
                AE.encode_cdf(Cdf_0, Cdf_1, "hyper.bin")
                FileSizeHyper = os.path.getsize("hyper.bin")
                print("hyper.bin: %d bytes" % (FileSizeHyper))
            
                if USE_GEO:
                    Head_block = struct.pack('4h2IB', Min_Main, Max_Main, Min_V_HYPER, Max_V_HYPER,
                                            FileSizeMain, FileSizeHyper, geo_index)
                else:
                    Head_block = struct.pack('4h2I', Min_Main, Max_Main, Min_V_HYPER, Max_V_HYPER,
                                            FileSizeMain, FileSizeHyper)

            file_object.write(Head_block)  # CU information
            # cat Head_Infor and 2 files together
            # Head = [FileSizeMain,FileSizeHyper,H,W,Min_Main,Max_Main,Min_V_HYPER,Max_V_HYPER,model_index]
            # print("Head Info:",Head)
            with open("main.bin", 'rb') as f:
                bits = f.read()
                file_object.write(bits)
            
            if USE_MULTI_HYPER:
                with open("hyper_1.bin", 'rb') as f:
                    bits = f.read()
                    file_object.write(bits)
                with open("hyper_2.bin", 'rb') as f:
                    bits = f.read()
                    file_object.write(bits)
            else:
                with open("hyper.bin", 'rb') as f:
                    bits = f.read()
                    file_object.write(bits)
        del im, im_block_list, im_block_loc_list
    file_object.close()


@torch.no_grad()
def decode(bin_dir, rec_dir, model_dir):
    ############### retreive head info ###############
    T = time.time()
    file_object = open(bin_dir, 'rb')

    head_len = struct.calcsize('2HB3?H')
    bits = file_object.read(head_len)
    [H, W, model_index, USE_GEO, USE_VR_MODEL,USE_MULTI_HYPER, CTU_size] = struct.unpack('2HB3?H', bits)
    if USE_VR_MODEL:
        head_lambda_len = struct.calcsize('H')
        bits = file_object.read(head_lambda_len)
        [lambda_rd_nom_scaled] = struct.unpack('H', bits)
    # print("File Info:",Head)
    # Split Main & Hyper bins
    block_width = CTU_size
    block_height = CTU_size
    C = 3
    out_img = np.zeros([H, W, C])
    H_offset = 0
    W_offset = 0
    Block_Num_in_Width = int(np.ceil(W / block_width))
    Block_Num_in_Height = int(np.ceil(H / block_height))

    if USE_VR_MODEL:
        lambda_rd_nom_used = lambda_rd_nom_scaled / pow(2, 16) * 1.2
        lambda_rd_numpy = np.zeros((1, 1), np.float32)
        lambda_rd_numpy[0, 0] = lambda_rd_nom_used
        lambda_rd = torch.Tensor(lambda_rd_numpy)
        M, N2 = 192, 128
        if (model_index == 1) or (model_index == 3):
            M, N2 = 256, 192
        image_comp = model.Image_coding(3, M, N2, M, M // 2)
        context = Weighted_Gaussian(M)
    else:
        M, N2 = 192, 128
        if (model_index == 6) or (model_index == 7) or (model_index == 14) or (model_index == 15):
            M, N2 = 256, 192
        if USE_MULTI_HYPER:
            image_comp = model.Image_coding_multi_hyper(3, M, N2, M, M // 2)
        else:
            image_comp = model.Image_coding(3, M, N2, M, M // 2)
        context = Weighted_Gaussian(M)
        lambda_rd = None

    c_main = M
    if USE_MULTI_HYPER:
        c_hyper = 256
        c_hyper_2 = 128
    else:
        c_hyper = N2

    ######################### Load Model #########################
    image_comp.load_state_dict(torch.load(
        os.path.join(model_dir, models[model_index] + r'.pkl'), map_location='cpu'))
    context.load_state_dict(torch.load(
        os.path.join(model_dir, models[model_index] + r'p.pkl'), map_location='cpu'))
    if GPU:
        image_comp = image_comp.cuda()
        context = context.cuda()

    for i_block in range(Block_Num_in_Height):
        for j_block in range(Block_Num_in_Width):
            # [block_H, block_W] indicates real shape of the current block
            block_H = block_height
            block_W = block_width
            if i_block == Block_Num_in_Height - 1:
                block_H = H - (Block_Num_in_Height - 1) * block_height
            if j_block == Block_Num_in_Width - 1:
                block_W = W - (Block_Num_in_Width - 1) * block_width
            print('==================> Decoding Block:', "(%d, %d)" % (i_block, j_block),
                  "[%d, %d]" % (block_H, block_W))
            precise = 16
            tile = 64.

            block_H_PAD = int(tile * np.ceil(block_H / tile))
            block_W_PAD = int(tile * np.ceil(block_W / tile))
            block_loc_list = []
            block_loc_list.append([0, 0, block_H_PAD, block_W_PAD])

            for block_loc in block_loc_list:
                # block_loc -> [vertical_location, horizontal_location, block_height, block_width]
                print('==================> Decoding sub_block:',
                      "(%d, %d, %d, %d)" % (block_loc[0], block_loc[1], block_loc[2], block_loc[3]))
                
                if USE_MULTI_HYPER:
                    if USE_GEO:
                        Block_head_len = struct.calcsize('6h3IB')
                        bits = file_object.read(Block_head_len)
                        [ Min_Main, Max_Main, Min_HYPER_1, Max_HYPER_1, Min_HYPER_2,Max_HYPER_2, FileSizeMain, FileSizeHyper1, FileSizeHyper2, geo_index] = struct.unpack('6h3IB', bits)
                        if geo_index % 2 == 0:
                            # [enc_height, enc_width] indicates shape of encoding block (after geometrical operation)
                            enc_height = block_loc[2]
                            enc_width = block_loc[3]
                        else:
                            enc_height = block_loc[3]
                            enc_width = block_loc[2]
                    else:
                        # BUG FIXED HERE
                        enc_height = block_H_PAD
                        enc_width = block_W_PAD
                        Block_head_len = struct.calcsize('6h3I')
                        bits = file_object.read(Block_head_len)
                        [ Min_Main, Max_Main, Min_HYPER_1, Max_HYPER_1, Min_HYPER_2,Max_HYPER_2, FileSizeMain, FileSizeHyper1, FileSizeHyper2] = struct.unpack('6h3I', bits)
                    
                    with open("main.bin", 'wb') as f:
                        bits = file_object.read(FileSizeMain)
                        f.write(bits)
                    with open("hyper_1.bin", 'wb') as f:
                        bits = file_object.read(FileSizeHyper1)
                        f.write(bits)
                    with open("hyper_2.bin", 'wb') as f:
                        bits = file_object.read(FileSizeHyper2)
                        f.write(bits)
                
                else: # Single Hyper Model
                    if USE_GEO:
                        Block_head_len = struct.calcsize('4h2IB')
                        bits = file_object.read(Block_head_len)
                        [Min_Main, Max_Main, Min_V_HYPER, Max_V_HYPER, FileSizeMain, FileSizeHyper,
                        geo_index] = struct.unpack('4h2IB', bits)
                        if geo_index % 2 == 0:
                            # [enc_height, enc_width] indicates shape of encoding block (after geometrical operation)
                            enc_height = block_loc[2]
                            enc_width = block_loc[3]
                        else:
                            enc_height = block_loc[3]
                            enc_width = block_loc[2]
                    else:
                        Block_head_len = struct.calcsize('4h2I')
                        bits = file_object.read(Block_head_len)
                        [Min_Main, Max_Main, Min_V_HYPER, Max_V_HYPER, FileSizeMain, FileSizeHyper] = struct.unpack('4h2I',bits)
                    with open("main.bin", 'wb') as f:
                        bits = file_object.read(FileSizeMain)
                        f.write(bits)
                    with open("hyper.bin", 'wb') as f:
                        bits = file_object.read(FileSizeHyper)
                        f.write(bits)
                    
                    print("check")
                if USE_MULTI_HYPER:
                    ############### Hyper 2 Decoder ###############
                    # [Min_V - 0.5 , Max_V + 0.5]
                    sample = np.arange(Min_HYPER_2, Max_HYPER_2+1+1)
                    sample = np.tile(sample, [c_hyper_2, 1, 1])
                    # Here goes HYY
                    lower = torch.sigmoid(image_comp.factorized_entropy_func._logits_cumulative(
                        torch.FloatTensor(sample).cuda() - 0.5, stop_gradient=False))
                    cdf_h = lower.data.cpu().numpy()*((1 << precise) - (Max_HYPER_2 -
                                                                        Min_HYPER_2 + 1))  # [N1, 1, Max - Min]
                    cdf_h = cdf_h.astype(np.int) + sample.astype(np.int) - Min_HYPER_2
                    T2 = time.time()
                
                    AE.init_decoder("hyper_2.bin", Min_HYPER_2, Max_HYPER_2)
                    
                    Recons = []
                    for ii in range(c_hyper_2):
                        for jj in range(int(block_H_PAD * block_W_PAD / 64 / 64)):
                            #print(cdf_h[i,0,:])
                            Recons.append(AE.decode_cdf(cdf_h[ii, 0, :].tolist()))
                        
                    # reshape Recons to y_hyper_q   [1, c_hyper, H_PAD/64, W_PAD/64]
                    y_hyper_2_q = torch.reshape(torch.Tensor(
                        Recons), [1, c_hyper_2, int(block_H_PAD / 64), int(block_W_PAD / 64)])
                    

                    #IPython.embed()
                    ############### Hyper 1 Decoder ###############
                    # hyper_dec = image_comp.p(image_comp.hyper_dec(y_hyper_q))
                    hyper_2_dec = image_comp.p_2(image_comp.hyper_2_dec(y_hyper_2_q.cuda()))
                    # print("hyper_2_dec",hyper_2_dec.mean())
                    _, c, h, w = hyper_2_dec.shape
                    c //= 2
                    mean = hyper_2_dec[:, :c, :, :]
                    scale = hyper_2_dec[:, c:, :, :]
                    scale = torch.abs(scale)
                    scale[scale < 1e-6] = 1e-6
                    #import IPython
                    #IPython.embed()
                    m = torch.distributions.normal.Normal(mean, scale)

                    sample = np.arange(Min_HYPER_1, Max_HYPER_1+1+1)  # [Min_V - 0.5 , Max_V + 0.5]
                    sample = torch.FloatTensor(np.tile(sample, [1, c, h, w, 1])).cuda()

                    lower = torch.zeros(1, c, h, w, Max_HYPER_1-Min_HYPER_1+2).cuda()
                    for cc in range(sample.shape[-1]):
                        lower[...,cc] = m.cdf(sample[...,cc] - 0.5)
                    # lower = m.cdf(sample-0.5)
                    precise = 16

                    cdf_m = lower.data.cpu().numpy()*((1 << precise) - (Max_HYPER_1 - Min_HYPER_1 + 1))
                    cdf_m = cdf_m.astype(np.int32) + sample.cpu().numpy().astype(np.int32) - Min_HYPER_1

                    AE.init_decoder("hyper_1.bin", Min_HYPER_1, Max_HYPER_1)
                    Recons = []
                    for ii in range(c):
                        for jj in range(int(h)):
                            for kk in range(int(w)):
                                #import IPython
                                #IPython.embed()
                                #print(ii,jj,kk)
                                Recons.append(AE.decode_cdf(cdf_m[0, ii, jj, kk, :].tolist()))

                    y_hyper_q = torch.reshape(torch.Tensor(Recons), [1, c, h, w]).cuda()

                else: # Single Hyper Model
                    ############### Hyper Decoder ###############
                    # [Min_V - 0.5 , Max_V + 0.5]
                    sample = np.arange(Min_V_HYPER, Max_V_HYPER + 1 + 1)
                    print("check2")
                    sample = np.tile(sample, [c_hyper, 1, 1])
                    sample_tensor = torch.FloatTensor(sample)
                    if GPU:
                        sample_tensor = sample_tensor.cuda()
                    lower = torch.sigmoid(image_comp.factorized_entropy_func._logits_cumulative(
                        sample_tensor - 0.5, stop_gradient=False))
                    print("check2")
                    cdf_h = lower.data.cpu().numpy() * ((1 << precise) - (Max_V_HYPER -
                                                                        Min_V_HYPER + 1))  # [N1, 1, Max - Min]
                    cdf_h = cdf_h.astype(np.int) + sample.astype(np.int) - Min_V_HYPER
                    T2 = time.time()
                    print("check2")
                    AE.init_decoder("hyper.bin", Min_V_HYPER, Max_V_HYPER)
                    print("check2")
                    Recons = []
                    for i in range(c_hyper):
                        for j in range(int(enc_height * enc_width / 64 / 64)):
                            # print(cdf_h[i,0,:])
                            Recons.append(AE.decode_cdf(cdf_h[i, 0, :].tolist()))
                    # reshape Recons to y_hyper_q   [1, c_hyper, H_PAD/64, W_PAD/64]
                    print("check2")
                    y_hyper_q = torch.reshape(torch.Tensor(
                        Recons), [1, c_hyper, int(enc_height / 64), int(enc_width / 64)])
                    print("check2")

                ############### Main Decoder ###############
                if GPU:
                    y_hyper_q = y_hyper_q.cuda()
                if USE_MULTI_HYPER:
                    hyper_dec = image_comp.p(image_comp.hyper_1_dec(y_hyper_q))
                else:
                    hyper_dec = image_comp.p(image_comp.hyper_dec(y_hyper_q))
                    print("check3")
                h, w = int(enc_height / 16), int(enc_width / 16)
                sample = np.arange(Min_Main, Max_Main + 1 + 1)  # [Min_V - 0.5 , Max_V + 0.5]

                sample = torch.FloatTensor(sample)
                if GPU:
                    sample = sample.cuda()

                p3d = (5, 5, 5, 5, 5, 5)
                y_main_q = torch.zeros(1, 1, c_main + 10, h + 10, w + 10)  # 8000x4000 -> 500*250
                if GPU:
                    y_main_q = y_main_q.cuda()
                    if USE_VR_MODEL:
                        lambda_rd = lambda_rd.cuda()
                AE.init_decoder("main.bin", Min_Main, Max_Main)
                hyper = torch.unsqueeze(context.conv3(hyper_dec), dim=1)
                print("check4")
                #
                context.conv1.weight.data *= context.conv1.mask

                for i in range(c_main):
                    T = time.time()
                    for j in range(int(enc_height / 16)):
                        for k in range(int(enc_width / 16)):

                            x1 = F.conv3d(y_main_q[:, :, i:i + 12, j:j + 12, k:k + 12],
                                          weight=context.conv1.weight, bias=context.conv1.bias)  # [1,24,1,1,1]
                            params_prob = context.conv2(
                                torch.cat((x1, hyper[:, :, i:i + 2, j:j + 2, k:k + 2]), dim=1))

                            # 3 gaussian
                            prob0, mean0, scale0, prob1, mean1, scale1, prob2, mean2, scale2 = params_prob[
                                                                                               0, :, 0, 0, 0]
                            # keep the weight  summation of prob == 1
                            probs = torch.stack([prob0, prob1, prob2], dim=-1)
                            probs = F.softmax(probs, dim=-1)

                            # process the scale value to positive non-zero
                            scale0 = torch.abs(scale0)
                            scale1 = torch.abs(scale1)
                            scale2 = torch.abs(scale2)
                            scale0[scale0 < 1e-6] = 1e-6
                            scale1[scale1 < 1e-6] = 1e-6
                            scale2[scale2 < 1e-6] = 1e-6
                            # 3 gaussian distributions
                            m0 = torch.distributions.normal.Normal(mean0.view(1, 1).repeat(
                                1, Max_Main - Min_Main + 2), scale0.view(1, 1).repeat(1, Max_Main - Min_Main + 2))
                            m1 = torch.distributions.normal.Normal(mean1.view(1, 1).repeat(
                                1, Max_Main - Min_Main + 2), scale1.view(1, 1).repeat(1, Max_Main - Min_Main + 2))
                            m2 = torch.distributions.normal.Normal(mean2.view(1, 1).repeat(
                                1, Max_Main - Min_Main + 2), scale2.view(1, 1).repeat(1, Max_Main - Min_Main + 2))
                            lower0 = m0.cdf(sample - 0.5)
                            lower1 = m1.cdf(sample - 0.5)
                            lower2 = m2.cdf(sample - 0.5)  # [1,c,h,w,Max-Min+2]
                            if GPU:
                                lower0 = lower0.cuda()
                                lower1 = lower1.cuda()
                                lower2 = lower2.cuda()

                            lower = probs[0:1] * lower0 + probs[1:2] * lower1 + probs[2:3] * lower2
                            cdf_m = lower.data.cpu().numpy() * ((1 << precise) - (Max_Main -
                                                                                  Min_Main + 1))  # [1, c, h, w ,Max-Min+1]
                            cdf_m = cdf_m.astype(np.int) + \
                                    sample.cpu().numpy().astype(np.int) - Min_Main

                            pixs = AE.decode_cdf(cdf_m[0, :].tolist())
                            y_main_q[0, 0, i + 5, j + 5, k + 5] = pixs
                    print("Decoding Channel (%d/192), Time (s): %0.4f" % (i, time.time() - T))
                del hyper, hyper_dec
                y_main_q = y_main_q[0, :, 5:-5, 5:-5, 5:-5]
                rec = image_comp.decoder(y_main_q, lambda_rd)

                ############################ Reverse Geometric Flip and Rotate ########################
                if USE_GEO:
                    i_rot = int(geo_index % 4)
                    if geo_index < 4:
                        rec = torch.rot90(rec, k=4 - i_rot, dims=[2, 3])
                    else:
                        rec = torch.flip(torch.rot90(rec, k=4 - i_rot, dims=[2, 3]), dims=[2])

                output_ = torch.clamp(rec, min=0., max=1.0)
                out = output_.data[0].cpu().numpy()
                out = out.transpose(1, 2, 0)
                out_img[H_offset: H_offset + block_H, W_offset: W_offset + block_W, :] = out[:block_H, :block_W, :]
            del block_loc_list
            W_offset += block_W
            if W_offset >= W:
                W_offset = 0
                H_offset += block_H
    print('Decoding success!')
    out_img = np.round(out_img * 255.0)
    out_img = out_img.astype('uint8')
    img = Image.fromarray(out_img[:H, :W, :])
    img.save(rec_dir)


# -i /output/str.bin -o /output/1dec.png -m_dir /model/ljp105/NIC_v02_VR_models --decode
# -i /data/ljp105/NIC_Dataset/test/ClassD_Kodak/1.png -o /output/str.bin -m_dir /model/ljp105/NIC_v02_VR_models -m 0 --lambda_rd 2 --encode
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--input", type=str, required=True, help="Input Image")
    parser.add_argument("-o", "--output", type=str, required=True, help="Output Bin(encode)/Image(decode)")
    parser.add_argument("-m_dir", "--model_dir", type=str, required=True, help="Directory containing trained models")
    parser.add_argument("-m", "--model", type=int, default=0, help="Model Index [0-5]")
    parser.add_argument("--lambda_rd", type=float, default=1, help="Input lambda for variable-rate models")
    parser.add_argument('--encode', dest='coder_flag', action='store_true')
    parser.add_argument('--decode', dest='coder_flag', action='store_false')
    # parser.add_argument("--block_width", type=int, default=2048, help="coding block width")
    # parser.add_argument("--block_height", type=int, default=1024, help="coding block height")
    args = parser.parse_args()

    test_images = []
    test_set = ['ClassA_6K', 'ClassB_4K', 'ClassC_2K', 'ClassD_Kodak']
    test_root = test_set[3]

    if os.path.isdir(args.input):
        dirs = os.listdir(args.input)
        for dir in dirs:
            if dir == test_root:
                path = os.path.join(args.input, dir)
                if os.path.isdir(path):
                    test_images += glob.glob(path + '/*.png')
                if os.path.isfile(path):
                    test_images.append(path)

    else:
        test_images.append(args.input)

    im_dirs = test_images

    img = Image.open(im_dirs[1])
    source_img = np.array(img)

    T = time.time()
    encode(im_dirs[1], args.output, args.model_dir, args.model, 1)

    decode('output_test/str.bin', 'output_test/dec.png', args.model_dir)

    img = Image.open('output_test/dec.png')
    rec_img = np.array(img)

    [rgb_psnr, rgb_msssim, yuv_psnr, y_msssim] = evaluate(source_img, rec_img)
    print(rgb_psnr)


'''
    T = time.time()
    if args.coder_flag:
        encode(args.input, args.output, args.model_dir, args.model, args.lambda_rd)
    else:
        decode(args.input, args.output, args.model_dir)
    print("Time (s):", time.time() - T)
'''