ninetailskim
/
EXQ_IndonesiaVer

 
			
							import paddlehub as hub
import argparse
import cv2
from PIL import Image, ImageDraw, ImageFont
import moviepy.editor as mpe
from moviepy.editor import VideoFileClip
import numpy as np
import random
import copy
from tqdm import tqdm
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

class segUtils():
    def __init__(self):
        super(segUtils, self).__init__()
        self.module = hub.Module(name="deeplabv3p_xception65_humanseg")

    def do_seg(self, frame):
        res = self.module.segmentation(images=[frame], use_gpu=True)
        return res[0]['data']

class detUtils():
    def __init__(self):
        super(detUtils, self).__init__()
        self.module = hub.Module(name="yolov3_resnet50_vd_coco2017")

    def do_det(self, frame):
        res = self.module.object_detection(images=[frame], use_gpu=True)
        for r in res[0]['data']:
            if r['label'] == 'person':
                return r

def cv2ImgAddText(img, text, left, top, textColor=(255, 0, 0), textSize=50):
    if (isinstance(img, np.ndarray)):  # 判断是否OpenCV图片类型
        img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

    draw = ImageDraw.Draw(img)

    fontStyle = ImageFont.truetype(
        "font/simsun.ttc", textSize, encoding="utf-8")

    draw.text((left+1, top+1), text, (0, 0, 0), font=fontStyle)
    draw.text((left, top), text, textColor, font=fontStyle)

    return cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)

su = segUtils()
du = detUtils()


class Position():
    def __init__(self, x, y, ranrange):
        super(Position, self).__init__()
        self.x = x
        self.y = y
        self.th = min(x, y) / 2
        self.index = 0
        self.speedx = 0
        self.speedy = 0
        self.ranrange = ranrange
 
    def getdirection(self):
        speed_x = random.randint(-self.ranrange, self.ranrange)
        speed_y = random.randint(-self.ranrange, self.ranrange)
        return speed_x, speed_y

    def getPos(self):
        if self.index % 7 == 0:
            self.speedx, self.speedy = self.getdirection()
        if self.index > 4:
            self.speedx *= 1.1
            self.speedy *= 1.1
        self.index += 1
        newx = self.x + self.speedx
        newy = self.y + self.speedy
        if newx < self.x - self.th:
            self.speedx = -self.speedx
            newx = self.x - self.th
        elif newx > self.x + self.th:
            self.speedx = -self.speedx
            newx = self.x + self.th
        if newy < self.y - self.th:
            self.speedy = -self.speedy
            newy = self.y - self.th
        elif newy > self.y + self.th:
            self.speedy = -self.speedy
            newy = self.y + self.th

        return newx if newx > 0 else 0, newy


def crop(frame, bbox, margin):

    h, w = frame.shape[:2]
    left = int(bbox['left'])
    right = int(bbox['right'])
    top = int(bbox['top'])
    bottom = int(bbox['bottom'])

    left = left - margin if left - margin > 0 else 0
    right = right + margin if right + margin < w else w - 1
    top = top - margin if top - margin > 0 else 0
    bottom = bottom + margin if bottom + margin < h else h - 1

    return frame[top:bottom, left:right,:]

def compose(humanimg, backimg, left):

    leftimg = cv2.imread(humanimg)
    leftback = cv2.imread(backimg)
    bbox = du.do_det(leftimg)
    leftimg = crop(leftimg, bbox, 20)
    
    height, width = leftback.shape[:2]
    h, w = leftimg.shape[:2]
    newheight = int(height * 3 / 5)
    newwidth = int(newheight * w / h)

    leftimg = cv2.resize(leftimg, (newwidth, newheight))
    
    leftmask = np.around(su.do_seg(leftimg) / 255)
    leftmask3 = np.repeat(leftmask[:,:,np.newaxis], 3, axis=2)
    if left:
        leftback[height-newheight:height, 0:newwidth] = leftback[height-newheight:height, 0:newwidth] * (1 - leftmask3) + leftmask3 * leftimg
    else:
        leftback[height-newheight:height, width - newwidth:width,:] = leftback[height-newheight:height, width - newwidth:width,:] * (1 - leftmask3) + leftmask3 * leftimg

    return leftback.astype(np.uint8)

def puttext(words, pos, out, fps, img):
    if len(words) < 8:
        second = 2
    else:
        second = 4
    temimg = None
    for si in range(second):
        for fi in range(fps):
            dx, dy = pos.getPos()
            temimg = copy.deepcopy(img)
            temimg = cv2ImgAddText(temimg, words, dx, dy)
            out.write(temimg)

def main(args):
    
    leftall = compose(args.lh, args.lb, True)
    rightall = compose(args.rh, args.rb, False)

    h,w = leftall.shape[:2]

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = 30
    out = cv2.VideoWriter(args.output, fourcc, fps, (w,h))

    
    leftPos = Position(int(w / 4), int(h / 3), 25)
    rightPos = Position(int(w / 9), int(h / 3), 25)

    with open(args.txt, "r", encoding="utf-8") as fp:
        lines = fp.readlines()
        for index in tqdm(range(len(lines))):
            line = lines[index]
            speaker, words = line[0], line[1:]
            if speaker == 'A':
                puttext(words, rightPos, out, fps, rightall)
            elif speaker == 'B':
                puttext(words, leftPos, out, fps, leftall)
    
    out.release()

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--lh", type=str, required=True)
    parser.add_argument("--rh", type=str, required=True)
    parser.add_argument("--lb", type=str, default="leftback.png")
    parser.add_argument("--rb", type=str, default="rightback.png")
    parser.add_argument("--txt", type=str, required=True)
    parser.add_argument("--output", type=str, default="res.mp4")
    args = parser.parse_args()
    main(args)