mirror of
https://github.com/maciej3031/comixify.git
synced 2026-03-11 08:54:35 +00:00
* Add keyframe model * Add segmentation utils * Add keyframes extraction pipeline * Add keyframe tests * Update dockerfile to include caffe * Add summe pretrained model * Add video for testing * Update keyframe pipeline, tests * Update settings to use in memory db for tests * Set keyframe number to 10, fix bugs * Fix keyframe order * Make requested changes * Fix Dockerfile * Make requested changes * Make requested changes * Add blank lines * Change dockerfile base cuda image to devel version * Add modified Cuda.cmake for Dockerfile * Add pyyaml dependecy to dockerfile * Update dockerfile * Update dockerfile * Fix markdown version error * Fix markdown version error * Change caffe installation to make * Update dockerfile * Update dockerfile * Fix boost imoprt * Fix boost not found bug * Add feature normalisation * Fix dateutil, fix caffe root slash * Fix slash bug * Add batching to feature extraction * Add model caching to keyframes extraction * Fix output images to be in proper range * Add time logging * Change feature batch to 128 * Change dockerfile * Fix dockerfile * Change feature batch to 10 * Add set mode gpu * Change feature batch to 64 * Change feature batch to 32 * Add I-frame frame sampling * Cleanup * Delete Cuda.cmake * Remove comments from Makefile.config * Cleanup * Fix color scheme switching * Remove cudnn.hpp, change caffe to 1.0 * Remove cudnn.hpp copy in dockerfile * Remove redundant run's in dockerfile * Change pretrained model
159 lines
6.1 KiB
Python
159 lines
6.1 KiB
Python
import os
|
|
import uuid
|
|
import numpy as np
|
|
import torch
|
|
import torch.nn as nn
|
|
os.environ['GLOG_minloglevel'] = '2' # Prevent caffe shell loging
|
|
import caffe
|
|
from datetime import datetime
|
|
from subprocess import call
|
|
from math import ceil
|
|
from sklearn.preprocessing import normalize
|
|
from django.conf import settings
|
|
from django.core.cache import cache
|
|
from skimage import img_as_ubyte
|
|
import logging
|
|
|
|
from utils import jj
|
|
from keyframes_rl.models import DSN
|
|
from keyframes.kts import cpd_auto
|
|
from keyframes.utils import batch
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class KeyFramesExtractor:
|
|
@classmethod
|
|
def get_keyframes(cls, video, gpu=settings.GPU, features_batch_size=settings.FEATURE_BATCH_SIZE):
|
|
frames_paths, all_frames_tmp_dir = cls._get_all_frames(video)
|
|
frames = cls._get_frames(frames_paths)
|
|
features = cls._get_features(frames, gpu, features_batch_size)
|
|
change_points, frames_per_segment = cls._get_segments(features)
|
|
probs = cls._get_probs(features, gpu)
|
|
chosen_frames = cls._get_chosen_frames(frames, probs, change_points, frames_per_segment)
|
|
return chosen_frames
|
|
|
|
@staticmethod
|
|
def _get_all_frames(video):
|
|
all_frames_tmp_dir = uuid.uuid4()
|
|
os.mkdir(jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}"))
|
|
call(["ffmpeg", "-i", f"{video.file.path}", "-vf", "select=not(mod(n\\,15))", "-vsync", "vfr", "-q:v", "2",
|
|
jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}", "%06d.jpeg")])
|
|
frames_paths = []
|
|
for dirname, dirnames, filenames in os.walk(jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}")):
|
|
for filename in filenames:
|
|
frames_paths.append(jj(dirname, filename))
|
|
return sorted(frames_paths), all_frames_tmp_dir
|
|
|
|
@staticmethod
|
|
def _get_frames(frames_paths):
|
|
frames = []
|
|
for frame_path in frames_paths:
|
|
frame = caffe.io.load_image(frame_path)
|
|
frames.append(frame)
|
|
return frames
|
|
|
|
@staticmethod
|
|
def _get_features(frames, gpu=True, batch_size=1):
|
|
caffe_root = os.environ.get("CAFFE_ROOT")
|
|
if not caffe_root:
|
|
print("Caffe root path not found.")
|
|
if not gpu:
|
|
caffe.set_mode_cpu()
|
|
else:
|
|
caffe.set_mode_gpu()
|
|
|
|
model_file = caffe_root + "/models/bvlc_googlenet/deploy.prototxt"
|
|
pretrained = caffe_root + "/models/bvlc_googlenet/bvlc_googlenet.caffemodel"
|
|
if not os.path.isfile(pretrained):
|
|
print("PRETRAINED Model not found.")
|
|
|
|
net = caffe.Net(model_file, pretrained, caffe.TEST)
|
|
net.blobs["data"].reshape(batch_size, 3, 224, 224)
|
|
|
|
mu = np.load(caffe_root + "/python/caffe/imagenet/ilsvrc_2012_mean.npy")
|
|
mu = mu.mean(1).mean(1)
|
|
transformer = caffe.io.Transformer({"data": net.blobs["data"].data.shape})
|
|
transformer.set_transpose("data", (2, 0, 1))
|
|
transformer.set_mean("data", mu)
|
|
transformer.set_raw_scale("data", 255)
|
|
transformer.set_channel_swap("data", (2, 1, 0))
|
|
|
|
features = np.zeros(shape=(len(frames), 1024))
|
|
for idx_batch, (n_batch, frames_batch) in enumerate(batch(frames, batch_size)):
|
|
for i in range(n_batch):
|
|
net.blobs['data'].data[i, ...] = transformer.preprocess("data", frames_batch[i])
|
|
net.forward()
|
|
temp = net.blobs["pool5/7x7_s1"].data[0:n_batch]
|
|
temp = temp.squeeze().copy()
|
|
features[idx_batch * batch_size:idx_batch * batch_size + n_batch] = temp
|
|
normalize(features, copy=False)
|
|
return features.astype(np.float32)
|
|
|
|
@staticmethod
|
|
def _get_probs(features, gpu=True):
|
|
model_cache_key = "keyframes_rl_model_cache"
|
|
model = cache.get(model_cache_key) # get model from cache
|
|
|
|
if model is None:
|
|
model_path = "keyframes_rl/pretrained_model/model_epoch100.pth.tar"
|
|
model = DSN(in_dim=1024, hid_dim=256, num_layers=1, cell="lstm")
|
|
if gpu:
|
|
checkpoint = torch.load(model_path)
|
|
else:
|
|
checkpoint = torch.load(model_path, map_location='cpu')
|
|
model.load_state_dict(checkpoint)
|
|
if gpu:
|
|
model = nn.DataParallel(model).cuda()
|
|
model.eval()
|
|
cache.set(model_cache_key, model, None)
|
|
|
|
seq = torch.from_numpy(features).unsqueeze(0)
|
|
if gpu: seq = seq.cuda()
|
|
probs = model(seq)
|
|
probs = probs.data.cpu().squeeze().numpy()
|
|
return probs
|
|
|
|
@staticmethod
|
|
def _get_chosen_frames(frames, probs, change_points, frames_per_segment, min_keyframes=10):
|
|
gts = []
|
|
s = 0
|
|
for q in frames_per_segment:
|
|
gts.append(np.mean(probs[s:s + q]).astype(float))
|
|
s += q
|
|
gts = np.array(gts)
|
|
picks = np.argsort(gts)[::-1][:min_keyframes]
|
|
chosen_frames = []
|
|
for pick in picks:
|
|
cp = change_points[pick]
|
|
low = cp[0]
|
|
high = cp[1]
|
|
x = low
|
|
if low != high:
|
|
x = low + np.argmax(probs[low:high])
|
|
chosen_frames.append({
|
|
"index": x,
|
|
"frame": frames[x]
|
|
})
|
|
chosen_frames.sort(key=lambda k: k['index'])
|
|
chosen_frames = [img_as_ubyte(o["frame"])[..., ::-1] for o in chosen_frames]
|
|
return chosen_frames
|
|
|
|
@staticmethod
|
|
def _get_segments(features):
|
|
K = np.dot(features, features.T)
|
|
n_frames = int(K.shape[0])
|
|
min_segments = int(ceil(n_frames / 10))
|
|
min_segments = max(10, min_segments)
|
|
min_segments = min(n_frames - 1, min_segments)
|
|
cps, scores = cpd_auto(K, min_segments, 1)
|
|
change_points = [
|
|
[0, cps[0] - 1]
|
|
]
|
|
frames_per_segment = [int(cps[0])]
|
|
for j in range(0, len(cps) - 1):
|
|
change_points.append([cps[j], cps[j + 1] - 1])
|
|
frames_per_segment.append(int(cps[j + 1] - cps[j]))
|
|
frames_per_segment.append(int(len(features) - cps[len(cps) - 1]))
|
|
change_points.append([cps[len(cps) - 1], len(features) - 1])
|
|
return change_points, frames_per_segment
|