Keyframes (#6)

* Add keyframe model

* Add segmentation utils

* Add keyframes extraction pipeline

* Add keyframe tests

* Update dockerfile to include caffe

* Add summe pretrained model

* Add video for testing

* Update keyframe pipeline, tests

* Update settings to use in memory db for tests

* Set keyframe number to 10, fix bugs

* Fix keyframe order

* Make requested changes

* Fix Dockerfile

* Make requested changes

* Make requested changes

* Add blank lines

* Change dockerfile base cuda image to devel version

* Add modified Cuda.cmake for Dockerfile

* Add pyyaml dependecy to dockerfile

* Update dockerfile

* Update dockerfile

* Fix markdown version error

* Fix markdown version error

* Change caffe installation to make

* Update dockerfile

* Update dockerfile

* Fix boost imoprt

* Fix boost not found bug

* Add feature normalisation

* Fix dateutil, fix caffe root slash

* Fix slash bug

* Add batching to feature extraction

* Add model caching to keyframes extraction

* Fix output images to be in proper range

* Add time logging

* Change feature batch to 128

* Change dockerfile

* Fix dockerfile

* Change feature batch to 10

* Add set mode gpu

* Change feature batch to 64

* Change feature batch to 32

* Add I-frame frame sampling

* Cleanup

* Delete Cuda.cmake

* Remove comments from Makefile.config

* Cleanup

* Fix color scheme switching

* Remove cudnn.hpp, change caffe to 1.0

* Remove cudnn.hpp copy in dockerfile

* Remove redundant run's in dockerfile

* Change pretrained model
This commit is contained in:
Maciej Pęśko 2018-10-01 22:27:06 +02:00 committed by GitHub
parent 43bb8134fd
commit b5dd5cffc0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 409 additions and 24 deletions

21
Makefile.config Normal file
View file

@ -0,0 +1,21 @@
USE_CUDNN := 1
CUDA_DIR := /usr/local/cuda
CUDA_ARCH := -gencode arch=compute_30,code=sm_30 \
-gencode arch=compute_35,code=sm_35 \
-gencode arch=compute_50,code=sm_50 \
-gencode arch=compute_52,code=sm_52 \
-gencode arch=compute_60,code=sm_60 \
-gencode arch=compute_61,code=sm_61 \
-gencode arch=compute_61,code=compute_61
BLAS := atlas
PYTHON_LIBRARIES := boost_python3 python3.6m
PYTHON_INCLUDE := /usr/include/python3.6 \
/usr/local/lib/python3.6/dist-packages/numpy/core/include
PYTHON_LIB := /usr/lib
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu/hdf5/serial
BUILD_DIR := build
DISTRIBUTE_DIR := distribute
TEST_GPUID := 0
Q ?= @

View file

@ -1,16 +1,47 @@
FROM nvidia/cuda:9.0-cudnn7-runtime
FROM nvidia/cuda:9.0-cudnn7-devel
RUN apt-get update && apt-get install -y apt-utils software-properties-common && \
add-apt-repository ppa:jonathonf/python-3.6 && \
apt-get update && apt-get -y install python3 python3-pip python3.6 python3.6-dev python3.6-venv vim ffmpeg \
build-essential cmake git libgtk2.0-dev pkg-config libavcodec-dev \
wget libatlas-base-dev libboost-all-dev libgflags-dev \
libgoogle-glog-dev libhdf5-serial-dev libleveldb-dev \
liblmdb-dev libopencv-dev libprotobuf-dev \
libsnappy-dev protobuf-compiler \
python-numpy python-setuptools python-scipy \
libavformat-dev libswscale-dev && \
python3.6 -m pip install --upgrade pip && \
python3.6 -m pip install jupyter ipywidgets jupyterlab && \
python3.6 -m pip install tensorflow-gpu h5py keras && \
python3.6 -m pip install scikit-image opencv-contrib-python
python3.6 -m pip install scikit-image opencv-contrib-python pyyaml
RUN mkdir /comixify
COPY ./Makefile.config /comixify/Makefile.config
ENV CAFFE_ROOT=/opt/caffe
WORKDIR $CAFFE_ROOT
ENV CLONE_TAG=1.0
RUN git clone -b ${CLONE_TAG} --depth 1 https://github.com/BVLC/caffe.git . && \
cp /comixify/Makefile.config ./Makefile.config && \
cd python && for req in $(cat requirements.txt) pydot; do python3.6 -m pip install $req; done && cd .. && \
sed -i '415s/.*/NVCCFLAGS += -D_FORCE_INLINES -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)/' Makefile && \
echo "# ---[ Includes" >> CMakeLists.txt && \
echo "set(${CMAKE_CXX_FLAGS} "-D_FORCE_INLINES ${CMAKE_CXX_FLAGS}")" >> CMakeLists.txt && \
ls -la /usr/lib/x86_64-linux-gnu && \
ln -s /usr/lib/x86_64-linux-gnu/libboost_python-py35.so /usr/lib/x86_64-linux-gnu/libboost_python3.so && \
make all -j"$(nproc)" && \
make distribute && \
ENV PYCAFFE_ROOT $CAFFE_ROOT/python
ENV PYTHONPATH $PYCAFFE_ROOT:$PYTHONPATH
ENV PATH $CAFFE_ROOT/build/tools:$PYCAFFE_ROOT:$PATH
RUN echo "$CAFFE_ROOT/build/lib" >> /etc/ld.so.conf.d/caffe.conf && ldconfig && \
python3.6 $CAFFE_ROOT/scripts/download_model_binary.py $CAFFE_ROOT/models/bvlc_googlenet && \
python3.6 -m pip install markdown=="2.6.11" && \
python3.6 -m pip install python-dateutil --upgrade
WORKDIR /comixify
COPY . /comixify
RUN python3.6 -m pip install -r requirements.txt

View file

@ -1,31 +1,44 @@
import os
import shutil
import uuid
import numpy as np
import torch
import torch.nn as nn
os.environ['GLOG_minloglevel'] = '2' # Prevent caffe shell loging
import caffe
from datetime import datetime
from subprocess import call
import cv2
from math import ceil
from sklearn.preprocessing import normalize
from django.conf import settings
from django.core.cache import cache
from skimage import img_as_ubyte
import logging
from utils import jj
from keyframes_rl.models import DSN
from keyframes.kts import cpd_auto
from keyframes.utils import batch
logger = logging.getLogger(__name__)
class KeyFramesExtractor():
class KeyFramesExtractor:
@classmethod
def get_keyframes(cls, video):
all_keyframes, all_frames_tmp_dir = cls._get_all_frames(video)
interval = cls._count_interval(all_keyframes)
chosen_frames = cls._get_frames_with_interval(interval, all_keyframes)
shutil.rmtree(jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}"))
def get_keyframes(cls, video, gpu=settings.GPU, features_batch_size=settings.FEATURE_BATCH_SIZE):
frames_paths, all_frames_tmp_dir = cls._get_all_frames(video)
frames = cls._get_frames(frames_paths)
features = cls._get_features(frames, gpu, features_batch_size)
change_points, frames_per_segment = cls._get_segments(features)
probs = cls._get_probs(features, gpu)
chosen_frames = cls._get_chosen_frames(frames, probs, change_points, frames_per_segment)
return chosen_frames
@staticmethod
def _get_all_frames(video):
all_frames_tmp_dir = uuid.uuid4()
os.mkdir(jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}"))
call(["ffmpeg", "-skip_frame", "nokey", "-i", f"{video.file.path}", "-vsync", "0", "-qscale:v", "1",
"-f", "image2", jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}", "%06d.jpeg")])
call(["ffmpeg", "-i", f"{video.file.path}", "-vf", "select=not(mod(n\\,15))", "-vsync", "vfr", "-q:v", "2",
jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}", "%06d.jpeg")])
frames_paths = []
for dirname, dirnames, filenames in os.walk(jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}")):
for filename in filenames:
@ -33,15 +46,114 @@ class KeyFramesExtractor():
return sorted(frames_paths), all_frames_tmp_dir
@staticmethod
def _count_interval(all_keyframes):
return int((len(all_keyframes) - settings.NUMBERS_OF_FRAMES_TO_SHOW) / (settings.NUMBERS_OF_FRAMES_TO_SHOW + 1))
def _get_frames(frames_paths):
frames = []
for frame_path in frames_paths:
frame = caffe.io.load_image(frame_path)
frames.append(frame)
return frames
@staticmethod
def _get_frames_with_interval(interval, all_keyframes):
def _get_features(frames, gpu=True, batch_size=1):
caffe_root = os.environ.get("CAFFE_ROOT")
if not caffe_root:
print("Caffe root path not found.")
if not gpu:
caffe.set_mode_cpu()
else:
caffe.set_mode_gpu()
model_file = caffe_root + "/models/bvlc_googlenet/deploy.prototxt"
pretrained = caffe_root + "/models/bvlc_googlenet/bvlc_googlenet.caffemodel"
if not os.path.isfile(pretrained):
print("PRETRAINED Model not found.")
net = caffe.Net(model_file, pretrained, caffe.TEST)
net.blobs["data"].reshape(batch_size, 3, 224, 224)
mu = np.load(caffe_root + "/python/caffe/imagenet/ilsvrc_2012_mean.npy")
mu = mu.mean(1).mean(1)
transformer = caffe.io.Transformer({"data": net.blobs["data"].data.shape})
transformer.set_transpose("data", (2, 0, 1))
transformer.set_mean("data", mu)
transformer.set_raw_scale("data", 255)
transformer.set_channel_swap("data", (2, 1, 0))
features = np.zeros(shape=(len(frames), 1024))
for idx_batch, (n_batch, frames_batch) in enumerate(batch(frames, batch_size)):
for i in range(n_batch):
net.blobs['data'].data[i, ...] = transformer.preprocess("data", frames_batch[i])
net.forward()
temp = net.blobs["pool5/7x7_s1"].data[0:n_batch]
temp = temp.squeeze().copy()
features[idx_batch * batch_size:idx_batch * batch_size + n_batch] = temp
normalize(features, copy=False)
return features.astype(np.float32)
@staticmethod
def _get_probs(features, gpu=True):
model_cache_key = "keyframes_rl_model_cache"
model = cache.get(model_cache_key) # get model from cache
if model is None:
model_path = "keyframes_rl/pretrained_model/model_epoch100.pth.tar"
model = DSN(in_dim=1024, hid_dim=256, num_layers=1, cell="lstm")
if gpu:
checkpoint = torch.load(model_path)
else:
checkpoint = torch.load(model_path, map_location='cpu')
model.load_state_dict(checkpoint)
if gpu:
model = nn.DataParallel(model).cuda()
model.eval()
cache.set(model_cache_key, model, None)
seq = torch.from_numpy(features).unsqueeze(0)
if gpu: seq = seq.cuda()
probs = model(seq)
probs = probs.data.cpu().squeeze().numpy()
return probs
@staticmethod
def _get_chosen_frames(frames, probs, change_points, frames_per_segment, min_keyframes=10):
gts = []
s = 0
for q in frames_per_segment:
gts.append(np.mean(probs[s:s + q]).astype(float))
s += q
gts = np.array(gts)
picks = np.argsort(gts)[::-1][:min_keyframes]
chosen_frames = []
for i in range(settings.NUMBERS_OF_FRAMES_TO_SHOW):
frame = cv2.imread(all_keyframes[(i + 1) * interval])
chosen_frames.append(frame)
for pick in picks:
cp = change_points[pick]
low = cp[0]
high = cp[1]
x = low
if low != high:
x = low + np.argmax(probs[low:high])
chosen_frames.append({
"index": x,
"frame": frames[x]
})
chosen_frames.sort(key=lambda k: k['index'])
chosen_frames = [img_as_ubyte(o["frame"])[..., ::-1] for o in chosen_frames]
return chosen_frames
@staticmethod
def _get_segments(features):
K = np.dot(features, features.T)
n_frames = int(K.shape[0])
min_segments = int(ceil(n_frames / 10))
min_segments = max(10, min_segments)
min_segments = min(n_frames - 1, min_segments)
cps, scores = cpd_auto(K, min_segments, 1)
change_points = [
[0, cps[0] - 1]
]
frames_per_segment = [int(cps[0])]
for j in range(0, len(cps) - 1):
change_points.append([cps[j], cps[j + 1] - 1])
frames_per_segment.append(int(cps[j + 1] - cps[j]))
frames_per_segment.append(int(len(features) - cps[len(cps) - 1]))
change_points.append([cps[len(cps) - 1], len(features) - 1])
return change_points, frames_per_segment

125
keyframes/kts/__init__.py Normal file
View file

@ -0,0 +1,125 @@
import numpy as np
def calc_scatters(K):
"""
Calculate scatter matrix:
scatters[i,j] = {scatter of the sequence with starting frame i and ending frame j}
"""
n = K.shape[0]
K1 = np.cumsum([0] + list(np.diag(K)))
K2 = np.zeros((n+1, n+1))
K2[1:, 1:] = np.cumsum(np.cumsum(K, 0), 1)
diagK2 = np.diag(K2)
i = np.arange(n).reshape((-1, 1))
j = np.arange(n).reshape((1, -1))
scatters = (
K1[1:].reshape((1, -1)) - K1[:-1].reshape((-1, 1)) - (
diagK2[1:].reshape((1, -1)) + diagK2[:-1].reshape((-1, 1))
- K2[1:, :-1].T - K2[:-1, 1:]
) / ((j - i + 1).astype(float) + (j == i-1).astype(float))
)
scatters[j < i] = 0
return scatters
def cpd_nonlin(K, ncp, lmin=1, lmax=100000, backtrack=True, verbose=True, out_scatters=None):
""" Change point detection with dynamic programming
K - square kernel matrix
ncp - number of change points to detect (ncp >= 0)
lmin - minimal length of a segment
lmax - maximal length of a segment
backtrack - when False - only evaluate objective scores (to save memory)
Returns: (cps, obj)
cps - detected array of change points: mean is thought to be constant on [ cps[i], cps[i+1] )
obj_vals - values of the objective function for 0..m changepoints
"""
m = int(ncp) # prevent numpy.int64
(n, n1) = K.shape
assert(n == n1), "Kernel matrix awaited."
assert(n >= (m + 1) * lmin)
assert(n <= (m + 1) * lmax)
assert(lmax >= lmin >= 1)
if verbose:
print("Precomputing scatters...")
J = calc_scatters(K)
if out_scatters is not None:
out_scatters[0] = J
if verbose:
print("Inferring best change points...")
# I[k, l] - value of the objective for k change-points and l first frames
I = 1e101 * np.ones((m + 1, n + 1))
I[0, lmin:lmax] = J[0, lmin - 1:lmax - 1]
if backtrack:
# p[k, l] --- "previous change" --- best t[k] when t[k+1] equals l
p = np.zeros((m + 1, n + 1), dtype=int)
else:
p = np.zeros((1, 1), dtype=int)
for k in range(1, m + 1):
for l in range((k + 1) * lmin, n + 1):
tmin = max(k * lmin, l - lmax)
tmax = l - lmin + 1
c = J[tmin:tmax, l - 1].reshape(-1) + I[k - 1, tmin:tmax].reshape(-1)
I[k, l] = np.min(c)
if backtrack:
p[k, l] = np.argmin(c)+tmin
# Collect change points
cps = np.zeros(m, dtype=int)
if backtrack:
cur = n
for k in range(m, 0, -1):
cps[k - 1] = p[k, cur]
cur = cps[k - 1]
scores = I[:, n].copy()
scores[scores > 1e99] = np.inf
return cps, scores
def cpd_auto(K, ncp, vmax, desc_rate=15, min_segments=10, **kwargs):
"""Main interface
Detect change points automatically selecting their number
K - kernel between each pair of frames in video
ncp - maximum ncp
vmax - special parameter
Optional arguments:
lmin - minimum segment length
lmax - maximum segment length
desc_rate - rate of descriptor sampling (vmax always corresponds to 1x)
Note:
- cps are always calculated in subsampled coordinates irrespective to
desc_rate
- lmin and m should be in agreement
---
Returns: (cps, costs)
cps - best selected change-points
costs - costs for 0,1,2,...,m change-points
Memory requirement: ~ (3*N*N + N*ncp)*4 bytes ~= 16 * N^2 bytes
That is 1,6 Gb for the N=10000.
"""
m = ncp
(_, scores) = cpd_nonlin(K, m, backtrack=False, **kwargs)
N = K.shape[0]
N2 = N * desc_rate # length of the video before subsampling
penalties = np.zeros(m + 1)
# Prevent division by zero (in case of 0 changes)
ncp = np.arange(1, m + 1)
penalties[1:] = (vmax * ncp/(2.0 * N2)) * (np.log(float(N2) / ncp) + 1)
costs = scores/float(N) + penalties
m_best = int(np.argmin(costs))
m_best = max(min_segments, m_best)
m_best = min(m_best, N)
(cps, scores2) = cpd_nonlin(K, m_best, **kwargs)
return cps, scores2

View file

@ -1,3 +1,69 @@
from django.test import TestCase
# Create your tests here.
import numpy as np
from keyframes.keyframes import KeyFramesExtractor
from api.models import Video
from django.core.files import File
import shutil
from utils import jj
from django.conf import settings
from keyframes.utils import batch
VIDEO_PATH = "tmp/f1_short.mp4"
VIDEO_N_FRAMES = 47
class KeyframesTestCase(TestCase):
def setUp(self):
f = open(VIDEO_PATH, 'rb')
self.video = Video.objects.create(file=File(f))
def tearDown(self):
shutil.rmtree(jj(f"{settings.TMP_DIR}", f"{self.all_frames_tmp_dir}"))
def test_keyframes(self):
"""Keyframes are extracted corectly"""
frames_paths, all_frames_tmp_dir = KeyFramesExtractor._get_all_frames(self.video)
self.assertIsInstance(frames_paths[0], str)
self.assertEqual(len(frames_paths), VIDEO_N_FRAMES)
self.all_frames_tmp_dir = all_frames_tmp_dir
frames = KeyFramesExtractor._get_frames(frames_paths)
self.assertEqual(len(frames), VIDEO_N_FRAMES)
self.assertIsInstance(frames[0], np.ndarray)
features = KeyFramesExtractor._get_features(frames, False)
self.assertIsInstance(features, np.ndarray)
self.assertEqual(features.shape, (VIDEO_N_FRAMES, 1024))
change_points, frames_per_segment = KeyFramesExtractor._get_segments(features)
self.assertIsInstance(change_points, list)
self.assertIsInstance(frames_per_segment, list)
for cp in frames_per_segment:
with self.subTest(cp=cp):
self.assertIsInstance(cp, int)
probs = KeyFramesExtractor._get_probs(features, False)
self.assertIsInstance(probs, np.ndarray)
self.assertEqual(probs.shape, (VIDEO_N_FRAMES, ))
chosen_frames = KeyFramesExtractor._get_chosen_frames(frames, probs, change_points, frames_per_segment)
self.assertIsInstance(chosen_frames, list)
self.assertTrue(len(chosen_frames) == 10)
class UtilsTestCase(TestCase):
def test_batch(self):
"""Barch is working"""
arr = [1, 1, 2, 2, 3, 3, 4]
batched_arr = batch(arr, 2)
self.assertEqual(list(batched_arr), [(2, [1, 1]), (2, [2, 2]), (2, [3, 3]), (1, [4])])
def test_empty_batch(self):
"""Barch is working"""
arr = []
batched_arr = batch(arr, 2)
self.assertEqual(list(batched_arr), [])

6
keyframes/utils.py Normal file
View file

@ -0,0 +1,6 @@
def batch(iterable, n=1):
length = len(iterable)
for ndx in range(0, length, n):
end_index = min(ndx + n, length)
n_elemnets = end_index - ndx
yield n_elemnets, iterable[ndx:end_index]

21
keyframes_rl/models.py Normal file
View file

@ -0,0 +1,21 @@
import torch.nn as nn
from torch.nn import functional as F
__all__ = ['DSN']
class DSN(nn.Module):
"""Deep Summarization Network"""
def __init__(self, in_dim=1024, hid_dim=256, num_layers=1, cell='lstm'):
super(DSN, self).__init__()
assert cell in ['lstm', 'gru'], "cell must be either 'lstm' or 'gru'"
if cell == 'lstm':
self.rnn = nn.LSTM(in_dim, hid_dim, num_layers=num_layers, bidirectional=True, batch_first=True)
else:
self.rnn = nn.GRU(in_dim, hid_dim, num_layers=num_layers, bidirectional=True, batch_first=True)
self.fc = nn.Linear(hid_dim*2, 1)
def forward(self, x):
h, _ = self.rnn(x)
p = F.sigmoid(self.fc(h))
return p

Binary file not shown.

View file

@ -10,3 +10,4 @@ pytz==2018.5
six==1.11.0
torch==0.4.1
torchvision==0.2.1
scikit-learn==0.19.2

View file

@ -140,3 +140,5 @@ MAX_FILE_SIZE = 50000000
NUMBERS_OF_FRAMES_TO_SHOW = 10
TMP_DIR = 'tmp/'
GPU = True
FEATURE_BATCH_SIZE = 32

BIN
tmp/f1_short.mp4 Normal file

Binary file not shown.