Keyframes (#6)

* Add keyframe model * Add segmentation utils * Add keyframes extraction pipeline * Add keyframe tests * Update dockerfile to include caffe * Add summe pretrained model * Add video for testing * Update keyframe pipeline, tests * Update settings to use in memory db for tests * Set keyframe number to 10, fix bugs * Fix keyframe order * Make requested changes * Fix Dockerfile * Make requested changes * Make requested changes * Add blank lines * Change dockerfile base cuda image to devel version * Add modified Cuda.cmake for Dockerfile * Add pyyaml dependecy to dockerfile * Update dockerfile * Update dockerfile * Fix markdown version error * Fix markdown version error * Change caffe installation to make * Update dockerfile * Update dockerfile * Fix boost imoprt * Fix boost not found bug * Add feature normalisation * Fix dateutil, fix caffe root slash * Fix slash bug * Add batching to feature extraction * Add model caching to keyframes extraction * Fix output images to be in proper range * Add time logging * Change feature batch to 128 * Change dockerfile * Fix dockerfile * Change feature batch to 10 * Add set mode gpu * Change feature batch to 64 * Change feature batch to 32 * Add I-frame frame sampling * Cleanup * Delete Cuda.cmake * Remove comments from Makefile.config * Cleanup * Fix color scheme switching * Remove cudnn.hpp, change caffe to 1.0 * Remove cudnn.hpp copy in dockerfile * Remove redundant run's in dockerfile * Change pretrained model
2018-10-01 22:27:06 +02:00 · 2018-10-01 22:27:06 +02:00 · b5dd5cffc0
commit b5dd5cffc0
parent 43bb8134fd
11 changed files with 409 additions and 24 deletions
--- a/Makefile.config
+++ b/Makefile.config
@ -0,0 +1,21 @@
+USE_CUDNN := 1
+CUDA_DIR := /usr/local/cuda
+
+CUDA_ARCH := -gencode arch=compute_30,code=sm_30 \
+		-gencode arch=compute_35,code=sm_35 \
+		-gencode arch=compute_50,code=sm_50 \
+		-gencode arch=compute_52,code=sm_52 \
+		-gencode arch=compute_60,code=sm_60 \
+		-gencode arch=compute_61,code=sm_61 \
+		-gencode arch=compute_61,code=compute_61
+BLAS := atlas
+PYTHON_LIBRARIES := boost_python3 python3.6m
+PYTHON_INCLUDE := /usr/include/python3.6 \
+                 /usr/local/lib/python3.6/dist-packages/numpy/core/include
+PYTHON_LIB := /usr/lib
+INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial
+LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu/hdf5/serial
+BUILD_DIR := build
+DISTRIBUTE_DIR := distribute
+TEST_GPUID := 0
+Q ?= @
--- a/35
+++ b/35
@ -1,16 +1,47 @@
-FROM nvidia/cuda:9.0-cudnn7-runtime
+FROM nvidia/cuda:9.0-cudnn7-devel

 RUN apt-get update && apt-get install -y apt-utils software-properties-common && \
    add-apt-repository ppa:jonathonf/python-3.6 && \
    apt-get update && apt-get -y install python3 python3-pip python3.6 python3.6-dev python3.6-venv vim ffmpeg \
    build-essential cmake git libgtk2.0-dev pkg-config libavcodec-dev \
+    wget libatlas-base-dev libboost-all-dev libgflags-dev \
+    libgoogle-glog-dev libhdf5-serial-dev libleveldb-dev \
+    liblmdb-dev libopencv-dev libprotobuf-dev \
+    libsnappy-dev protobuf-compiler \
+    python-numpy python-setuptools python-scipy \
    libavformat-dev libswscale-dev && \
    python3.6 -m pip install --upgrade pip && \
    python3.6 -m pip install jupyter ipywidgets jupyterlab && \
    python3.6 -m pip install tensorflow-gpu h5py keras && \
-    python3.6 -m pip install scikit-image opencv-contrib-python
+    python3.6 -m pip install scikit-image opencv-contrib-python pyyaml

 RUN mkdir /comixify
+COPY ./Makefile.config /comixify/Makefile.config
+
+ENV CAFFE_ROOT=/opt/caffe
+WORKDIR $CAFFE_ROOT
+
+ENV CLONE_TAG=1.0
+
+RUN git clone -b ${CLONE_TAG} --depth 1 https://github.com/BVLC/caffe.git . && \
+    cp /comixify/Makefile.config ./Makefile.config && \
+    cd python && for req in $(cat requirements.txt) pydot; do python3.6 -m pip install $req; done && cd .. && \
+    sed -i '415s/.*/NVCCFLAGS += -D_FORCE_INLINES -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)/' Makefile && \
+    echo "# ---[ Includes" >> CMakeLists.txt && \
+    echo "set(${CMAKE_CXX_FLAGS} "-D_FORCE_INLINES ${CMAKE_CXX_FLAGS}")" >> CMakeLists.txt && \
+    ls -la /usr/lib/x86_64-linux-gnu && \
+    ln -s /usr/lib/x86_64-linux-gnu/libboost_python-py35.so /usr/lib/x86_64-linux-gnu/libboost_python3.so && \
+    make all -j"$(nproc)" && \
+    make distribute && \
+
+ENV PYCAFFE_ROOT $CAFFE_ROOT/python
+ENV PYTHONPATH $PYCAFFE_ROOT:$PYTHONPATH
+ENV PATH $CAFFE_ROOT/build/tools:$PYCAFFE_ROOT:$PATH
+RUN echo "$CAFFE_ROOT/build/lib" >> /etc/ld.so.conf.d/caffe.conf && ldconfig && \
+    python3.6 $CAFFE_ROOT/scripts/download_model_binary.py $CAFFE_ROOT/models/bvlc_googlenet && \
+    python3.6 -m pip install markdown=="2.6.11" && \
+    python3.6 -m pip install python-dateutil --upgrade
+
 WORKDIR /comixify
 COPY . /comixify
 RUN python3.6 -m pip install -r requirements.txt
--- a/keyframes/keyframes.py
+++ b/keyframes/keyframes.py
@ -1,31 +1,44 @@
 import os
-import shutil
 import uuid
+import numpy as np
+import torch
+import torch.nn as nn
+os.environ['GLOG_minloglevel'] = '2' # Prevent caffe shell loging
+import caffe
+from datetime import datetime
 from subprocess import call
-
-import cv2
+from math import ceil
+from sklearn.preprocessing import normalize
 from django.conf import settings
+from django.core.cache import cache
+from skimage import img_as_ubyte
+import logging

 from utils import jj
+from keyframes_rl.models import DSN
+from keyframes.kts import cpd_auto
+from keyframes.utils import batch
+
+logger = logging.getLogger(__name__)


-class KeyFramesExtractor():
+class KeyFramesExtractor:
    @classmethod
-    def get_keyframes(cls, video):
-        all_keyframes, all_frames_tmp_dir = cls._get_all_frames(video)
-        interval = cls._count_interval(all_keyframes)
-        chosen_frames = cls._get_frames_with_interval(interval, all_keyframes)
-
-        shutil.rmtree(jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}"))
+    def get_keyframes(cls, video, gpu=settings.GPU, features_batch_size=settings.FEATURE_BATCH_SIZE):
+        frames_paths, all_frames_tmp_dir = cls._get_all_frames(video)
+        frames = cls._get_frames(frames_paths)
+        features = cls._get_features(frames, gpu, features_batch_size)
+        change_points, frames_per_segment = cls._get_segments(features)
+        probs = cls._get_probs(features, gpu)
+        chosen_frames = cls._get_chosen_frames(frames, probs, change_points, frames_per_segment)
        return chosen_frames

    @staticmethod
    def _get_all_frames(video):
        all_frames_tmp_dir = uuid.uuid4()
        os.mkdir(jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}"))
-        call(["ffmpeg", "-skip_frame", "nokey", "-i", f"{video.file.path}", "-vsync", "0", "-qscale:v", "1",
-              "-f", "image2", jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}", "%06d.jpeg")])
-
+        call(["ffmpeg", "-i", f"{video.file.path}", "-vf", "select=not(mod(n\\,15))", "-vsync", "vfr", "-q:v", "2",
+            jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}", "%06d.jpeg")])
        frames_paths = []
        for dirname, dirnames, filenames in os.walk(jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}")):
            for filename in filenames:
@ -33,15 +46,114 @@ class KeyFramesExtractor():
        return sorted(frames_paths), all_frames_tmp_dir

    @staticmethod
-    def _count_interval(all_keyframes):
-        return int((len(all_keyframes) - settings.NUMBERS_OF_FRAMES_TO_SHOW) / (settings.NUMBERS_OF_FRAMES_TO_SHOW + 1))
+    def _get_frames(frames_paths):
+        frames = []
+        for frame_path in frames_paths:
+            frame = caffe.io.load_image(frame_path)
+            frames.append(frame)
+        return frames

    @staticmethod
-    def _get_frames_with_interval(interval, all_keyframes):
+    def _get_features(frames, gpu=True, batch_size=1):
+        caffe_root = os.environ.get("CAFFE_ROOT")
+        if not caffe_root:
+            print("Caffe root path not found.")
+        if not gpu:
+            caffe.set_mode_cpu()
+        else:
+            caffe.set_mode_gpu()
+
+        model_file = caffe_root + "/models/bvlc_googlenet/deploy.prototxt"
+        pretrained = caffe_root + "/models/bvlc_googlenet/bvlc_googlenet.caffemodel"
+        if not os.path.isfile(pretrained):
+            print("PRETRAINED Model not found.")
+
+        net = caffe.Net(model_file, pretrained, caffe.TEST)
+        net.blobs["data"].reshape(batch_size, 3, 224, 224)
+
+        mu = np.load(caffe_root + "/python/caffe/imagenet/ilsvrc_2012_mean.npy")
+        mu = mu.mean(1).mean(1)
+        transformer = caffe.io.Transformer({"data": net.blobs["data"].data.shape})
+        transformer.set_transpose("data", (2, 0, 1))
+        transformer.set_mean("data", mu)
+        transformer.set_raw_scale("data", 255)
+        transformer.set_channel_swap("data", (2, 1, 0))
+
+        features = np.zeros(shape=(len(frames), 1024))
+        for idx_batch, (n_batch, frames_batch) in enumerate(batch(frames, batch_size)):
+            for i in range(n_batch):
+                net.blobs['data'].data[i, ...] = transformer.preprocess("data", frames_batch[i])
+            net.forward()
+            temp = net.blobs["pool5/7x7_s1"].data[0:n_batch]
+            temp = temp.squeeze().copy()
+            features[idx_batch * batch_size:idx_batch * batch_size + n_batch] = temp
+        normalize(features, copy=False)
+        return features.astype(np.float32)
+        
+    @staticmethod
+    def _get_probs(features, gpu=True):
+        model_cache_key = "keyframes_rl_model_cache"
+        model = cache.get(model_cache_key)  # get model from cache
+
+        if model is None:
+            model_path = "keyframes_rl/pretrained_model/model_epoch100.pth.tar"
+            model = DSN(in_dim=1024, hid_dim=256, num_layers=1, cell="lstm")
+            if gpu:
+                checkpoint = torch.load(model_path)
+            else:
+                checkpoint = torch.load(model_path, map_location='cpu')
+            model.load_state_dict(checkpoint)
+            if gpu:
+                model = nn.DataParallel(model).cuda()
+            model.eval()
+            cache.set(model_cache_key, model, None)
+
+        seq = torch.from_numpy(features).unsqueeze(0)
+        if gpu: seq = seq.cuda()
+        probs = model(seq)
+        probs = probs.data.cpu().squeeze().numpy()
+        return probs
+
+    @staticmethod
+    def _get_chosen_frames(frames, probs, change_points, frames_per_segment, min_keyframes=10):
+        gts = []
+        s = 0
+        for q in frames_per_segment:
+            gts.append(np.mean(probs[s:s + q]).astype(float))
+            s += q
+        gts = np.array(gts)
+        picks = np.argsort(gts)[::-1][:min_keyframes]
        chosen_frames = []
-
-        for i in range(settings.NUMBERS_OF_FRAMES_TO_SHOW):
-            frame = cv2.imread(all_keyframes[(i + 1) * interval])
-            chosen_frames.append(frame)
-
+        for pick in picks:
+            cp = change_points[pick]
+            low = cp[0]
+            high = cp[1]
+            x = low
+            if low != high:
+                x = low + np.argmax(probs[low:high])
+            chosen_frames.append({
+                "index": x,
+                "frame": frames[x]
+            })
+        chosen_frames.sort(key=lambda k: k['index'])
+        chosen_frames = [img_as_ubyte(o["frame"])[..., ::-1] for o in chosen_frames]
        return chosen_frames
+
+    @staticmethod
+    def _get_segments(features):
+        K = np.dot(features, features.T)
+        n_frames = int(K.shape[0])
+        min_segments = int(ceil(n_frames / 10))
+        min_segments = max(10, min_segments)
+        min_segments = min(n_frames - 1, min_segments)
+        cps, scores = cpd_auto(K, min_segments, 1)
+        change_points = [
+            [0, cps[0] - 1]
+        ]
+        frames_per_segment = [int(cps[0])]
+        for j in range(0, len(cps) - 1):
+            change_points.append([cps[j], cps[j + 1] - 1])
+            frames_per_segment.append(int(cps[j + 1] - cps[j]))
+        frames_per_segment.append(int(len(features) - cps[len(cps) - 1]))
+        change_points.append([cps[len(cps) - 1], len(features) - 1])
+        return change_points, frames_per_segment
--- a/keyframes/kts/init.py
+++ b/keyframes/kts/init.py
@ -0,0 +1,125 @@
+import numpy as np
+
+
+def calc_scatters(K):
+    """
+    Calculate scatter matrix:
+    scatters[i,j] = {scatter of the sequence with starting frame i and ending frame j}
+    """
+    n = K.shape[0]
+    K1 = np.cumsum([0] + list(np.diag(K)))
+    K2 = np.zeros((n+1, n+1))
+    K2[1:, 1:] = np.cumsum(np.cumsum(K, 0), 1)
+    diagK2 = np.diag(K2)
+    i = np.arange(n).reshape((-1, 1))
+    j = np.arange(n).reshape((1, -1))
+    scatters = (
+            K1[1:].reshape((1, -1)) - K1[:-1].reshape((-1, 1)) - (
+                diagK2[1:].reshape((1, -1)) + diagK2[:-1].reshape((-1, 1))
+                - K2[1:, :-1].T - K2[:-1, 1:]
+            ) / ((j - i + 1).astype(float) + (j == i-1).astype(float))
+    )
+    scatters[j < i] = 0
+    return scatters
+
+
+def cpd_nonlin(K, ncp, lmin=1, lmax=100000, backtrack=True, verbose=True, out_scatters=None):
+    """ Change point detection with dynamic programming
+    K - square kernel matrix
+    ncp - number of change points to detect (ncp >= 0)
+    lmin - minimal length of a segment
+    lmax - maximal length of a segment
+    backtrack - when False - only evaluate objective scores (to save memory)
+    Returns: (cps, obj)
+        cps - detected array of change points: mean is thought to be constant on [ cps[i], cps[i+1] )
+        obj_vals - values of the objective function for 0..m changepoints
+    """
+    m = int(ncp)  # prevent numpy.int64
+
+    (n, n1) = K.shape
+    assert(n == n1), "Kernel matrix awaited."
+
+    assert(n >= (m + 1) * lmin)
+    assert(n <= (m + 1) * lmax)
+    assert(lmax >= lmin >= 1)
+
+    if verbose:
+        print("Precomputing scatters...")
+    J = calc_scatters(K)
+
+    if out_scatters is not None:
+        out_scatters[0] = J
+
+    if verbose:
+        print("Inferring best change points...")
+    # I[k, l] - value of the objective for k change-points and l first frames
+    I = 1e101 * np.ones((m + 1, n + 1))
+    I[0, lmin:lmax] = J[0, lmin - 1:lmax - 1]
+
+    if backtrack:
+        # p[k, l] --- "previous change" --- best t[k] when t[k+1] equals l
+        p = np.zeros((m + 1, n + 1), dtype=int)
+    else:
+        p = np.zeros((1, 1), dtype=int)
+
+    for k in range(1, m + 1):
+        for l in range((k + 1) * lmin, n + 1):
+            tmin = max(k * lmin, l - lmax)
+            tmax = l - lmin + 1
+            c = J[tmin:tmax, l - 1].reshape(-1) + I[k - 1, tmin:tmax].reshape(-1)
+            I[k, l] = np.min(c)
+            if backtrack:
+                p[k, l] = np.argmin(c)+tmin
+
+    # Collect change points
+    cps = np.zeros(m, dtype=int)
+
+    if backtrack:
+        cur = n
+        for k in range(m, 0, -1):
+            cps[k - 1] = p[k, cur]
+            cur = cps[k - 1]
+
+    scores = I[:, n].copy()
+    scores[scores > 1e99] = np.inf
+    return cps, scores
+
+
+def cpd_auto(K, ncp, vmax, desc_rate=15, min_segments=10, **kwargs):
+    """Main interface
+    Detect change points automatically selecting their number
+        K       - kernel between each pair of frames in video
+        ncp     - maximum ncp
+        vmax    - special parameter
+    Optional arguments:
+        lmin     - minimum segment length
+        lmax     - maximum segment length
+        desc_rate - rate of descriptor sampling (vmax always corresponds to 1x)
+    Note:
+        - cps are always calculated in subsampled coordinates irrespective to
+            desc_rate
+        - lmin and m should be in agreement
+    ---
+    Returns: (cps, costs)
+        cps   - best selected change-points
+        costs - costs for 0,1,2,...,m change-points
+    Memory requirement: ~ (3*N*N + N*ncp)*4 bytes ~= 16 * N^2 bytes
+    That is 1,6 Gb for the N=10000.
+    """
+    m = ncp
+    (_, scores) = cpd_nonlin(K, m, backtrack=False, **kwargs)
+
+    N = K.shape[0]
+    N2 = N * desc_rate  # length of the video before subsampling
+
+    penalties = np.zeros(m + 1)
+    # Prevent division by zero (in case of 0 changes)
+    ncp = np.arange(1, m + 1)
+    penalties[1:] = (vmax * ncp/(2.0 * N2)) * (np.log(float(N2) / ncp) + 1)
+
+    costs = scores/float(N) + penalties
+    m_best = int(np.argmin(costs))
+    m_best = max(min_segments, m_best)
+    m_best = min(m_best, N)
+    (cps, scores2) = cpd_nonlin(K, m_best, **kwargs)
+    return cps, scores2
--- a/keyframes/tests.py
+++ b/keyframes/tests.py
@ -1,3 +1,69 @@
 from django.test import TestCase

-# Create your tests here.
+import numpy as np
+from keyframes.keyframes import KeyFramesExtractor
+from api.models import Video
+from django.core.files import File
+import shutil
+from utils import jj
+from django.conf import settings
+from keyframes.utils import batch
+
+VIDEO_PATH = "tmp/f1_short.mp4" 
+VIDEO_N_FRAMES = 47
+
+
+class KeyframesTestCase(TestCase):
+
+    def setUp(self):
+        f = open(VIDEO_PATH, 'rb')
+        self.video = Video.objects.create(file=File(f))
+
+    def tearDown(self):
+        shutil.rmtree(jj(f"{settings.TMP_DIR}", f"{self.all_frames_tmp_dir}"))
+
+    def test_keyframes(self):
+        """Keyframes are extracted corectly"""
+
+        frames_paths, all_frames_tmp_dir = KeyFramesExtractor._get_all_frames(self.video)
+        self.assertIsInstance(frames_paths[0], str)
+        self.assertEqual(len(frames_paths), VIDEO_N_FRAMES)
+        self.all_frames_tmp_dir = all_frames_tmp_dir
+
+        frames = KeyFramesExtractor._get_frames(frames_paths)
+        self.assertEqual(len(frames), VIDEO_N_FRAMES)
+        self.assertIsInstance(frames[0], np.ndarray)
+
+        features = KeyFramesExtractor._get_features(frames, False)
+        self.assertIsInstance(features, np.ndarray)
+        self.assertEqual(features.shape, (VIDEO_N_FRAMES, 1024))
+
+        change_points, frames_per_segment = KeyFramesExtractor._get_segments(features)
+        self.assertIsInstance(change_points, list)
+        self.assertIsInstance(frames_per_segment, list)
+
+        for cp in frames_per_segment:
+            with self.subTest(cp=cp):
+                self.assertIsInstance(cp, int)
+
+        probs = KeyFramesExtractor._get_probs(features, False)
+        self.assertIsInstance(probs, np.ndarray)
+        self.assertEqual(probs.shape, (VIDEO_N_FRAMES, ))
+
+        chosen_frames = KeyFramesExtractor._get_chosen_frames(frames, probs, change_points, frames_per_segment)
+        self.assertIsInstance(chosen_frames, list)
+        self.assertTrue(len(chosen_frames) == 10)
+
+
+class UtilsTestCase(TestCase):
+    def test_batch(self):
+        """Barch is working"""
+        arr = [1, 1, 2, 2, 3, 3, 4]
+        batched_arr = batch(arr, 2)
+        self.assertEqual(list(batched_arr), [(2, [1, 1]), (2, [2, 2]), (2, [3, 3]), (1, [4])])
+
+    def test_empty_batch(self):
+        """Barch is working"""
+        arr = []
+        batched_arr = batch(arr, 2)
+        self.assertEqual(list(batched_arr), [])
--- a/keyframes/utils.py
+++ b/keyframes/utils.py
@ -0,0 +1,6 @@
+def batch(iterable, n=1):
+    length = len(iterable)
+    for ndx in range(0, length, n):
+        end_index = min(ndx + n, length)
+        n_elemnets = end_index - ndx
+        yield n_elemnets, iterable[ndx:end_index]
--- a/keyframes_rl/models.py
+++ b/keyframes_rl/models.py
@ -0,0 +1,21 @@
+import torch.nn as nn
+from torch.nn import functional as F
+
+__all__ = ['DSN']
+
+
+class DSN(nn.Module):
+    """Deep Summarization Network"""
+    def __init__(self, in_dim=1024, hid_dim=256, num_layers=1, cell='lstm'):
+        super(DSN, self).__init__()
+        assert cell in ['lstm', 'gru'], "cell must be either 'lstm' or 'gru'"
+        if cell == 'lstm':
+            self.rnn = nn.LSTM(in_dim, hid_dim, num_layers=num_layers, bidirectional=True, batch_first=True)
+        else:
+            self.rnn = nn.GRU(in_dim, hid_dim, num_layers=num_layers, bidirectional=True, batch_first=True)
+        self.fc = nn.Linear(hid_dim*2, 1)
+
+    def forward(self, x):
+        h, _ = self.rnn(x)
+        p = F.sigmoid(self.fc(h))
+        return p
--- a/keyframes_rl/pretrained_model/model_epoch60.pth.tar
+++ b/keyframes_rl/pretrained_model/model_epoch60.pth.tar
--- a/requirements.txt
+++ b/requirements.txt
@ -10,3 +10,4 @@ pytz==2018.5
 six==1.11.0
 torch==0.4.1
 torchvision==0.2.1
+scikit-learn==0.19.2
--- a/settings/settings.py
+++ b/settings/settings.py
@ -140,3 +140,5 @@ MAX_FILE_SIZE = 50000000
 NUMBERS_OF_FRAMES_TO_SHOW = 10
 TMP_DIR = 'tmp/'
 GPU = True
+
+FEATURE_BATCH_SIZE = 32
--- a/tmp/f1_short.mp4
+++ b/tmp/f1_short.mp4