diff --git a/Makefile.config b/Makefile.config new file mode 100644 index 0000000..b87aee7 --- /dev/null +++ b/Makefile.config @@ -0,0 +1,21 @@ +USE_CUDNN := 1 +CUDA_DIR := /usr/local/cuda + +CUDA_ARCH := -gencode arch=compute_30,code=sm_30 \ + -gencode arch=compute_35,code=sm_35 \ + -gencode arch=compute_50,code=sm_50 \ + -gencode arch=compute_52,code=sm_52 \ + -gencode arch=compute_60,code=sm_60 \ + -gencode arch=compute_61,code=sm_61 \ + -gencode arch=compute_61,code=compute_61 +BLAS := atlas +PYTHON_LIBRARIES := boost_python3 python3.6m +PYTHON_INCLUDE := /usr/include/python3.6 \ + /usr/local/lib/python3.6/dist-packages/numpy/core/include +PYTHON_LIB := /usr/lib +INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial +LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu/hdf5/serial +BUILD_DIR := build +DISTRIBUTE_DIR := distribute +TEST_GPUID := 0 +Q ?= @ \ No newline at end of file diff --git a/dockerfile b/dockerfile index c88a9c7..4e0246d 100644 --- a/dockerfile +++ b/dockerfile @@ -1,16 +1,47 @@ -FROM nvidia/cuda:9.0-cudnn7-runtime +FROM nvidia/cuda:9.0-cudnn7-devel RUN apt-get update && apt-get install -y apt-utils software-properties-common && \ add-apt-repository ppa:jonathonf/python-3.6 && \ apt-get update && apt-get -y install python3 python3-pip python3.6 python3.6-dev python3.6-venv vim ffmpeg \ build-essential cmake git libgtk2.0-dev pkg-config libavcodec-dev \ + wget libatlas-base-dev libboost-all-dev libgflags-dev \ + libgoogle-glog-dev libhdf5-serial-dev libleveldb-dev \ + liblmdb-dev libopencv-dev libprotobuf-dev \ + libsnappy-dev protobuf-compiler \ + python-numpy python-setuptools python-scipy \ libavformat-dev libswscale-dev && \ python3.6 -m pip install --upgrade pip && \ python3.6 -m pip install jupyter ipywidgets jupyterlab && \ python3.6 -m pip install tensorflow-gpu h5py keras && \ - python3.6 -m pip install scikit-image opencv-contrib-python + python3.6 -m pip install scikit-image opencv-contrib-python pyyaml RUN mkdir /comixify +COPY ./Makefile.config /comixify/Makefile.config + +ENV CAFFE_ROOT=/opt/caffe +WORKDIR $CAFFE_ROOT + +ENV CLONE_TAG=1.0 + +RUN git clone -b ${CLONE_TAG} --depth 1 https://github.com/BVLC/caffe.git . && \ + cp /comixify/Makefile.config ./Makefile.config && \ + cd python && for req in $(cat requirements.txt) pydot; do python3.6 -m pip install $req; done && cd .. && \ + sed -i '415s/.*/NVCCFLAGS += -D_FORCE_INLINES -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)/' Makefile && \ + echo "# ---[ Includes" >> CMakeLists.txt && \ + echo "set(${CMAKE_CXX_FLAGS} "-D_FORCE_INLINES ${CMAKE_CXX_FLAGS}")" >> CMakeLists.txt && \ + ls -la /usr/lib/x86_64-linux-gnu && \ + ln -s /usr/lib/x86_64-linux-gnu/libboost_python-py35.so /usr/lib/x86_64-linux-gnu/libboost_python3.so && \ + make all -j"$(nproc)" && \ + make distribute && \ + +ENV PYCAFFE_ROOT $CAFFE_ROOT/python +ENV PYTHONPATH $PYCAFFE_ROOT:$PYTHONPATH +ENV PATH $CAFFE_ROOT/build/tools:$PYCAFFE_ROOT:$PATH +RUN echo "$CAFFE_ROOT/build/lib" >> /etc/ld.so.conf.d/caffe.conf && ldconfig && \ + python3.6 $CAFFE_ROOT/scripts/download_model_binary.py $CAFFE_ROOT/models/bvlc_googlenet && \ + python3.6 -m pip install markdown=="2.6.11" && \ + python3.6 -m pip install python-dateutil --upgrade + WORKDIR /comixify COPY . /comixify RUN python3.6 -m pip install -r requirements.txt diff --git a/keyframes/keyframes.py b/keyframes/keyframes.py index 99830f9..c874715 100644 --- a/keyframes/keyframes.py +++ b/keyframes/keyframes.py @@ -1,31 +1,44 @@ import os -import shutil import uuid +import numpy as np +import torch +import torch.nn as nn +os.environ['GLOG_minloglevel'] = '2' # Prevent caffe shell loging +import caffe +from datetime import datetime from subprocess import call - -import cv2 +from math import ceil +from sklearn.preprocessing import normalize from django.conf import settings +from django.core.cache import cache +from skimage import img_as_ubyte +import logging from utils import jj +from keyframes_rl.models import DSN +from keyframes.kts import cpd_auto +from keyframes.utils import batch + +logger = logging.getLogger(__name__) -class KeyFramesExtractor(): +class KeyFramesExtractor: @classmethod - def get_keyframes(cls, video): - all_keyframes, all_frames_tmp_dir = cls._get_all_frames(video) - interval = cls._count_interval(all_keyframes) - chosen_frames = cls._get_frames_with_interval(interval, all_keyframes) - - shutil.rmtree(jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}")) + def get_keyframes(cls, video, gpu=settings.GPU, features_batch_size=settings.FEATURE_BATCH_SIZE): + frames_paths, all_frames_tmp_dir = cls._get_all_frames(video) + frames = cls._get_frames(frames_paths) + features = cls._get_features(frames, gpu, features_batch_size) + change_points, frames_per_segment = cls._get_segments(features) + probs = cls._get_probs(features, gpu) + chosen_frames = cls._get_chosen_frames(frames, probs, change_points, frames_per_segment) return chosen_frames @staticmethod def _get_all_frames(video): all_frames_tmp_dir = uuid.uuid4() os.mkdir(jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}")) - call(["ffmpeg", "-skip_frame", "nokey", "-i", f"{video.file.path}", "-vsync", "0", "-qscale:v", "1", - "-f", "image2", jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}", "%06d.jpeg")]) - + call(["ffmpeg", "-i", f"{video.file.path}", "-vf", "select=not(mod(n\\,15))", "-vsync", "vfr", "-q:v", "2", + jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}", "%06d.jpeg")]) frames_paths = [] for dirname, dirnames, filenames in os.walk(jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}")): for filename in filenames: @@ -33,15 +46,114 @@ class KeyFramesExtractor(): return sorted(frames_paths), all_frames_tmp_dir @staticmethod - def _count_interval(all_keyframes): - return int((len(all_keyframes) - settings.NUMBERS_OF_FRAMES_TO_SHOW) / (settings.NUMBERS_OF_FRAMES_TO_SHOW + 1)) + def _get_frames(frames_paths): + frames = [] + for frame_path in frames_paths: + frame = caffe.io.load_image(frame_path) + frames.append(frame) + return frames @staticmethod - def _get_frames_with_interval(interval, all_keyframes): + def _get_features(frames, gpu=True, batch_size=1): + caffe_root = os.environ.get("CAFFE_ROOT") + if not caffe_root: + print("Caffe root path not found.") + if not gpu: + caffe.set_mode_cpu() + else: + caffe.set_mode_gpu() + + model_file = caffe_root + "/models/bvlc_googlenet/deploy.prototxt" + pretrained = caffe_root + "/models/bvlc_googlenet/bvlc_googlenet.caffemodel" + if not os.path.isfile(pretrained): + print("PRETRAINED Model not found.") + + net = caffe.Net(model_file, pretrained, caffe.TEST) + net.blobs["data"].reshape(batch_size, 3, 224, 224) + + mu = np.load(caffe_root + "/python/caffe/imagenet/ilsvrc_2012_mean.npy") + mu = mu.mean(1).mean(1) + transformer = caffe.io.Transformer({"data": net.blobs["data"].data.shape}) + transformer.set_transpose("data", (2, 0, 1)) + transformer.set_mean("data", mu) + transformer.set_raw_scale("data", 255) + transformer.set_channel_swap("data", (2, 1, 0)) + + features = np.zeros(shape=(len(frames), 1024)) + for idx_batch, (n_batch, frames_batch) in enumerate(batch(frames, batch_size)): + for i in range(n_batch): + net.blobs['data'].data[i, ...] = transformer.preprocess("data", frames_batch[i]) + net.forward() + temp = net.blobs["pool5/7x7_s1"].data[0:n_batch] + temp = temp.squeeze().copy() + features[idx_batch * batch_size:idx_batch * batch_size + n_batch] = temp + normalize(features, copy=False) + return features.astype(np.float32) + + @staticmethod + def _get_probs(features, gpu=True): + model_cache_key = "keyframes_rl_model_cache" + model = cache.get(model_cache_key) # get model from cache + + if model is None: + model_path = "keyframes_rl/pretrained_model/model_epoch100.pth.tar" + model = DSN(in_dim=1024, hid_dim=256, num_layers=1, cell="lstm") + if gpu: + checkpoint = torch.load(model_path) + else: + checkpoint = torch.load(model_path, map_location='cpu') + model.load_state_dict(checkpoint) + if gpu: + model = nn.DataParallel(model).cuda() + model.eval() + cache.set(model_cache_key, model, None) + + seq = torch.from_numpy(features).unsqueeze(0) + if gpu: seq = seq.cuda() + probs = model(seq) + probs = probs.data.cpu().squeeze().numpy() + return probs + + @staticmethod + def _get_chosen_frames(frames, probs, change_points, frames_per_segment, min_keyframes=10): + gts = [] + s = 0 + for q in frames_per_segment: + gts.append(np.mean(probs[s:s + q]).astype(float)) + s += q + gts = np.array(gts) + picks = np.argsort(gts)[::-1][:min_keyframes] chosen_frames = [] - - for i in range(settings.NUMBERS_OF_FRAMES_TO_SHOW): - frame = cv2.imread(all_keyframes[(i + 1) * interval]) - chosen_frames.append(frame) - + for pick in picks: + cp = change_points[pick] + low = cp[0] + high = cp[1] + x = low + if low != high: + x = low + np.argmax(probs[low:high]) + chosen_frames.append({ + "index": x, + "frame": frames[x] + }) + chosen_frames.sort(key=lambda k: k['index']) + chosen_frames = [img_as_ubyte(o["frame"])[..., ::-1] for o in chosen_frames] return chosen_frames + + @staticmethod + def _get_segments(features): + K = np.dot(features, features.T) + n_frames = int(K.shape[0]) + min_segments = int(ceil(n_frames / 10)) + min_segments = max(10, min_segments) + min_segments = min(n_frames - 1, min_segments) + cps, scores = cpd_auto(K, min_segments, 1) + change_points = [ + [0, cps[0] - 1] + ] + frames_per_segment = [int(cps[0])] + for j in range(0, len(cps) - 1): + change_points.append([cps[j], cps[j + 1] - 1]) + frames_per_segment.append(int(cps[j + 1] - cps[j])) + frames_per_segment.append(int(len(features) - cps[len(cps) - 1])) + change_points.append([cps[len(cps) - 1], len(features) - 1]) + return change_points, frames_per_segment diff --git a/keyframes/kts/__init__.py b/keyframes/kts/__init__.py new file mode 100644 index 0000000..6ee03d4 --- /dev/null +++ b/keyframes/kts/__init__.py @@ -0,0 +1,125 @@ +import numpy as np + + +def calc_scatters(K): + """ + Calculate scatter matrix: + scatters[i,j] = {scatter of the sequence with starting frame i and ending frame j} + """ + n = K.shape[0] + K1 = np.cumsum([0] + list(np.diag(K))) + K2 = np.zeros((n+1, n+1)) + K2[1:, 1:] = np.cumsum(np.cumsum(K, 0), 1) + diagK2 = np.diag(K2) + i = np.arange(n).reshape((-1, 1)) + j = np.arange(n).reshape((1, -1)) + scatters = ( + K1[1:].reshape((1, -1)) - K1[:-1].reshape((-1, 1)) - ( + diagK2[1:].reshape((1, -1)) + diagK2[:-1].reshape((-1, 1)) + - K2[1:, :-1].T - K2[:-1, 1:] + ) / ((j - i + 1).astype(float) + (j == i-1).astype(float)) + ) + scatters[j < i] = 0 + return scatters + + +def cpd_nonlin(K, ncp, lmin=1, lmax=100000, backtrack=True, verbose=True, out_scatters=None): + """ Change point detection with dynamic programming + K - square kernel matrix + ncp - number of change points to detect (ncp >= 0) + lmin - minimal length of a segment + lmax - maximal length of a segment + backtrack - when False - only evaluate objective scores (to save memory) + Returns: (cps, obj) + cps - detected array of change points: mean is thought to be constant on [ cps[i], cps[i+1] ) + obj_vals - values of the objective function for 0..m changepoints + """ + m = int(ncp) # prevent numpy.int64 + + (n, n1) = K.shape + assert(n == n1), "Kernel matrix awaited." + + assert(n >= (m + 1) * lmin) + assert(n <= (m + 1) * lmax) + assert(lmax >= lmin >= 1) + + if verbose: + print("Precomputing scatters...") + J = calc_scatters(K) + + if out_scatters is not None: + out_scatters[0] = J + + if verbose: + print("Inferring best change points...") + # I[k, l] - value of the objective for k change-points and l first frames + I = 1e101 * np.ones((m + 1, n + 1)) + I[0, lmin:lmax] = J[0, lmin - 1:lmax - 1] + + if backtrack: + # p[k, l] --- "previous change" --- best t[k] when t[k+1] equals l + p = np.zeros((m + 1, n + 1), dtype=int) + else: + p = np.zeros((1, 1), dtype=int) + + for k in range(1, m + 1): + for l in range((k + 1) * lmin, n + 1): + tmin = max(k * lmin, l - lmax) + tmax = l - lmin + 1 + c = J[tmin:tmax, l - 1].reshape(-1) + I[k - 1, tmin:tmax].reshape(-1) + I[k, l] = np.min(c) + if backtrack: + p[k, l] = np.argmin(c)+tmin + + # Collect change points + cps = np.zeros(m, dtype=int) + + if backtrack: + cur = n + for k in range(m, 0, -1): + cps[k - 1] = p[k, cur] + cur = cps[k - 1] + + scores = I[:, n].copy() + scores[scores > 1e99] = np.inf + return cps, scores + + +def cpd_auto(K, ncp, vmax, desc_rate=15, min_segments=10, **kwargs): + """Main interface + Detect change points automatically selecting their number + K - kernel between each pair of frames in video + ncp - maximum ncp + vmax - special parameter + Optional arguments: + lmin - minimum segment length + lmax - maximum segment length + desc_rate - rate of descriptor sampling (vmax always corresponds to 1x) + Note: + - cps are always calculated in subsampled coordinates irrespective to + desc_rate + - lmin and m should be in agreement + --- + Returns: (cps, costs) + cps - best selected change-points + costs - costs for 0,1,2,...,m change-points + Memory requirement: ~ (3*N*N + N*ncp)*4 bytes ~= 16 * N^2 bytes + That is 1,6 Gb for the N=10000. + """ + m = ncp + (_, scores) = cpd_nonlin(K, m, backtrack=False, **kwargs) + + N = K.shape[0] + N2 = N * desc_rate # length of the video before subsampling + + penalties = np.zeros(m + 1) + # Prevent division by zero (in case of 0 changes) + ncp = np.arange(1, m + 1) + penalties[1:] = (vmax * ncp/(2.0 * N2)) * (np.log(float(N2) / ncp) + 1) + + costs = scores/float(N) + penalties + m_best = int(np.argmin(costs)) + m_best = max(min_segments, m_best) + m_best = min(m_best, N) + (cps, scores2) = cpd_nonlin(K, m_best, **kwargs) + return cps, scores2 diff --git a/keyframes/tests.py b/keyframes/tests.py index 7ce503c..ddd43f6 100644 --- a/keyframes/tests.py +++ b/keyframes/tests.py @@ -1,3 +1,69 @@ from django.test import TestCase -# Create your tests here. +import numpy as np +from keyframes.keyframes import KeyFramesExtractor +from api.models import Video +from django.core.files import File +import shutil +from utils import jj +from django.conf import settings +from keyframes.utils import batch + +VIDEO_PATH = "tmp/f1_short.mp4" +VIDEO_N_FRAMES = 47 + + +class KeyframesTestCase(TestCase): + + def setUp(self): + f = open(VIDEO_PATH, 'rb') + self.video = Video.objects.create(file=File(f)) + + def tearDown(self): + shutil.rmtree(jj(f"{settings.TMP_DIR}", f"{self.all_frames_tmp_dir}")) + + def test_keyframes(self): + """Keyframes are extracted corectly""" + + frames_paths, all_frames_tmp_dir = KeyFramesExtractor._get_all_frames(self.video) + self.assertIsInstance(frames_paths[0], str) + self.assertEqual(len(frames_paths), VIDEO_N_FRAMES) + self.all_frames_tmp_dir = all_frames_tmp_dir + + frames = KeyFramesExtractor._get_frames(frames_paths) + self.assertEqual(len(frames), VIDEO_N_FRAMES) + self.assertIsInstance(frames[0], np.ndarray) + + features = KeyFramesExtractor._get_features(frames, False) + self.assertIsInstance(features, np.ndarray) + self.assertEqual(features.shape, (VIDEO_N_FRAMES, 1024)) + + change_points, frames_per_segment = KeyFramesExtractor._get_segments(features) + self.assertIsInstance(change_points, list) + self.assertIsInstance(frames_per_segment, list) + + for cp in frames_per_segment: + with self.subTest(cp=cp): + self.assertIsInstance(cp, int) + + probs = KeyFramesExtractor._get_probs(features, False) + self.assertIsInstance(probs, np.ndarray) + self.assertEqual(probs.shape, (VIDEO_N_FRAMES, )) + + chosen_frames = KeyFramesExtractor._get_chosen_frames(frames, probs, change_points, frames_per_segment) + self.assertIsInstance(chosen_frames, list) + self.assertTrue(len(chosen_frames) == 10) + + +class UtilsTestCase(TestCase): + def test_batch(self): + """Barch is working""" + arr = [1, 1, 2, 2, 3, 3, 4] + batched_arr = batch(arr, 2) + self.assertEqual(list(batched_arr), [(2, [1, 1]), (2, [2, 2]), (2, [3, 3]), (1, [4])]) + + def test_empty_batch(self): + """Barch is working""" + arr = [] + batched_arr = batch(arr, 2) + self.assertEqual(list(batched_arr), []) diff --git a/keyframes/utils.py b/keyframes/utils.py new file mode 100644 index 0000000..f1d4b43 --- /dev/null +++ b/keyframes/utils.py @@ -0,0 +1,6 @@ +def batch(iterable, n=1): + length = len(iterable) + for ndx in range(0, length, n): + end_index = min(ndx + n, length) + n_elemnets = end_index - ndx + yield n_elemnets, iterable[ndx:end_index] diff --git a/keyframes_rl/models.py b/keyframes_rl/models.py new file mode 100644 index 0000000..cbbee4a --- /dev/null +++ b/keyframes_rl/models.py @@ -0,0 +1,21 @@ +import torch.nn as nn +from torch.nn import functional as F + +__all__ = ['DSN'] + + +class DSN(nn.Module): + """Deep Summarization Network""" + def __init__(self, in_dim=1024, hid_dim=256, num_layers=1, cell='lstm'): + super(DSN, self).__init__() + assert cell in ['lstm', 'gru'], "cell must be either 'lstm' or 'gru'" + if cell == 'lstm': + self.rnn = nn.LSTM(in_dim, hid_dim, num_layers=num_layers, bidirectional=True, batch_first=True) + else: + self.rnn = nn.GRU(in_dim, hid_dim, num_layers=num_layers, bidirectional=True, batch_first=True) + self.fc = nn.Linear(hid_dim*2, 1) + + def forward(self, x): + h, _ = self.rnn(x) + p = F.sigmoid(self.fc(h)) + return p diff --git a/keyframes_rl/pretrained_model/model_epoch60.pth.tar b/keyframes_rl/pretrained_model/model_epoch60.pth.tar new file mode 100644 index 0000000..9b20b5a Binary files /dev/null and b/keyframes_rl/pretrained_model/model_epoch60.pth.tar differ diff --git a/requirements.txt b/requirements.txt index 44b32ff..085cdff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ pytz==2018.5 six==1.11.0 torch==0.4.1 torchvision==0.2.1 +scikit-learn==0.19.2 \ No newline at end of file diff --git a/settings/settings.py b/settings/settings.py index 0adac0a..d4bab31 100644 --- a/settings/settings.py +++ b/settings/settings.py @@ -140,3 +140,5 @@ MAX_FILE_SIZE = 50000000 NUMBERS_OF_FRAMES_TO_SHOW = 10 TMP_DIR = 'tmp/' GPU = True + +FEATURE_BATCH_SIZE = 32 diff --git a/tmp/f1_short.mp4 b/tmp/f1_short.mp4 new file mode 100644 index 0000000..218a2ba Binary files /dev/null and b/tmp/f1_short.mp4 differ