forked from prehistoric-systems/comixify
Keyframes (#6)
* Add keyframe model * Add segmentation utils * Add keyframes extraction pipeline * Add keyframe tests * Update dockerfile to include caffe * Add summe pretrained model * Add video for testing * Update keyframe pipeline, tests * Update settings to use in memory db for tests * Set keyframe number to 10, fix bugs * Fix keyframe order * Make requested changes * Fix Dockerfile * Make requested changes * Make requested changes * Add blank lines * Change dockerfile base cuda image to devel version * Add modified Cuda.cmake for Dockerfile * Add pyyaml dependecy to dockerfile * Update dockerfile * Update dockerfile * Fix markdown version error * Fix markdown version error * Change caffe installation to make * Update dockerfile * Update dockerfile * Fix boost imoprt * Fix boost not found bug * Add feature normalisation * Fix dateutil, fix caffe root slash * Fix slash bug * Add batching to feature extraction * Add model caching to keyframes extraction * Fix output images to be in proper range * Add time logging * Change feature batch to 128 * Change dockerfile * Fix dockerfile * Change feature batch to 10 * Add set mode gpu * Change feature batch to 64 * Change feature batch to 32 * Add I-frame frame sampling * Cleanup * Delete Cuda.cmake * Remove comments from Makefile.config * Cleanup * Fix color scheme switching * Remove cudnn.hpp, change caffe to 1.0 * Remove cudnn.hpp copy in dockerfile * Remove redundant run's in dockerfile * Change pretrained model
This commit is contained in:
parent
43bb8134fd
commit
b5dd5cffc0
11 changed files with 409 additions and 24 deletions
21
Makefile.config
Normal file
21
Makefile.config
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
USE_CUDNN := 1
|
||||
CUDA_DIR := /usr/local/cuda
|
||||
|
||||
CUDA_ARCH := -gencode arch=compute_30,code=sm_30 \
|
||||
-gencode arch=compute_35,code=sm_35 \
|
||||
-gencode arch=compute_50,code=sm_50 \
|
||||
-gencode arch=compute_52,code=sm_52 \
|
||||
-gencode arch=compute_60,code=sm_60 \
|
||||
-gencode arch=compute_61,code=sm_61 \
|
||||
-gencode arch=compute_61,code=compute_61
|
||||
BLAS := atlas
|
||||
PYTHON_LIBRARIES := boost_python3 python3.6m
|
||||
PYTHON_INCLUDE := /usr/include/python3.6 \
|
||||
/usr/local/lib/python3.6/dist-packages/numpy/core/include
|
||||
PYTHON_LIB := /usr/lib
|
||||
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial
|
||||
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu/hdf5/serial
|
||||
BUILD_DIR := build
|
||||
DISTRIBUTE_DIR := distribute
|
||||
TEST_GPUID := 0
|
||||
Q ?= @
|
||||
35
dockerfile
35
dockerfile
|
|
@ -1,16 +1,47 @@
|
|||
FROM nvidia/cuda:9.0-cudnn7-runtime
|
||||
FROM nvidia/cuda:9.0-cudnn7-devel
|
||||
|
||||
RUN apt-get update && apt-get install -y apt-utils software-properties-common && \
|
||||
add-apt-repository ppa:jonathonf/python-3.6 && \
|
||||
apt-get update && apt-get -y install python3 python3-pip python3.6 python3.6-dev python3.6-venv vim ffmpeg \
|
||||
build-essential cmake git libgtk2.0-dev pkg-config libavcodec-dev \
|
||||
wget libatlas-base-dev libboost-all-dev libgflags-dev \
|
||||
libgoogle-glog-dev libhdf5-serial-dev libleveldb-dev \
|
||||
liblmdb-dev libopencv-dev libprotobuf-dev \
|
||||
libsnappy-dev protobuf-compiler \
|
||||
python-numpy python-setuptools python-scipy \
|
||||
libavformat-dev libswscale-dev && \
|
||||
python3.6 -m pip install --upgrade pip && \
|
||||
python3.6 -m pip install jupyter ipywidgets jupyterlab && \
|
||||
python3.6 -m pip install tensorflow-gpu h5py keras && \
|
||||
python3.6 -m pip install scikit-image opencv-contrib-python
|
||||
python3.6 -m pip install scikit-image opencv-contrib-python pyyaml
|
||||
|
||||
RUN mkdir /comixify
|
||||
COPY ./Makefile.config /comixify/Makefile.config
|
||||
|
||||
ENV CAFFE_ROOT=/opt/caffe
|
||||
WORKDIR $CAFFE_ROOT
|
||||
|
||||
ENV CLONE_TAG=1.0
|
||||
|
||||
RUN git clone -b ${CLONE_TAG} --depth 1 https://github.com/BVLC/caffe.git . && \
|
||||
cp /comixify/Makefile.config ./Makefile.config && \
|
||||
cd python && for req in $(cat requirements.txt) pydot; do python3.6 -m pip install $req; done && cd .. && \
|
||||
sed -i '415s/.*/NVCCFLAGS += -D_FORCE_INLINES -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)/' Makefile && \
|
||||
echo "# ---[ Includes" >> CMakeLists.txt && \
|
||||
echo "set(${CMAKE_CXX_FLAGS} "-D_FORCE_INLINES ${CMAKE_CXX_FLAGS}")" >> CMakeLists.txt && \
|
||||
ls -la /usr/lib/x86_64-linux-gnu && \
|
||||
ln -s /usr/lib/x86_64-linux-gnu/libboost_python-py35.so /usr/lib/x86_64-linux-gnu/libboost_python3.so && \
|
||||
make all -j"$(nproc)" && \
|
||||
make distribute && \
|
||||
|
||||
ENV PYCAFFE_ROOT $CAFFE_ROOT/python
|
||||
ENV PYTHONPATH $PYCAFFE_ROOT:$PYTHONPATH
|
||||
ENV PATH $CAFFE_ROOT/build/tools:$PYCAFFE_ROOT:$PATH
|
||||
RUN echo "$CAFFE_ROOT/build/lib" >> /etc/ld.so.conf.d/caffe.conf && ldconfig && \
|
||||
python3.6 $CAFFE_ROOT/scripts/download_model_binary.py $CAFFE_ROOT/models/bvlc_googlenet && \
|
||||
python3.6 -m pip install markdown=="2.6.11" && \
|
||||
python3.6 -m pip install python-dateutil --upgrade
|
||||
|
||||
WORKDIR /comixify
|
||||
COPY . /comixify
|
||||
RUN python3.6 -m pip install -r requirements.txt
|
||||
|
|
|
|||
|
|
@ -1,31 +1,44 @@
|
|||
import os
|
||||
import shutil
|
||||
import uuid
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
os.environ['GLOG_minloglevel'] = '2' # Prevent caffe shell loging
|
||||
import caffe
|
||||
from datetime import datetime
|
||||
from subprocess import call
|
||||
|
||||
import cv2
|
||||
from math import ceil
|
||||
from sklearn.preprocessing import normalize
|
||||
from django.conf import settings
|
||||
from django.core.cache import cache
|
||||
from skimage import img_as_ubyte
|
||||
import logging
|
||||
|
||||
from utils import jj
|
||||
from keyframes_rl.models import DSN
|
||||
from keyframes.kts import cpd_auto
|
||||
from keyframes.utils import batch
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class KeyFramesExtractor():
|
||||
class KeyFramesExtractor:
|
||||
@classmethod
|
||||
def get_keyframes(cls, video):
|
||||
all_keyframes, all_frames_tmp_dir = cls._get_all_frames(video)
|
||||
interval = cls._count_interval(all_keyframes)
|
||||
chosen_frames = cls._get_frames_with_interval(interval, all_keyframes)
|
||||
|
||||
shutil.rmtree(jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}"))
|
||||
def get_keyframes(cls, video, gpu=settings.GPU, features_batch_size=settings.FEATURE_BATCH_SIZE):
|
||||
frames_paths, all_frames_tmp_dir = cls._get_all_frames(video)
|
||||
frames = cls._get_frames(frames_paths)
|
||||
features = cls._get_features(frames, gpu, features_batch_size)
|
||||
change_points, frames_per_segment = cls._get_segments(features)
|
||||
probs = cls._get_probs(features, gpu)
|
||||
chosen_frames = cls._get_chosen_frames(frames, probs, change_points, frames_per_segment)
|
||||
return chosen_frames
|
||||
|
||||
@staticmethod
|
||||
def _get_all_frames(video):
|
||||
all_frames_tmp_dir = uuid.uuid4()
|
||||
os.mkdir(jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}"))
|
||||
call(["ffmpeg", "-skip_frame", "nokey", "-i", f"{video.file.path}", "-vsync", "0", "-qscale:v", "1",
|
||||
"-f", "image2", jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}", "%06d.jpeg")])
|
||||
|
||||
call(["ffmpeg", "-i", f"{video.file.path}", "-vf", "select=not(mod(n\\,15))", "-vsync", "vfr", "-q:v", "2",
|
||||
jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}", "%06d.jpeg")])
|
||||
frames_paths = []
|
||||
for dirname, dirnames, filenames in os.walk(jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}")):
|
||||
for filename in filenames:
|
||||
|
|
@ -33,15 +46,114 @@ class KeyFramesExtractor():
|
|||
return sorted(frames_paths), all_frames_tmp_dir
|
||||
|
||||
@staticmethod
|
||||
def _count_interval(all_keyframes):
|
||||
return int((len(all_keyframes) - settings.NUMBERS_OF_FRAMES_TO_SHOW) / (settings.NUMBERS_OF_FRAMES_TO_SHOW + 1))
|
||||
def _get_frames(frames_paths):
|
||||
frames = []
|
||||
for frame_path in frames_paths:
|
||||
frame = caffe.io.load_image(frame_path)
|
||||
frames.append(frame)
|
||||
return frames
|
||||
|
||||
@staticmethod
|
||||
def _get_frames_with_interval(interval, all_keyframes):
|
||||
def _get_features(frames, gpu=True, batch_size=1):
|
||||
caffe_root = os.environ.get("CAFFE_ROOT")
|
||||
if not caffe_root:
|
||||
print("Caffe root path not found.")
|
||||
if not gpu:
|
||||
caffe.set_mode_cpu()
|
||||
else:
|
||||
caffe.set_mode_gpu()
|
||||
|
||||
model_file = caffe_root + "/models/bvlc_googlenet/deploy.prototxt"
|
||||
pretrained = caffe_root + "/models/bvlc_googlenet/bvlc_googlenet.caffemodel"
|
||||
if not os.path.isfile(pretrained):
|
||||
print("PRETRAINED Model not found.")
|
||||
|
||||
net = caffe.Net(model_file, pretrained, caffe.TEST)
|
||||
net.blobs["data"].reshape(batch_size, 3, 224, 224)
|
||||
|
||||
mu = np.load(caffe_root + "/python/caffe/imagenet/ilsvrc_2012_mean.npy")
|
||||
mu = mu.mean(1).mean(1)
|
||||
transformer = caffe.io.Transformer({"data": net.blobs["data"].data.shape})
|
||||
transformer.set_transpose("data", (2, 0, 1))
|
||||
transformer.set_mean("data", mu)
|
||||
transformer.set_raw_scale("data", 255)
|
||||
transformer.set_channel_swap("data", (2, 1, 0))
|
||||
|
||||
features = np.zeros(shape=(len(frames), 1024))
|
||||
for idx_batch, (n_batch, frames_batch) in enumerate(batch(frames, batch_size)):
|
||||
for i in range(n_batch):
|
||||
net.blobs['data'].data[i, ...] = transformer.preprocess("data", frames_batch[i])
|
||||
net.forward()
|
||||
temp = net.blobs["pool5/7x7_s1"].data[0:n_batch]
|
||||
temp = temp.squeeze().copy()
|
||||
features[idx_batch * batch_size:idx_batch * batch_size + n_batch] = temp
|
||||
normalize(features, copy=False)
|
||||
return features.astype(np.float32)
|
||||
|
||||
@staticmethod
|
||||
def _get_probs(features, gpu=True):
|
||||
model_cache_key = "keyframes_rl_model_cache"
|
||||
model = cache.get(model_cache_key) # get model from cache
|
||||
|
||||
if model is None:
|
||||
model_path = "keyframes_rl/pretrained_model/model_epoch100.pth.tar"
|
||||
model = DSN(in_dim=1024, hid_dim=256, num_layers=1, cell="lstm")
|
||||
if gpu:
|
||||
checkpoint = torch.load(model_path)
|
||||
else:
|
||||
checkpoint = torch.load(model_path, map_location='cpu')
|
||||
model.load_state_dict(checkpoint)
|
||||
if gpu:
|
||||
model = nn.DataParallel(model).cuda()
|
||||
model.eval()
|
||||
cache.set(model_cache_key, model, None)
|
||||
|
||||
seq = torch.from_numpy(features).unsqueeze(0)
|
||||
if gpu: seq = seq.cuda()
|
||||
probs = model(seq)
|
||||
probs = probs.data.cpu().squeeze().numpy()
|
||||
return probs
|
||||
|
||||
@staticmethod
|
||||
def _get_chosen_frames(frames, probs, change_points, frames_per_segment, min_keyframes=10):
|
||||
gts = []
|
||||
s = 0
|
||||
for q in frames_per_segment:
|
||||
gts.append(np.mean(probs[s:s + q]).astype(float))
|
||||
s += q
|
||||
gts = np.array(gts)
|
||||
picks = np.argsort(gts)[::-1][:min_keyframes]
|
||||
chosen_frames = []
|
||||
|
||||
for i in range(settings.NUMBERS_OF_FRAMES_TO_SHOW):
|
||||
frame = cv2.imread(all_keyframes[(i + 1) * interval])
|
||||
chosen_frames.append(frame)
|
||||
|
||||
for pick in picks:
|
||||
cp = change_points[pick]
|
||||
low = cp[0]
|
||||
high = cp[1]
|
||||
x = low
|
||||
if low != high:
|
||||
x = low + np.argmax(probs[low:high])
|
||||
chosen_frames.append({
|
||||
"index": x,
|
||||
"frame": frames[x]
|
||||
})
|
||||
chosen_frames.sort(key=lambda k: k['index'])
|
||||
chosen_frames = [img_as_ubyte(o["frame"])[..., ::-1] for o in chosen_frames]
|
||||
return chosen_frames
|
||||
|
||||
@staticmethod
|
||||
def _get_segments(features):
|
||||
K = np.dot(features, features.T)
|
||||
n_frames = int(K.shape[0])
|
||||
min_segments = int(ceil(n_frames / 10))
|
||||
min_segments = max(10, min_segments)
|
||||
min_segments = min(n_frames - 1, min_segments)
|
||||
cps, scores = cpd_auto(K, min_segments, 1)
|
||||
change_points = [
|
||||
[0, cps[0] - 1]
|
||||
]
|
||||
frames_per_segment = [int(cps[0])]
|
||||
for j in range(0, len(cps) - 1):
|
||||
change_points.append([cps[j], cps[j + 1] - 1])
|
||||
frames_per_segment.append(int(cps[j + 1] - cps[j]))
|
||||
frames_per_segment.append(int(len(features) - cps[len(cps) - 1]))
|
||||
change_points.append([cps[len(cps) - 1], len(features) - 1])
|
||||
return change_points, frames_per_segment
|
||||
|
|
|
|||
125
keyframes/kts/__init__.py
Normal file
125
keyframes/kts/__init__.py
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
def calc_scatters(K):
|
||||
"""
|
||||
Calculate scatter matrix:
|
||||
scatters[i,j] = {scatter of the sequence with starting frame i and ending frame j}
|
||||
"""
|
||||
n = K.shape[0]
|
||||
K1 = np.cumsum([0] + list(np.diag(K)))
|
||||
K2 = np.zeros((n+1, n+1))
|
||||
K2[1:, 1:] = np.cumsum(np.cumsum(K, 0), 1)
|
||||
diagK2 = np.diag(K2)
|
||||
i = np.arange(n).reshape((-1, 1))
|
||||
j = np.arange(n).reshape((1, -1))
|
||||
scatters = (
|
||||
K1[1:].reshape((1, -1)) - K1[:-1].reshape((-1, 1)) - (
|
||||
diagK2[1:].reshape((1, -1)) + diagK2[:-1].reshape((-1, 1))
|
||||
- K2[1:, :-1].T - K2[:-1, 1:]
|
||||
) / ((j - i + 1).astype(float) + (j == i-1).astype(float))
|
||||
)
|
||||
scatters[j < i] = 0
|
||||
return scatters
|
||||
|
||||
|
||||
def cpd_nonlin(K, ncp, lmin=1, lmax=100000, backtrack=True, verbose=True, out_scatters=None):
|
||||
""" Change point detection with dynamic programming
|
||||
K - square kernel matrix
|
||||
ncp - number of change points to detect (ncp >= 0)
|
||||
lmin - minimal length of a segment
|
||||
lmax - maximal length of a segment
|
||||
backtrack - when False - only evaluate objective scores (to save memory)
|
||||
Returns: (cps, obj)
|
||||
cps - detected array of change points: mean is thought to be constant on [ cps[i], cps[i+1] )
|
||||
obj_vals - values of the objective function for 0..m changepoints
|
||||
"""
|
||||
m = int(ncp) # prevent numpy.int64
|
||||
|
||||
(n, n1) = K.shape
|
||||
assert(n == n1), "Kernel matrix awaited."
|
||||
|
||||
assert(n >= (m + 1) * lmin)
|
||||
assert(n <= (m + 1) * lmax)
|
||||
assert(lmax >= lmin >= 1)
|
||||
|
||||
if verbose:
|
||||
print("Precomputing scatters...")
|
||||
J = calc_scatters(K)
|
||||
|
||||
if out_scatters is not None:
|
||||
out_scatters[0] = J
|
||||
|
||||
if verbose:
|
||||
print("Inferring best change points...")
|
||||
# I[k, l] - value of the objective for k change-points and l first frames
|
||||
I = 1e101 * np.ones((m + 1, n + 1))
|
||||
I[0, lmin:lmax] = J[0, lmin - 1:lmax - 1]
|
||||
|
||||
if backtrack:
|
||||
# p[k, l] --- "previous change" --- best t[k] when t[k+1] equals l
|
||||
p = np.zeros((m + 1, n + 1), dtype=int)
|
||||
else:
|
||||
p = np.zeros((1, 1), dtype=int)
|
||||
|
||||
for k in range(1, m + 1):
|
||||
for l in range((k + 1) * lmin, n + 1):
|
||||
tmin = max(k * lmin, l - lmax)
|
||||
tmax = l - lmin + 1
|
||||
c = J[tmin:tmax, l - 1].reshape(-1) + I[k - 1, tmin:tmax].reshape(-1)
|
||||
I[k, l] = np.min(c)
|
||||
if backtrack:
|
||||
p[k, l] = np.argmin(c)+tmin
|
||||
|
||||
# Collect change points
|
||||
cps = np.zeros(m, dtype=int)
|
||||
|
||||
if backtrack:
|
||||
cur = n
|
||||
for k in range(m, 0, -1):
|
||||
cps[k - 1] = p[k, cur]
|
||||
cur = cps[k - 1]
|
||||
|
||||
scores = I[:, n].copy()
|
||||
scores[scores > 1e99] = np.inf
|
||||
return cps, scores
|
||||
|
||||
|
||||
def cpd_auto(K, ncp, vmax, desc_rate=15, min_segments=10, **kwargs):
|
||||
"""Main interface
|
||||
Detect change points automatically selecting their number
|
||||
K - kernel between each pair of frames in video
|
||||
ncp - maximum ncp
|
||||
vmax - special parameter
|
||||
Optional arguments:
|
||||
lmin - minimum segment length
|
||||
lmax - maximum segment length
|
||||
desc_rate - rate of descriptor sampling (vmax always corresponds to 1x)
|
||||
Note:
|
||||
- cps are always calculated in subsampled coordinates irrespective to
|
||||
desc_rate
|
||||
- lmin and m should be in agreement
|
||||
---
|
||||
Returns: (cps, costs)
|
||||
cps - best selected change-points
|
||||
costs - costs for 0,1,2,...,m change-points
|
||||
Memory requirement: ~ (3*N*N + N*ncp)*4 bytes ~= 16 * N^2 bytes
|
||||
That is 1,6 Gb for the N=10000.
|
||||
"""
|
||||
m = ncp
|
||||
(_, scores) = cpd_nonlin(K, m, backtrack=False, **kwargs)
|
||||
|
||||
N = K.shape[0]
|
||||
N2 = N * desc_rate # length of the video before subsampling
|
||||
|
||||
penalties = np.zeros(m + 1)
|
||||
# Prevent division by zero (in case of 0 changes)
|
||||
ncp = np.arange(1, m + 1)
|
||||
penalties[1:] = (vmax * ncp/(2.0 * N2)) * (np.log(float(N2) / ncp) + 1)
|
||||
|
||||
costs = scores/float(N) + penalties
|
||||
m_best = int(np.argmin(costs))
|
||||
m_best = max(min_segments, m_best)
|
||||
m_best = min(m_best, N)
|
||||
(cps, scores2) = cpd_nonlin(K, m_best, **kwargs)
|
||||
return cps, scores2
|
||||
|
|
@ -1,3 +1,69 @@
|
|||
from django.test import TestCase
|
||||
|
||||
# Create your tests here.
|
||||
import numpy as np
|
||||
from keyframes.keyframes import KeyFramesExtractor
|
||||
from api.models import Video
|
||||
from django.core.files import File
|
||||
import shutil
|
||||
from utils import jj
|
||||
from django.conf import settings
|
||||
from keyframes.utils import batch
|
||||
|
||||
VIDEO_PATH = "tmp/f1_short.mp4"
|
||||
VIDEO_N_FRAMES = 47
|
||||
|
||||
|
||||
class KeyframesTestCase(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
f = open(VIDEO_PATH, 'rb')
|
||||
self.video = Video.objects.create(file=File(f))
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(jj(f"{settings.TMP_DIR}", f"{self.all_frames_tmp_dir}"))
|
||||
|
||||
def test_keyframes(self):
|
||||
"""Keyframes are extracted corectly"""
|
||||
|
||||
frames_paths, all_frames_tmp_dir = KeyFramesExtractor._get_all_frames(self.video)
|
||||
self.assertIsInstance(frames_paths[0], str)
|
||||
self.assertEqual(len(frames_paths), VIDEO_N_FRAMES)
|
||||
self.all_frames_tmp_dir = all_frames_tmp_dir
|
||||
|
||||
frames = KeyFramesExtractor._get_frames(frames_paths)
|
||||
self.assertEqual(len(frames), VIDEO_N_FRAMES)
|
||||
self.assertIsInstance(frames[0], np.ndarray)
|
||||
|
||||
features = KeyFramesExtractor._get_features(frames, False)
|
||||
self.assertIsInstance(features, np.ndarray)
|
||||
self.assertEqual(features.shape, (VIDEO_N_FRAMES, 1024))
|
||||
|
||||
change_points, frames_per_segment = KeyFramesExtractor._get_segments(features)
|
||||
self.assertIsInstance(change_points, list)
|
||||
self.assertIsInstance(frames_per_segment, list)
|
||||
|
||||
for cp in frames_per_segment:
|
||||
with self.subTest(cp=cp):
|
||||
self.assertIsInstance(cp, int)
|
||||
|
||||
probs = KeyFramesExtractor._get_probs(features, False)
|
||||
self.assertIsInstance(probs, np.ndarray)
|
||||
self.assertEqual(probs.shape, (VIDEO_N_FRAMES, ))
|
||||
|
||||
chosen_frames = KeyFramesExtractor._get_chosen_frames(frames, probs, change_points, frames_per_segment)
|
||||
self.assertIsInstance(chosen_frames, list)
|
||||
self.assertTrue(len(chosen_frames) == 10)
|
||||
|
||||
|
||||
class UtilsTestCase(TestCase):
|
||||
def test_batch(self):
|
||||
"""Barch is working"""
|
||||
arr = [1, 1, 2, 2, 3, 3, 4]
|
||||
batched_arr = batch(arr, 2)
|
||||
self.assertEqual(list(batched_arr), [(2, [1, 1]), (2, [2, 2]), (2, [3, 3]), (1, [4])])
|
||||
|
||||
def test_empty_batch(self):
|
||||
"""Barch is working"""
|
||||
arr = []
|
||||
batched_arr = batch(arr, 2)
|
||||
self.assertEqual(list(batched_arr), [])
|
||||
|
|
|
|||
6
keyframes/utils.py
Normal file
6
keyframes/utils.py
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
def batch(iterable, n=1):
|
||||
length = len(iterable)
|
||||
for ndx in range(0, length, n):
|
||||
end_index = min(ndx + n, length)
|
||||
n_elemnets = end_index - ndx
|
||||
yield n_elemnets, iterable[ndx:end_index]
|
||||
21
keyframes_rl/models.py
Normal file
21
keyframes_rl/models.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
import torch.nn as nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
__all__ = ['DSN']
|
||||
|
||||
|
||||
class DSN(nn.Module):
|
||||
"""Deep Summarization Network"""
|
||||
def __init__(self, in_dim=1024, hid_dim=256, num_layers=1, cell='lstm'):
|
||||
super(DSN, self).__init__()
|
||||
assert cell in ['lstm', 'gru'], "cell must be either 'lstm' or 'gru'"
|
||||
if cell == 'lstm':
|
||||
self.rnn = nn.LSTM(in_dim, hid_dim, num_layers=num_layers, bidirectional=True, batch_first=True)
|
||||
else:
|
||||
self.rnn = nn.GRU(in_dim, hid_dim, num_layers=num_layers, bidirectional=True, batch_first=True)
|
||||
self.fc = nn.Linear(hid_dim*2, 1)
|
||||
|
||||
def forward(self, x):
|
||||
h, _ = self.rnn(x)
|
||||
p = F.sigmoid(self.fc(h))
|
||||
return p
|
||||
BIN
keyframes_rl/pretrained_model/model_epoch60.pth.tar
Normal file
BIN
keyframes_rl/pretrained_model/model_epoch60.pth.tar
Normal file
Binary file not shown.
|
|
@ -10,3 +10,4 @@ pytz==2018.5
|
|||
six==1.11.0
|
||||
torch==0.4.1
|
||||
torchvision==0.2.1
|
||||
scikit-learn==0.19.2
|
||||
|
|
@ -140,3 +140,5 @@ MAX_FILE_SIZE = 50000000
|
|||
NUMBERS_OF_FRAMES_TO_SHOW = 10
|
||||
TMP_DIR = 'tmp/'
|
||||
GPU = True
|
||||
|
||||
FEATURE_BATCH_SIZE = 32
|
||||
|
|
|
|||
BIN
tmp/f1_short.mp4
Normal file
BIN
tmp/f1_short.mp4
Normal file
Binary file not shown.
Loading…
Reference in a new issue