From 1b4486cbe62ffd982f58f2295ee1bcb21baeba74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20P=C4=99=C5=9Bko?= <mpesko3031@gmail.com>
Date: Mon, 1 Oct 2018 22:34:57 +0200
Subject: [PATCH] Add downloading videos from YouTube (#7)

- Add downloading videos from YouTube
- Minor cleanup
---
 README.md              |  3 ++-
 api/models.py          | 38 +++++++++++++++++++++++++++++++++-----
 api/serializers.py     |  4 ++++
 api/urls.py            |  8 ++++----
 api/views.py           | 38 +++++++++++++++++++++++++++++---------
 keyframes/keyframes.py | 17 +++++++++--------
 nginx.conf             |  4 ++++
 requirements.txt       |  4 +++-
 8 files changed, 88 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index 9eb537d..a57dd7c 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,8 @@
 
 #TODO:
 - [ ] Prepare readme
-- [ ] Fix model loading to memory
+- [x] Download videos directly from YouTube
+- [x] Fix model loading to memory
 - [ ] Replace ffmpeg with some keyframes extractor
 - [ ] Prepare own ComixGAN
 - [ ] Prepare some more intelligent method for comic layout
diff --git a/api/models.py b/api/models.py
index b39b176..bf43720 100644
--- a/api/models.py
+++ b/api/models.py
@@ -2,15 +2,43 @@ import os
 import uuid
 
 import cv2
+import pafy
 from django.conf import settings
 from django.core.files import File
 from django.db import models
 
+from api.exceptions import TooLargeFile
+from comic_layout.comic_layout import LayoutGenerator
+from keyframes.keyframes import KeyFramesExtractor
+from style_transfer.style_transfer import StyleTransfer
+from utils import jj
+
 
 class Video(models.Model):
     file = models.FileField(blank=False, null=False, upload_to="raw_videos")
     timestamp = models.DateTimeField(auto_now_add=True)
 
+    def download_from_youtube(self, yt_url):
+        yt_pafy = pafy.new(yt_url)
+
+        # Use the biggest possible quality with file size < MAX_FILE_SIZE and resolution <= 480px
+        for stream in yt_pafy.videostreams:
+            if stream.get_filesize() < settings.MAX_FILE_SIZE and int(stream.quality.split("x")[1]) <= 480:
+                tmp_name = uuid.uuid4().hex + ".mp4"
+                relative_path = jj('raw_videos', tmp_name)
+                full_path = jj(settings.MEDIA_ROOT, relative_path)
+                stream.download(full_path)
+                self.file.name = relative_path
+                break
+        else:
+            raise TooLargeFile()
+
+    def create_comic(self):
+        keyframes = KeyFramesExtractor.get_keyframes(video=self)
+        stylized_keyframes = StyleTransfer.get_stylized_frames(frames=keyframes)
+        comic_image = LayoutGenerator.get_layout(frames=stylized_keyframes)
+        return comic_image
+
 
 class Comic(models.Model):
     file = models.FileField(blank=False, null=False, upload_to="comic")
@@ -20,10 +48,10 @@ class Comic(models.Model):
     def create_from_nparray(cls, nparray_file, video):
         if nparray_file.max() <= 1:
             nparray_file = (nparray_file * 255).astype(int)
-        tmp_name = uuid.uuid4()
-        cv2.imwrite(f"{settings.TMP_DIR}{tmp_name}.png", nparray_file)
-        with open(f"{settings.TMP_DIR}{tmp_name}.png", mode="rb") as tmp_file:
-            comic_image = File(tmp_file, name=f"{tmp_name}.png")
+        tmp_name = uuid.uuid4().hex + ".png"
+        cv2.imwrite(jj(settings.TMP_DIR, tmp_name), nparray_file)
+        with open(jj(settings.TMP_DIR, tmp_name), mode="rb") as tmp_file:
+            comic_image = File(tmp_file, name=tmp_name)
             comic = Comic.objects.create(file=comic_image, video=video)
-        os.remove(f"{settings.TMP_DIR}{tmp_name}.png")
+        os.remove(jj(settings.TMP_DIR, tmp_name))
         return comic
diff --git a/api/serializers.py b/api/serializers.py
index 2bebcef..ac2ef99 100644
--- a/api/serializers.py
+++ b/api/serializers.py
@@ -17,3 +17,7 @@ class VideoSerializer(serializers.ModelSerializer):
         if file.size > settings.MAX_FILE_SIZE:
             raise TooLargeFile
         return attrs
+
+
+class YouTubeDownloadSerializer(serializers.Serializer):
+    url = serializers.URLField()
diff --git a/api/urls.py b/api/urls.py
index c3bb986..826a8ff 100644
--- a/api/urls.py
+++ b/api/urls.py
@@ -1,8 +1,8 @@
-from django.conf.urls import url
-
-from .views import Comixify
+from django.urls import path
 
+from .views import Comixify, ComixifyFromYoutube
 
 urlpatterns = [
-    url(r'^$', Comixify.as_view(), name='annotate'),
+    path(r'', Comixify.as_view(), name='comixify'),
+    path(r'from_yt/', ComixifyFromYoutube.as_view(), name='comixify_from_yt'),
 ]
diff --git a/api/views.py b/api/views.py
index 9c1247d..df9f051 100644
--- a/api/views.py
+++ b/api/views.py
@@ -2,11 +2,8 @@ from rest_framework.parsers import FormParser, MultiPartParser
 from rest_framework.response import Response
 from rest_framework.views import APIView
 
-from comic_layout.comic_layout import LayoutGenerator
-from keyframes.keyframes import KeyFramesExtractor
-from style_transfer.style_transfer import StyleTransfer
 from .models import Video, Comic
-from .serializers import VideoSerializer
+from .serializers import VideoSerializer, YouTubeDownloadSerializer
 
 
 class Comixify(APIView):
@@ -22,12 +19,35 @@ class Comixify(APIView):
 
         video_file = serializer.validated_data["file"]
         video = Video.objects.create(file=video_file)
-
-        keyframes = KeyFramesExtractor.get_keyframes(video=video)
-        stylized_keyframes = StyleTransfer.get_stylized_frames(frames=keyframes)
-        comic_image = LayoutGenerator.get_layout(frames=stylized_keyframes)
-
+        comic_image = video.create_comic()
         comic = Comic.create_from_nparray(comic_image, video)
+
+        response = {
+            "status_message": "ok",
+            "comic": comic.file.url,
+        }
+        # Remove to spare storage
+        video.file.delete()
+        return Response(response)
+
+
+class ComixifyFromYoutube(APIView):
+
+    def post(self, request):
+        """
+        Receives video, and returns comic image
+        """
+
+        serializer = YouTubeDownloadSerializer(data=request.data)
+        serializer.is_valid(raise_exception=True)
+        yt_url = serializer.validated_data["url"]
+
+        video = Video()
+        video.download_from_youtube(yt_url)
+        video.save()
+        comic_image = video.create_comic()
+        comic = Comic.create_from_nparray(comic_image, video)
+
         response = {
             "status_message": "ok",
             "comic": comic.file.url,
diff --git a/keyframes/keyframes.py b/keyframes/keyframes.py
index c874715..dc170e3 100644
--- a/keyframes/keyframes.py
+++ b/keyframes/keyframes.py
@@ -1,11 +1,12 @@
 import os
 import uuid
+
 import numpy as np
 import torch
 import torch.nn as nn
-os.environ['GLOG_minloglevel'] = '2' # Prevent caffe shell loging
+
+os.environ['GLOG_minloglevel'] = '2'  # Prevent caffe shell loging
 import caffe
-from datetime import datetime
 from subprocess import call
 from math import ceil
 from sklearn.preprocessing import normalize
@@ -35,12 +36,12 @@ class KeyFramesExtractor:
 
     @staticmethod
     def _get_all_frames(video):
-        all_frames_tmp_dir = uuid.uuid4()
-        os.mkdir(jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}"))
-        call(["ffmpeg", "-i", f"{video.file.path}", "-vf", "select=not(mod(n\\,15))", "-vsync", "vfr", "-q:v", "2",
-            jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}", "%06d.jpeg")])
+        all_frames_tmp_dir = uuid.uuid4().hex
+        os.mkdir(jj(settings.TMP_DIR, all_frames_tmp_dir))
+        call(["ffmpeg", "-i", video.file.path, "-vf", "select=not(mod(n\\,15))", "-vsync", "vfr", "-q:v", "2",
+              jj(settings.TMP_DIR, all_frames_tmp_dir, "%06d.jpeg")])
         frames_paths = []
-        for dirname, dirnames, filenames in os.walk(jj(f"{settings.TMP_DIR}", f"{all_frames_tmp_dir}")):
+        for dirname, dirnames, filenames in os.walk(jj(settings.TMP_DIR, all_frames_tmp_dir)):
             for filename in filenames:
                 frames_paths.append(jj(dirname, filename))
         return sorted(frames_paths), all_frames_tmp_dir
@@ -89,7 +90,7 @@ class KeyFramesExtractor:
             features[idx_batch * batch_size:idx_batch * batch_size + n_batch] = temp
         normalize(features, copy=False)
         return features.astype(np.float32)
-        
+
     @staticmethod
     def _get_probs(features, gpu=True):
         model_cache_key = "keyframes_rl_model_cache"
diff --git a/nginx.conf b/nginx.conf
index e380fb1..afb9a1f 100644
--- a/nginx.conf
+++ b/nginx.conf
@@ -22,6 +22,10 @@ server {
         proxy_set_header Host $host;
         proxy_redirect off;
         proxy_pass http://hello_server;
+        proxy_connect_timeout 300;
+        proxy_send_timeout 300;
+        proxy_read_timeout 300;
+        send_timeout 300;
     }
     location ^~ /.well-known {
         allow all;
diff --git a/requirements.txt b/requirements.txt
index 085cdff..2eeef33 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,10 +4,12 @@ djangorestframework==3.8.2
 gunicorn==19.9.0
 numpy==1.14.5
 opencv-python==3.4.2.17
+pafy==0.5.4
 Pillow==5.2.0
 psycopg2==2.7.5
 pytz==2018.5
 six==1.11.0
 torch==0.4.1
 torchvision==0.2.1
-scikit-learn==0.19.2
\ No newline at end of file
+scikit-learn==0.19.2
+youtube-dl==2018.9.18