Zeus-Scanner/lib/core/common.py

import os
import re
import json
import time
import shlex
import subprocess
try:
    from urllib import (  # python 2
        unquote
    )
except ImportError:
    from urllib.parse import (  # python 3
        unquote
    )

import requests
from lxml import etree

import lib.core.settings

# reference https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
STATUS_CODES = {
    100: "continue", 101: "switching protocols", 102: "processing",
    200: "OK", 201: "created", 202: "accepted", 203: "non-authoritative information",
    204: "no content", 205: "reset content", 206: "partial content",
    207: "multi-status", 208: "already reported", 226: "IM used",
    300: "multiple choices", 301: "moved permanently",  302: "not found",
    303: "see other", 304: "not modified", 305: "use proxy",
    306: "switch proxy", 308: "permanent redirect",
    400: "bad request", 401: "unauthorized", 402: "payment required",
    403: "forbidden", 404: "not found", 405: "method not allowed",
    406: "not acceptable", 407: "proxy authentication required", 408: "request timed out",
    409: "conflict", 410: "gone", 411: "length required", 412: "precondition failed",
    413: "payload to large", 414: "URI too long", 415: "unsupported media type",
    416: "range not satisfiable", 417: "expectation failed", 418: "im a teapot {EASTER EGG!}",
    421: "misdirected request", 422: "unprocesseable entity", 423: "locked",
    424: "failed dependency", 426: "upgrade requried", 428: "precondition required",
    429: "to many requests", 431: "request headers field too large",
    451: "unavailable for legal reasons",
    500: "internal server error", 501: "not implemented", 502: "bad gateway",
    503: "service unavailable", 504: "gateway timeout", 505: "HTTP version not supported",
    506: "variant also negotiable", 507: "insufficient storage", 508: "loop detected",
    510: "not extended", 511: "network authentication required", "other": "unexpected error code"
}


class HTTP_HEADER:
    ACCEPT = "Accept"
    ACCEPT_CHARSET = "Accept-Charset"
    ACCEPT_ENCODING = "Accept-Encoding"
    ACCEPT_LANGUAGE = "Accept-Language"
    AUTHORIZATION = "Authorization"
    CACHE_CONTROL = "Cache-Control"
    CONNECTION = "Connection"
    CONTENT_ENCODING = "Content-Encoding"
    CONTENT_LENGTH = "Content-Length"
    CONTENT_RANGE = "Content-Range"
    CONTENT_TYPE = "Content-Type"
    COOKIE = "Cookie"
    EXPIRES = "Expires"
    HOST = "Host"
    IF_MODIFIED_SINCE = "If-Modified-Since"
    LAST_MODIFIED = "Last-Modified"
    LOCATION = "Location"
    PRAGMA = "Pragma"
    PROXY_AUTHORIZATION = "Proxy-Authorization"
    PROXY_CONNECTION = "Proxy-Connection"
    RANGE = "Range"
    REFERER = "Referer"
    REFRESH = "Refresh"  # Reference: http://stackoverflow.com/a/283794
    SERVER = "Server"
    SET_COOKIE = "Set-Cookie"
    TRANSFER_ENCODING = "Transfer-Encoding"
    URI = "URI"
    USER_AGENT = "User-Agent"
    VIA = "Via"
    X_CACHE = "X-Cache"
    X_POWERED_BY = "X-Powered-By"
    X_DATA_ORIGIN = "X-Data-Origin"
    X_FRAME_OPT = "X-Frame-Options"
    X_FORWARDED_FOR = "X-Forwarded-For"


class URLParser(object):

    def __init__(self, url):
        self.url = url
        self.url_match_regex = re.compile(r"((https?):((//)|(\\\\))+([\w\d:#@%/;$()~_?\+-=\\\.&](#!)?)*)")
        self.webcache_regex = re.compile(r"cache:(.{,16})?:")
        self.possible_leftovers = ("<", ">", ";", ",")
        self.webcache_schema = "webcache"
        self.constant_ip_ban_splitter = "continue="
        self.content_ip_ban_seperator = ("Fid", "&gs_")

    def extract_webcache_url(self, splitter="+"):
        """
        extract the URL from Google's webcache URL
        """
        url = self.url
        data = self.webcache_regex.split(url)
        to_extract = data[2].split(splitter)
        extracted = to_extract[0]
        if self.url_match_regex.match(extracted):
            return extracted
        return None

    def extract_ip_ban_url(self):
        """
        extract the true URL from Google's IP ban URL
        """
        url = unquote(self.url)
        to_use_separator = None
        retval_url = None
        url_data_list = url.split(self.constant_ip_ban_splitter)
        for item in url_data_list:
            for sep in list(self.content_ip_ban_seperator):
                if sep in item:
                    to_use_separator = sep
            retval_url = item.split(to_use_separator)
        return unquote(retval_url[0])

    def strip_url_leftovers(self):
        """
        strip any leftovers that come up with the URL every now and then
        """
        url = self.url
        for possible in self.possible_leftovers:
            if possible in url:
                url = url.split(possible)[0]
        return url


def write_to_log_file(data_to_write, path, filename, blacklist=False):
    """
    write all found data to a log file
    """
    lib.core.settings.create_dir(path.format(os.getcwd()))
    full_file_path = "{}/{}".format(
        path.format(os.getcwd()), filename.format(len(os.listdir(path.format(
            os.getcwd()
        ))) + 1)
    )
    skip_log_schema = (
        "url-log", "blackwidow-log", "zeus-log",
        "extracted", ".blacklist", "sqli-sites"
    )
    to_search = filename.split("-")[0]
    amount = len([f for f in os.listdir(path) if to_search in f])
    new_filename = "{}({}).{}".format(
                    filename.split("-")[0], amount, filename.split(".")[-1]
                )
    with open(full_file_path, "a+") as log:
        data = re.sub(r'\s+', '', log.read())
        if re.match(r'^<.+>$', data):  # matches HTML and XML
            try:
                log.write(etree.tostring(data_to_write, pretty_print=True))
            except TypeError:
                return write_to_log_file(data_to_write, path, new_filename)
        elif amount > 0 and not any(_ in filename for _ in list(skip_log_schema)):
            return write_to_log_file(data_to_write, path, new_filename)
        elif blacklist:
            items = log.readlines()
            if any(d.strip() == data_to_write for d in items):
                lib.core.settings.logger.info(lib.core.settings.set_color(
                    "query already in blacklist..."
                ))
                return full_file_path
            else:
                log.write(data_to_write + "\n")
        else:
            if isinstance(data_to_write, list):
                for item in data_to_write:
                    item = item.strip()
                    log.write(str(item) + "\n")
            elif isinstance(data_to_write, (tuple, set)):
                for item in list(data_to_write):
                    item = item.strip()
                    log.write(str(item) + "\n")
            elif isinstance(data_to_write, dict):
                json.dump(data_to_write, log, sort_keys=True, indent=4)
            else:
                log.write(data_to_write + "\n")
    lib.core.settings.logger.info(lib.core.settings.set_color(
        "successfully wrote found items to '{}'...".format(full_file_path)
    ))
    return full_file_path


def start_up():
    """
    start the program and display the time it was started
    """
    print(
        "\n\n[*] starting up at {}..\n\n".format(time.strftime("%H:%M:%S"))
    )


def shutdown():
    """
    shut down the program and the time it stopped
    """
    print(
        "\n\n[*] shutting down at {}..\n\n".format(time.strftime("%H:%M:%S"))
    )
    exit(0)


def prompt(question, opts=None, default=None, paused=False):
    """
    ask a question
    """
    if opts is not None and default is None:
        options = '/'.join(opts)
        return raw_input(
            "[{} {}] {}[{}]: ".format(
                time.strftime("%H:%M:%S"),
                "PROMPT", question, options
            )
        )
    elif default is not None:
        if opts is not None:
            options = "/".join(opts)
            print(
                "[{} {}] {}[{}] {}".format(
                    time.strftime("%H:%M:%S"), "PROMPT",
                    question, options, default
                )
            )
            return default
        else:
            print(
                "[{} {}] {} {}".format(
                    time.strftime("%H:%M:%S"), "PROMPT",
                    question, default
                )
            )
            return default
    elif opts is None and default is None and paused:
        opts = "[(s)kip (e)xit]"
        question_ = raw_input(
            "[{} {}] {} {}: ".format(
                time.strftime("%H:%M:%S"), "PROMPT", question, opts
            )
        )
        if question_.lower().startswith("s"):
            return True
        return False
    else:
        return raw_input(
            "[{} {}] {} ".format(
                time.strftime("%H:%M:%S"), "PROMPT", question
            )
        )


def pause():
    """
    interactive pause function, as of now you are only able to skip and exit
    from this function
    """
    message = "program has been paused, how do you want to proceed?"
    return prompt(
        message, paused=True
    )


def run_fix(message, command, fail_message, exit_process=False):
    """
    run the fix script for the program
    """
    do_fix = prompt(
        message, opts="yN"
    )
    if do_fix.lower().startswith("y"):
        cmd = shlex.split(command)
        subprocess.call(cmd)
        if exit_process:
            lib.core.settings.logger.info(lib.core.settings.set_color(
                "command completed successfully, should be safe to re-run Zeus..."
            ))
    else:
        lib.core.settings.logger.fatal(lib.core.settings.set_color(
            fail_message, level=50
        ))


def get_page(url, **kwargs):
    agent = kwargs.get("agent", None)
    proxy = kwargs.get("proxy", None)
    xforward = kwargs.get("xforward", False)
    auth = kwargs.get("auth", None)
    skip_verf = kwargs.get("skip_verf", False)

    if agent is None:
        agent = lib.core.settings.DEFAULT_USER_AGENT

    if xforward:
        ip_list = (
            lib.core.settings.create_random_ip(),
            lib.core.settings.create_random_ip(),
            lib.core.settings.create_random_ip()
        )
        headers = {
            HTTP_HEADER.CONNECTION: "close",
            HTTP_HEADER.USER_AGENT: agent,
            HTTP_HEADER.X_FORWARDED_FOR: "{}, {}, {}".format(
                ip_list[0], ip_list[1], ip_list[2]
            )
        }
    elif auth:
        headers = {
            HTTP_HEADER.CONNECTION: "close",
            HTTP_HEADER.USER_AGENT: agent,
            HTTP_HEADER.AUTHORIZATION: "{}".format(
                auth
            )
        }
    else:
        headers = {
            HTTP_HEADER.CONNECTION: "close",
            HTTP_HEADER.USER_AGENT: agent
        }

    if proxy is not None:
        proxies = {
            "https": proxy,
            "http": proxy
        }
    else:
        proxies = {}

    req = requests.get(url, params=headers, proxies=proxies, verify=False if skip_verf else True, timeout=20)
    status = req.status_code
    html = req.content
    headers = req.headers
    return req, status, html, headers