#!/usr/bin/env python3

# This file is part of Cockpit.
#
# Copyright (C) 2017 Slavek Kabrda
#
# Cockpit is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
#
# Cockpit is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Cockpit; If not, see <http://www.gnu.org/licenses/>.

# The name and version of the training data
TRAINING_DATA = "tests-train-1.jsonl.gz"

# The number of days in history to learn from. This is different from
# the amount of  data we gather in tests-data, and can be adjusted
# independently.
SINCE = 21

import os
import socket
import subprocess
import sys
import time
import urllib

sys.dont_write_bytecode = True

import task

from machine import testvm

BOTS = os.path.dirname(os.path.realpath(__file__))

def run(url_or_file, verbose=False, dry=False, **kwargs):
    # Default set of training data, retrieve it and use from data directory
    if not url_or_file:
        url_or_file = TRAINING_DATA

    # A URL was provided directly, just use it
    if url_or_file.startswith("http"):
        filename = os.path.basename(url_or_file)
        url = url_or_file

    else:
        host = os.environ.get("COCKPIT_LEARN_SERVICE_HOST", "learn-cockpit.apps.ci.centos.org")
        port = os.environ.get("COCKPIT_LEARN_SERVICE_PORT", "443")
        url = "{0}://{1}:{2}/train/{3}".format("https" if port == "443" else "http", host, port, os.path.basename(url_or_file))
        filename = url_or_file

    if "/" not in filename and not os.path.exists(filename):
        if not dry:
            subprocess.check_call([ os.path.join(BOTS, "image-download"), "--state", filename ])
        filename = os.path.join(testvm.get_images_data_dir(), filename)
    train(filename, url, verbose)

# Does 'tail -F' on an HTTP URL
def tail(url, until, verbose=False):
    stop = False
    at = 0

    while True:
        time.sleep(10)

        try:
            req = urllib.request.Request(url, headers={ "Range": "bytes={0}-".format(at) })
            with urllib.request.urlopen(req, cafile=os.path.join(BOTS, "images", "files", "ca.pem")) as f:
                while True:
                    data = f.read(2048)
                    if not data:
                        break
                    at += len(data)
                    if verbose:
                        sys.stderr.buffer.write(data)
        except urllib.error.HTTPError as ex:
            if ex.code != 404 and ex.code != 416:
                sys.stderr.write("{0}: {1}\n".format(url, ex))
        except (ConnectionResetError, urllib.error.URLError, socket.gaierror) as ex:
            sys.stderr.write("{0}: {1}\n".format(url, ex))

        if stop:
            break

        # Note that we do one more loop after we stop, to make sure to get all of url
        stop = until()

def train(filename, url, verbose=False):
    if verbose:
        sys.stderr.write(" ^ {0}\n".format(url))

    cmd = [ os.path.join(BOTS, "image-upload"), "--state", filename, "--store", url ]

    # Passing through a non terminal stdout is necessary to make progress work
    subprocess.check_call(cmd)

    # We run until the file disappears, which means training has taken place
    def until():
        try:
            req = urllib.request.Request(url, method="HEAD")
            with urllib.request.urlopen(req, cafile=os.path.join(BOTS, "images", "files", "ca.pem")) as f:
                f.read()
        except urllib.error.HTTPError as ex:
            if ex.code == 404:
                return True
            sys.stderr.write("{0}: {1}\n".format(url, ex))
        except (ConnectionResetError, urllib.error.URLError, socket.gaierror) as ex:
            sys.stderr.write("{0}: {1}\n".format(url, ex))
        return False

    # Now tail the logs until above happens
    log = urllib.parse.urljoin(url, "../log")
    tail(log, until, verbose)

if __name__ == '__main__':
    task.main(function=run, title="Learn from testing data", verbose=True)