#!/usr/bin/env python3 # This file is part of Cockpit. # # Copyright (C) 2017 Slavek Kabrda # # Cockpit is free software; you can redistribute it and/or modify it # under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or # (at your option) any later version. # # Cockpit is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with Cockpit; If not, see . # The name and version of the training data TRAINING_DATA = "tests-train-1.jsonl.gz" # The number of days in history to learn from. This is different from # the amount of data we gather in tests-data, and can be adjusted # independently. SINCE = 21 import os import socket import subprocess import sys import time import urllib sys.dont_write_bytecode = True import task from machine import testvm BOTS = os.path.dirname(os.path.realpath(__file__)) def run(url_or_file, verbose=False, dry=False, **kwargs): # Default set of training data, retrieve it and use from data directory if not url_or_file: url_or_file = TRAINING_DATA # A URL was provided directly, just use it if url_or_file.startswith("http"): filename = os.path.basename(url_or_file) url = url_or_file else: host = os.environ.get("COCKPIT_LEARN_SERVICE_HOST", "learn-cockpit.apps.ci.centos.org") port = os.environ.get("COCKPIT_LEARN_SERVICE_PORT", "443") url = "{0}://{1}:{2}/train/{3}".format("https" if port == "443" else "http", host, port, os.path.basename(url_or_file)) filename = url_or_file if "/" not in filename and not os.path.exists(filename): if not dry: subprocess.check_call([ os.path.join(BOTS, "image-download"), "--state", filename ]) filename = os.path.join(testvm.get_images_data_dir(), filename) train(filename, url, verbose) # Does 'tail -F' on an HTTP URL def tail(url, until, verbose=False): stop = False at = 0 while True: time.sleep(10) try: req = urllib.request.Request(url, headers={ "Range": "bytes={0}-".format(at) }) with urllib.request.urlopen(req, cafile=os.path.join(BOTS, "images", "files", "ca.pem")) as f: while True: data = f.read(2048) if not data: break at += len(data) if verbose: sys.stderr.buffer.write(data) except urllib.error.HTTPError as ex: if ex.code != 404 and ex.code != 416: sys.stderr.write("{0}: {1}\n".format(url, ex)) except (ConnectionResetError, urllib.error.URLError, socket.gaierror) as ex: sys.stderr.write("{0}: {1}\n".format(url, ex)) if stop: break # Note that we do one more loop after we stop, to make sure to get all of url stop = until() def train(filename, url, verbose=False): if verbose: sys.stderr.write(" ^ {0}\n".format(url)) cmd = [ os.path.join(BOTS, "image-upload"), "--state", filename, "--store", url ] # Passing through a non terminal stdout is necessary to make progress work subprocess.check_call(cmd) # We run until the file disappears, which means training has taken place def until(): try: req = urllib.request.Request(url, method="HEAD") with urllib.request.urlopen(req, cafile=os.path.join(BOTS, "images", "files", "ca.pem")) as f: f.read() except urllib.error.HTTPError as ex: if ex.code == 404: return True sys.stderr.write("{0}: {1}\n".format(url, ex)) except (ConnectionResetError, urllib.error.URLError, socket.gaierror) as ex: sys.stderr.write("{0}: {1}\n".format(url, ex)) return False # Now tail the logs until above happens log = urllib.parse.urljoin(url, "../log") tail(log, until, verbose) if __name__ == '__main__': task.main(function=run, title="Learn from testing data", verbose=True)