123 lines
4.3 KiB
Python
Executable file
123 lines
4.3 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
# This file is part of Cockpit.
|
|
#
|
|
# Copyright (C) 2017 Slavek Kabrda
|
|
#
|
|
# Cockpit is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU Lesser General Public License as published by
|
|
# the Free Software Foundation; either version 2.1 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Cockpit is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
# Lesser General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Lesser General Public License
|
|
# along with Cockpit; If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
# The name and version of the training data
|
|
TRAINING_DATA = "tests-train-1.jsonl.gz"
|
|
|
|
# The number of days in history to learn from. This is different from
|
|
# the amount of data we gather in tests-data, and can be adjusted
|
|
# independently.
|
|
SINCE = 21
|
|
|
|
import os
|
|
import socket
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
import urllib
|
|
|
|
sys.dont_write_bytecode = True
|
|
|
|
import task
|
|
|
|
from machine import testvm
|
|
|
|
BOTS = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
def run(url_or_file, verbose=False, dry=False, **kwargs):
|
|
# Default set of training data, retrieve it and use from data directory
|
|
if not url_or_file:
|
|
url_or_file = TRAINING_DATA
|
|
|
|
# A URL was provided directly, just use it
|
|
if url_or_file.startswith("http"):
|
|
filename = os.path.basename(url_or_file)
|
|
url = url_or_file
|
|
|
|
else:
|
|
host = os.environ.get("COCKPIT_LEARN_SERVICE_HOST", "learn-cockpit.apps.ci.centos.org")
|
|
port = os.environ.get("COCKPIT_LEARN_SERVICE_PORT", "443")
|
|
url = "{0}://{1}:{2}/train/{3}".format("https" if port == "443" else "http", host, port, os.path.basename(url_or_file))
|
|
filename = url_or_file
|
|
|
|
if "/" not in filename and not os.path.exists(filename):
|
|
if not dry:
|
|
subprocess.check_call([ os.path.join(BOTS, "image-download"), "--state", filename ])
|
|
filename = os.path.join(testvm.get_images_data_dir(), filename)
|
|
train(filename, url, verbose)
|
|
|
|
# Does 'tail -F' on an HTTP URL
|
|
def tail(url, until, verbose=False):
|
|
stop = False
|
|
at = 0
|
|
|
|
while True:
|
|
time.sleep(10)
|
|
|
|
try:
|
|
req = urllib.request.Request(url, headers={ "Range": "bytes={0}-".format(at) })
|
|
with urllib.request.urlopen(req, cafile=os.path.join(BOTS, "images", "files", "ca.pem")) as f:
|
|
while True:
|
|
data = f.read(2048)
|
|
if not data:
|
|
break
|
|
at += len(data)
|
|
if verbose:
|
|
sys.stderr.buffer.write(data)
|
|
except urllib.error.HTTPError as ex:
|
|
if ex.code != 404 and ex.code != 416:
|
|
sys.stderr.write("{0}: {1}\n".format(url, ex))
|
|
except (ConnectionResetError, urllib.error.URLError, socket.gaierror) as ex:
|
|
sys.stderr.write("{0}: {1}\n".format(url, ex))
|
|
|
|
if stop:
|
|
break
|
|
|
|
# Note that we do one more loop after we stop, to make sure to get all of url
|
|
stop = until()
|
|
|
|
def train(filename, url, verbose=False):
|
|
if verbose:
|
|
sys.stderr.write(" ^ {0}\n".format(url))
|
|
|
|
cmd = [ os.path.join(BOTS, "image-upload"), "--state", filename, "--store", url ]
|
|
|
|
# Passing through a non terminal stdout is necessary to make progress work
|
|
subprocess.check_call(cmd)
|
|
|
|
# We run until the file disappears, which means training has taken place
|
|
def until():
|
|
try:
|
|
req = urllib.request.Request(url, method="HEAD")
|
|
with urllib.request.urlopen(req, cafile=os.path.join(BOTS, "images", "files", "ca.pem")) as f:
|
|
f.read()
|
|
except urllib.error.HTTPError as ex:
|
|
if ex.code == 404:
|
|
return True
|
|
sys.stderr.write("{0}: {1}\n".format(url, ex))
|
|
except (ConnectionResetError, urllib.error.URLError, socket.gaierror) as ex:
|
|
sys.stderr.write("{0}: {1}\n".format(url, ex))
|
|
return False
|
|
|
|
# Now tail the logs until above happens
|
|
log = urllib.parse.urljoin(url, "../log")
|
|
tail(log, until, verbose)
|
|
|
|
if __name__ == '__main__':
|
|
task.main(function=run, title="Learn from testing data", verbose=True)
|