#!/usr/bin/env python3 # This file is part of Cockpit. # # Copyright (C) 2018 Red Hat, Inc. # # Cockpit is free software; you can redistribute it and/or modify it # under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or # (at your option) any later version. # # Cockpit is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with Cockpit; If not, see . import sys import time import os import urllib import json import re sys.dont_write_bytecode = True import task NUMBER_OPEN_ISSUES = 7 # How many issues do we want to have open at a given time? # How far back does our data go? If a flake gets fixed but is still # flaky after this many days, the bots open another issue. WINDOW_DAYS = 21 # This parses the output JSONL format discussed here, where various # values are grouped: # # https://github.com/cockpit-project/cockpituous/blob/master/learn/README.md # Here we're looking for a field in a record that only has one value def value(record, field): values = record.get(field, []) if len(values) == 1: return values[0][0] or "" return None # Here we're looking for the count of a specific field/value in the record def count(record, field, only): values = record.get(field, []) for value, count in values: if value != only: continue return count return 0 # For linking flakes to test logs def slurp_one(url, n, logs): items_url = url + str(n) + "/items.jsonl" try: with urllib.request.urlopen(items_url) as f: for line in f.readlines(): try: record = json.loads(line.decode('utf-8')) logs.setdefault(record["test"], [ ]).append(record["url"]) except ValueError as ex: sys.stderr.write("{0}: {1}\n".format(url, ex)) except urllib.error.URLError as ex: if ex.code == 404: return False raise return True def slurp_failure_logs(url): logs = { } n = 0 while slurp_one(url, n, logs): n = n + 1 return logs def get_failure_logs(failure_logs, name, context): match = context.replace("/", "-") return sorted(filter(lambda url: match in url, failure_logs[name]), reverse=True)[0:10] # Main def run(context, verbose=False, **kwargs): api = task.github.GitHub() open_issues = api.issues(labels=[ "flake" ]) create_count = NUMBER_OPEN_ISSUES - len(open_issues) if create_count <= 0: return 0 if verbose: sys.stderr.write("Going to create %s new flake issue(s)\n" % create_count) host = os.environ.get("COCKPIT_LEARN_SERVICE_HOST", "learn-cockpit.apps.ci.centos.org") port = os.environ.get("COCKPIT_LEARN_SERVICE_PORT", "443") url = "{0}://{1}:{2}/active/".format("https" if port == "443" else "http", host, port) failure_logs = slurp_failure_logs(url) # Retrieve the URL statistics = [ ] with urllib.request.urlopen(url + "statistics.jsonl") as f: for line in f.readlines(): try: record = json.loads(line.decode('utf-8')) statistics.append(record) except ValueError as ex: sys.stderr.write("{0}: {1}\n".format(url, ex)) tests = { } for record in statistics: test = value(record, "test") context = value(record, "context") status = value(record, "status") tracker = value(record, "tracker") # Flaky tests only score on those that fail and are not tracked if test is not None and status == "failure" and not tracker: merged = count(record, "merged", True) not_merged = count(record, "merged", False) null_merged = count(record, "merged", None) total = merged + not_merged + null_merged # And the key is that they were merged anyway if total > 10: tests.setdefault(test, [ ]).append((merged / total, context, record)) scores = [ ] for n, t in tests.items(): scores.append((sum(map(lambda f: f[0], t))/len(t), n, t)) closed_issues = api.issues(labels=["flake"], state="closed", since=(time.time() - (WINDOW_DAYS * 86400))) def find_in_issues(issues, name): for issue in issues: if name in issue["title"]: return True return False def url_desc(url): m = re.search("pull-[0-9]+", url) return m.group(0) if m else url def failure_description(name, f, logs): return ("%s%% on %s\n" % (int(f[0]*100), f[1]) + "".join(map(lambda url: " - [%s](%s)\n" % (url_desc(url), url), get_failure_logs(logs, name, f[1])))) scores.sort(reverse=True) for score, name, failures in scores: if find_in_issues(open_issues, name) or find_in_issues(closed_issues, name): continue if verbose: sys.stderr.write("Opening issue for %s\n" % name) source = "
Source material\n\n```json\n%s\n```\n
\n" % "\n".join(map(lambda f: json.dumps(f[2], indent=2), failures)) data = { "title": "%s is flaky" % name, "body": ("\n".join(map(lambda f: failure_description(name, f, failure_logs), failures)) + "\n\n" + source), "labels": [ "flake" ] } api.post("issues", data) create_count -= 1 if create_count == 0: break return 0 if __name__ == '__main__': task.main(function=run, title="Create issues for test flakes")