#!/usr/bin/env python3

# This file is part of Cockpit.
#
# Copyright (C) 2018 Red Hat, Inc.
#
# Cockpit is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
#
# Cockpit is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Cockpit; If not, see <http://www.gnu.org/licenses/>.

import json
import os
import socket
import sys
import urllib

sys.dont_write_bytecode = True

import task

# This parses the output JSONL format discussed here, where various
# values are grouped:
#
# https://github.com/cockpit-project/cockpituous/blob/master/learn/README.md

# Here we're looking for a field in a record that only has one value
def value(record, field):
    values = record.get(field, [])
    if len(values) == 1:
        return values[0][0] or ""
    return None

# Here we're looking for the count of a specific field/value in the record
def count(record, field, only):
    values = record.get(field, [])
    for value, count in values:
        if value != only:
            continue
        return count
    return 0

def run(url, verbose=False, dry=False, **kwargs):
    if not url:
        host = os.environ.get("COCKPIT_LEARN_SERVICE_HOST", "learn-cockpit.apps.ci.centos.org")
        port = os.environ.get("COCKPIT_LEARN_SERVICE_PORT", "443")
        url = "{0}://{1}:{2}/active/statistics.jsonl".format("https" if port == "443" else "http", host, port)

    statistics = [ ]

    # Retrieve the URL
    try:
        req = urllib.request.Request(url)
        with urllib.request.urlopen(req) as f:
            for line in f.readlines():
                try:
                    record = json.loads(line.decode('utf-8'))
                    statistics.append(record)
                except ValueError as ex:
                    sys.stderr.write("{0}: {1}\n".format(url, ex))
    except (ConnectionResetError, urllib.error.URLError, socket.gaierror) as ex:
        sys.stderr.write("{0}: {1}\n".format(url, ex))
        return False

    # Now go through and look for tests that have never flaked
    score = []
    tests = { }
    failure = set()

    for record in statistics:
        test = value(record, "test")
        context = value(record, "context")
        status = value(record, "status")
        tracker = value(record, "tracker")

        # There should only be one of all of these values
        if test is None or status is None:
            continue

        # Make note of all the tests we've seen
        contexts = tests.get(test)
        if not contexts:
            tests[test] = contexts = [ ]
        if context:
            contexts.append(context)

        # Flaky tests only score on those that fail and are not tracked
        if status == "failure" and not tracker:
            merged = count(record, "merged", True)
            not_merged = count(record, "merged", False)
            null_merged = count(record, "merged", None)
            total = merged + not_merged + null_merged

            # And the key is that they were merged anyway
            if total > 0:
                score.append((context, merged / total, test))
                failure.add(test)

    # Tests that never failed (wow) get a zero score
    for test, contexts in tests.items():
        if test not in failure:
            for context in contexts:
                score.append((context, 0, test))

    score.sort()

    for context, fraction, test in score:
        sys.stdout.write("{:>5} {:>20} {}\n".format(int(fraction * 100), context, test))

if __name__ == '__main__':
    task.main(function=run, title="Get flakiness score for tests", verbose=True)