481 lines
17 KiB
Python
Executable file
481 lines
17 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
# This file is part of Cockpit.
|
|
#
|
|
# Copyright (C) 2017 Red Hat, Inc.
|
|
#
|
|
# Cockpit is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU Lesser General Public License as published by
|
|
# the Free Software Foundation; either version 2.1 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Cockpit is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
# Lesser General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Lesser General Public License
|
|
# along with Cockpit; If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import gzip
|
|
import json
|
|
import os
|
|
import re
|
|
import socket
|
|
import ssl
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import time
|
|
import urllib.parse
|
|
import urllib.request, urllib.error, urllib.parse
|
|
import zlib
|
|
|
|
import html.parser
|
|
|
|
sys.dont_write_bytecode = True
|
|
|
|
import task
|
|
|
|
from machine import testvm
|
|
|
|
# The number of days of previous closed pull requests to learn from
|
|
SINCE_DAYS = 120
|
|
|
|
BOTS = os.path.abspath(os.path.dirname(__file__))
|
|
SEEDED = set()
|
|
SINKS = { }
|
|
|
|
def run(filename, verbose=False, dry=False, **kwargs):
|
|
since = time.time() - 60 * 60 * 24 * SINCE_DAYS
|
|
pulls = Pulls(since)
|
|
|
|
# Seed with our input data
|
|
if filename:
|
|
if "/" not in filename and not os.path.exists(filename):
|
|
if not dry:
|
|
subprocess.check_call([ os.path.join(BOTS, "image-download"), "--state", filename ])
|
|
filename = os.path.join(testvm.get_images_data_dir(), filename)
|
|
(outfd, outname) = tempfile.mkstemp(prefix=os.path.basename(filename), dir=os.path.dirname(filename))
|
|
os.close(outfd)
|
|
output = gzip.open(outname, 'wb')
|
|
if os.path.exists(filename):
|
|
with gzip.open(filename, 'rb') as fp:
|
|
seed(since, fp, pulls, output)
|
|
else:
|
|
output = sys.stdout.buffer
|
|
outname = None
|
|
|
|
def write(**kwargs):
|
|
line = json.dumps(kwargs).encode('utf-8') + b"\n"
|
|
output.write(line)
|
|
|
|
# Iterate through all revisions, pull requests on this branch
|
|
for (commit, merged, created, pull) in commits("master", pulls, since, verbose):
|
|
logged = False
|
|
if verbose:
|
|
sys.stderr.write("- {0}\n".format(commit))
|
|
for (context, created, url, log) in logs(commit):
|
|
if verbose:
|
|
sys.stderr.write(" - {0} {1}\n".format(created, context))
|
|
for (status, name, body, tracker) in tap(log):
|
|
write(pull=pull, revision=commit, status=status,
|
|
context=context, date=created, merged=merged,
|
|
test=name, url=url, tracker=tracker, log=body)
|
|
logged = True
|
|
|
|
# Nothing found for this log
|
|
if not logged:
|
|
write(pull=pull, revision=commit, status="unknown", date=created,
|
|
merged=merged, url=url, log=log)
|
|
logged = True
|
|
|
|
# Nothing found for this revision
|
|
if not logged:
|
|
write(pull=pull, revision=commit, status="unknown", date=created, merged=merged)
|
|
logged = True
|
|
|
|
sys.stdout.flush()
|
|
if output:
|
|
output.close()
|
|
if outname:
|
|
os.rename(outname, filename)
|
|
|
|
if not dry and outname and filename:
|
|
upload = [ os.path.join(BOTS, "image-upload"), "--state", filename ]
|
|
subprocess.check_call(upload)
|
|
|
|
# An HTML parser that just pulls out all the <a href="...">
|
|
# link hrefs in a given page of content. We also qualify these
|
|
# hrefs with a base url, in case they're relative
|
|
class HrefParser(html.parser.HTMLParser):
|
|
def __init__(self, base, hrefs):
|
|
html.parser.HTMLParser.__init__(self)
|
|
self.hrefs = hrefs
|
|
self.base = base
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
if tag.lower() == "a":
|
|
for (name, value) in attrs:
|
|
if name.lower() == "href":
|
|
url = urllib.parse.urljoin(self.base, value)
|
|
# print 'HREF', url
|
|
self.hrefs.append(url)
|
|
|
|
# Check if a given pull request was included in its base
|
|
# branch via merging or otherwise
|
|
class Pulls():
|
|
def __init__(self, since):
|
|
self.fetched = { }
|
|
self.checked = { }
|
|
self.pulls = { }
|
|
self.listing = [ ]
|
|
self.since = since
|
|
|
|
# Get all the pull requests since a given time
|
|
def __iter__(self):
|
|
if self.listing:
|
|
iterate = self.pulls.values()
|
|
else:
|
|
iterate = task.api.pulls(state="all", since=self.since)
|
|
listing = [ ]
|
|
for pull in iterate:
|
|
self.pulls[pull["number"]] = pull
|
|
listing.append(pull)
|
|
yield pull
|
|
self.listing = listing
|
|
|
|
# Turn a stning/int pull number into an pull object
|
|
def normalize(self, pull):
|
|
if isinstance(pull, int):
|
|
pull = str(pull)
|
|
if isinstance(pull, str):
|
|
if "/" not in pull:
|
|
pull = qualify("pulls/{0}".format(pull))
|
|
if pull in self.pulls:
|
|
pull = self.pulls[pull]
|
|
else:
|
|
pull = task.api.get(pull)
|
|
self.pulls[pull["url"]] = pull
|
|
elif not isinstance(pull, dict):
|
|
raise ValueError("Invalid pull request: {0}".format(repr(pull)))
|
|
return pull
|
|
|
|
def merged(self, pull):
|
|
pull = self.normalize(pull)
|
|
# if not pull:
|
|
# return None
|
|
|
|
number = pull["number"]
|
|
|
|
if number in self.checked:
|
|
return self.checked[number]
|
|
|
|
if pull.get("state") != "closed":
|
|
return None
|
|
|
|
# GitHub is telling us this was merged
|
|
if pull.get("merged"):
|
|
return True
|
|
|
|
# Fetch git data about this branch
|
|
cwd = os.path.dirname(__file__)
|
|
base = pull["base"]["ref"]
|
|
if base not in self.fetched:
|
|
try:
|
|
subprocess.check_call([ "git", "fetch", "-q", "--", "origin", base ], cwd=cwd)
|
|
except subprocess.CalledProcessError:
|
|
return None # error already printed by process
|
|
self.fetched[base] = base
|
|
|
|
# Look for git commits up until a year before the pull request
|
|
when = time.mktime(time.strptime(pull["created_at"], "%Y-%m-%dT%H:%M:%SZ"))
|
|
when -= 60 * 60 * 24 * 365
|
|
since = time.strftime("%Y-%m-%d", time.gmtime(when))
|
|
|
|
# Check if it's referred to in this branch
|
|
match = "(Closes|Fixes|closes|fixes).*{0}".format(number)
|
|
cmd = [
|
|
"git", "log", "--extended-regexp", "--grep", match,
|
|
"--since=" + since, "origin/" + base
|
|
]
|
|
output = subprocess.check_output(cmd, cwd=cwd)
|
|
self.checked[number] = output and True or False
|
|
return self.checked[number]
|
|
|
|
# Retrieves the content of the given URL
|
|
def retrieve(url):
|
|
ctx = ssl.create_default_context()
|
|
ctx.check_hostname = False
|
|
ctx.verify_mode = ssl.CERT_NONE
|
|
req = urllib.request.urlopen(url, context=ctx)
|
|
return req.read().decode('utf-8', 'replace')
|
|
|
|
# Returns a list of all results at the given URL
|
|
def links(url):
|
|
result = [ ]
|
|
parser = HrefParser(url, result)
|
|
try:
|
|
parser.feed(retrieve(url))
|
|
except urllib.error.HTTPError as ex:
|
|
if ex.code != 404:
|
|
raise
|
|
except (ConnectionResetError, urllib.error.URLError, socket.gaierror) as ex:
|
|
sys.stderr.write("{0}: {1}\n".format(url, ex))
|
|
return result
|
|
|
|
# Parses seed input data and passes it through to output
|
|
# all the while preparing the fact that certain URLs have
|
|
# already been seen
|
|
def seed(since, fp, pulls, output):
|
|
seeded = None
|
|
known = re.compile("# SKIP Known issue #([0-9]+)", re.IGNORECASE)
|
|
|
|
while True:
|
|
try:
|
|
line = fp.readline()
|
|
except (OSError, zlib.error) as ex:
|
|
sys.stderr.write("tests-data: {0}\n".format(str(ex)))
|
|
break
|
|
if not line:
|
|
break
|
|
try:
|
|
item = json.loads(line.decode('utf-8'))
|
|
except ValueError as ex:
|
|
sys.stderr.write("tests-data: {0}\n".format(str(ex)))
|
|
continue
|
|
|
|
# Once we see a new pull treat the old one as complete and seeded
|
|
# As a failsafe, just to make sure we didn't miss something
|
|
# wo don't treat the last pull request as completely seeded
|
|
pull = item.get("pull")
|
|
if pull and pull != seeded:
|
|
SEEDED.add(seeded)
|
|
seeded = None
|
|
|
|
if pull and item.get("merged") not in [ True, False ]:
|
|
item["merged"] = pulls.merged(pull)
|
|
|
|
# Note that we've already retrieved this URL
|
|
url = item.get("url")
|
|
if url and item.get("log") is not None:
|
|
SEEDED.add(url)
|
|
SEEDED.add(urllib.parse.urljoin(url, "./"))
|
|
|
|
# If the pull request had a known merged value it can be seeded
|
|
# This forces us to retrieve data about open pull requests again
|
|
if item["merged"] in [ True, False ]:
|
|
seeded = pull
|
|
SEEDED.add(item["revision"])
|
|
|
|
date = item.get("date")
|
|
if not date or since > time.mktime(time.strptime(date, "%Y-%m-%dT%H:%M:%SZ")):
|
|
continue
|
|
|
|
# COMPAT: Fix data that wasn't yet valid
|
|
if item["status"] == "skip":
|
|
match = known.search(item["log"])
|
|
if match:
|
|
item["status"] = "failure"
|
|
item["tracker"] = qualify("issues/{0}".format(match.group(1)))
|
|
|
|
line = json.dumps(item).encode('utf-8') + b"\n"
|
|
output.write(line)
|
|
|
|
# Generate a list of (revision, merged, url) for the given branch
|
|
# This includes pull requests targeting the branch in question
|
|
#
|
|
# revision: the SHA of a commit
|
|
# merged: True/False/None whether merged or not
|
|
# url: The URL for the pull request or None
|
|
def commits(branch, pulls, since, verbose=False):
|
|
if verbose:
|
|
sys.stderr.write("{0}\n".format(branch))
|
|
|
|
# Iterate through commits on master
|
|
for commit in task.api.commits(branch, since=since):
|
|
revision = commit["sha"].lower()
|
|
if revision not in SEEDED:
|
|
yield revision, True, commit["commit"]["committer"]["date"], None
|
|
|
|
# Iterate through pull requests
|
|
for pull in pulls:
|
|
if pull["number"] in SEEDED:
|
|
continue
|
|
if pull["base"]["ref"] != branch:
|
|
continue
|
|
if verbose:
|
|
sys.stderr.write("pull-{0}\n".format(pull["number"]))
|
|
merged = pulls.merged(pull)
|
|
|
|
for revision in revisions(pull):
|
|
yield revision, merged, pull["created_at"], pull["url"]
|
|
|
|
# The next revisions for the pull request are not the ones
|
|
# that got merged. Only the first one produced by revisions
|
|
if merged:
|
|
merged = False
|
|
|
|
|
|
# Get all the revisions in a pull request. GitHub doesn't help
|
|
# us here so we have to use silly tricks
|
|
def revisions(pull):
|
|
head = pull.get("head", { }).get("sha")
|
|
if not head:
|
|
return
|
|
|
|
# First give back the main pull request
|
|
head = head.lower()
|
|
yield head
|
|
|
|
# All the revisions we've seen
|
|
seen = set([ head ])
|
|
|
|
# Seed the set of sinks. We use these sinks to figure out additional
|
|
# revisions for the pull request. Unfortunately GitHub doesn't help us
|
|
# with a list of revisions that this pull request used to reflect. So
|
|
# we have to look to our sink for that info.
|
|
data = task.api.get("commits/{0}/status?page=1&per_page=100".format(head))
|
|
for status in data.get("statuses", [ ]):
|
|
url = status["target_url"]
|
|
if url:
|
|
SEEDED.add(urllib.parse.urljoin(url, "./"))
|
|
sink = urllib.parse.urljoin(url, "../")
|
|
if sink not in SINKS:
|
|
SINKS[sink] = links(sink)
|
|
|
|
# Now ask each sink for its set of urls
|
|
name = "pull-{0}".format(pull["number"])
|
|
for sink in SINKS:
|
|
for link in SINKS[sink]:
|
|
|
|
# We only care about stuff at the sink where pull-XXXX is in
|
|
# the URL. This is how we figure out whether things are related
|
|
if name not in link:
|
|
continue
|
|
|
|
# Already retrieved this one
|
|
if link in SEEDED:
|
|
continue
|
|
|
|
# Build a URL for the cockpituous sink /status file and read it
|
|
target = urllib.parse.urljoin(link, "status")
|
|
try:
|
|
data = json.loads(retrieve(target))
|
|
except (ValueError, ConnectionError) as ex:
|
|
sys.stderr.write("{0}: {1}\n".format(target, ex))
|
|
except urllib.error.HTTPError as ex:
|
|
if ex.code != 404:
|
|
raise
|
|
except urllib.error.URLError as ex:
|
|
sys.stderr.write("{0}: {1}\n".format(target, ex))
|
|
pass
|
|
else:
|
|
# The status file contains a "revision" field which is the git revision
|
|
# of what was tested during that test run. This is what we're after
|
|
if "revision" in data:
|
|
revision = data["revision"].lower()
|
|
if revision not in seen:
|
|
seen.add(revision)
|
|
yield revision
|
|
|
|
# Pull out all status (context, created, log) for a given revision. This includes multiple
|
|
# test runs for a given revision, and all the various status contexts
|
|
def logs(revision):
|
|
page = 1
|
|
count = 100
|
|
while count == 100:
|
|
data = task.api.get("commits/{0}/status?page={1}&per_page={2}".format(revision, page, count))
|
|
count = 0
|
|
for status in data.get("statuses", [ ]):
|
|
count += 1
|
|
# Make sure to not consider "state": "success" as a success
|
|
# here because individual tests may have failed, or been retried.
|
|
#
|
|
# Always only consider tests individually to have run or failed
|
|
# not entire test suite statuses
|
|
if status["state"] in [ "pending" ]:
|
|
continue
|
|
target = status.get("target_url")
|
|
if not target:
|
|
continue
|
|
if target.endswith(".html"):
|
|
target = target[:-5]
|
|
if target in SEEDED:
|
|
continue
|
|
log = None
|
|
try:
|
|
log = retrieve(target)
|
|
except urllib.error.HTTPError as ex:
|
|
if ex.code != 404:
|
|
raise
|
|
log = ""
|
|
except (ConnectionResetError, urllib.error.URLError, socket.gaierror) as ex:
|
|
sys.stderr.write("{0}: {1}\n".format(target, ex))
|
|
if log is not None:
|
|
yield (status["context"], status["created_at"], target, log)
|
|
|
|
|
|
# Generate (status, name, body, tracker) for each Test Anything Protocol test
|
|
# in the content.
|
|
#
|
|
# status: possible values "success", "failure", "skip"
|
|
# name: the name of the test
|
|
# body: full log of the test
|
|
# tracker: url tracking the failure, or None
|
|
def tap(content):
|
|
name = status = tracker = None
|
|
prefix = None
|
|
body = [ ]
|
|
blocks = False
|
|
for line in content.split('\n'):
|
|
# The test intro, everything before here is fluff
|
|
if not prefix and line.startswith("1.."):
|
|
prefix = line
|
|
body = [ ]
|
|
name = status = tracker = None
|
|
|
|
# A TAP test status line
|
|
elif line.startswith("ok ") or line.startswith("not ok "):
|
|
body.append(line)
|
|
# Parse out the status
|
|
if line.startswith("not ok "):
|
|
status = "failure"
|
|
line = line[7:]
|
|
else:
|
|
line = line[3:]
|
|
if "# SKIP KNOWN ISSUE" in line.upper():
|
|
status = "failure"
|
|
(unused, delim, issue) = line.partition("#")
|
|
tracker = qualify("issues/{0}".format(issue))
|
|
if "# SKIP" in line.upper():
|
|
status = "skip"
|
|
else:
|
|
status = "success"
|
|
# Parse out the name
|
|
while line[0].isspace() or line[0].isdigit():
|
|
line = line[1:]
|
|
(name, delim, directive) = line.partition("#")
|
|
(name, delim, directive) = name.partition("duration")
|
|
name = name.strip()
|
|
# Old Cockpit tests had strange blocks
|
|
if not blocks:
|
|
yield (status, name, "\n".join(body), tracker)
|
|
status = name = tracker = None
|
|
body = [ ]
|
|
else:
|
|
# Old Cockpit tests didn't separate bound their stuff properly
|
|
if line.startswith("# --------------------"):
|
|
blocks = True
|
|
if status:
|
|
yield (status, name, "\n".join(body), tracker)
|
|
name = status = tracker = None
|
|
body = [ ]
|
|
body.append(line)
|
|
|
|
# Qualify a URL into the GitHub repository
|
|
def qualify(path):
|
|
return "https://api.github.com" + task.api.qualify(path)
|
|
|
|
if __name__ == '__main__':
|
|
task.main(function=run, title="Pull out test data for pull requests", verbose=True)
|