parent
044b8da55a
commit
d5a822884f
288 changed files with 13040 additions and 1 deletions
481
bots/tests-data
Executable file
481
bots/tests-data
Executable file
|
|
@ -0,0 +1,481 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# This file is part of Cockpit.
|
||||
#
|
||||
# Copyright (C) 2017 Red Hat, Inc.
|
||||
#
|
||||
# Cockpit is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU Lesser General Public License as published by
|
||||
# the Free Software Foundation; either version 2.1 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Cockpit is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with Cockpit; If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import gzip
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import socket
|
||||
import ssl
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import urllib.parse
|
||||
import urllib.request, urllib.error, urllib.parse
|
||||
import zlib
|
||||
|
||||
import html.parser
|
||||
|
||||
sys.dont_write_bytecode = True
|
||||
|
||||
import task
|
||||
|
||||
from machine import testvm
|
||||
|
||||
# The number of days of previous closed pull requests to learn from
|
||||
SINCE_DAYS = 120
|
||||
|
||||
BOTS = os.path.abspath(os.path.dirname(__file__))
|
||||
SEEDED = set()
|
||||
SINKS = { }
|
||||
|
||||
def run(filename, verbose=False, dry=False, **kwargs):
|
||||
since = time.time() - 60 * 60 * 24 * SINCE_DAYS
|
||||
pulls = Pulls(since)
|
||||
|
||||
# Seed with our input data
|
||||
if filename:
|
||||
if "/" not in filename and not os.path.exists(filename):
|
||||
if not dry:
|
||||
subprocess.check_call([ os.path.join(BOTS, "image-download"), "--state", filename ])
|
||||
filename = os.path.join(testvm.get_images_data_dir(), filename)
|
||||
(outfd, outname) = tempfile.mkstemp(prefix=os.path.basename(filename), dir=os.path.dirname(filename))
|
||||
os.close(outfd)
|
||||
output = gzip.open(outname, 'wb')
|
||||
if os.path.exists(filename):
|
||||
with gzip.open(filename, 'rb') as fp:
|
||||
seed(since, fp, pulls, output)
|
||||
else:
|
||||
output = sys.stdout.buffer
|
||||
outname = None
|
||||
|
||||
def write(**kwargs):
|
||||
line = json.dumps(kwargs).encode('utf-8') + b"\n"
|
||||
output.write(line)
|
||||
|
||||
# Iterate through all revisions, pull requests on this branch
|
||||
for (commit, merged, created, pull) in commits("master", pulls, since, verbose):
|
||||
logged = False
|
||||
if verbose:
|
||||
sys.stderr.write("- {0}\n".format(commit))
|
||||
for (context, created, url, log) in logs(commit):
|
||||
if verbose:
|
||||
sys.stderr.write(" - {0} {1}\n".format(created, context))
|
||||
for (status, name, body, tracker) in tap(log):
|
||||
write(pull=pull, revision=commit, status=status,
|
||||
context=context, date=created, merged=merged,
|
||||
test=name, url=url, tracker=tracker, log=body)
|
||||
logged = True
|
||||
|
||||
# Nothing found for this log
|
||||
if not logged:
|
||||
write(pull=pull, revision=commit, status="unknown", date=created,
|
||||
merged=merged, url=url, log=log)
|
||||
logged = True
|
||||
|
||||
# Nothing found for this revision
|
||||
if not logged:
|
||||
write(pull=pull, revision=commit, status="unknown", date=created, merged=merged)
|
||||
logged = True
|
||||
|
||||
sys.stdout.flush()
|
||||
if output:
|
||||
output.close()
|
||||
if outname:
|
||||
os.rename(outname, filename)
|
||||
|
||||
if not dry and outname and filename:
|
||||
upload = [ os.path.join(BOTS, "image-upload"), "--state", filename ]
|
||||
subprocess.check_call(upload)
|
||||
|
||||
# An HTML parser that just pulls out all the <a href="...">
|
||||
# link hrefs in a given page of content. We also qualify these
|
||||
# hrefs with a base url, in case they're relative
|
||||
class HrefParser(html.parser.HTMLParser):
|
||||
def __init__(self, base, hrefs):
|
||||
html.parser.HTMLParser.__init__(self)
|
||||
self.hrefs = hrefs
|
||||
self.base = base
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag.lower() == "a":
|
||||
for (name, value) in attrs:
|
||||
if name.lower() == "href":
|
||||
url = urllib.parse.urljoin(self.base, value)
|
||||
# print 'HREF', url
|
||||
self.hrefs.append(url)
|
||||
|
||||
# Check if a given pull request was included in its base
|
||||
# branch via merging or otherwise
|
||||
class Pulls():
|
||||
def __init__(self, since):
|
||||
self.fetched = { }
|
||||
self.checked = { }
|
||||
self.pulls = { }
|
||||
self.listing = [ ]
|
||||
self.since = since
|
||||
|
||||
# Get all the pull requests since a given time
|
||||
def __iter__(self):
|
||||
if self.listing:
|
||||
iterate = self.pulls.values()
|
||||
else:
|
||||
iterate = task.api.pulls(state="all", since=self.since)
|
||||
listing = [ ]
|
||||
for pull in iterate:
|
||||
self.pulls[pull["number"]] = pull
|
||||
listing.append(pull)
|
||||
yield pull
|
||||
self.listing = listing
|
||||
|
||||
# Turn a stning/int pull number into an pull object
|
||||
def normalize(self, pull):
|
||||
if isinstance(pull, int):
|
||||
pull = str(pull)
|
||||
if isinstance(pull, str):
|
||||
if "/" not in pull:
|
||||
pull = qualify("pulls/{0}".format(pull))
|
||||
if pull in self.pulls:
|
||||
pull = self.pulls[pull]
|
||||
else:
|
||||
pull = task.api.get(pull)
|
||||
self.pulls[pull["url"]] = pull
|
||||
elif not isinstance(pull, dict):
|
||||
raise ValueError("Invalid pull request: {0}".format(repr(pull)))
|
||||
return pull
|
||||
|
||||
def merged(self, pull):
|
||||
pull = self.normalize(pull)
|
||||
# if not pull:
|
||||
# return None
|
||||
|
||||
number = pull["number"]
|
||||
|
||||
if number in self.checked:
|
||||
return self.checked[number]
|
||||
|
||||
if pull.get("state") != "closed":
|
||||
return None
|
||||
|
||||
# GitHub is telling us this was merged
|
||||
if pull.get("merged"):
|
||||
return True
|
||||
|
||||
# Fetch git data about this branch
|
||||
cwd = os.path.dirname(__file__)
|
||||
base = pull["base"]["ref"]
|
||||
if base not in self.fetched:
|
||||
try:
|
||||
subprocess.check_call([ "git", "fetch", "-q", "--", "origin", base ], cwd=cwd)
|
||||
except subprocess.CalledProcessError:
|
||||
return None # error already printed by process
|
||||
self.fetched[base] = base
|
||||
|
||||
# Look for git commits up until a year before the pull request
|
||||
when = time.mktime(time.strptime(pull["created_at"], "%Y-%m-%dT%H:%M:%SZ"))
|
||||
when -= 60 * 60 * 24 * 365
|
||||
since = time.strftime("%Y-%m-%d", time.gmtime(when))
|
||||
|
||||
# Check if it's referred to in this branch
|
||||
match = "(Closes|Fixes|closes|fixes).*{0}".format(number)
|
||||
cmd = [
|
||||
"git", "log", "--extended-regexp", "--grep", match,
|
||||
"--since=" + since, "origin/" + base
|
||||
]
|
||||
output = subprocess.check_output(cmd, cwd=cwd)
|
||||
self.checked[number] = output and True or False
|
||||
return self.checked[number]
|
||||
|
||||
# Retrieves the content of the given URL
|
||||
def retrieve(url):
|
||||
ctx = ssl.create_default_context()
|
||||
ctx.check_hostname = False
|
||||
ctx.verify_mode = ssl.CERT_NONE
|
||||
req = urllib.request.urlopen(url, context=ctx)
|
||||
return req.read().decode('utf-8', 'replace')
|
||||
|
||||
# Returns a list of all results at the given URL
|
||||
def links(url):
|
||||
result = [ ]
|
||||
parser = HrefParser(url, result)
|
||||
try:
|
||||
parser.feed(retrieve(url))
|
||||
except urllib.error.HTTPError as ex:
|
||||
if ex.code != 404:
|
||||
raise
|
||||
except (ConnectionResetError, urllib.error.URLError, socket.gaierror) as ex:
|
||||
sys.stderr.write("{0}: {1}\n".format(url, ex))
|
||||
return result
|
||||
|
||||
# Parses seed input data and passes it through to output
|
||||
# all the while preparing the fact that certain URLs have
|
||||
# already been seen
|
||||
def seed(since, fp, pulls, output):
|
||||
seeded = None
|
||||
known = re.compile("# SKIP Known issue #([0-9]+)", re.IGNORECASE)
|
||||
|
||||
while True:
|
||||
try:
|
||||
line = fp.readline()
|
||||
except (OSError, zlib.error) as ex:
|
||||
sys.stderr.write("tests-data: {0}\n".format(str(ex)))
|
||||
break
|
||||
if not line:
|
||||
break
|
||||
try:
|
||||
item = json.loads(line.decode('utf-8'))
|
||||
except ValueError as ex:
|
||||
sys.stderr.write("tests-data: {0}\n".format(str(ex)))
|
||||
continue
|
||||
|
||||
# Once we see a new pull treat the old one as complete and seeded
|
||||
# As a failsafe, just to make sure we didn't miss something
|
||||
# wo don't treat the last pull request as completely seeded
|
||||
pull = item.get("pull")
|
||||
if pull and pull != seeded:
|
||||
SEEDED.add(seeded)
|
||||
seeded = None
|
||||
|
||||
if pull and item.get("merged") not in [ True, False ]:
|
||||
item["merged"] = pulls.merged(pull)
|
||||
|
||||
# Note that we've already retrieved this URL
|
||||
url = item.get("url")
|
||||
if url and item.get("log") is not None:
|
||||
SEEDED.add(url)
|
||||
SEEDED.add(urllib.parse.urljoin(url, "./"))
|
||||
|
||||
# If the pull request had a known merged value it can be seeded
|
||||
# This forces us to retrieve data about open pull requests again
|
||||
if item["merged"] in [ True, False ]:
|
||||
seeded = pull
|
||||
SEEDED.add(item["revision"])
|
||||
|
||||
date = item.get("date")
|
||||
if not date or since > time.mktime(time.strptime(date, "%Y-%m-%dT%H:%M:%SZ")):
|
||||
continue
|
||||
|
||||
# COMPAT: Fix data that wasn't yet valid
|
||||
if item["status"] == "skip":
|
||||
match = known.search(item["log"])
|
||||
if match:
|
||||
item["status"] = "failure"
|
||||
item["tracker"] = qualify("issues/{0}".format(match.group(1)))
|
||||
|
||||
line = json.dumps(item).encode('utf-8') + b"\n"
|
||||
output.write(line)
|
||||
|
||||
# Generate a list of (revision, merged, url) for the given branch
|
||||
# This includes pull requests targeting the branch in question
|
||||
#
|
||||
# revision: the SHA of a commit
|
||||
# merged: True/False/None whether merged or not
|
||||
# url: The URL for the pull request or None
|
||||
def commits(branch, pulls, since, verbose=False):
|
||||
if verbose:
|
||||
sys.stderr.write("{0}\n".format(branch))
|
||||
|
||||
# Iterate through commits on master
|
||||
for commit in task.api.commits(branch, since=since):
|
||||
revision = commit["sha"].lower()
|
||||
if revision not in SEEDED:
|
||||
yield revision, True, commit["commit"]["committer"]["date"], None
|
||||
|
||||
# Iterate through pull requests
|
||||
for pull in pulls:
|
||||
if pull["number"] in SEEDED:
|
||||
continue
|
||||
if pull["base"]["ref"] != branch:
|
||||
continue
|
||||
if verbose:
|
||||
sys.stderr.write("pull-{0}\n".format(pull["number"]))
|
||||
merged = pulls.merged(pull)
|
||||
|
||||
for revision in revisions(pull):
|
||||
yield revision, merged, pull["created_at"], pull["url"]
|
||||
|
||||
# The next revisions for the pull request are not the ones
|
||||
# that got merged. Only the first one produced by revisions
|
||||
if merged:
|
||||
merged = False
|
||||
|
||||
|
||||
# Get all the revisions in a pull request. GitHub doesn't help
|
||||
# us here so we have to use silly tricks
|
||||
def revisions(pull):
|
||||
head = pull.get("head", { }).get("sha")
|
||||
if not head:
|
||||
return
|
||||
|
||||
# First give back the main pull request
|
||||
head = head.lower()
|
||||
yield head
|
||||
|
||||
# All the revisions we've seen
|
||||
seen = set([ head ])
|
||||
|
||||
# Seed the set of sinks. We use these sinks to figure out additional
|
||||
# revisions for the pull request. Unfortunately GitHub doesn't help us
|
||||
# with a list of revisions that this pull request used to reflect. So
|
||||
# we have to look to our sink for that info.
|
||||
data = task.api.get("commits/{0}/status?page=1&per_page=100".format(head))
|
||||
for status in data.get("statuses", [ ]):
|
||||
url = status["target_url"]
|
||||
if url:
|
||||
SEEDED.add(urllib.parse.urljoin(url, "./"))
|
||||
sink = urllib.parse.urljoin(url, "../")
|
||||
if sink not in SINKS:
|
||||
SINKS[sink] = links(sink)
|
||||
|
||||
# Now ask each sink for its set of urls
|
||||
name = "pull-{0}".format(pull["number"])
|
||||
for sink in SINKS:
|
||||
for link in SINKS[sink]:
|
||||
|
||||
# We only care about stuff at the sink where pull-XXXX is in
|
||||
# the URL. This is how we figure out whether things are related
|
||||
if name not in link:
|
||||
continue
|
||||
|
||||
# Already retrieved this one
|
||||
if link in SEEDED:
|
||||
continue
|
||||
|
||||
# Build a URL for the cockpituous sink /status file and read it
|
||||
target = urllib.parse.urljoin(link, "status")
|
||||
try:
|
||||
data = json.loads(retrieve(target))
|
||||
except (ValueError, ConnectionError) as ex:
|
||||
sys.stderr.write("{0}: {1}\n".format(target, ex))
|
||||
except urllib.error.HTTPError as ex:
|
||||
if ex.code != 404:
|
||||
raise
|
||||
except urllib.error.URLError as ex:
|
||||
sys.stderr.write("{0}: {1}\n".format(target, ex))
|
||||
pass
|
||||
else:
|
||||
# The status file contains a "revision" field which is the git revision
|
||||
# of what was tested during that test run. This is what we're after
|
||||
if "revision" in data:
|
||||
revision = data["revision"].lower()
|
||||
if revision not in seen:
|
||||
seen.add(revision)
|
||||
yield revision
|
||||
|
||||
# Pull out all status (context, created, log) for a given revision. This includes multiple
|
||||
# test runs for a given revision, and all the various status contexts
|
||||
def logs(revision):
|
||||
page = 1
|
||||
count = 100
|
||||
while count == 100:
|
||||
data = task.api.get("commits/{0}/status?page={1}&per_page={2}".format(revision, page, count))
|
||||
count = 0
|
||||
for status in data.get("statuses", [ ]):
|
||||
count += 1
|
||||
# Make sure to not consider "state": "success" as a success
|
||||
# here because individual tests may have failed, or been retried.
|
||||
#
|
||||
# Always only consider tests individually to have run or failed
|
||||
# not entire test suite statuses
|
||||
if status["state"] in [ "pending" ]:
|
||||
continue
|
||||
target = status.get("target_url")
|
||||
if not target:
|
||||
continue
|
||||
if target.endswith(".html"):
|
||||
target = target[:-5]
|
||||
if target in SEEDED:
|
||||
continue
|
||||
log = None
|
||||
try:
|
||||
log = retrieve(target)
|
||||
except urllib.error.HTTPError as ex:
|
||||
if ex.code != 404:
|
||||
raise
|
||||
log = ""
|
||||
except (ConnectionResetError, urllib.error.URLError, socket.gaierror) as ex:
|
||||
sys.stderr.write("{0}: {1}\n".format(target, ex))
|
||||
if log is not None:
|
||||
yield (status["context"], status["created_at"], target, log)
|
||||
|
||||
|
||||
# Generate (status, name, body, tracker) for each Test Anything Protocol test
|
||||
# in the content.
|
||||
#
|
||||
# status: possible values "success", "failure", "skip"
|
||||
# name: the name of the test
|
||||
# body: full log of the test
|
||||
# tracker: url tracking the failure, or None
|
||||
def tap(content):
|
||||
name = status = tracker = None
|
||||
prefix = None
|
||||
body = [ ]
|
||||
blocks = False
|
||||
for line in content.split('\n'):
|
||||
# The test intro, everything before here is fluff
|
||||
if not prefix and line.startswith("1.."):
|
||||
prefix = line
|
||||
body = [ ]
|
||||
name = status = tracker = None
|
||||
|
||||
# A TAP test status line
|
||||
elif line.startswith("ok ") or line.startswith("not ok "):
|
||||
body.append(line)
|
||||
# Parse out the status
|
||||
if line.startswith("not ok "):
|
||||
status = "failure"
|
||||
line = line[7:]
|
||||
else:
|
||||
line = line[3:]
|
||||
if "# SKIP KNOWN ISSUE" in line.upper():
|
||||
status = "failure"
|
||||
(unused, delim, issue) = line.partition("#")
|
||||
tracker = qualify("issues/{0}".format(issue))
|
||||
if "# SKIP" in line.upper():
|
||||
status = "skip"
|
||||
else:
|
||||
status = "success"
|
||||
# Parse out the name
|
||||
while line[0].isspace() or line[0].isdigit():
|
||||
line = line[1:]
|
||||
(name, delim, directive) = line.partition("#")
|
||||
(name, delim, directive) = name.partition("duration")
|
||||
name = name.strip()
|
||||
# Old Cockpit tests had strange blocks
|
||||
if not blocks:
|
||||
yield (status, name, "\n".join(body), tracker)
|
||||
status = name = tracker = None
|
||||
body = [ ]
|
||||
else:
|
||||
# Old Cockpit tests didn't separate bound their stuff properly
|
||||
if line.startswith("# --------------------"):
|
||||
blocks = True
|
||||
if status:
|
||||
yield (status, name, "\n".join(body), tracker)
|
||||
name = status = tracker = None
|
||||
body = [ ]
|
||||
body.append(line)
|
||||
|
||||
# Qualify a URL into the GitHub repository
|
||||
def qualify(path):
|
||||
return "https://api.github.com" + task.api.qualify(path)
|
||||
|
||||
if __name__ == '__main__':
|
||||
task.main(function=run, title="Pull out test data for pull requests", verbose=True)
|
||||
Loading…
Add table
Add a link
Reference in a new issue