#!/usr/bin/env python3 # This file is part of Cockpit. # # Copyright (C) 2013 Red Hat, Inc. # # Cockpit is free software; you can redistribute it and/or modify it # under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or # (at your option) any later version. # # Cockpit is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with Cockpit; If not, see . # Days after which images expire if not in use IMAGE_EXPIRE = 14 import argparse import os import subprocess import sys import time import urllib import re from contextlib import contextmanager from task import github from machine import testvm BOTS = os.path.dirname(os.path.realpath(__file__)) # threshold in G below which unreferenced qcow2 images will be pruned, even if they aren't old PRUNE_THRESHOLD_G = float(os.environ.get("PRUNE_THRESHOLD_G", 15)) def enough_disk_space(): """Check if available disk space in our data store is sufficient """ st = os.statvfs(testvm.get_images_data_dir()) free = st.f_bavail * st.f_frsize / (1024*1024*1024) return free >= PRUNE_THRESHOLD_G; def get_refs(open_pull_requests=True, offline=False): """Return dictionary for available refs of the format {'rhel-7.4': 'ad50328990e44c22501bd5e454746d4b5e561b7c'} Expects to be called from the top level of the git checkout If offline is true, git show-ref is used instead of listing the remote """ # get all remote heads and filter empty lines # output of ls-remote has the format # # d864d3792db442e3de3d1811fa4bc371793a8f4f refs/heads/master # ad50328990e44c22501bd5e454746d4b5e561b7c refs/heads/rhel-7.4 refs = { } considerable = {} if open_pull_requests: if offline: raise Exception("Unable to consider open pull requests when in offline mode") for p in github.GitHub().pulls(): with urllib.request.urlopen(p["patch_url"]) as f: images = [] # enough to look at the git commit header, it lists all changed files changed = f.read(4000).decode('utf-8').split("\n") for line in changed: m = re.match("^ bots/images/([^\/]*)\| 2 \+\-$", line) if m: images.append(m.group(1).strip()) if images: sha = p["head"]["sha"] considerable[sha] = images subprocess.call(["git", "fetch", "origin", "pull/{0}/head".format(p["number"])]) refs["pull request #{} ({})".format(p["number"], p["title"])] = sha git_cmd = "show-ref" if offline else "ls-remote" ref_output = subprocess.check_output(["git", git_cmd], universal_newlines=True).splitlines() # filter out the "refs/heads/" prefix and generate a dictionary prefix = "refs/heads" for ln in ref_output: [ref, name] = ln.split() if name.startswith(prefix): refs[name[len(prefix):]] = ref return (refs, considerable) def get_image_links(ref, git_path): """Return all image links for the given git ref Expects to be called from the top level of the git checkout """ # get all the links we have first # trailing slash on path is important if not git_path.endswith("/"): git_path = "{0}/".format(git_path) try: entries = subprocess.check_output(["git", "ls-tree", "--name-only", ref, git_path], universal_newlines=True).splitlines() except subprocess.CalledProcessError as e: if e.returncode == 128: sys.stderr.write("Skipping {0} due to tree error.\n".format(ref)) return [] raise links = [subprocess.check_output(["git", "show", "{0}:{1}".format(ref, entry)], universal_newlines=True) for entry in entries] return [link for link in links if link.endswith(".qcow2")] @contextmanager def remember_cwd(): curdir = os.getcwd() try: yield finally: os.chdir(curdir) def get_image_names(quiet=False, open_pull_requests=True, offline=False): """Return all image names used by all branches and optionally in open pull requests """ images = set() # iterate over visible refs (mostly branches) # this hinges on being in the top level directory of the the git checkout with remember_cwd(): os.chdir(os.path.join(BOTS, "..")) (refs, considerable) = get_refs(open_pull_requests, offline) # list images present in each branch / pull request for name, ref in refs.items(): if not quiet: sys.stderr.write("Considering images from {0} ({1})\n".format(name, ref)) for link in get_image_links(ref, "bots/images"): if ref in considerable: for consider in considerable[ref]: if link.startswith(consider): images.add(link) else: images.add(link) return images def prune_images(force, dryrun, quiet=False, open_pull_requests=True, offline=False, checkout_only=False): """Prune images """ now = time.time() # everything we want to keep if checkout_only: targets = set() else: targets = get_image_names(quiet, open_pull_requests, offline) # what we have in the current checkout might already have been added by its branch, but check anyway for filename in os.listdir(testvm.IMAGES_DIR): path = os.path.join(testvm.IMAGES_DIR, filename) # only consider original image entries as trustworthy sources and ignore non-links if path.endswith(".qcow2") or path.endswith(".partial") or not os.path.islink(path): continue target = os.readlink(path) targets.add(target) expiry_threshold = now - IMAGE_EXPIRE * 86400 for filename in os.listdir(testvm.get_images_data_dir()): path = os.path.join(testvm.get_images_data_dir(), filename) if not force and (enough_disk_space() and os.lstat(path).st_mtime > expiry_threshold): continue if os.path.isfile(path) and (path.endswith(".xz") or path.endswith(".qcow2") or path.endswith(".partial")) and filename not in targets: if not quiet or dryrun: sys.stderr.write("Pruning {0}\n".format(filename)) if not dryrun: os.unlink(path) # now prune broken links for filename in os.listdir(testvm.IMAGES_DIR): path = os.path.join(testvm.IMAGES_DIR, filename) # don't prune original image entries and ignore non-links if not path.endswith(".qcow2") or not os.path.islink(path): continue # if the link isn't valid, prune if not os.path.isfile(path): if not quiet or dryrun: sys.stderr.write("Pruning link {0}\n".format(path)) if not dryrun: os.unlink(path) def every_image(): result = [] for filename in os.listdir(testvm.IMAGES_DIR): link = os.path.join(testvm.IMAGES_DIR, filename) if os.path.islink(link): result.append(filename) return result def main(): parser = argparse.ArgumentParser(description='Prune downloaded images') parser.add_argument("--force", action="store_true", help="Delete images even if they aren't old") parser.add_argument("--quiet", action="store_true", help="Make downloading quieter") parser.add_argument("-d", "--dry-run-prune", dest="dryrun", action="store_true", help="Don't actually delete images and links") parser.add_argument("-b", "--branches-only", dest="branches_only", action="store_true", help="Don't consider pull requests on GitHub, only look at branches") parser.add_argument("-c", "--checkout-only", dest="checkout_only", action="store_true", help="Consider neither pull requests on GitHub nor branches, only look at the current checkout") parser.add_argument("-o", "--offline", dest="offline", action="store_true", help="Don't access external sources such as GitHub") args = parser.parse_args() try: prune_images(args.force, args.dryrun, quiet=args.quiet, open_pull_requests=(not args.branches_only), offline=args.offline, checkout_only=args.checkout_only) except RuntimeError as ex: sys.stderr.write("image-prune: {0}\n".format(str(ex))) return 1 return 0 if __name__ == '__main__': sys.exit(main())