#!/usr/bin/env python3 # Copyright (C) 2014 by Thomas Petazzoni # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import sys import os import os.path import argparse import csv import collections import math try: import matplotlib matplotlib.use('Agg') import matplotlib.font_manager as fm import matplotlib.pyplot as plt except ImportError: sys.stderr.write("You need python-matplotlib to generate the size graph\n") exit(1) class Config: biggest_first = False iec = False size_limit = 0.01 colors = ['#e60004', '#f28e00', '#ffed00', '#940084', '#2e1d86', '#0068b5', '#009836', '#97c000'] # # This function adds a new file to 'filesdict', after checking its # size. The 'filesdict' contain the relative path of the file as the # key, and as the value a tuple containing the name of the package to # which the file belongs and the size of the file. # # filesdict: the dict to which the file is added # relpath: relative path of the file # fullpath: absolute path to the file # pkg: package to which the file belongs # def add_file(filesdict, relpath, abspath, pkg): if not os.path.exists(abspath): return if os.path.islink(abspath): return sz = os.stat(abspath).st_size filesdict[relpath] = (pkg, sz) # # This function returns a dict where each key is the path of a file in # the root filesystem, and the value is a tuple containing two # elements: the name of the package to which this file belongs and the # size of the file. # # builddir: path to the Buildroot output directory # def build_package_dict(builddir): filesdict = {} with open(os.path.join(builddir, "build", "packages-file-list.txt")) as f: for line in f.readlines(): pkg, fpath = line.split(",", 1) # remove the initial './' in each file path fpath = fpath.strip()[2:] fullpath = os.path.join(builddir, "target", fpath) add_file(filesdict, fpath, fullpath, pkg) return filesdict # # This function builds a dictionary that contains the name of a # package as key, and the size of the files installed by this package # as the value. # # filesdict: dictionary with the name of the files as key, and as # value a tuple containing the name of the package to which the files # belongs, and the size of the file. As returned by # build_package_dict. # # builddir: path to the Buildroot output directory # def build_package_size(filesdict, builddir): pkgsize = collections.defaultdict(int) seeninodes = set() for root, _, files in os.walk(os.path.join(builddir, "target")): for f in files: fpath = os.path.join(root, f) if os.path.islink(fpath): continue st = os.stat(fpath) if st.st_ino in seeninodes: # hard link continue else: seeninodes.add(st.st_ino) frelpath = os.path.relpath(fpath, os.path.join(builddir, "target")) if frelpath not in filesdict: print("WARNING: %s is not part of any package" % frelpath) pkg = "unknown" else: pkg = filesdict[frelpath][0] pkgsize[pkg] += st.st_size return pkgsize # # Given a dict returned by build_package_size(), this function # generates a pie chart of the size installed by each package. # # pkgsize: dictionary with the name of the package as a key, and the # size as the value, as returned by build_package_size. # # outputf: output file for the graph # def draw_graph(pkgsize, outputf): def size2string(sz): if Config.iec: divider = 1024.0 prefixes = ['', 'Ki', 'Mi', 'Gi', 'Ti'] else: divider = 1000.0 prefixes = ['', 'k', 'M', 'G', 'T'] while sz > divider and len(prefixes) > 1: prefixes = prefixes[1:] sz = sz/divider # precision is made so that there are always at least three meaningful # digits displayed (e.g. '3.14' and '10.4', not just '3' and '10') precision = int(2-math.floor(math.log10(sz))) if sz < 1000 else 0 return '{:.{prec}f} {}B'.format(sz, prefixes[0], prec=precision) total = sum(pkgsize.values()) labels = [] values = [] other_value = 0 unknown_value = 0 for (p, sz) in sorted(pkgsize.items(), key=lambda x: x[1], reverse=Config.biggest_first): if sz < (total * Config.size_limit): other_value += sz elif p == "unknown": unknown_value = sz else: labels.append("%s (%s)" % (p, size2string(sz))) values.append(sz) if unknown_value != 0: labels.append("Unknown (%s)" % (size2string(unknown_value))) values.append(unknown_value) if other_value != 0: labels.append("Other (%s)" % (size2string(other_value))) values.append(other_value) plt.figure() patches, texts, autotexts = plt.pie(values, labels=labels, autopct='%1.1f%%', shadow=True, colors=Config.colors) # Reduce text size proptease = fm.FontProperties() proptease.set_size('xx-small') plt.setp(autotexts, fontproperties=proptease) plt.setp(texts, fontproperties=proptease) plt.suptitle("Filesystem size per package", fontsize=18, y=.97) plt.title("Total filesystem size: %s" % (size2string(total)), fontsize=10, y=.96) plt.savefig(outputf) # # Generate a CSV file with statistics about the size of each file, its # size contribution to the package and to the overall system. # # filesdict: dictionary with the name of the files as key, and as # value a tuple containing the name of the package to which the files # belongs, and the size of the file. As returned by # build_package_dict. # # pkgsize: dictionary with the name of the package as a key, and the # size as the value, as returned by build_package_size. # # outputf: output CSV file # def gen_files_csv(filesdict, pkgsizes, outputf): total = 0 for (p, sz) in pkgsizes.items(): total += sz with open(outputf, 'w') as csvfile: wr = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL) wr.writerow(["File name", "Package name", "File size", "Package size", "File size in package (%)", "File size in system (%)"]) for f, (pkgname, filesize) in filesdict.items(): pkgsize = pkgsizes[pkgname] if pkgsize == 0: percent_pkg = 0 else: percent_pkg = float(filesize) / pkgsize * 100 percent_total = float(filesize) / total * 100 wr.writerow([f, pkgname, filesize, pkgsize, "%.1f" % percent_pkg, "%.1f" % percent_total]) # # Generate a CSV file with statistics about the size of each package, # and their size contribution to the overall system. # # pkgsize: dictionary with the name of the package as a key, and the # size as the value, as returned by build_package_size. # # outputf: output CSV file # def gen_packages_csv(pkgsizes, outputf): total = sum(pkgsizes.values()) with open(outputf, 'w') as csvfile: wr = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL) wr.writerow(["Package name", "Package size", "Package size in system (%)"]) for (pkg, size) in pkgsizes.items(): wr.writerow([pkg, size, "%.1f" % (float(size) / total * 100)]) # # Our special action for --iec, --binary, --si, --decimal # class PrefixAction(argparse.Action): def __init__(self, option_strings, dest, **kwargs): for key in ["type", "nargs"]: if key in kwargs: raise ValueError('"{}" not allowed'.format(key)) super(PrefixAction, self).__init__(option_strings, dest, nargs=0, type=bool, **kwargs) def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, option_string in ["--iec", "--binary"]) def main(): parser = argparse.ArgumentParser(description='Draw size statistics graphs') parser.add_argument("--builddir", '-i', metavar="BUILDDIR", required=True, help="Buildroot output directory") parser.add_argument("--graph", '-g', metavar="GRAPH", help="Graph output file (.pdf or .png extension)") parser.add_argument("--file-size-csv", '-f', metavar="FILE_SIZE_CSV", help="CSV output file with file size statistics") parser.add_argument("--package-size-csv", '-p', metavar="PKG_SIZE_CSV", help="CSV output file with package size statistics") parser.add_argument("--biggest-first", action='store_true', help="Sort packages in decreasing size order, " + "rather than in increasing size order") parser.add_argument("--iec", "--binary", "--si", "--decimal", action=PrefixAction, help="Use IEC (binary, powers of 1024) or SI (decimal, " "powers of 1000, the default) prefixes") parser.add_argument("--size-limit", "-l", type=float, help='Under this size ratio, files are accounted to ' + 'the generic "Other" package. Default: 0.01 (1%%)') args = parser.parse_args() Config.biggest_first = args.biggest_first Config.iec = args.iec if args.size_limit is not None: if args.size_limit < 0.0 or args.size_limit > 1.0: parser.error("--size-limit must be in [0.0..1.0]") Config.size_limit = args.size_limit # Find out which package installed what files pkgdict = build_package_dict(args.builddir) # Collect the size installed by each package pkgsize = build_package_size(pkgdict, args.builddir) if args.graph: draw_graph(pkgsize, args.graph) if args.file_size_csv: gen_files_csv(pkgdict, pkgsize, args.file_size_csv) if args.package_size_csv: gen_packages_csv(pkgsize, args.package_size_csv) if __name__ == "__main__": main()