#!/usr/bin/env python

# Copyright (C) 2016 Thomas De Schampheleire <thomas.de.schampheleire@gmail.com>

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# TODO (improvements)
# - support K,M,G size suffixes for threshold
# - output CSV file in addition to stdout reporting

import csv
import argparse
import sys


def read_file_size_csv(inputf, detail=None):
    """Extract package or file sizes from CSV file into size dictionary"""
    sizes = {}
    reader = csv.reader(inputf)

    header = next(reader)
    if header[0] != 'File name' or header[1] != 'Package name' or \
       header[2] != 'File size' or header[3] != 'Package size':
        print(("Input file %s does not contain the expected header. Are you "
               "sure this file corresponds to the file-size-stats.csv "
               "file created by 'make graph-size'?") % inputf.name)
        sys.exit(1)

    for row in reader:
        if detail:
            sizes[row[0]] = int(row[2])
        else:
            sizes[row[1]] = int(row[3])

    return sizes


def compare_sizes(old, new):
    """Return delta/added/removed dictionaries based on two input size
    dictionaries"""
    delta = {}
    oldkeys = set(old.keys())
    newkeys = set(new.keys())

    # packages/files in both
    for entry in newkeys.intersection(oldkeys):
        delta[entry] = ('', new[entry] - old[entry])
    # packages/files only in new
    for entry in newkeys.difference(oldkeys):
        delta[entry] = ('added', new[entry])
    # packages/files only in old
    for entry in oldkeys.difference(newkeys):
        delta[entry] = ('removed', -old[entry])

    return delta


def print_results(result, threshold):
    """Print the given result dictionary sorted by size, ignoring any entries
    below or equal to threshold"""

    from six import iteritems
    list_result = list(iteritems(result))
    # result is a dictionary: name -> (flag, size difference)
    # list_result is a list of tuples: (name, (flag, size difference))

    for entry in sorted(list_result, key=lambda entry: entry[1][1]):
        if threshold is not None and abs(entry[1][1]) <= threshold:
            continue
        print('%12s %7s %s' % (entry[1][1], entry[1][0], entry[0]))


# main #########################################################################

description = """
Compare rootfs size between Buildroot compilations, for example after changing
configuration options or after switching to another Buildroot release.

This script compares the file-size-stats.csv file generated by 'make graph-size'
with the corresponding file from another Buildroot compilation.
The size differences can be reported per package or per file.
Size differences smaller or equal than a given threshold can be ignored.
"""

parser = argparse.ArgumentParser(description=description,
                                 formatter_class=argparse.RawDescriptionHelpFormatter)

parser.add_argument('-d', '--detail', action='store_true',
                    help='''report differences for individual files rather than
                            packages''')
parser.add_argument('-t', '--threshold', type=int,
                    help='''ignore size differences smaller or equal than this
                            value (bytes)''')
parser.add_argument('old_file_size_csv', type=argparse.FileType('r'),
                    metavar='old-file-size-stats.csv',
                    help="""old CSV file with file and package size statistics,
                            generated by 'make graph-size'""")
parser.add_argument('new_file_size_csv', type=argparse.FileType('r'),
                    metavar='new-file-size-stats.csv',
                    help='new CSV file with file and package size statistics')
args = parser.parse_args()

if args.detail:
    keyword = 'file'
else:
    keyword = 'package'

old_sizes = read_file_size_csv(args.old_file_size_csv, args.detail)
new_sizes = read_file_size_csv(args.new_file_size_csv, args.detail)

delta = compare_sizes(old_sizes, new_sizes)

print('Size difference per %s (bytes), threshold = %s' % (keyword, args.threshold))
print(80*'-')
print_results(delta, args.threshold)
print(80*'-')
print_results({'TOTAL': ('', sum(new_sizes.values()) - sum(old_sizes.values()))},
              threshold=None)