5563a1c6a4
Currently, when a filename contains characters not representable in the user's locale, we fail hard, especially when the host python is python3. This is because python2 and python3 handle encoding/decoding strings differently, with python3 presumable doing the right thing, but it breaks on some systems, while python2 presumable does the wrong thing, but it works everywhere. (Just joking, obviously...) Part of the issue being that the csv reader in python2 is broken with UTF8. We fix the issue by ditching the csv reader, and simply read the file in binary mode, manually partitioning the lines on the first comma. Then, we use the binary-encoded (really, un-encoded) package names and filenames as values and keys, respectively. Finally, for each filename or package we need to print, we try to decode them with the defaults for the user settings, but catch any decoding exception and fall back to dumping the raw, binary values. Which codec is used by default differs between Python version, but in all cases something sane is printed at least. Thanks a lot to Arnout for the live help doing this patch. :-) Reported-by: Jaap Crezee <jaap@jcz.nl> Signed-off-by: "Yann E. MORIN" <yann.morin.1998@free.fr> Cc: Arnout Vandecappelle <arnout@mind.be> Cc: Jaap Crezee <jaap@jcz.nl> [Arnout: commit log improvement] Signed-off-by: Arnout Vandecappelle (Essensium/Mind) <arnout@mind.be>
50 lines
1.7 KiB
Python
Executable File
50 lines
1.7 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
import sys
|
|
import csv
|
|
import argparse
|
|
from collections import defaultdict
|
|
|
|
warn = 'Warning: {0} file "{1}" is touched by more than one package: {2}\n'
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('packages_file_list', nargs='*',
|
|
help='The packages-file-list to check from')
|
|
parser.add_argument('-t', '--type', metavar="TYPE",
|
|
help='Report as a TYPE file (TYPE is either target, staging, or host)')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not len(args.packages_file_list) == 1:
|
|
sys.stderr.write('No packages-file-list was provided.\n')
|
|
return False
|
|
|
|
if args.type is None:
|
|
sys.stderr.write('No type was provided\n')
|
|
return False
|
|
|
|
file_to_pkg = defaultdict(list)
|
|
with open(args.packages_file_list[0], 'rb') as pkg_file_list:
|
|
for line in pkg_file_list.readlines():
|
|
pkg, _, file = line.rstrip(b'\n').partition(b',')
|
|
file_to_pkg[file].append(pkg)
|
|
|
|
for file in file_to_pkg:
|
|
if len(file_to_pkg[file]) > 1:
|
|
# If possible, try to decode the binary strings with
|
|
# the default user's locale
|
|
try:
|
|
sys.stderr.write(warn.format(args.type, file.decode(),
|
|
[p.decode() for p in file_to_pkg[file]]))
|
|
except UnicodeDecodeError:
|
|
# ... but fallback to just dumping them raw if they
|
|
# contain non-representable chars
|
|
sys.stderr.write(warn.format(args.type, file,
|
|
file_to_pkg[file]))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|