timplement suite structure to orchestrate merging - amprolla - devuan's apt repo merger
 (HTM) git clone git://parazyd.org/amprolla.git
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit b0080eb9b53c778c81f33cb5f6a978945ee939d1
 (DIR) parent cea8d90386986eecc2ae3cecbd2f87a8470dfa88
 (HTM) Author: parazyd <parazyd@dyne.org>
       Date:   Mon, 29 May 2017 16:54:24 +0200
       
       implement suite structure to orchestrate merging
       
       Diffstat:
         M amprolla-merge                      |     100 ++++++++++++++++++++++---------
         M lib/config.py                       |       8 +++-----
         M lib/package.py                      |      20 ++++++++++++++------
       
       3 files changed, 88 insertions(+), 40 deletions(-)
       ---
 (DIR) diff --git a/amprolla-merge b/amprolla-merge
       t@@ -3,40 +3,46 @@
        Amprolla main module
        """
        
       -import sys
        from os.path import join
        from time import time
        
        from lib.package import (write_packages, load_packages_file,
                                 merge_packages_many)
       -from lib.config import banpkgs
       +from lib.config import (aliases, banpkgs, repo_order, repos,
       +                        spooldir, suites)
        
       -roots = {
       -    'devuan': 'spool/devuan/dists/jessie',
       -    'debian': 'spool/debian/dists/jessie',
       -    'debian-sec': 'spool/dists/jessie/updates',
       -}
        
       -#devuan_release_contents = open(join(roots['devuan'], 'Release')).read()
       -#debian_release_contents = open(join(roots['debian'], 'Release')).read()
       -#devuan_release = parse_release(devuan_release_contents)
       -#debian_release = parse_release(debian_release_contents)
       -#devuan_files = list(filter(lambda x: x.endswith('Packages.gz') and 'armhf' in x, devuan_release.keys()))
       -#debian_files = list(filter(lambda x: x.endswith('Packages.gz') and 'armhf' in x, debian_release.keys()))
       +def prepare_merge_dict():
       +    """
       +    This function will prepare a dict of lists that contain the repos
       +    that need to be merged in an ordered fashion. Orders them using the
       +    repo_order list found in lib.config
       +    Example output:
       +        { ascii: ['ascii', None, 'stretch'] },
       +    """
       +    merge_dict = {}
        
       -#packages_file = 'main/binary-armhf/Packages.gz'
       -packages_file = sys.argv[1]
       +    for suite in suites:
       +        for i in suites[suite]:
       +            merge_dict[i] = []
        
       -t1 = time()
       -print('Loading packages: %s' % packages_file)
       +    for suite in merge_dict:
       +        for repo in repo_order:
       +            tmpsuite = suite
       +            if repos[repo]['aliases'] is True:
       +                if tmpsuite in aliases[repos[repo]['name']]:
       +                    tmpsuite = aliases[repos[repo]['name']][suite]
       +                elif repos[repo]['skipmissing'] is True:
       +                    tmpsuite = None
       +                skips = ['jessie-security', 'ascii-security']
       +                if repo == 'debian' and suite in skips:
       +                    tmpsuite = None
       +            if tmpsuite:  # make it a proper path
       +                tmpsuite = join(spooldir, repos[repo]['dists'], tmpsuite)
       +            merge_dict[suite].append(tmpsuite)
        
       -devuan = load_packages_file(join(roots['devuan'], packages_file))
       -debian = load_packages_file(join(roots['debian'], packages_file))
       -debian_sec = load_packages_file(join(roots['debian-sec'], packages_file))
       +    return merge_dict
        
       -all_repos = [{'name': 'devuan', 'packages': devuan},
       -             {'name': 'debian-sec', 'packages': debian_sec},
       -             {'name': 'debian', 'packages': debian}]
        
        def devuan_rewrite(pkg, repo_name):
            """
       t@@ -51,11 +57,47 @@ def devuan_rewrite(pkg, repo_name):
            return pkg
        
        
       -print('Merging packages')
       -new_pkgs = merge_packages_many(all_repos, banned_packages=banpkgs, rewriter=devuan_rewrite)
       +def merge(packages_list):
       +    t1 = time()
       +
       +    all_repos = []
       +    print('Loading packages: %s' % packages_list)
       +
       +    devuan = load_packages_file(packages_list[0])
       +    if devuan:
       +        all_repos.append({'name': 'devuan', 'packages': devuan})
       +
       +    debian_sec = load_packages_file(packages_list[1])
       +    if debian_sec:
       +        all_repos.append({'name': 'debian-sec', 'packages': debian_sec})
       +
       +    debian = load_packages_file(packages_list[2])
       +    if debian:
       +        all_repos.append({'name': 'debian', 'packages': debian})
       +
       +    print('Merging packages')
       +    new_pkgs = merge_packages_many(all_repos, banned_packages=banpkgs, rewriter=devuan_rewrite)
       +
       +    print('Writing packages')
       +    write_packages(new_pkgs, 'Packages.merged')
       +
       +    t2 = time()
       +    print('time:', t2-t1)
       +
       +
       +packages_file = 'main/binary-armhf/Packages.gz'
       +to_merge = prepare_merge_dict()
       +
       +tt1 = time()
       +for suite in to_merge:
       +    pkg_list = []
       +    for rep in to_merge[suite]:
       +        if rep:
       +            pkg_list.append(join(rep, packages_file))
       +        else:
       +            pkg_list.append(None)
        
       -print('Writing packages')
       -write_packages(new_pkgs, 'Packages.merged')
       +    merge(pkg_list)
        
       -t2 = time()
       -print('time:', t2-t1)
       +tt2 = time()
       +print('total time:', tt2-tt1)
 (DIR) diff --git a/lib/config.py b/lib/config.py
       t@@ -7,7 +7,9 @@ sign_key = 'fa1b0274'
        mergedir = './merged'
        mergedsubdirs = ['dists', 'pool']
        banpkgs = {'systemd', 'systemd-sysv'}
       -#checksums = [ 'md5sum', 'sha1', 'sha256', 'sha512' ]
       +# checksums = [ 'md5sum', 'sha1', 'sha256', 'sha512' ]
       +
       +repo_order = ['devuan', 'debian-sec', 'debian']
        
        repos = {
            'devuan': {
       t@@ -17,7 +19,6 @@ repos = {
                'pool': 'devuan/pool',
                'aliases': False,
                'skipmissing': False,
       -        'priority': 0,
            },
            'debian-sec': {
                'name': 'DEBIAN-SECURITY',
       t@@ -26,17 +27,14 @@ repos = {
                'pool': 'pool',
                'aliases': True,
                'skipmissing': True,
       -        'priority': 1,
            },
            'debian': {
                'name': 'DEBIAN',
       -        #'host': 'httpredir.debian.org',
                'host': 'http://ftp.debian.org',
                'dists': 'debian/dists',
                'pool': 'debian/pool',
                'aliases': True,
                'skipmissing': False,
       -        'priority': 2,
            }
        }
        
 (DIR) diff --git a/lib/package.py b/lib/package.py
       t@@ -3,6 +3,7 @@ from gzip import open as gzip_open
        from lib.parse import (parse_packages, parse_dependencies)
        from lib.config import packages_keys
        
       +
        def write_packages(packages, filename, sort=True):
            """
            Writes `packages` to a file (per debian Packages format)
       t@@ -22,20 +23,24 @@ def write_packages(packages, filename, sort=True):
        
            f.close()
        
       +
        def load_packages_file(filename):
            """ Load a gzip'd packages file.
            Returns a dictionary of package name and package key-values.
            """
       -    packages_contents = gzip_open(filename).read()
       -    packages_contents = packages_contents.decode('utf-8')
       -    return parse_packages(packages_contents)
       +    if filename is not None:
       +        packages_contents = gzip_open(filename).read()
       +        packages_contents = packages_contents.decode('utf-8')
       +        return parse_packages(packages_contents)
       +
       +    return None
        
        
        def package_banned(pkg, banned_pkgs):
            """
            Returns True is the package contains a banned dependency.
       -    Currently checks and parses both the 'Depends:' and the 'Pre-Depends' fields
       -    of the package.
       +    Currently checks and parses both the 'Depends:' and the 'Pre-Depends'
       +    fields of the package.
            """
            if pkg.get('Package') in banned_pkgs:
                return True
       t@@ -85,13 +90,16 @@ def merge_packages(pkg1, pkg2, name1, name2, banned_packages=set(),
        
            return new_pkgs
        
       +
        def merge_packages_many(packages, banned_packages=set(), rewriter=None):
            """
            Merges two (or more) previously loaded/parsed (using load_packages_file)
            packages dictionaries, priority is defined by the order of the `packages`
            list, optionally discarding any banned packages.
            """
       -    assert len(packages) > 1
       +    assert len(packages) > 1  # TODO: what to do when there is only one?
       +    # a situation arises when the file exists, but it just has the gzip
       +    # header, rather than any content
        
            new_pkgs = {}