From patchwork Tue Oct 27 17:14:30 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?q?Peter_M=C3=BCller?= X-Patchwork-Id: 3614 Return-Path: Received: from mail01.ipfire.org (mail01.haj.ipfire.org [172.28.1.202]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) server-digest SHA384 client-signature ECDSA (P-384) client-digest SHA384) (Client CN "mail01.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by web04.haj.ipfire.org (Postfix) with ESMTPS id 4CLJGq214Vz3wgn for ; Tue, 27 Oct 2020 17:14:43 +0000 (UTC) Received: from mail02.haj.ipfire.org (mail02.haj.ipfire.org [172.28.1.201]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) server-digest SHA384 client-signature ECDSA (P-384) client-digest SHA384) (Client CN "mail02.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mail01.ipfire.org (Postfix) with ESMTPS id 4CLJGp0WN7zsS; Tue, 27 Oct 2020 17:14:42 +0000 (UTC) Received: from mail02.haj.ipfire.org (localhost [127.0.0.1]) by mail02.haj.ipfire.org (Postfix) with ESMTP id 4CLJGn6np6z2xlB; Tue, 27 Oct 2020 17:14:41 +0000 (UTC) Received: from mail01.ipfire.org (mail01.haj.ipfire.org [172.28.1.202]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) server-digest SHA384 client-signature ECDSA (P-384) client-digest SHA384) (Client CN "mail01.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mail02.haj.ipfire.org (Postfix) with ESMTPS id 4CLJGm30fMz2xjV for ; Tue, 27 Oct 2020 17:14:40 +0000 (UTC) Received: from location02.haj.ipfire.org (location02.haj.ipfire.org [172.28.1.170]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) server-digest SHA384 client-signature ECDSA (P-384) client-digest SHA384) (Client CN "location02.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mail01.ipfire.org (Postfix) with ESMTPS id 4CLJGl4mQMzsS; Tue, 27 Oct 2020 17:14:39 +0000 (UTC) Received: by location02.haj.ipfire.org (Postfix, from userid 0) id 4CLJGl393Sz13Kb; Tue, 27 Oct 2020 17:14:39 +0000 (UTC) From: =?utf-8?q?Peter_M=C3=BCller?= To: location@lists.ipfire.org Subject: [PATCH] export: Flatten the tree before exporting it Date: Tue, 27 Oct 2020 17:14:30 +0000 Message-Id: <20201027171430.20785-1-peter.mueller@ipfire.org> X-Mailer: git-send-email 2.20.1 MIME-Version: 1.0 X-BeenThere: location@lists.ipfire.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: location-bounces@lists.ipfire.org Sender: "Location" From: Michael Tremer This patch removes the possibility that any IP address ranges might show up in multiple exported files. If this was content from the database: * 10.0.0.0/16 - DE * 10.0.1.0/24 - FR Then the IP address 10.0.1.1 would match for both countries. The algorithm will now break the larger /16 subnet apart into smaller subnets so that 10.0.1.0/24 is no longer overlapped. There was some time spent on this to make this as efficient as possible. Signed-off-by: Michael Tremer --- src/python/export.py | 154 ++++++++++++++++++++++++++++++------------- 1 file changed, 110 insertions(+), 44 deletions(-) diff --git a/src/python/export.py b/src/python/export.py index 5eaf43f..dd44332 100644 --- a/src/python/export.py +++ b/src/python/export.py @@ -89,28 +89,55 @@ class OutputWriter(object): def write(self, network, subnets): if self.flatten and self._flatten(network): - log.debug("Skipping writing network %s" % network) + log.debug("Skipping writing network %s (last one was %s)" % (network, self._last_network)) return + # Convert network into a Python object + _network = ipaddress.ip_network("%s" % network) + # Write the network when it has no subnets if not subnets: - network = ipaddress.ip_network("%s" % network) - return self._write_network(network) + log.debug("Writing %s to %s" % (_network, self.f)) + return self._write_network(_network) + + # Convert subnets into Python objects + _subnets = [ipaddress.ip_network("%s" % subnet) for subnet in subnets] + + # Split the network into smaller bits so that + # we can accomodate for any gaps in it later + to_check = set() + for _subnet in _subnets: + to_check.update( + _network.address_exclude(_subnet) + ) + + # Clear the list of all subnets + subnets = [] + + # Check if all subnets to not overlap with anything else + while to_check: + subnet_to_check = to_check.pop() - # Collect all matching subnets - matching_subnets = [] + for _subnet in _subnets: + # Drop this subnet if it equals one of the subnets + # or if it is subnet of one of them + if subnet_to_check == _subnet or subnet_to_check.subnet_of(_subnet): + break - for subnet in sorted(subnets): - # Try to find the subnet in the database - n = self.db.lookup("%s" % subnet.network_address) + # Break it down if it overlaps + if subnet_to_check.overlaps(_subnet): + to_check.update( + subnet_to_check.address_exclude(_subnet) + ) + break - # No entry or matching country means those IP addresses belong here - if not n or n.country_code == network.country_code: - matching_subnets.append(subnet) + # Add the subnet again as it passed the check + else: + subnets.append(subnet_to_check) # Write all networks as compact as possible - for network in ipaddress.collapse_addresses(matching_subnets): - log.debug("Writing %s to database" % network) + for network in ipaddress.collapse_addresses(subnets): + log.debug("Writing %s to %s" % (network, self.f)) self._write_network(network) def finish(self): @@ -206,40 +233,40 @@ class Exporter(object): # Get all networks that match the family networks = self.db.search_networks(family=family) - # Materialise the generator into a list (uses quite some memory) - networks = list(networks) + # Create a stack with all networks in order where we can put items back + # again and retrieve them in the next iteration. + networks = BufferedStack(networks) # Walk through all networks - for i, network in enumerate(networks): - _network = ipaddress.ip_network("%s" % network) - - # Search for all subnets - subnets = set() - - while i < len(networks): - subnet = networks[i+1] - - if subnet.is_subnet_of(network): - _subnet = ipaddress.ip_network("%s" % subnet) - - subnets.add(_subnet) - subnets.update(_network.address_exclude(_subnet)) - - i += 1 - else: + for network in networks: + # Collect all networks which are a subnet of network + subnets = [] + for subnet in networks: + # If the next subnet was not a subnet, we have to push + # it back on the stack and break this loop + if not subnet.is_subnet_of(network): + networks.push(subnet) break + subnets.append(subnet) + # Write matching countries - try: - writers[network.country_code].write(network, subnets) - except KeyError: - pass + if network.country_code and network.country_code in writers: + # Mismatching subnets + gaps = [ + subnet for subnet in subnets if not network.country_code == subnet.country_code + ] + + writers[network.country_code].write(network, gaps) # Write matching ASNs - try: - writers[network.asn].write(network, subnets) - except KeyError: - pass + if network.asn and network.asn in writers: + # Mismatching subnets + gaps = [ + subnet for subnet in subnets if not network.asn == subnet.asn + ] + + writers[network.asn].write(network, gaps) # Handle flags for flag in flags: @@ -247,10 +274,19 @@ class Exporter(object): # Fetch the "fake" country code country = flags[flag] - try: - writers[country].write(network, subnets) - except KeyError: - pass + if not country in writers: + continue + + gaps = [ + subnet for subnet in subnets + if not subnet.has_flag(flag) + ] + + writers[country].write(network, gaps) + + # Push all subnets back onto the stack + for subnet in reversed(subnets): + networks.push(subnet) # Write everything to the filesystem for writer in writers.values(): @@ -262,3 +298,33 @@ class Exporter(object): ) return os.path.join(directory, filename) + + +class BufferedStack(object): + """ + This class takes an iterator and when being iterated + over it returns objects from that iterator for as long + as there are any. + + It additionally has a function to put an item back on + the back so that it will be returned again at the next + iteration. + """ + def __init__(self, iterator): + self.iterator = iterator + self.stack = [] + + def __iter__(self): + return self + + def __next__(self): + if self.stack: + return self.stack.pop(0) + + return next(self.iterator) + + def push(self, elem): + """ + Takes an element and puts it on the stack + """ + self.stack.insert(0, elem)