export: Flatten the tree before exporting it
Commit Message
From: Michael Tremer <michael.tremer@ipfire.org>
This patch removes the possibility that any IP address ranges
might show up in multiple exported files.
If this was content from the database:
* 10.0.0.0/16 - DE
* 10.0.1.0/24 - FR
Then the IP address 10.0.1.1 would match for both countries.
The algorithm will now break the larger /16 subnet apart into
smaller subnets so that 10.0.1.0/24 is no longer overlapped.
There was some time spent on this to make this as efficient
as possible.
Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
---
src/python/export.py | 154 ++++++++++++++++++++++++++++++-------------
1 file changed, 110 insertions(+), 44 deletions(-)
@@ -89,28 +89,55 @@ class OutputWriter(object):
def write(self, network, subnets):
if self.flatten and self._flatten(network):
- log.debug("Skipping writing network %s" % network)
+ log.debug("Skipping writing network %s (last one was %s)" % (network, self._last_network))
return
+ # Convert network into a Python object
+ _network = ipaddress.ip_network("%s" % network)
+
# Write the network when it has no subnets
if not subnets:
- network = ipaddress.ip_network("%s" % network)
- return self._write_network(network)
+ log.debug("Writing %s to %s" % (_network, self.f))
+ return self._write_network(_network)
+
+ # Convert subnets into Python objects
+ _subnets = [ipaddress.ip_network("%s" % subnet) for subnet in subnets]
+
+ # Split the network into smaller bits so that
+ # we can accomodate for any gaps in it later
+ to_check = set()
+ for _subnet in _subnets:
+ to_check.update(
+ _network.address_exclude(_subnet)
+ )
+
+ # Clear the list of all subnets
+ subnets = []
+
+ # Check if all subnets to not overlap with anything else
+ while to_check:
+ subnet_to_check = to_check.pop()
- # Collect all matching subnets
- matching_subnets = []
+ for _subnet in _subnets:
+ # Drop this subnet if it equals one of the subnets
+ # or if it is subnet of one of them
+ if subnet_to_check == _subnet or subnet_to_check.subnet_of(_subnet):
+ break
- for subnet in sorted(subnets):
- # Try to find the subnet in the database
- n = self.db.lookup("%s" % subnet.network_address)
+ # Break it down if it overlaps
+ if subnet_to_check.overlaps(_subnet):
+ to_check.update(
+ subnet_to_check.address_exclude(_subnet)
+ )
+ break
- # No entry or matching country means those IP addresses belong here
- if not n or n.country_code == network.country_code:
- matching_subnets.append(subnet)
+ # Add the subnet again as it passed the check
+ else:
+ subnets.append(subnet_to_check)
# Write all networks as compact as possible
- for network in ipaddress.collapse_addresses(matching_subnets):
- log.debug("Writing %s to database" % network)
+ for network in ipaddress.collapse_addresses(subnets):
+ log.debug("Writing %s to %s" % (network, self.f))
self._write_network(network)
def finish(self):
@@ -206,40 +233,40 @@ class Exporter(object):
# Get all networks that match the family
networks = self.db.search_networks(family=family)
- # Materialise the generator into a list (uses quite some memory)
- networks = list(networks)
+ # Create a stack with all networks in order where we can put items back
+ # again and retrieve them in the next iteration.
+ networks = BufferedStack(networks)
# Walk through all networks
- for i, network in enumerate(networks):
- _network = ipaddress.ip_network("%s" % network)
-
- # Search for all subnets
- subnets = set()
-
- while i < len(networks):
- subnet = networks[i+1]
-
- if subnet.is_subnet_of(network):
- _subnet = ipaddress.ip_network("%s" % subnet)
-
- subnets.add(_subnet)
- subnets.update(_network.address_exclude(_subnet))
-
- i += 1
- else:
+ for network in networks:
+ # Collect all networks which are a subnet of network
+ subnets = []
+ for subnet in networks:
+ # If the next subnet was not a subnet, we have to push
+ # it back on the stack and break this loop
+ if not subnet.is_subnet_of(network):
+ networks.push(subnet)
break
+ subnets.append(subnet)
+
# Write matching countries
- try:
- writers[network.country_code].write(network, subnets)
- except KeyError:
- pass
+ if network.country_code and network.country_code in writers:
+ # Mismatching subnets
+ gaps = [
+ subnet for subnet in subnets if not network.country_code == subnet.country_code
+ ]
+
+ writers[network.country_code].write(network, gaps)
# Write matching ASNs
- try:
- writers[network.asn].write(network, subnets)
- except KeyError:
- pass
+ if network.asn and network.asn in writers:
+ # Mismatching subnets
+ gaps = [
+ subnet for subnet in subnets if not network.asn == subnet.asn
+ ]
+
+ writers[network.asn].write(network, gaps)
# Handle flags
for flag in flags:
@@ -247,10 +274,19 @@ class Exporter(object):
# Fetch the "fake" country code
country = flags[flag]
- try:
- writers[country].write(network, subnets)
- except KeyError:
- pass
+ if not country in writers:
+ continue
+
+ gaps = [
+ subnet for subnet in subnets
+ if not subnet.has_flag(flag)
+ ]
+
+ writers[country].write(network, gaps)
+
+ # Push all subnets back onto the stack
+ for subnet in reversed(subnets):
+ networks.push(subnet)
# Write everything to the filesystem
for writer in writers.values():
@@ -262,3 +298,33 @@ class Exporter(object):
)
return os.path.join(directory, filename)
+
+
+class BufferedStack(object):
+ """
+ This class takes an iterator and when being iterated
+ over it returns objects from that iterator for as long
+ as there are any.
+
+ It additionally has a function to put an item back on
+ the back so that it will be returned again at the next
+ iteration.
+ """
+ def __init__(self, iterator):
+ self.iterator = iterator
+ self.stack = []
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ if self.stack:
+ return self.stack.pop(0)
+
+ return next(self.iterator)
+
+ def push(self, elem):
+ """
+ Takes an element and puts it on the stack
+ """
+ self.stack.insert(0, elem)