From patchwork Wed Oct 21 14:47:38 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?Peter_M=C3=BCller?= X-Patchwork-Id: 3594 Return-Path: Received: from mail01.ipfire.org (mail01.haj.ipfire.org [172.28.1.202]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) server-digest SHA384 client-signature ECDSA (P-384) client-digest SHA384) (Client CN "mail01.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by web04.haj.ipfire.org (Postfix) with ESMTPS id 4CGYJ94BMqz3wgF for ; Wed, 21 Oct 2020 14:47:53 +0000 (UTC) Received: from mail02.haj.ipfire.org (mail02.haj.ipfire.org [172.28.1.201]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) server-digest SHA384 client-signature ECDSA (P-384) client-digest SHA384) (Client CN "mail02.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mail01.ipfire.org (Postfix) with ESMTPS id 4CGYJ92CfGz11M; Wed, 21 Oct 2020 14:47:52 +0000 (UTC) Received: from mail02.haj.ipfire.org (localhost [127.0.0.1]) by mail02.haj.ipfire.org (Postfix) with ESMTP id 4CGYJ869NNz2xlV; Wed, 21 Oct 2020 14:47:52 +0000 (UTC) Received: from mail01.ipfire.org (mail01.haj.ipfire.org [172.28.1.202]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) server-digest SHA384 client-signature ECDSA (P-384) client-digest SHA384) (Client CN "mail01.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mail02.haj.ipfire.org (Postfix) with ESMTPS id 4CGYJ75MPlz2xkC for ; Wed, 21 Oct 2020 14:47:51 +0000 (UTC) Received: from location02.haj.ipfire.org (location02.haj.ipfire.org [172.28.1.170]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) client-signature ECDSA (P-384)) (Client CN "location02.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mail01.ipfire.org (Postfix) with ESMTPS id 4CGYJ72Gwrzt1; Wed, 21 Oct 2020 14:47:51 +0000 (UTC) Received: by location02.haj.ipfire.org (Postfix, from userid 0) id 4CGYJ70Vj7z13Zw; Wed, 21 Oct 2020 14:47:51 +0000 (UTC) From: =?utf-8?q?Peter_M=C3=BCller?= To: location@lists.ipfire.org Subject: [PATCH 3/8] export.py: fix exporting IP networks for crappy xt_geoip module Date: Wed, 21 Oct 2020 14:47:38 +0000 Message-Id: <20201021144743.18083-3-peter.mueller@ipfire.org> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201021144743.18083-1-peter.mueller@ipfire.org> References: <20201021144743.18083-1-peter.mueller@ipfire.org> MIME-Version: 1.0 X-BeenThere: location@lists.ipfire.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: location-bounces@lists.ipfire.org Sender: "Location" In contrast to the location database itself, the xt_geoip module consumes a list of IP networks for each country, and returns after the first match. We therefore need to... (a) sort IP networks by their size, allow as precise matches as possible (b) export _any_ IP networks - including inverted subnets - to prevent undefined overlaps (c) do the entire thing as fast as possible, consuming as less disk space as possible, which is why we can't just iterate over all /24 chunks Partially fixes: #12499 Signed-off-by: Michael Tremer Signed-off-by: Peter Müller --- src/python/export.py | 69 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 15 deletions(-) diff --git a/src/python/export.py b/src/python/export.py index d15c6f0..5eaf43f 100644 --- a/src/python/export.py +++ b/src/python/export.py @@ -39,8 +39,8 @@ class OutputWriter(object): suffix = "networks" mode = "w" - def __init__(self, f, prefix=None, flatten=True): - self.f, self.prefix, self.flatten = f, prefix, flatten + def __init__(self, db, f, prefix=None, flatten=True): + self.db, self.f, self.prefix, self.flatten = db, f, prefix, flatten # The previously written network self._last_network = None @@ -49,13 +49,13 @@ class OutputWriter(object): self._write_header() @classmethod - def open(cls, filename, **kwargs): + def open(cls, db, filename, **kwargs): """ Convenience function to open a file """ f = open(filename, cls.mode) - return cls(f, **kwargs) + return cls(db, f, **kwargs) def __repr__(self): return "<%s f=%s>" % (self.__class__.__name__, self.f) @@ -87,13 +87,31 @@ class OutputWriter(object): def _write_network(self, network): self.f.write("%s\n" % network) - def write(self, network): + def write(self, network, subnets): if self.flatten and self._flatten(network): log.debug("Skipping writing network %s" % network) return - # Write the network to file - self._write_network(network) + # Write the network when it has no subnets + if not subnets: + network = ipaddress.ip_network("%s" % network) + return self._write_network(network) + + # Collect all matching subnets + matching_subnets = [] + + for subnet in sorted(subnets): + # Try to find the subnet in the database + n = self.db.lookup("%s" % subnet.network_address) + + # No entry or matching country means those IP addresses belong here + if not n or n.country_code == network.country_code: + matching_subnets.append(subnet) + + # Write all networks as compact as possible + for network in ipaddress.collapse_addresses(matching_subnets): + log.debug("Writing %s to database" % network) + self._write_network(network) def finish(self): """ @@ -143,10 +161,10 @@ class XTGeoIPOutputWriter(OutputWriter): mode = "wb" def _write_network(self, network): - for address in (network.first_address, network.last_address): + for address in (network.network_address, network.broadcast_address): # Convert this into a string of bits bytes = socket.inet_pton( - network.family, address, + socket.AF_INET6 if network.version == 6 else socket.AF_INET, "%s" % address, ) self.f.write(bytes) @@ -175,7 +193,7 @@ class Exporter(object): directory, prefix=country_code, suffix=self.writer.suffix, family=family, ) - writers[country_code] = self.writer.open(filename, prefix="CC_%s" % country_code) + writers[country_code] = self.writer.open(self.db, filename, prefix="CC_%s" % country_code) # Create writers for ASNs for asn in asns: @@ -183,22 +201,43 @@ class Exporter(object): directory, "AS%s" % asn, suffix=self.writer.suffix, family=family, ) - writers[asn] = self.writer.open(filename, prefix="AS%s" % asn) + writers[asn] = self.writer.open(self.db, filename, prefix="AS%s" % asn) # Get all networks that match the family networks = self.db.search_networks(family=family) + # Materialise the generator into a list (uses quite some memory) + networks = list(networks) + # Walk through all networks - for network in networks: + for i, network in enumerate(networks): + _network = ipaddress.ip_network("%s" % network) + + # Search for all subnets + subnets = set() + + while i < len(networks): + subnet = networks[i+1] + + if subnet.is_subnet_of(network): + _subnet = ipaddress.ip_network("%s" % subnet) + + subnets.add(_subnet) + subnets.update(_network.address_exclude(_subnet)) + + i += 1 + else: + break + # Write matching countries try: - writers[network.country_code].write(network) + writers[network.country_code].write(network, subnets) except KeyError: pass # Write matching ASNs try: - writers[network.asn].write(network) + writers[network.asn].write(network, subnets) except KeyError: pass @@ -209,7 +248,7 @@ class Exporter(object): country = flags[flag] try: - writers[country].write(network) + writers[country].write(network, subnets) except KeyError: pass