From patchwork Wed Oct 21 14:47:39 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?Peter_M=C3=BCller?= X-Patchwork-Id: 3595 Return-Path: Received: from mail01.ipfire.org (mail01.haj.ipfire.org [172.28.1.202]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) client-signature ECDSA (P-384)) (Client CN "mail01.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by web04.haj.ipfire.org (Postfix) with ESMTPS id 4CGYJ94YpWz3wgl for ; Wed, 21 Oct 2020 14:47:53 +0000 (UTC) Received: from mail02.haj.ipfire.org (mail02.haj.ipfire.org [172.28.1.201]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) client-signature ECDSA (P-384)) (Client CN "mail02.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mail01.ipfire.org (Postfix) with ESMTPS id 4CGYJ92Cvnz11j; Wed, 21 Oct 2020 14:47:53 +0000 (UTC) Received: from mail02.haj.ipfire.org (localhost [127.0.0.1]) by mail02.haj.ipfire.org (Postfix) with ESMTP id 4CGYJ86KlTz2yP9; Wed, 21 Oct 2020 14:47:52 +0000 (UTC) Received: from mail01.ipfire.org (mail01.haj.ipfire.org [172.28.1.202]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) client-signature ECDSA (P-384)) (Client CN "mail01.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mail02.haj.ipfire.org (Postfix) with ESMTPS id 4CGYJ75bxQz2xlV for ; Wed, 21 Oct 2020 14:47:51 +0000 (UTC) Received: from location02.haj.ipfire.org (location02.haj.ipfire.org [172.28.1.170]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) client-signature ECDSA (P-384)) (Client CN "location02.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mail01.ipfire.org (Postfix) with ESMTPS id 4CGYJ72cfRz11D; Wed, 21 Oct 2020 14:47:51 +0000 (UTC) Received: by location02.haj.ipfire.org (Postfix, from userid 0) id 4CGYJ70dsBz13cW; Wed, 21 Oct 2020 14:47:51 +0000 (UTC) From: =?utf-8?q?Peter_M=C3=BCller?= To: location@lists.ipfire.org Subject: [PATCH 4/8] location-importer.in: filter bogus IP networks for both Whois and extended sources Date: Wed, 21 Oct 2020 14:47:39 +0000 Message-Id: <20201021144743.18083-4-peter.mueller@ipfire.org> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201021144743.18083-1-peter.mueller@ipfire.org> References: <20201021144743.18083-1-peter.mueller@ipfire.org> MIME-Version: 1.0 X-BeenThere: location@lists.ipfire.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: location-bounces@lists.ipfire.org Sender: "Location" Sanity checks for parsed networks have been put into a separate function to avoid boilerplate code for extended sources. This makes the location database less vulnerable to garbage written into RIR databases on purpose or by chance. Fixes: #12500 Signed-off-by: Peter Müller --- src/python/location-importer.in | 83 ++++++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 16 deletions(-) diff --git a/src/python/location-importer.in b/src/python/location-importer.in index d249a35..20eb052 100644 --- a/src/python/location-importer.in +++ b/src/python/location-importer.in @@ -459,6 +459,69 @@ class CLI(object): for line in f: self._parse_line(line) + def _check_parsed_network(self, network): + """ + Assistive function to detect and subsequently sort out parsed + networks from RIR data (both Whois and so-called "extended sources"), + which are or have... + + (a) not globally routable (RFC 1918 space, et al.) + (b) covering a too large chunk of the IP address space (prefix length + is < 7 for IPv4 networks, and < 10 for IPv6) + (c) "0.0.0.0" or "::" as a network address + (d) are too small for being publicly announced (we have decided not to + process them at the moment, as they significantly enlarge our + database without providing very helpful additional information) + + This unfortunately is necessary due to brain-dead clutter across + various RIR databases, causing mismatches and eventually disruptions. + + We will return False in case a network is not suitable for adding + it to our database, and True otherwise. + """ + + if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)): + return False + + if not network.is_global: + logging.warning("Skipping non-globally routable network: %s" % network) + return False + + if network.version == 4: + if network.prefixlen < 7: + logging.warning("Skipping too big IP chunk: %s" % network) + return False + + if network.prefixlen > 24: + logging.info("Skipping network too small to be publicly announced: %s" % network) + return False + + if str(network.network_address) == "0.0.0.0": + logging.warning("Skipping network based on 0.0.0.0: %s" % network) + return False + + elif network.version == 6: + if network.prefixlen < 10: + logging.warning("Skipping too big IP chunk: %s" % network) + return False + + if network.prefixlen > 48: + logging.info("Skipping network too small to be publicly announced: %s" % network) + return False + + if str(network.network_address) == "::": + logging.warning("Skipping network based on '::': %s" % network) + return False + + else: + # This should not happen... + logging.warning("Skipping network of unknown family, this should not happen: %s" % network) + return False + + # In case we have made it here, the network is considered to + # be suitable for libloc consumption... + return True + def _parse_block(self, block): # Get first line to find out what type of block this is line = block[0] @@ -549,22 +612,7 @@ class CLI(object): network = ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("inetnum"), strict=False) - # Bail out in case we have processed a network covering the entire IP range, which - # is necessary to work around faulty (?) IPv6 network processing - if network.prefixlen == 0: - logging.warning("Skipping network covering the entire IP adress range: %s" % network) - return - - # Bail out in case we have processed a network whose prefix length indicates it is - # not globally routable (we have decided not to process them at the moment, as they - # significantly enlarge our database without providing very helpful additional information) - if (network.prefixlen > 24 and network.version == 4) or (network.prefixlen > 48 and network.version == 6): - logging.info("Skipping network too small to be publicly announced: %s" % network) - return - - # Bail out in case we have processed a non-public IP network - if network.is_private: - logging.warning("Skipping non-globally routable network: %s" % network) + if not self._check_parsed_network(network): return self.db.execute("INSERT INTO _rirdata(network, country) \ @@ -648,6 +696,9 @@ class CLI(object): log.warning("Invalid IP address: %s" % address) return + if not self._check_parsed_network(network): + return + self.db.execute("INSERT INTO networks(network, country) \ VALUES(%s, %s) ON CONFLICT (network) DO \ UPDATE SET country = excluded.country",