From patchwork Fri Oct 30 14:35:10 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?Peter_M=C3=BCller?= X-Patchwork-Id: 3620 Return-Path: Received: from mail01.ipfire.org (mail01.haj.ipfire.org [172.28.1.202]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) server-digest SHA384 client-signature ECDSA (P-384) client-digest SHA384) (Client CN "mail01.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by web04.haj.ipfire.org (Postfix) with ESMTPS id 4CN4bT3CMKz3wgn for ; Fri, 30 Oct 2020 14:35:17 +0000 (UTC) Received: from mail02.haj.ipfire.org (mail02.haj.ipfire.org [172.28.1.201]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) client-signature ECDSA (P-384)) (Client CN "mail02.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mail01.ipfire.org (Postfix) with ESMTPS id 4CN4bT0Zzyzm3; Fri, 30 Oct 2020 14:35:17 +0000 (UTC) Received: from mail02.haj.ipfire.org (localhost [127.0.0.1]) by mail02.haj.ipfire.org (Postfix) with ESMTP id 4CN4bS6xxKz2xq4; Fri, 30 Oct 2020 14:35:16 +0000 (UTC) Received: from mail01.ipfire.org (mail01.haj.ipfire.org [172.28.1.202]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) client-signature ECDSA (P-384)) (Client CN "mail01.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mail02.haj.ipfire.org (Postfix) with ESMTPS id 4CN4bR3ZHmz2xcS for ; Fri, 30 Oct 2020 14:35:15 +0000 (UTC) Received: from location02.haj.ipfire.org (location02.haj.ipfire.org [172.28.1.170]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) server-digest SHA384 client-signature ECDSA (P-384) client-digest SHA384) (Client CN "location02.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mail01.ipfire.org (Postfix) with ESMTPS id 4CN4bR0ZFSzm3; Fri, 30 Oct 2020 14:35:15 +0000 (UTC) Received: by location02.haj.ipfire.org (Postfix, from userid 0) id 4CN4bQ6mQyz139Y; Fri, 30 Oct 2020 14:35:14 +0000 (UTC) From: =?utf-8?q?Peter_M=C3=BCller?= To: location@lists.ipfire.org Subject: [PATCH] location-importer.in: skip networks with unknown country codes Date: Fri, 30 Oct 2020 14:35:10 +0000 Message-Id: <20201030143510.6514-1-peter.mueller@ipfire.org> X-Mailer: git-send-email 2.20.1 MIME-Version: 1.0 X-BeenThere: location@lists.ipfire.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: location-bounces@lists.ipfire.org Sender: "Location" There is no sense in parsing and storting networks whose country codes cannot be found in the ISO-3166-x country code table. This avoids side effects in applications using the location database, and introduces another sanity check to compensate bogus RIR data. On location02, this affects some networks from APNIC (country code: ZZ) as well as a bunch of smaller allocations within the RIPE region still tagged to CS or YU (Yugoslavia). To my surprise, no network tagged as SU (Soviet Union) was found - while the NIC for .su TLD is still operational. :-) Fixes: #12510 Signed-off-by: Peter Müller --- src/python/location-importer.in | 42 ++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/src/python/location-importer.in b/src/python/location-importer.in index 864eab1..89b556a 100644 --- a/src/python/location-importer.in +++ b/src/python/location-importer.in @@ -388,10 +388,17 @@ class CLI(object): TRUNCATE TABLE networks; """) + # Fetch all valid country codes to check parsed networks aganist... + rows = self.db.query("SELECT * FROM countries ORDER BY country_code") + validcountries = [] + + for row in rows: + validcountries.append(row.country_code) + for source in location.importer.WHOIS_SOURCES: with downloader.request(source, return_blocks=True) as f: for block in f: - self._parse_block(block) + self._parse_block(block, validcountries) # Process all parsed networks from every RIR we happen to have access to, # insert the largest network chunks into the networks table immediately... @@ -467,7 +474,7 @@ class CLI(object): # Download data with downloader.request(source) as f: for line in f: - self._parse_line(line) + self._parse_line(line, validcountries) def _check_parsed_network(self, network): """ @@ -532,7 +539,7 @@ class CLI(object): # be suitable for libloc consumption... return True - def _parse_block(self, block): + def _parse_block(self, block, validcountries = None): # Get first line to find out what type of block this is line = block[0] @@ -542,7 +549,7 @@ class CLI(object): # inetnum if line.startswith("inet6num:") or line.startswith("inetnum:"): - return self._parse_inetnum_block(block) + return self._parse_inetnum_block(block, validcountries) # organisation elif line.startswith("organisation:"): @@ -573,7 +580,7 @@ class CLI(object): autnum.get("asn"), autnum.get("org"), ) - def _parse_inetnum_block(self, block): + def _parse_inetnum_block(self, block, validcountries = None): log.debug("Parsing inetnum block:") inetnum = {} @@ -624,17 +631,17 @@ class CLI(object): if not inetnum or not "country" in inetnum: return - # Skip objects with bogus country code 'ZZ' - if inetnum.get("country") == "ZZ": - log.warning("Skipping network with bogus country 'ZZ': %s" % \ - (inetnum.get("inet6num") or inetnum.get("inetnum"))) - return - network = ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("inetnum"), strict=False) if not self._check_parsed_network(network): return + # Skip objects with unknown country codes + if validcountries and inetnum.get("country") not in validcountries: + log.warning("Skipping network with bogus country '%s': %s" % \ + (inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum"))) + return + self.db.execute("INSERT INTO _rirdata(network, country) \ VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country", "%s" % network, inetnum.get("country"), @@ -659,7 +666,7 @@ class CLI(object): org.get("organisation"), org.get("org-name"), ) - def _parse_line(self, line): + def _parse_line(self, line, validcountries = None): # Skip version line if line.startswith("2"): return @@ -674,8 +681,15 @@ class CLI(object): log.warning("Could not parse line: %s" % line) return - # Skip any lines that are for stats only - if country_code == "*": + # Skip any lines that are for stats only or do not have a country + # code at all (avoids log spam below) + if not country_code or country_code == '*': + return + + # Skip objects with unknown country codes + if validcountries and country_code not in validcountries: + log.warning("Skipping line with bogus country '%s': %s" % \ + (country_code, line)) return if type in ("ipv6", "ipv4"):