From patchwork Wed Oct 21 14:47:37 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?Peter_M=C3=BCller?= X-Patchwork-Id: 3601 Return-Path: Received: from mail01.ipfire.org (mail01.haj.ipfire.org [172.28.1.202]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) client-signature ECDSA (P-384)) (Client CN "mail01.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by web04.haj.ipfire.org (Postfix) with ESMTPS id 4CGYJB65d8z3wgw for ; Wed, 21 Oct 2020 14:47:54 +0000 (UTC) Received: from mail02.haj.ipfire.org (mail02.haj.ipfire.org [172.28.1.201]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) client-signature ECDSA (P-384)) (Client CN "mail02.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mail01.ipfire.org (Postfix) with ESMTPS id 4CGYJB23k8z1W1; Wed, 21 Oct 2020 14:47:54 +0000 (UTC) Received: from mail02.haj.ipfire.org (localhost [127.0.0.1]) by mail02.haj.ipfire.org (Postfix) with ESMTP id 4CGYJB03Skz30ZK; Wed, 21 Oct 2020 14:47:54 +0000 (UTC) Received: from mail01.ipfire.org (mail01.haj.ipfire.org [172.28.1.202]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) client-signature ECDSA (P-384)) (Client CN "mail01.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mail02.haj.ipfire.org (Postfix) with ESMTPS id 4CGYJ85pj4z2xZr for ; Wed, 21 Oct 2020 14:47:52 +0000 (UTC) Received: from location02.haj.ipfire.org (location02.haj.ipfire.org [172.28.1.170]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) client-signature ECDSA (P-384)) (Client CN "location02.haj.ipfire.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mail01.ipfire.org (Postfix) with ESMTPS id 4CGYJ71xw9zsJ; Wed, 21 Oct 2020 14:47:51 +0000 (UTC) Received: by location02.haj.ipfire.org (Postfix, from userid 0) id 4CGYJ70K8MzylJ; Wed, 21 Oct 2020 14:47:51 +0000 (UTC) From: =?utf-8?q?Peter_M=C3=BCller?= To: location@lists.ipfire.org Subject: [PATCH 2/8] Revert "Revert "location-importer.in: only import relevant data from AFRINIC, APNIC and RIPE"" Date: Wed, 21 Oct 2020 14:47:37 +0000 Message-Id: <20201021144743.18083-2-peter.mueller@ipfire.org> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201021144743.18083-1-peter.mueller@ipfire.org> References: <20201021144743.18083-1-peter.mueller@ipfire.org> MIME-Version: 1.0 X-BeenThere: location@lists.ipfire.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: location-bounces@lists.ipfire.org Sender: "Location" This reverts commit 13f67f285856e8eabfeff2daf1be3aeaa36a82cc. Signed-off-by: Peter Müller --- src/python/location-importer.in | 89 ++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/src/python/location-importer.in b/src/python/location-importer.in index b220eaf..d249a35 100644 --- a/src/python/location-importer.in +++ b/src/python/location-importer.in @@ -165,6 +165,7 @@ class CLI(object): -- networks CREATE TABLE IF NOT EXISTS networks(network inet, country text); CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network); + CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network)); CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops); -- overrides @@ -365,6 +366,16 @@ class CLI(object): CREATE TEMPORARY TABLE _organizations(handle text, name text NOT NULL) ON COMMIT DROP; CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle); + + CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL) + ON COMMIT DROP; + CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network)); + CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network); + """) + + # Remove all previously imported content + self.db.execute(""" + TRUNCATE TABLE networks; """) for source in location.importer.WHOIS_SOURCES: @@ -372,6 +383,67 @@ class CLI(object): for block in f: self._parse_block(block) + # Process all parsed networks from every RIR we happen to have access to, + # insert the largest network chunks into the networks table immediately... + families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)") + + for family in (row.family for row in families): + smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family) + + self.db.execute("INSERT INTO networks(network, country) \ + SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family) + + # ... determine any other prefixes for this network family, ... + prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \ + WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family) + + # ... and insert networks with this prefix in case they provide additional + # information (i. e. subnet of a larger chunk with a different country) + for prefix in (row.prefix for row in prefixes): + self.db.execute(""" + WITH candidates AS ( + SELECT + _rirdata.network, + _rirdata.country + FROM + _rirdata + WHERE + family(_rirdata.network) = %s + AND + masklen(_rirdata.network) = %s + ), + filtered AS ( + SELECT + DISTINCT ON (c.network) + c.network, + c.country, + masklen(networks.network), + networks.country AS parent_country + FROM + candidates c + LEFT JOIN + networks + ON + c.network << networks.network + ORDER BY + c.network, + masklen(networks.network) DESC NULLS LAST + ) + INSERT INTO + networks(network, country) + SELECT + network, + country + FROM + filtered + WHERE + parent_country IS NULL + OR + country <> parent_country + ON CONFLICT DO NOTHING""", + family, prefix, + ) + self.db.execute(""" INSERT INTO autnums(number, name) SELECT _autnums.number, _organizations.name FROM _autnums @@ -472,17 +544,30 @@ class CLI(object): inetnum[key] = val.upper() # Skip empty objects - if not inetnum: + if not inetnum or not "country" in inetnum: return network = ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("inetnum"), strict=False) + # Bail out in case we have processed a network covering the entire IP range, which + # is necessary to work around faulty (?) IPv6 network processing + if network.prefixlen == 0: + logging.warning("Skipping network covering the entire IP adress range: %s" % network) + return + + # Bail out in case we have processed a network whose prefix length indicates it is + # not globally routable (we have decided not to process them at the moment, as they + # significantly enlarge our database without providing very helpful additional information) + if (network.prefixlen > 24 and network.version == 4) or (network.prefixlen > 48 and network.version == 6): + logging.info("Skipping network too small to be publicly announced: %s" % network) + return + # Bail out in case we have processed a non-public IP network if network.is_private: logging.warning("Skipping non-globally routable network: %s" % network) return - self.db.execute("INSERT INTO networks(network, country) \ + self.db.execute("INSERT INTO _rirdata(network, country) \ VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country", "%s" % network, inetnum.get("country"), )