[4/8] location-importer.in: filter bogus IP networks for both Whois and extended sources

Message ID 20201021144743.18083-4-peter.mueller@ipfire.org
State Accepted
Commit bd341642fc6bbcc050e9b4ec5124585c83cab84d
Headers
Series [1/8] Revert "Revert "Revert "Revert "importer: Import raw sources for inetnum's again"""" |

Commit Message

Peter Müller Oct. 21, 2020, 2:47 p.m. UTC
  Sanity checks for parsed networks have been put into a separate function
to avoid boilerplate code for extended sources. This makes the location
database less vulnerable to garbage written into RIR databases on
purpose or by chance.

Fixes: #12500

Signed-off-by: Peter Müller <peter.mueller@ipfire.org>
---
 src/python/location-importer.in | 83 ++++++++++++++++++++++++++-------
 1 file changed, 67 insertions(+), 16 deletions(-)
  

Patch

diff --git a/src/python/location-importer.in b/src/python/location-importer.in
index d249a35..20eb052 100644
--- a/src/python/location-importer.in
+++ b/src/python/location-importer.in
@@ -459,6 +459,69 @@  class CLI(object):
 					for line in f:
 						self._parse_line(line)
 
+	def _check_parsed_network(self, network):
+		"""
+			Assistive function to detect and subsequently sort out parsed
+			networks from RIR data (both Whois and so-called "extended sources"),
+			which are or have...
+
+			(a) not globally routable (RFC 1918 space, et al.)
+			(b) covering a too large chunk of the IP address space (prefix length
+				is < 7 for IPv4 networks, and < 10 for IPv6)
+			(c) "0.0.0.0" or "::" as a network address
+			(d) are too small for being publicly announced (we have decided not to
+				process them at the moment, as they significantly enlarge our
+				database without providing very helpful additional information)
+
+			This unfortunately is necessary due to brain-dead clutter across
+			various RIR databases, causing mismatches and eventually disruptions.
+
+			We will return False in case a network is not suitable for adding
+			it to our database, and True otherwise.
+		"""
+
+		if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
+			return False
+
+		if not network.is_global:
+			logging.warning("Skipping non-globally routable network: %s" % network)
+			return False
+
+		if network.version == 4:
+			if network.prefixlen < 7:
+				logging.warning("Skipping too big IP chunk: %s" % network)
+				return False
+
+			if network.prefixlen > 24:
+				logging.info("Skipping network too small to be publicly announced: %s" % network)
+				return False
+
+			if str(network.network_address) == "0.0.0.0":
+				logging.warning("Skipping network based on 0.0.0.0: %s" % network)
+				return False
+
+		elif network.version == 6:
+			if network.prefixlen < 10:
+				logging.warning("Skipping too big IP chunk: %s" % network)
+				return False
+
+			if network.prefixlen > 48:
+				logging.info("Skipping network too small to be publicly announced: %s" % network)
+				return False
+
+			if str(network.network_address) == "::":
+				logging.warning("Skipping network based on '::': %s" % network)
+				return False
+
+		else:
+			# This should not happen...
+			logging.warning("Skipping network of unknown family, this should not happen: %s" % network)
+			return False
+
+		# In case we have made it here, the network is considered to
+		# be suitable for libloc consumption...
+		return True
+
 	def _parse_block(self, block):
 		# Get first line to find out what type of block this is
 		line = block[0]
@@ -549,22 +612,7 @@  class CLI(object):
 
 		network = ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("inetnum"), strict=False)
 
-		# Bail out in case we have processed a network covering the entire IP range, which
-		# is necessary to work around faulty (?) IPv6 network processing
-		if network.prefixlen == 0:
-			logging.warning("Skipping network covering the entire IP adress range: %s" % network)
-			return
-
-		# Bail out in case we have processed a network whose prefix length indicates it is
-		# not globally routable (we have decided not to process them at the moment, as they
-		# significantly enlarge our database without providing very helpful additional information)
-		if (network.prefixlen > 24 and network.version == 4) or (network.prefixlen > 48 and network.version == 6):
-			logging.info("Skipping network too small to be publicly announced: %s" % network)
-			return
-
-		# Bail out in case we have processed a non-public IP network
-		if network.is_private:
-			logging.warning("Skipping non-globally routable network: %s" % network)
+		if not self._check_parsed_network(network):
 			return
 
 		self.db.execute("INSERT INTO _rirdata(network, country) \
@@ -648,6 +696,9 @@  class CLI(object):
 			log.warning("Invalid IP address: %s" % address)
 			return
 
+		if not self._check_parsed_network(network):
+			return
+
 		self.db.execute("INSERT INTO networks(network, country) \
 			VALUES(%s, %s) ON CONFLICT (network) DO \
 			UPDATE SET country = excluded.country",