Re-introducing inetnum parser, first attempt

Message ID 46ac5f3c-36e4-c1a5-a9f6-fd9f6f4d21e9@ipfire.org
State Superseded
Headers show
Series
  • Re-introducing inetnum parser, first attempt
Related show

Commit Message

Peter Müller Sept. 1, 2020, 7:44 p.m. UTC
Good evening Michael,

below comes the diff of a rather hacky attempt to bring back the intetnum parser we've
had in the past. Since I am not quite sure about that INSERT INTO networks() SQL statement
and handling conflicts with extended source files there, I thought letting you have a
look at it might be a good idea. :-)

What do you think? Is this the right direction?

Thanks, and best regards,
Peter Müller

Patch

diff --git a/src/python/importer.py b/src/python/importer.py
index de20f37..586bd97 100644
--- a/src/python/importer.py
+++ b/src/python/importer.py
@@ -30,8 +30,8 @@  WHOIS_SOURCES = (
 	"https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz",
 
 	# Asia Pacific Network Information Centre
-	#"https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
-	#"https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
+	"https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
+	"https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
 	#"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
 	#"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
 	"https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
@@ -45,8 +45,8 @@  WHOIS_SOURCES = (
 	# XXX ???
 
 	# Réseaux IP Européens
-	#"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
-	#"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
+	"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
+	"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
 	#"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
 	#"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
 	"https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
diff --git a/src/python/location-importer.in b/src/python/location-importer.in
index f5ae4a9..4d7cec4 100644
--- a/src/python/location-importer.in
+++ b/src/python/location-importer.in
@@ -393,6 +393,10 @@  class CLI(object):
 		if line.startswith("aut-num:"):
 			return self._parse_autnum_block(block)
 
+		# inetnum
+		if line.startswith("inet6num:") or line.startswith("inetnum:"):
+			return self._parse_inetnum_block(block)
+
 		# organisation
 		elif line.startswith("organisation:"):
 			return self._parse_org_block(block)
@@ -422,6 +426,78 @@  class CLI(object):
 			autnum.get("asn"), autnum.get("org"),
 		)
 
+	def _parse_inetnum_block(self, block):
+		logging.debug("Parsing inetnum block:")
+
+		inetnum = {}
+		for line in block:
+			logging.debug(line)
+
+			# Split line
+			key, val = split_line(line)
+
+			if key == "inetnum":
+				start_address, delim, end_address = val.partition("-")
+
+				# Strip any excess space
+				start_address, end_address = start_address.rstrip(), end_address.strip()
+
+				# Skip invalid blocks
+				if start_address in ["0.0.0.0", "::/0", "0::/0",]:
+					return
+
+				# Convert to IP address
+				try:
+					start_address = ipaddress.ip_address(start_address)
+					end_address   = ipaddress.ip_address(end_address)
+				except ValueError:
+					logging.warning("Could not parse line: %s" % line)
+					return
+
+				# Set prefix to default
+				prefix = 32
+
+				# Count number of addresses in this subnet
+				num_addresses = int(end_address) - int(start_address)
+				if num_addresses:
+					prefix -= math.log(num_addresses, 2)
+
+				inetnum["inetnum"] = "%s/%.0f" % (start_address, prefix)
+
+			elif key == "inet6num":
+				# Skip invalid blocks
+				if val in ["0.0.0.0", "::/0", "0::/0",]:
+					return
+
+				inetnum[key] = val
+
+			elif key == "netname":
+				inetnum[key] = val
+
+			elif key == "country":
+				if val == "UNITED STATES":
+					val = "US"
+
+				inetnum[key] = val.upper()
+
+			elif key == "descr":
+				if key in inetnum:
+					inetnum[key] += "\n%s" % val
+				else:
+					inetnum[key] = val
+
+		# Skip empty objects
+		if not inetnum:
+			return
+
+		network = ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("inetnum"), strict=False)
+
+		self.db.execute("INSERT INTO networks(network, country) \
+			VALUES(%s, %s) ON CONFLICT (network) DO \
+			UPDATE SET country = excluded.country",
+			"%s" % str(network), inetnum.get("country"),
+		)
+
 	def _parse_org_block(self, block):
 		org = {}
 		for line in block: