[v2,2/2] location-importer.in: import additional IP information for Amazon AWS IP networks

Message ID 20210608095541.5050-2-peter.mueller@ipfire.org
State Accepted
Commit dcef2ba4773bf1692849823dccf0121dba23cb4e
Headers
Series [v2,1/2] location-importer.in: add source column for overrides as well |

Commit Message

Peter Müller June 8, 2021, 9:55 a.m. UTC
  Amazon publishes information regarding some of their IP networks
primarily used for AWS cloud services in a machine-readable format. To
improve libloc lookup results for these, we have little choice other
than importing and parsing them.

Unfortunately, there seems to be no machine-readable list of the
locations of their data centers or availability zones available. If
there _is_ any, please let the author know.

The second version of this patch adds a meaningful description for the
"source" column in the overrides tables, to make introduced changes
less intransparent.

Fixes: #12594

Signed-off-by: Peter Müller <peter.mueller@ipfire.org>
---
 src/python/location-importer.in | 114 ++++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)
  

Patch

diff --git a/src/python/location-importer.in b/src/python/location-importer.in
index 78bfd55..4acd972 100644
--- a/src/python/location-importer.in
+++ b/src/python/location-importer.in
@@ -19,6 +19,7 @@ 
 
 import argparse
 import ipaddress
+import json
 import logging
 import math
 import re
@@ -976,6 +977,10 @@  class CLI(object):
 				TRUNCATE TABLE network_overrides;
 			""")
 
+			# Update overrides for various cloud providers big enough to publish their own IP
+			# network allocation lists in a machine-readable format...
+			self._update_overrides_for_aws()
+
 			for file in ns.files:
 				log.info("Reading %s..." % file)
 
@@ -1051,6 +1056,115 @@  class CLI(object):
 						else:
 							log.warning("Unsupported type: %s" % type)
 
+	def _update_overrides_for_aws(self):
+		# Download Amazon AWS IP allocation file to create overrides...
+		downloader = location.importer.Downloader()
+
+		try:
+			with downloader.request("https://ip-ranges.amazonaws.com/ip-ranges.json", return_blocks=False) as f:
+				aws_ip_dump = json.load(f.body)
+		except Exception as e:
+			log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
+			return
+
+		# XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
+		# there seems to be no machine-readable version available of this other than
+		# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
+		# (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
+		# was helpful here as well.
+		aws_region_country_map = {
+				"af-south-1": "ZA",
+				"ap-east-1": "HK",
+				"ap-south-1": "IN",
+				"ap-south-2": "IN",
+				"ap-northeast-3": "JP",
+				"ap-northeast-2": "KR",
+				"ap-southeast-1": "SG",
+				"ap-southeast-2": "AU",
+				"ap-southeast-3": "MY",
+				"ap-southeast-4": "AU",
+				"ap-northeast-1": "JP",
+				"ca-central-1": "CA",
+				"eu-central-1": "DE",
+				"eu-central-2": "CH",
+				"eu-west-1": "IE",
+				"eu-west-2": "GB",
+				"eu-south-1": "IT",
+				"eu-south-2": "ES",
+				"eu-west-3": "FR",
+				"eu-north-1": "SE",
+				"me-central-1": "AE",
+				"me-south-1": "BH",
+				"sa-east-1": "BR"
+				}
+
+		# Fetch all valid country codes to check parsed networks aganist...
+		rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
+		validcountries = []
+
+		for row in rows:
+			validcountries.append(row.country_code)
+
+		with self.db.transaction():
+			for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
+				try:
+					network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
+				except ValueError:
+					log.warning("Unable to parse line: %s" % snetwork)
+					continue
+
+				# Sanitize parsed networks...
+				if not self._check_parsed_network(network):
+					continue
+
+				# Determine region of this network...
+				region = snetwork["region"]
+				cc = None
+				is_anycast = False
+
+				# Any region name starting with "us-" will get "US" country code assigned straight away...
+				if region.startswith("us-"):
+					cc = "US"
+				elif region.startswith("cn-"):
+					# ... same goes for China ...
+					cc = "CN"
+				elif region == "GLOBAL":
+					# ... funny region name for anycast-like networks ...
+					is_anycast = True
+				elif region in aws_region_country_map:
+					# ... assign looked up country code otherwise ...
+					cc = aws_region_country_map[region]
+				else:
+					# ... and bail out if we are missing something here
+					log.warning("Unable to determine country code for line: %s" % snetwork)
+					continue
+
+				# Skip networks with unknown country codes
+				if not is_anycast and validcountries and cc not in validcountries:
+					log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
+						(cc, network))
+					return
+
+				# Conduct SQL statement...
+				self.db.execute("""
+					INSERT INTO network_overrides(
+						network,
+						country,
+						source,
+						is_anonymous_proxy,
+						is_satellite_provider,
+						is_anycast
+					) VALUES (%s, %s, %s, %s, %s, %s)
+					ON CONFLICT (network) DO NOTHING""",
+					"%s" % network,
+					cc,
+					"Amazon AWS IP feed",
+					None,
+					None,
+					is_anycast,
+				)
+
+
 	@staticmethod
 	def _parse_bool(block, key):
 		val = block.get(key)