From patchwork Tue Sep 27 16:48:39 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Michael Tremer X-Patchwork-Id: 6015 Return-Path: Received: from mail01.ipfire.org (mail01.haj.ipfire.org [172.28.1.202]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) server-digest SHA384 client-signature ECDSA (P-384) client-digest SHA384) (Client CN "mail01.haj.ipfire.org", Issuer "R3" (verified OK)) by web04.haj.ipfire.org (Postfix) with ESMTPS id 4McQZ20wJ4z3wdD for ; Tue, 27 Sep 2022 16:48:58 +0000 (UTC) Received: from mail02.haj.ipfire.org (mail02.haj.ipfire.org [172.28.1.201]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) client-signature ECDSA (P-384)) (Client CN "mail02.haj.ipfire.org", Issuer "R3" (verified OK)) by mail01.ipfire.org (Postfix) with ESMTPS id 4McQZ0562Tz2M1; Tue, 27 Sep 2022 16:48:56 +0000 (UTC) Received: from mail02.haj.ipfire.org (localhost [127.0.0.1]) by mail02.haj.ipfire.org (Postfix) with ESMTP id 4McQZ03T0lz30M1; Tue, 27 Sep 2022 16:48:56 +0000 (UTC) Received: from mail01.ipfire.org (mail01.haj.ipfire.org [172.28.1.202]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) client-signature ECDSA (P-384)) (Client CN "mail01.haj.ipfire.org", Issuer "R3" (verified OK)) by mail02.haj.ipfire.org (Postfix) with ESMTPS id 4McQYy4JxBz2xNS for ; Tue, 27 Sep 2022 16:48:54 +0000 (UTC) Received: from [127.0.0.1] (localhost [127.0.0.1]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (P-384) server-digest SHA384) (No client certificate requested) by mail01.ipfire.org (Postfix) with ESMTPSA id 4McQYy1K0Rzh6; Tue, 27 Sep 2022 16:48:54 +0000 (UTC) DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=ipfire.org; s=202003ed25519; t=1664297334; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=VyCrmy9Y66nBGJShUO/ob0Tn3fKXbNvjHfQiSMiecGs=; b=cOaPLXF9zPIfR2DM7l3Bjw2dmfddCdbud9D2yKMMnqVkTReR83kR7+yXs41R77bBQKjrjZ mlW1deSzjH3X6hBQ== DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=ipfire.org; s=202003rsa; t=1664297334; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=VyCrmy9Y66nBGJShUO/ob0Tn3fKXbNvjHfQiSMiecGs=; b=d56h6EHxHrFk7FukA2y1JsRO6tCMAuUzt+rDiG/oej9tOqwiVEjY5BStxPbZK1TjVOCO/p 58XFN48c5OW8goy1UvI4FHsnwv3eFHpQXGWlb87Y9K1DoYHkMSCCiZcBsV03m0GaBEv4CU f2zs7E5wl2ttUawsGF3lDXOfA31di0zhz1K/eRSyGhv29JAtqvNGIbNlRCWram6DgnEYPI kKqcQGDLdtPIWBj9InWHHFKWNlTaWXq4ioShfImOefUIOcPe77MXm4Ta37hL6Iwk+oeaqf D3+vCpM0froQrVsl65IUNjMRjrefulXOY+Hy0heayqUcbfDupiSyynvZe8TlLA== From: Michael Tremer To: location@lists.ipfire.org Subject: [PATCH 02/10] importer: Add command to import geofeeds into the database Date: Tue, 27 Sep 2022 16:48:39 +0000 Message-Id: <20220927164847.3409646-2-michael.tremer@ipfire.org> In-Reply-To: <20220927164847.3409646-1-michael.tremer@ipfire.org> References: <20220927164847.3409646-1-michael.tremer@ipfire.org> MIME-Version: 1.0 X-BeenThere: location@lists.ipfire.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Michael Tremer Errors-To: location-bounces@lists.ipfire.org Sender: "Location" Signed-off-by: Michael Tremer --- src/scripts/location-importer.in | 143 +++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in index 5bd5da3..ddec376 100644 --- a/src/scripts/location-importer.in +++ b/src/scripts/location-importer.in @@ -18,6 +18,7 @@ ############################################################################### import argparse +import concurrent.futures import ipaddress import json import logging @@ -95,6 +96,11 @@ class CLI(object): update_announcements.add_argument("server", nargs=1, help=_("Route Server to connect to"), metavar=_("SERVER")) + # Update geofeeds + update_geofeeds = subparsers.add_parser("update-geofeeds", + help=_("Update Geofeeds")) + update_geofeeds.set_defaults(func=self.handle_update_geofeeds) + # Update overrides update_overrides = subparsers.add_parser("update-overrides", help=_("Update overrides"), @@ -183,6 +189,25 @@ class CLI(object): CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops); -- geofeeds + CREATE TABLE IF NOT EXISTS geofeeds( + id serial primary key, + url text, + status integer default null, + updated_at timestamp without time zone default null + ); + CREATE UNIQUE INDEX IF NOT EXISTS geofeeds_unique + ON geofeeds(url); + CREATE TABLE IF NOT EXISTS geofeed_networks( + geofeed_id integer references geofeeds(id) on delete cascade, + network inet, + country text, + region text, + city text + ); + CREATE INDEX IF NOT EXISTS geofeed_networks_geofeed_id + ON geofeed_networks(geofeed_id); + CREATE INDEX IF NOT EXISTS geofeed_networks_search + ON geofeed_networks(network); CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text); CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique ON network_geofeeds(network); @@ -1253,6 +1278,124 @@ class CLI(object): # Otherwise return the line yield line + def handle_update_geofeeds(self, ns): + # Fetch all Geofeeds that require an update + geofeeds = self.db.query(""" + SELECT + id, + url + FROM + geofeeds + WHERE + updated_at IS NULL + OR + updated_at <= CURRENT_TIMESTAMP - INTERVAL '1 week' + ORDER BY + id + """) + + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: + results = executor.map(self._fetch_geofeed, geofeeds) + + for result in results: + print(result) + + def _fetch_geofeed(self, geofeed): + log.debug("Fetching Geofeed %s" % geofeed.url) + + with self.db.transaction(): + # Open the URL + try: + req = urllib.request.Request(geofeed.url, headers={ + "User-Agent" : "location/%s" % location.__version__, + + # We expect some plain text file in CSV format + "Accept" : "text/csv, text/plain", + }) + + # XXX set proxy + + # Send the request + with urllib.request.urlopen(req, timeout=10) as f: + # Remove any previous data + self.db.execute("DELETE FROM geofeed_networks \ + WHERE geofeed_id = %s", geofeed.id) + + # Read the output line by line + for line in f: + line = line.decode() + + # Strip any newline + line = line.rstrip() + + # Skip empty lines + if not line: + continue + + # Try to parse the line + try: + fields = line.split(",", 5) + except ValueError: + log.debug("Could not parse line: %s" % line) + continue + + # Check if we have enough fields + if len(fields) < 4: + log.debug("Not enough fields in line: %s" % line) + continue + + # Fetch all fields + network, country, region, city, = fields[:4] + + # Try to parse the network + try: + network = ipaddress.ip_network(network, strict=False) + except ValueError: + log.debug("Could not parse network: %s" % network) + continue + + # XXX Check the country code + + # Write this into the database + self.db.execute(""" + INSERT INTO + geofeed_networks ( + geofeed_id, + network, + country, + region, + city + ) + VALUES (%s, %s, %s, %s, %s)""", + geofeed.id, + "%s" % network, + country, + region, + city, + ) + + # Catch any HTTP errors + except urllib.request.HTTPError as e: + self.db.execute("UPDATE geofeeds SET status = %s \ + WHERE id = %s", e.code, geofeed.id) + + # Catch any other errors + except urllib.request.URLError as e: + log.error("Could not fetch URL %s: %s" % (geofeed.url, e)) + + # Mark the geofeed as updated + else: + self.db.execute(""" + UPDATE + geofeeds + SET + updated_at = CURRENT_TIMESTAMP, + status = NULL + WHERE + id = %s""", + geofeed.id, + ) + def handle_update_overrides(self, ns): with self.db.transaction(): # Only drop manually created overrides, as we can be reasonably sure to have them,