From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: <gentoo-commits+bounces-1673993-garchives=archives.gentoo.org@lists.gentoo.org> Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by finch.gentoo.org (Postfix) with ESMTPS id 118D615808B for <garchives@archives.gentoo.org>; Fri, 27 Sep 2024 00:52:28 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id F15C62BC044; Fri, 27 Sep 2024 00:52:26 +0000 (UTC) Received: from smtp.gentoo.org (woodpecker.gentoo.org [140.211.166.183]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id D26DF2BC044 for <gentoo-commits@lists.gentoo.org>; Fri, 27 Sep 2024 00:52:26 +0000 (UTC) Received: from oystercatcher.gentoo.org (oystercatcher.gentoo.org [148.251.78.52]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by smtp.gentoo.org (Postfix) with ESMTPS id D1C8D34347B for <gentoo-commits@lists.gentoo.org>; Fri, 27 Sep 2024 00:52:25 +0000 (UTC) Received: from localhost.localdomain (localhost [IPv6:::1]) by oystercatcher.gentoo.org (Postfix) with ESMTP id E4AB62702 for <gentoo-commits@lists.gentoo.org>; Fri, 27 Sep 2024 00:52:23 +0000 (UTC) From: "Matt Jolly" <kangie@gentoo.org> To: gentoo-commits@lists.gentoo.org Content-Transfer-Encoding: 8bit Content-type: text/plain; charset=UTF-8 Reply-To: gentoo-dev@lists.gentoo.org, "Matt Jolly" <kangie@gentoo.org> Message-ID: <1727398331.e0adc1721f392c89c8262c4f864f6b1edf796edc.kangie@gentoo> Subject: [gentoo-commits] proj/chromium-tools:master commit in: / X-VCS-Repository: proj/chromium-tools X-VCS-Files: get-opera-version-mapping.py X-VCS-Directories: / X-VCS-Committer: kangie X-VCS-Committer-Name: Matt Jolly X-VCS-Revision: e0adc1721f392c89c8262c4f864f6b1edf796edc X-VCS-Branch: master Date: Fri, 27 Sep 2024 00:52:23 +0000 (UTC) Precedence: bulk List-Post: <mailto:gentoo-commits@lists.gentoo.org> List-Help: <mailto:gentoo-commits+help@lists.gentoo.org> List-Unsubscribe: <mailto:gentoo-commits+unsubscribe@lists.gentoo.org> List-Subscribe: <mailto:gentoo-commits+subscribe@lists.gentoo.org> List-Id: Gentoo Linux mail <gentoo-commits.gentoo.org> X-BeenThere: gentoo-commits@lists.gentoo.org X-Auto-Response-Suppress: DR, RN, NRN, OOF, AutoReply X-Archives-Salt: de4f70a7-0b94-4da3-a2ce-1584a23645ff X-Archives-Hash: 9d42525c61e40bf48eb65b7b6006e2e3 commit: e0adc1721f392c89c8262c4f864f6b1edf796edc Author: Matt Jolly <kangie <AT> gentoo <DOT> org> AuthorDate: Fri Sep 27 00:46:24 2024 +0000 Commit: Matt Jolly <kangie <AT> gentoo <DOT> org> CommitDate: Fri Sep 27 00:52:11 2024 +0000 URL: https://gitweb.gentoo.org/proj/chromium-tools.git/commit/?id=e0adc172 get-opera-version-mapping: major refactor - Rework the logic to get a better result when remediating - Also store the version mapping in a dataclass (why not). - Use packaging.version.Version to make sorting versions trivial - Accept positional arguments for the max and min versions. Signed-off-by: Matt Jolly <kangie <AT> gentoo.org> get-opera-version-mapping.py | 118 +++++++++++++++++++++++++++++++------------ 1 file changed, 86 insertions(+), 32 deletions(-) diff --git a/get-opera-version-mapping.py b/get-opera-version-mapping.py index 6d6f3de..015fd21 100755 --- a/get-opera-version-mapping.py +++ b/get-opera-version-mapping.py @@ -1,6 +1,32 @@ #!/usr/bin/env python + +# SPDX-License-Identifier: GPL-2.0-or-later +# This script is used to extract Opera and Chromium versions from the Opera changelog (blog) +# This is incomplete data, so we need to fill in the gaps with the Chromium version from the previous known version +# The intent here is to have _some_ sort of datasource to identify a potentially-fixed version of Opera based on +# the Chromium version it includes. +# High level logic: +# We can fetch the opera blog posts that relate to a major version of Opera as long as they don't change their URIs. +# We iterate over H4 elements to get the Opera version (and date, though we throw that away) +# We then iterate over child elements until we find an "Update Chromium" entry, which we can use to get the +# Chromium version (in which case we bail early) Or we exhaust the children and give up. +# Lather, rinse, repeat. + +import argparse, dataclasses + import requests from bs4 import BeautifulSoup +from packaging.version import Version + + +@dataclasses.dataclass +class OperaChromiumVersion: + opera_version: Version + chromium_version: Version + + def __str__(self): + chromium_version_str = 'unknown' if self.chromium_version == Version('0.0.0.0') else str(self.chromium_version) + return f"Opera Version: {self.opera_version}, Chromium Version: {chromium_version_str}" def get_opera_chromium_versions(base_url, start_version, end_version): @@ -15,16 +41,11 @@ def get_opera_chromium_versions(base_url, start_version, end_version): end_version: The ending version to extract information for (inclusive). Returns: - A dictionary mapping Opera version to Chromium version. - If no update is mentioned, the previous Chromium version is used. - For missing data or errors, "unknown" is used. + A list of OperaChromiumVersion objects containing the extracted version information. """ - versions = {} - chromium_version = None + versions: list[OperaChromiumVersion] = [] for version in range(start_version, end_version + 1): - # Fix formatting issue: - # OR url = base_url.format(version) url = base_url.format(version) print(f"Processing version {version}") @@ -38,8 +59,8 @@ def get_opera_chromium_versions(base_url, start_version, end_version): # Iterate through each section starting with an H4 element for section in content.find_all('h4'): + chromium_version = None version_str, date_str = section.text.strip().split(' – ') - versions[version_str] = chromium_version # Process all content elements (including nested ones) until the next H4 next_sibling = section.find_next_sibling( @@ -63,7 +84,12 @@ def get_opera_chromium_versions(base_url, start_version, end_version): # Handle missing Chromium version if not chromium_version: - chromium_version = "unknown" + chromium_version = '0.0.0.0' + + versions.append(OperaChromiumVersion( + Version(version_str), + Version(chromium_version) + )) except requests.exceptions.RequestException as e: if e.args and e.args[0] == 404: @@ -76,41 +102,69 @@ def get_opera_chromium_versions(base_url, start_version, end_version): print(f"Unexpected error: {e}") chromium_version = None # Reset chromium_version for next iteration - return versions + # We're broadly sorted by major version, but within each major version we get newer entries first + # Sort by Opera version to get the correct order + sorted_versions = sorted(versions, key=lambda x: x.opera_version) + return sorted_versions def remediate_unknown_versions(versions): """ - Remediates entries with "unknown" values in the versions dictionary by + Remediates entries with '0.0.0.0' values in the versions dictionary by assuming no change from the previous known version. Args: - versions: A dictionary mapping Opera version to Chromium version. + versions: A list of OperaChromiumVersion objects containing the extracted version information. Returns: - The modified versions dictionary with "unknown" values replaced based on previous entries. + A list of OperaChromiumVersion objects with '0.0.0.0' values replaced + by the previous known version if available. """ - previous_version = None - for version, chromium_version in versions.items(): - if chromium_version == "unknown": - if previous_version is not None: - # Update with previous version - versions[version] = previous_version + previous_version: Version = Version('0.0.0.0') + fixed_versions: list[OperaChromiumVersion] = [] + + for mapping in versions: + if mapping.chromium_version == Version('0.0.0.0') and previous_version is not Version('0.0.0.0'): + # Update with previous version + fixed_versions.append(OperaChromiumVersion(mapping.opera_version, previous_version)) else: - previous_version = chromium_version # Update known version for future references - return versions + # This should be fine, we're always parsing from oldest to newest + if previous_version < mapping.chromium_version: + previous_version = mapping.chromium_version + fixed_versions.append(mapping) + + return fixed_versions + + +def parse_arguments(): + """ + Parses the command line arguments and returns the parsed values. + + Returns: + The parsed command line arguments. + """ + parser = argparse.ArgumentParser(description='Get Opera and Chromium versions.') + parser.add_argument('start_ver', type=int, help='starting version', default=110) + parser.add_argument('end_ver', type=int, help='ending version', default=115) + return parser.parse_args() + + +def main(): + args = parse_arguments() + + # Base URL with version placeholder + base_url = "https://blogs.opera.com/desktop/changelog-for-{}/" + opera_chromium_versions = get_opera_chromium_versions(base_url, args.start_ver, args.end_ver) + fixed_versions = remediate_unknown_versions(opera_chromium_versions) -# Example usage -# Base URL with version placeholder -base_url = "https://blogs.opera.com/desktop/changelog-for-{}/" -opera_chromium_versions = get_opera_chromium_versions(base_url, 110, 115) + # Print the versions + if fixed_versions: + for mapping in fixed_versions: + print(mapping) + else: + print("Failed to extract any versions.") -opera_chromium_versions = remediate_unknown_versions(opera_chromium_versions) -if opera_chromium_versions: - for opera_version, chromium_version in opera_chromium_versions.items(): - print( - f"Opera Version: {opera_version}, Chromium Version: {chromium_version}") -else: - print("Failed to extract any versions.") +if __name__ == "__main__": + main()