Fix linting and type checking issues following black, ruff and pyrigh…

…t update
openzim · Feb 14, 2025 · 91f1100 · 91f1100
1 parent 1242168
commit 91f1100
Show file tree

Hide file tree

Showing 21 changed files with 124 additions and 104 deletions.
diff --git a/src/zimscraperlib/filesystem.py b/src/zimscraperlib/filesystem.py
@@ -1,6 +1,6 @@
-""" Files manipulation tools
+"""Files manipulation tools
 
-    Shortcuts to retrieve mime type using magic"""
+Shortcuts to retrieve mime type using magic"""
 
 import pathlib
 from contextlib import contextmanager

diff --git a/src/zimscraperlib/fix_ogvjs_dist.py b/src/zimscraperlib/fix_ogvjs_dist.py
@@ -1,4 +1,4 @@
-""" quick script to fix videojs-ogvjs so that it triggers on webm mimetype """
+"""quick script to fix videojs-ogvjs so that it triggers on webm mimetype"""
 
 import logging
 import pathlib

diff --git a/src/zimscraperlib/html.py b/src/zimscraperlib/html.py
@@ -1,4 +1,4 @@
-""" Tools to work with HTML contents """
+"""Tools to work with HTML contents"""
 
 import pathlib
 from typing import BinaryIO, TextIO
@@ -43,11 +43,13 @@ def find_language_in(content: str | BinaryIO | TextIO, mime_type: str) -> str:
                     continue
                 if (
                     nodename == "meta"
-                    and not node.attrs.get("http-equiv", "").lower()
+                    and not node.attrs.get(
+                        "http-equiv", ""
+                    ).lower()  # pyright:ignore[reportUnknownMemberType, reportAttributeAccessIssue]
                     == "content-language"
                 ):
                     continue
-                return node.attrs[key]
+                return node.attrs[key]  # pyright:ignore[reportReturnType]
     return ""
 
 

diff --git a/src/zimscraperlib/image/optimization.py b/src/zimscraperlib/image/optimization.py
@@ -1,22 +1,22 @@
-""" An image optimization module to optimize the following image formats:
+"""An image optimization module to optimize the following image formats:
 
-    - JPEG (using optimize-images)
-    - PNG (using optimize-images)
-    - GIF (using gifsicle with lossy optimization)
-    - WebP (using Pillow)
+- JPEG (using optimize-images)
+- PNG (using optimize-images)
+- GIF (using gifsicle with lossy optimization)
+- WebP (using Pillow)
 
-    Some important notes:
-    - This makes use of the --lossy option from gifsicle which is present
-     only in versions above 1.92.
-      If the package manager has a lower version, you can build gifsicle
-      from source and install or
-      do not use the lossiness option.
+Some important notes:
+- This makes use of the --lossy option from gifsicle which is present
+ only in versions above 1.92.
+  If the package manager has a lower version, you can build gifsicle
+  from source and install or
+  do not use the lossiness option.
 
-    - Presets for the optimizer are available in zimscraperlib.image.presets.
+- Presets for the optimizer are available in zimscraperlib.image.presets.
 
-    - If no options for an image optimization is passed, the optimizer
-    can still run on default settings which give
-      a bit less size than the original images but maintain a high quality. """
+- If no options for an image optimization is passed, the optimizer
+can still run on default settings which give
+  a bit less size than the original images but maintain a high quality."""
 
 import io
 import os

diff --git a/src/zimscraperlib/misc.py b/src/zimscraperlib/misc.py
@@ -1,4 +1,4 @@
-""" Miscelaneous utils"""
+"""Miscelaneous utils"""
 
 from typing import TypeVar
 

diff --git a/src/zimscraperlib/rewriting/css.py b/src/zimscraperlib/rewriting/css.py
@@ -1,4 +1,4 @@
-""" CSS Rewriting
+"""CSS Rewriting
 
 This modules contains tools to rewrite CSS retrieved from an online source so that it
 can safely operate within a ZIM, linking only to ZIM entries everytime a URL is used.

diff --git a/src/zimscraperlib/rewriting/html.py b/src/zimscraperlib/rewriting/html.py
@@ -1,4 +1,4 @@
-""" HTML Rewriting
+"""HTML Rewriting
 
 This modules contains tools to rewrite HTML retrieved from an online source so that it
 can safely operate within a ZIM.
@@ -101,8 +101,12 @@ def extract_base_href(content: str) -> str | None:
     if not soup.head:
         return None
     for base in soup.head.find_all("base"):
-        if base.has_attr("href"):
-            return base["href"]
+        if base.has_attr(  # pyright:ignore[reportUnknownMemberType, reportAttributeAccessIssue]
+            "href"
+        ):
+            return base[  # pyright:ignore[reportIndexIssue, reportUnknownVariableType, reportArgumentType, reportReturnType]
+                "href"
+            ]
     return None
 
 

diff --git a/src/zimscraperlib/rewriting/js.py b/src/zimscraperlib/rewriting/js.py
@@ -1,4 +1,4 @@
-""" JS Rewriting
+"""JS Rewriting
 
 This modules contains tools to rewrite JS retrieved from an online source so that it
 can safely operate within a ZIM. It is based on the assumption that wombat.js will be

diff --git a/src/zimscraperlib/rewriting/url_rewriting.py b/src/zimscraperlib/rewriting/url_rewriting.py
@@ -1,4 +1,4 @@
-""" URL rewriting tools
+"""URL rewriting tools
 
 This module is about url and entry path rewriting.
 

diff --git a/src/zimscraperlib/types.py b/src/zimscraperlib/types.py
@@ -1,16 +1,16 @@
-""" File extensions to MIME-Type  mapping
+"""File extensions to MIME-Type  mapping
 
-    All libzim *articles* contains the mime-type of their content, for the libzim
-    reader to properly return it.
+All libzim *articles* contains the mime-type of their content, for the libzim
+reader to properly return it.
 
-    Providing accurate mime-type for ZIM Article is important to prevent broken features
-    upon reading.
-    Ex.: youtube scraper uses Web Assembly files (.wasm) for the WebM codecs.
-    Without the proper mime-type, wasm files are returned as octet-stream and thus
-    not loaded efficiently.
+Providing accurate mime-type for ZIM Article is important to prevent broken features
+upon reading.
+Ex.: youtube scraper uses Web Assembly files (.wasm) for the WebM codecs.
+Without the proper mime-type, wasm files are returned as octet-stream and thus
+not loaded efficiently.
 
-    Should your scraper need additional mapping, use mimetypes.add_type() and it will
-    be automatically used. """
+Should your scraper need additional mapping, use mimetypes.add_type() and it will
+be automatically used."""
 
 import mimetypes
 import pathlib

diff --git a/src/zimscraperlib/uri.py b/src/zimscraperlib/uri.py
@@ -1,4 +1,4 @@
-""" URI handling module"""
+"""URI handling module"""
 
 import urllib.parse
 

diff --git a/src/zimscraperlib/zim/__init__.py b/src/zimscraperlib/zim/__init__.py
@@ -1,10 +1,10 @@
-""" ZIM file creation tools
+"""ZIM file creation tools
 
-    zim.creator: create files by manually adding each article
-    zim.filesystem: zimwriterfs-like creation from a build folder
-    zim.providers: contentProvider for serving libzim with data
-    zim.items: item to add to creator
-    zim.archive: read ZIM files, accessing or searching its content"""
+zim.creator: create files by manually adding each article
+zim.filesystem: zimwriterfs-like creation from a build folder
+zim.providers: contentProvider for serving libzim with data
+zim.items: item to add to creator
+zim.archive: read ZIM files, accessing or searching its content"""
 
 from libzim.writer import Blob  # pyright: ignore[reportMissingModuleSource]
 

diff --git a/src/zimscraperlib/zim/_libkiwix.py b/src/zimscraperlib/zim/_libkiwix.py
@@ -1,4 +1,4 @@
-r""" [INTERNAL] libkiwix's internal features copies
+r"""[INTERNAL] libkiwix's internal features copies
 
 CAUTION: this is __not__ part of zimscraperlib's API. Don't use outside scraperlib!
 

diff --git a/src/zimscraperlib/zim/archive.py b/src/zimscraperlib/zim/archive.py
@@ -1,10 +1,10 @@
-""" ZIM Archive helper
+"""ZIM Archive helper
 
-    Convenient subclass of libzim.reader.Archive with:
-    - direct access to Item from path
-    - direct access to suggestions and suggestions count
-    - direct access to search results and number of results
-    - public Entry access by Id"""
+Convenient subclass of libzim.reader.Archive with:
+- direct access to Item from path
+- direct access to suggestions and suggestions count
+- direct access to search results and number of results
+- public Entry access by Id"""
 
 from collections.abc import Iterable
 from types import TracebackType

diff --git a/src/zimscraperlib/zim/creator.py b/src/zimscraperlib/zim/creator.py
@@ -1,18 +1,18 @@
-""" ZIM Creator helper
-
-    Convenient subclass of libzim.writer.Creator with:
-    - easier configuration of commonly set props during init
-    - start/stop methods to bypass the contextmanager
-    - method to create an entry directly from args
-    - direct method to add redirects without title
-    - prevent exeption on double call to close()
-
-    Convenient subclasses of libzim.writer.Item with:
-    - metadata set on initialization
-    - metadata stored on object
-    Sister subclass StaticItem (inheriting from it) with:
-    - content stored on object
-    - can be used to store a filepath and content read from it (not stored) """
+"""ZIM Creator helper
+
+Convenient subclass of libzim.writer.Creator with:
+- easier configuration of commonly set props during init
+- start/stop methods to bypass the contextmanager
+- method to create an entry directly from args
+- direct method to add redirects without title
+- prevent exeption on double call to close()
+
+Convenient subclasses of libzim.writer.Item with:
+- metadata set on initialization
+- metadata stored on object
+Sister subclass StaticItem (inheriting from it) with:
+- content stored on object
+- can be used to store a filepath and content read from it (not stored)"""
 
 import io
 import logging

diff --git a/src/zimscraperlib/zim/filesystem.py b/src/zimscraperlib/zim/filesystem.py
@@ -1,27 +1,27 @@
-""" zimwriterfs-like tools to convert a build folder into a ZIM
+"""zimwriterfs-like tools to convert a build folder into a ZIM
 
-    make_zim_file behaves in a similar way to zimwriterfs and expects the same options:
+make_zim_file behaves in a similar way to zimwriterfs and expects the same options:
 
-    - Guesses file mime-type from filenames
-    - Add all files to respective namespaces based on mime type
-    - Add redirects from a zimwriterfs-compatible redirects TSV
-    - Adds common metadata
+- Guesses file mime-type from filenames
+- Add all files to respective namespaces based on mime type
+- Add redirects from a zimwriterfs-compatible redirects TSV
+- Adds common metadata
 
-    Also included:
-    - Add redirect from a list of (source, destination, title) strings
+Also included:
+- Add redirect from a list of (source, destination, title) strings
 
-    Note: due to the lack of a cancel() method in the libzim itself, it is not possible
-    to stop a zim creation process. Should an error occur in your code, a Zim file
-    with up-to-that-moment content will be created at destination.
+Note: due to the lack of a cancel() method in the libzim itself, it is not possible
+to stop a zim creation process. Should an error occur in your code, a Zim file
+with up-to-that-moment content will be created at destination.
 
-    To prevent this (creating an unwanted Zim file) from happening,
-    a workaround is in place. It prevents the libzim from finishing its process.
-    While it results in no Zim file being created, it results in the zim temp folder
-    to be left on disk and very frequently leads to a segmentation fault at garbage
-    collection (on exit mostly).
+To prevent this (creating an unwanted Zim file) from happening,
+a workaround is in place. It prevents the libzim from finishing its process.
+While it results in no Zim file being created, it results in the zim temp folder
+to be left on disk and very frequently leads to a segmentation fault at garbage
+collection (on exit mostly).
 
-    Meaning you should exit right after an exception in your code (during zim creation)
-    Use workaround_nocancel=False to disable the workaround. """
+Meaning you should exit right after an exception in your code (during zim creation)
+Use workaround_nocancel=False to disable the workaround."""
 
 import datetime
 import pathlib

diff --git a/src/zimscraperlib/zim/indexing.py b/src/zimscraperlib/zim/indexing.py
@@ -1,4 +1,4 @@
-""" Special item with customized index data and helper classes """
+"""Special item with customized index data and helper classes"""
 
 import io
 import pathlib

diff --git a/src/zimscraperlib/zim/items.py b/src/zimscraperlib/zim/items.py
@@ -1,4 +1,4 @@
-""" libzim Item helpers """
+"""libzim Item helpers"""
 
 import io
 import pathlib

diff --git a/src/zimscraperlib/zim/providers.py b/src/zimscraperlib/zim/providers.py
@@ -1,10 +1,10 @@
-""" libzim Providers accepting a `ref` arg to keep it away from garbage collection
+"""libzim Providers accepting a `ref` arg to keep it away from garbage collection
 
-    Use case is to pass it the Item instance that created the Provider so that the
-    Item lives longer than the provider, thus allowing:
-    - to keep a single copy of the data if it is to be indexed
-        (and thus Provider instanced twice)
-    - to release whatever needs to be once we know data won't be fetched anymore """
+Use case is to pass it the Item instance that created the Provider so that the
+Item lives longer than the provider, thus allowing:
+- to keep a single copy of the data if it is to be indexed
+    (and thus Provider instanced twice)
+- to release whatever needs to be once we know data won't be fetched anymore"""
 
 import io
 import pathlib

diff --git a/tests/rewriting/test_js_rewriting.py b/tests/rewriting/test_js_rewriting.py
@@ -14,7 +14,7 @@
 
 @pytest.fixture
 def simple_js_rewriter(
-    simple_url_rewriter_gen: Callable[[str], ArticleUrlRewriter]
+    simple_url_rewriter_gen: Callable[[str], ArticleUrlRewriter],
 ) -> JsRewriter:
     return JsRewriter(
         url_rewriter=simple_url_rewriter_gen("http://www.example.com"),

diff --git a/tests/zim/test_metadata.py b/tests/zim/test_metadata.py
@@ -337,24 +337,38 @@ def test_mandatory_value(metadata_init: MetadataInitConfig):
         if issubclass(metadata_init.a_type, metadata.TextListBasedMetadata):
             metadata_init.a_type([])
             metadata_init.a_type(["", " "])
-        elif issubclass(metadata_init.a_type, metadata.TextBasedMetadata):
+        elif issubclass(
+            metadata_init.a_type,  # pyright:ignore[reportUnknownMemberType, reportUnknownArgumentType]
+            metadata.TextBasedMetadata,
+        ):
             metadata_init.a_type("")
             metadata_init.a_type(" ")
-        elif issubclass(metadata_init.a_type, metadata.DateBasedMetadata):
+        elif issubclass(
+            metadata_init.a_type,  # pyright:ignore[reportUnknownMemberType, reportUnknownArgumentType]
+            metadata.DateBasedMetadata,
+        ):
             pytest.skip("Cannot set an empty Date")
-        elif issubclass(metadata_init.a_type, metadata.DefaultIllustrationMetadata):
+        elif issubclass(
+            metadata_init.a_type,  # pyright:ignore[reportUnknownMemberType, reportUnknownArgumentType]
+            metadata.DefaultIllustrationMetadata,
+        ):
             metadata_init.a_type(b"")  # pyright:ignore[reportUnknownMemberType]
             metadata_init.a_type(b" ")  # pyright:ignore[reportUnknownMemberType]
-        elif get_classvar_value_type(metadata_init.a_type) is bytes:
+        elif (
+            get_classvar_value_type(
+                metadata_init.a_type  # pyright:ignore[reportUnknownMemberType, reportUnknownArgumentType]
+            )
+            is bytes
+        ):
             if metadata_init.nb_args == 1:
-                metadata_init.a_type(b"")  # pyright: ignore[reportCallIssue]
-                metadata_init.a_type(b" ")  # pyright: ignore[reportCallIssue]
+                metadata_init.a_type(b"")  # pyright: ignore[reportUnknownMemberType]
+                metadata_init.a_type(b" ")  # pyright: ignore[reportUnknownMemberType]
             else:
-                metadata_init.a_type(
-                    b"", name="Foo"  # pyright: ignore[reportCallIssue]
+                metadata_init.a_type(  # pyright: ignore[reportUnknownMemberType]
+                    b"", name="Foo"
                 )
-                metadata_init.a_type(
-                    b" ", name="Foo"  # pyright: ignore[reportCallIssue]
+                metadata_init.a_type(  # pyright: ignore[reportUnknownMemberType]
+                    b" ", name="Foo"
                 )
         else:
             raise OSError("WTF")