Skip to content

File Utilities

sahi.utils.file

File I/O utilities for SAHI.

Classes

NumpyEncoder

Bases: JSONEncoder

JSON encoder for numpy types.

Source code in sahi/utils/file.py
class NumpyEncoder(json.JSONEncoder):
    """JSON encoder for numpy types."""

    def default(self, obj: object) -> object:
        """Encode numpy types as JSON-serializable Python types."""
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super().default(obj)
Functions
default(obj)

Encode numpy types as JSON-serializable Python types.

Source code in sahi/utils/file.py
def default(self, obj: object) -> object:
    """Encode numpy types as JSON-serializable Python types."""
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return super().default(obj)

Functions

download_from_url(from_url, to_path)

Downloads a file from the given URL and saves it to the specified path.

Parameters:

Name Type Description Default
from_url
str

The URL of the file to download.

required
to_path
str

The path where the downloaded file should be saved.

required

Returns:

Type Description
None

None

Source code in sahi/utils/file.py
def download_from_url(from_url: str, to_path: str) -> None:
    """Downloads a file from the given URL and saves it to the specified path.

    Args:
        from_url (str): The URL of the file to download.
        to_path (str): The path where the downloaded file should be saved.

    Returns:
        None
    """
    Path(to_path).parent.mkdir(parents=True, exist_ok=True)

    if not os.path.exists(to_path):
        import urllib.request

        urllib.request.urlretrieve(from_url, to_path)

get_base_filename(path)

Takes a file path, returns (base_filename_with_extension, base_filename_without_extension).

Source code in sahi/utils/file.py
def get_base_filename(path: str) -> tuple[str, str]:
    """Takes a file path, returns (base_filename_with_extension, base_filename_without_extension)."""
    base_filename_with_extension = ntpath.basename(path)
    base_filename_without_extension, _ = os.path.splitext(base_filename_with_extension)
    return base_filename_with_extension, base_filename_without_extension

get_file_extension(path)

Get the file extension from a given file path.

Parameters:

Name Type Description Default
path
str

The file path.

required

Returns:

Name Type Description
str str

The file extension.

Source code in sahi/utils/file.py
def get_file_extension(path: str) -> str:
    """Get the file extension from a given file path.

    Args:
        path (str): The file path.

    Returns:
        str: The file extension.
    """
    _, file_extension = os.path.splitext(path)
    return file_extension

import_model_class(model_type, class_name)

Import a predefined detection model class by name.

Parameters:

Name Type Description Default
model_type
str

Framework type ("yolov5", "detectron2", "mmdet", etc).

required
class_name
str

Name of the detection model class (e.g., "MmdetDetectionModel").

required

Returns:

Name Type Description
class_ type

class with given path

Source code in sahi/utils/file.py
def import_model_class(model_type: str, class_name: str) -> type:
    """Import a predefined detection model class by name.

    Args:
        model_type: Framework type ("yolov5", "detectron2", "mmdet", etc).
        class_name: Name of the detection model class (e.g., "MmdetDetectionModel").

    Returns:
        class_: class with given path
    """
    module = __import__(f"sahi.models.{model_type}", fromlist=[class_name])
    class_ = getattr(module, class_name)
    return class_

increment_path(path, exist_ok=True, sep='')

Increment path, i.e. runs/exp --> runs/exp{sep}0, runs/exp{sep}1 etc.

Parameters:

Name Type Description Default
path
str | Path

str The base path to increment.

required
exist_ok
bool

bool If True, return the path as is if it already exists. If False, increment the path.

True
sep
str

str The separator to use between the base path and the increment number.

''

Returns:

Name Type Description
str str

The incremented path.

Example

increment_path("runs/exp", sep="") 'runs/exp_0' increment_path("runs/exp_0", sep="") 'runs/exp_1'

Source code in sahi/utils/file.py
def increment_path(path: str | Path, exist_ok: bool = True, sep: str = "") -> str:
    """Increment path, i.e. runs/exp --> runs/exp{sep}0, runs/exp{sep}1 etc.

    Args:
        path: str
            The base path to increment.
        exist_ok: bool
            If True, return the path as is if it already exists. If False, increment the path.
        sep: str
            The separator to use between the base path and the increment number.

    Returns:
        str: The incremented path.

    Example:
        >>> increment_path("runs/exp", sep="_")
        'runs/exp_0'
        >>> increment_path("runs/exp_0", sep="_")
        'runs/exp_1'
    """
    path = Path(path)  # os-agnostic
    if (path.exists() and exist_ok) or (not path.exists()):
        return str(path)
    else:
        dirs = glob.glob(f"{path}{sep}*")  # similar paths
        matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs]
        indices = [int(m.groups()[0]) for m in matches if m]  # indices
        n = max(indices) + 1 if indices else 2  # increment number
        return f"{path}{sep}{n}"  # update path

is_colab()

Check if the current environment is a Google Colab instance.

Returns:

Name Type Description
bool bool

True if the environment is a Google Colab instance, False otherwise.

Source code in sahi/utils/file.py
def is_colab() -> bool:
    """Check if the current environment is a Google Colab instance.

    Returns:
        bool: True if the environment is a Google Colab instance, False otherwise.
    """
    import sys

    return "google.colab" in sys.modules

list_files(directory, contains=['.json'], verbose=1)

Walk given directory and return a list of file path with desired extension.

Parameters:

Name Type Description Default
directory
str

str "data/coco/"

required
contains
list

list A list of strings to check if the target file contains them, example: ["coco.png", ".jpg", "jpeg"]

['.json']
verbose
int

int 0: no print 1: print number of files

1

Returns:

Name Type Description
filepath_list list[str]

List of file paths.

Source code in sahi/utils/file.py
def list_files(
    directory: str,
    contains: list = [".json"],
    verbose: int = 1,
) -> list[str]:
    """Walk given directory and return a list of file path with desired extension.

    Args:
        directory: str
            "data/coco/"
        contains: list
            A list of strings to check if the target file contains them, example: ["coco.png", ".jpg", "jpeg"]
        verbose: int
            0: no print
            1: print number of files

    Returns:
        filepath_list: List of file paths.
    """
    # define verboseprint
    verboseprint = print if verbose else lambda *a, **k: None

    filepath_list: list[str] = []

    for file in os.listdir(directory):
        # check if filename contains any of the terms given in contains list
        if any(strtocheck in file.lower() for strtocheck in contains):
            filepath = str(os.path.join(directory, file))
            filepath_list.append(filepath)

    number_of_files = len(filepath_list)
    folder_name = Path(directory).name

    verboseprint(f"There are {number_of_files!s} listed files in folder: {folder_name}/")

    return filepath_list

list_files_recursively(directory, contains=['.json'], verbose=True)

Walk given directory recursively and return a list of file path with desired extension.

Parameters:

Name Type Description Default
directory
str

Directory path to walk, e.g. "data/coco/".

required
contains
list[str]

A list of strings to check if the target file contains them, example: ["coco.png", ".jpg", "jpeg"].

['.json']
verbose
bool

If true, prints some results.

True

Returns:

Name Type Description
relative_filepath_list list[str]

List of file paths relative to given directory.

abs_filepath_list list[str]

List of absolute file paths.

Source code in sahi/utils/file.py
def list_files_recursively(
    directory: str, contains: list[str] = [".json"], verbose: bool = True
) -> tuple[list[str], list[str]]:
    """Walk given directory recursively and return a list of file path with desired extension.

    Args:
        directory: Directory path to walk, e.g. "data/coco/".
        contains: A list of strings to check if the target file contains them,
            example: ["coco.png", ".jpg", "jpeg"].
        verbose: If true, prints some results.

    Returns:
        relative_filepath_list: List of file paths relative to given directory.
        abs_filepath_list: List of absolute file paths.
    """
    # define verboseprint
    verboseprint = print if verbose else lambda *a, **k: None

    # walk directories recursively and find json files
    abs_filepath_list = []
    relative_filepath_list = []

    # r=root, d=directories, f=files
    for r, _, f in os.walk(directory):
        for file in f:
            # check if filename contains any of the terms given in contains list
            if any(strtocheck in file.lower() for strtocheck in contains):
                abs_filepath = os.path.join(r, file)
                abs_filepath_list.append(abs_filepath)
                relative_filepath = abs_filepath.split(directory)[-1]
                relative_filepath_list.append(relative_filepath)

    number_of_files = len(relative_filepath_list)
    folder_name = directory.split(os.sep)[-1]

    verboseprint(f"There are {number_of_files} listed files in folder {folder_name}.")

    return relative_filepath_list, abs_filepath_list

load_json(load_path, encoding='utf-8')

Load JSON formatted data from file.

Encoding type can be specified with 'encoding' argument.

Parameters:

Name Type Description Default
load_path
str

str "dirname/coco.json"

required
encoding
str

str Encoding type, default is 'utf-8'

'utf-8'
Example inputs

load_path: "dirname/coco.json"

Source code in sahi/utils/file.py
def load_json(load_path: str, encoding: str = "utf-8") -> object:
    """Load JSON formatted data from file.

    Encoding type can be specified with 'encoding' argument.

    Args:
        load_path: str
            "dirname/coco.json"
        encoding: str
            Encoding type, default is 'utf-8'

    Example inputs:
        load_path: "dirname/coco.json"
    """
    # read from path
    with open(load_path, encoding=encoding) as json_file:
        data = json.load(json_file)
    return data

load_pickle(load_path)

Loads pickle formatted data (given as "data") from load_path.

Parameters:

Name Type Description Default
load_path
str | Path

str "dirname/coco.pickle"

required
Example inputs

load_path: "dirname/coco.pickle"

Source code in sahi/utils/file.py
def load_pickle(load_path: str | Path) -> object:
    """Loads pickle formatted data (given as "data") from load_path.

    Args:
        load_path: str
            "dirname/coco.pickle"

    Example inputs:
        load_path: "dirname/coco.pickle"
    """
    with open(load_path, "rb") as json_file:
        data = pickle.load(json_file)
    return data

save_json(data, save_path, indent=None)

Saves json formatted data (given as "data") as save_path.

Parameters:

Name Type Description Default
data
object

dict Data to be saved as json

required
save_path
str | Path

str "dirname/coco.json"

required
indent
int | None

int or None Indentation level for pretty-printing the JSON data. If None, the most compact representation will be used. If an integer is provided, it specifies the number of spaces to use for indentation. Example: indent=4 will format the JSON data with an indentation of 4 spaces per level.

None
Example inputs

data: {"image_id": 5} save_path: "dirname/coco.json" indent: Train json files with indent=None, val json files with indent=4

Source code in sahi/utils/file.py
def save_json(data: object, save_path: str | Path, indent: int | None = None) -> None:
    """Saves json formatted data (given as "data") as save_path.

    Args:
        data: dict
            Data to be saved as json
        save_path: str
            "dirname/coco.json"
        indent: int or None
            Indentation level for pretty-printing the JSON data. If None, the most compact representation
            will be used. If an integer is provided, it specifies the number of spaces to use for indentation.
            Example: indent=4 will format the JSON data with an indentation of 4 spaces per level.

    Example inputs:
        data: {"image_id": 5}
        save_path: "dirname/coco.json"
        indent: Train json files with indent=None, val json files with indent=4
    """
    # create dir if not present
    Path(save_path).parent.mkdir(parents=True, exist_ok=True)

    # export as json
    with open(save_path, "w", encoding="utf-8") as outfile:
        json.dump(data, outfile, separators=(",", ":"), cls=NumpyEncoder, indent=indent)

save_pickle(data, save_path)

Saves pickle formatted data (given as "data") as save_path.

Parameters:

Name Type Description Default
data
object

dict Data to be saved as pickle

required
save_path
str | Path

str "dirname/coco.pickle"

required
Example inputs

data: {"image_id": 5} save_path: "dirname/coco.pickle"

Source code in sahi/utils/file.py
def save_pickle(data: object, save_path: str | Path) -> None:
    """Saves pickle formatted data (given as "data") as save_path.

    Args:
        data: dict
            Data to be saved as pickle
        save_path: str
            "dirname/coco.pickle"

    Example inputs:
        data: {"image_id": 5}
        save_path: "dirname/coco.pickle"
    """
    # create dir if not present
    Path(save_path).parent.mkdir(parents=True, exist_ok=True)

    # export as json
    with open(save_path, "wb") as outfile:
        pickle.dump(data, outfile)

unzip(file_path, dest_dir)

Unzips compressed .zip file.

Example inputs

file_path: 'data/01_alb_id.zip' dest_dir: 'data/'

Source code in sahi/utils/file.py
def unzip(file_path: str, dest_dir: str) -> None:
    """Unzips compressed .zip file.

    Example inputs:
        file_path: 'data/01_alb_id.zip'
        dest_dir: 'data/'
    """
    # unzip file
    with zipfile.ZipFile(file_path) as zf:
        zf.extractall(dest_dir)