Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoInstallSign in
Socket

binpickle

Package Overview
Dependencies
Maintainers
1
Versions
13
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

binpickle - npm Package Compare versions

Comparing version
0.4.0a1
to
0.4.0a2
binpickle/py.typed
+2
from . import abc
from . import registry
from abc import ABC
from typing import Any
from typing_extensions import Buffer, Optional, Self
class Codec(ABC):
codec_id: Optional[str]
def encode(self, buf: Buffer) -> Buffer: ...
def decode(self, buf: Buffer, out: Optional[Buffer] = None) -> Buffer: ...
def get_config(self) -> dict[str, Any]: ...
@classmethod
def from_config(cls, cfg: dict[str, Any]) -> Self: ...
from typing import Any
from .abc import Codec
codec_registry: dict[str, Codec]
def get_codec(config: dict[str, Any]) -> Codec: ...
+2
-1
# Changes here will be overwritten by Copier
_commit: 0e64af4
_commit: 8d75d6c
_src_path: https://github.com/lenskit/lk-project-template

@@ -10,1 +10,2 @@ package_name: binpickle

start_year: 2020
typecheck: true

@@ -14,62 +14,70 @@ name: Validate Source Rules

lint:
name: Check Source Style
name: Check Source Code
runs-on: ubuntu-latest
steps:
- name: 📥 Check out source code
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: 📥 Check out source code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: 🐍 Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.11"
cache: 'pip'
- name: 🐍 Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.11"
cache: "pip"
- name: 🛠️ Install tools
run: |
pip install ruff
- name: 🛠️ Install development tools and dependencies
run: |
pip install -e .[dev]
- name: 🪮 Check source code formatting
id: format
run: |
if pipx run ruff format --diff $PKG_DIR; then
echo passed=yes >>"$GITHUB_OUTPUT"
else
echo passed=no >>"$GITHUB_OUTPUT"
echo "::error::source code not formatted"
fi
env:
PKG_DIR: binpickle
- name: 🪮 Check source code formatting
id: format
run: |
if ruff format --diff $PKG_DIR; then
echo passed=yes >>"$GITHUB_OUTPUT"
else
echo passed=no >>"$GITHUB_OUTPUT"
echo "::error::source code not formatted"
fi
env:
PKG_DIR: binpickle
- name: 🐜 Check source code lint rules
id: lint
run: |
if pipx run ruff check --output-format=github $PKG_DIR; then
echo passed=yes >>"$GITHUB_OUTPUT"
else
echo passed=no >>"$GITHUB_OUTPUT"
echo "::error::source code lint check failed"
fi
env:
PKG_DIR: binpickle
- name: 🐜 Check source code lint rules
id: lint
run: |
if ruff check --output-format=github $PKG_DIR; then
echo passed=yes >>"$GITHUB_OUTPUT"
else
echo passed=no >>"$GITHUB_OUTPUT"
echo "::error::source code lint check failed"
fi
env:
PKG_DIR: binpickle
- name: 🧾 Checking results
run: |
if [ "$FMT_PASSED" = no ]; then
echo "::error::format failed, failing build"
exit 1
fi
if [ "$LINT_PASSED" = no ]; then
if [ "$LINT_REQUIRED" = true ]; then
echo "::error::lint failed, failing build"
exit 2
else
echo "::error::lint failed but non-mandatory"
fi
fi
env:
FMT_PASSED: ${{ steps.fmt.outputs.passed }}
LINT_PASSED: ${{ steps.lint.outputs.passed }}
LINT_REQUIRED: True
- name: 🧾 Checking lint and format results
run: |
if [ "$FMT_PASSED" = no ]; then
echo "::error::format failed, failing build"
exit 1
fi
if [ "$LINT_PASSED" = no ]; then
if [ "$LINT_REQUIRED" = true ]; then
echo "::error::lint failed, failing build"
exit 2
else
echo "::error::lint failed but non-mandatory"
fi
fi
env:
FMT_PASSED: ${{ steps.fmt.outputs.passed }}
LINT_PASSED: ${{ steps.lint.outputs.passed }}
LINT_REQUIRED: True
- name: 📐 Check types
id: typecheck
uses: jakebailey/pyright-action@v1
with:
extra-args: binpickle
{
"mypy-type-checker.reportingScope": "workspace",
"[python]": {
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.codeActionsOnSave": {
"source.organizeImports": true
},
"editor.formatOnSave": true,
},
"[jsonc]": {
"editor.formatOnSave": true,
},
"[yaml]": {
"editor.defaultFormatter": "redhat.vscode-yaml",
"editor.formatOnSave": true,
},
"[toml]": {
"editor.defaultFormatter": "tamasfe.even-better-toml",
"editor.formatOnSave": true,
},
}
Metadata-Version: 2.1
Name: binpickle
Version: 0.4.0a1
Version: 0.4.0a2
Summary: Optimized format for pickling binary data.

@@ -33,2 +33,3 @@ Author-email: Michael Ekstrand <mdekstrand@drexel.edu>

Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Operating System :: OS Independent

@@ -44,5 +45,9 @@ Requires-Python: >=3.10

Requires-Dist: setuptools_scm>=8; extra == "dev"
Requires-Dist: build; extra == "dev"
Requires-Dist: twine; extra == "dev"
Requires-Dist: ruff; extra == "dev"
Requires-Dist: mypy~=1.5; extra == "dev"
Requires-Dist: pyright; extra == "dev"
Requires-Dist: copier; extra == "dev"
Requires-Dist: ipython; extra == "dev"
Requires-Dist: pyproject2conda; extra == "dev"
Requires-Dist: sphinx-autobuild; extra == "dev"

@@ -49,0 +54,0 @@ Requires-Dist: humanize~=4.0; extra == "dev"

@@ -8,5 +8,9 @@ msgpack>=1.0

setuptools_scm>=8
build
twine
ruff
mypy~=1.5
pyright
copier
ipython
pyproject2conda
sphinx-autobuild

@@ -13,0 +17,0 @@ humanize~=4.0

@@ -19,2 +19,3 @@ .copier-answers.yml

binpickle/format.py
binpickle/py.typed
binpickle/read.py

@@ -33,5 +34,2 @@ binpickle/write.py

docs/_templates/base.html
stubs/numcodecs/__init__.pyi
stubs/numcodecs/abc.pyi
stubs/numcodecs/registry.pyi
tests/test_file_info.py

@@ -41,2 +39,5 @@ tests/test_format.py

tests/test_util.py
tests/test_validation.py
tests/test_validation.py
typings/numcodecs/__init__.pyi
typings/numcodecs/abc.pyi
typings/numcodecs/registry.pyi

@@ -5,6 +5,6 @@ """

from importlib.metadata import version, PackageNotFoundError
from importlib.metadata import PackageNotFoundError, version
from .write import dump, BinPickler
from .read import load, BinPickleFile, file_info
from .read import BinPickleFile, file_info, load
from .write import BinPickler, dump

@@ -11,0 +11,0 @@ try:

@@ -5,4 +5,6 @@ """

from __future__ import annotations
from typing import Optional, Any
import hashlib
from typing import Any, Optional
from typing_extensions import Buffer

@@ -9,0 +11,0 @@

@@ -6,7 +6,8 @@ """

from __future__ import annotations
from typing import Optional, TypeAlias, Callable, overload
from typing_extensions import Buffer
from typing import Callable, Optional, TypeAlias, overload
from numcodecs.abc import Codec
from numcodecs.registry import get_codec
from typing_extensions import Buffer

@@ -13,0 +14,0 @@ from binpickle.format import CodecSpec

@@ -17,1 +17,8 @@ class BinPickleError(Exception):

"""
class FormatWarning(UserWarning):
"""
A likely problem has been detected with the file format, but we can proceed
without correctness errors.
"""

@@ -5,5 +5,9 @@ """

from __future__ import annotations
import enum
import io
import struct
from dataclasses import dataclass, field, fields
import struct
from typing import TypeAlias
from typing import Any, TypeAlias

@@ -29,2 +33,22 @@ from binpickle.errors import FormatError

class Flags(enum.Flag):
"""
Flags that can be set in the BinPickle header.
"""
BIG_ENDIAN = 1
"""
This file was created on a big-endian system; if absent, the data is in little-endian.
Note that this affects only the serialized buffer data; it does **not** affect the lengths
and offsets in the file format, which are always stored in network byte order (big-endian)
or encoded with MsgPack.
"""
MAPPABLE = 2
"""
This file is designed to be memory-mapped.
"""
@dataclass

@@ -37,3 +61,3 @@ class FileHeader:

1. File version (2 bytes, big-endian).
2. Flags (2 bytes). Currently no flags are defined, so this is set to 0.
2. Flags (2 bytes), as defined in :class:`Flags`.
3. File length (8 bytes, big-endian). Length is signed; if the file length is not known,

@@ -47,2 +71,3 @@ this field is set to -1.

"The NumPy file version."
flags: Flags = Flags(0)
length: int = -1

@@ -53,6 +78,6 @@ "The length of the file (-1 for unknown)."

"Encode the file header as bytes."
return HEADER_FORMAT.pack(MAGIC, self.version, 0, self.length)
return HEADER_FORMAT.pack(MAGIC, self.version, self.flags._value_, self.length)
@classmethod
def decode(cls, buf: bytes, *, verify=True):
def decode(cls, buf: bytes | bytearray | memoryview, *, verify: bool = True) -> FileHeader:
"Decode a file header from bytes."

@@ -67,8 +92,10 @@ if len(buf) != HEADER_FORMAT.size:

raise FormatError("invalid version {}".format(v))
if verify and flags != 0:
raise FormatError("unsupported flags")
return cls(v, off)
try:
flags = Flags(flags)
except ValueError as e:
raise FormatError("unsupported flags", e)
return cls(v, flags, off)
@classmethod
def read(cls, file, **kwargs):
def read(cls, file: io.FileIO | io.BufferedReader, **kwargs: bool) -> FileHeader:
buf = file.read(HEADER_FORMAT.size)

@@ -110,3 +137,3 @@ return cls.decode(buf, **kwargs)

@classmethod
def decode(cls, buf, *, verify=True):
def decode(cls, buf: bytes | bytearray | memoryview, *, verify: bool = True) -> FileTrailer:
"Decode a file trailer from bytes."

@@ -141,3 +168,3 @@ off, len, ck = TRAILER_FORMAT.unpack(buf)

@classmethod
def from_repr(cls, repr):
def from_repr(cls, repr: dict[str, Any]):
"Convert an index entry from its MsgPack-compatible representation"

@@ -144,0 +171,0 @@ if not isinstance(repr, dict):

@@ -0,18 +1,21 @@

import hashlib
import io
import logging
import mmap
import pickle
import sys
import warnings
from dataclasses import dataclass
from enum import Enum
import hashlib
import mmap
import logging
import io
from os import PathLike
from typing import Optional
from typing import Any, Optional
import msgpack
from typing_extensions import Buffer
import pickle
import msgpack
from binpickle.encode import resolve_codec
from binpickle.errors import BinPickleError, FormatError, IntegrityError
from binpickle.errors import BinPickleError, FormatError, FormatWarning, IntegrityError
from .format import FileHeader, IndexEntry, FileTrailer
from ._util import hash_buffer
from .format import FileHeader, FileTrailer, Flags, IndexEntry

@@ -45,7 +48,8 @@ _log = logging.getLogger(__name__)

The name of the file to load.
direct(bool):
direct(bool or str):
If ``True``, returned objects zero-copy when possible, but cannot
outlast the :class:`BinPickleFile` instance. If ``False``, they
are copied from the file and do not need to be freed before
:meth:`close` is called.
:meth:`close` is called. If the string ``'nowarn'``, open in
direct mode but do not warn if the file is unmappable.
verify(bool):

@@ -55,4 +59,4 @@ If ``True`` (the default), verify file checksums while reading.

filename: str | PathLike
direct: bool
filename: str | PathLike[str]
direct: bool | str
verify: bool

@@ -66,8 +70,10 @@ header: FileHeader

def __init__(self, filename, *, direct: bool = False, verify: bool = True):
def __init__(
self, filename: str | PathLike[str], *, direct: bool | str = False, verify: bool = True
):
self.filename = filename
self.direct = direct
self.verify = verify
with open(filename, "rb") as bpf:
self.header = FileHeader.read(bpf)
with open(filename, "rb", buffering=0) as bpf:
self._read_header(bpf)
self._map = mmap.mmap(bpf.fileno(), self.header.length, access=mmap.ACCESS_READ)

@@ -80,3 +86,3 @@ self._mv = memoryview(self._map)

def __exit__(self, *args):
def __exit__(self, *args: Any):
self.close()

@@ -114,3 +120,3 @@ return False

"""
errors = []
errors: list[str] = []
assert self._index_buf is not None, "file not loaded"

@@ -147,2 +153,13 @@

def _read_header(self, bpf: io.FileIO) -> None:
self.header = FileHeader.read(bpf)
if sys.byteorder == "big" and Flags.BIG_ENDIAN not in self.header.flags:
raise FormatError("attempting to load little-endian file on big-endian host")
if sys.byteorder == "little" and Flags.BIG_ENDIAN in self.header.flags:
raise FormatError("attempting to load big-endian file on little-endian host")
if self.direct and self.direct != "nowarn" and Flags.MAPPABLE not in self.header.flags:
warnings.warn(
"direct mode reqested but file is not marked as mappable", FormatWarning, 3
)
def _read_index(self) -> None:

@@ -168,3 +185,3 @@ tpos = self.header.trailer_pos()

self.entries = [IndexEntry.from_repr(e) for e in msgpack.unpackb(self._index_buf)]
self.entries = [IndexEntry.from_repr(e) for e in msgpack.unpackb(self._index_buf)] # type: ignore
_log.debug("read %d entries from file", len(self.entries))

@@ -180,4 +197,4 @@

end = start + length
if direct is None:
direct = self.direct
if direct is None and self.direct:
direct = True

@@ -216,3 +233,3 @@ buf = self._mv[start:end]

def load(file: str | PathLike) -> object:
def load(file: str | PathLike[str]) -> object:
"""

@@ -229,3 +246,3 @@ Load an object from a BinPickle file.

def file_info(file: str | PathLike) -> BPKInfo:
def file_info(file: str | PathLike[str]) -> BPKInfo:
"""

@@ -232,0 +249,0 @@ Test whether a file is a BinPickle file, and if so, return basic information

@@ -0,17 +1,18 @@

import hashlib
import io
import logging
import mmap
import pickle
import sys
import warnings
from os import PathLike
import warnings
import logging
import io
import hashlib
import pickle
from typing import Any
import msgpack
import numpy as np
from typing_extensions import Buffer, List, Optional, Self
import numpy as np
from .format import CodecSpec, FileHeader, FileTrailer, IndexEntry
from .encode import ResolvedCodec, resolve_codec, CodecArg
from ._util import human_size
from .encode import CodecArg, ResolvedCodec, resolve_codec
from .format import CodecSpec, FileHeader, FileTrailer, Flags, IndexEntry

@@ -45,10 +46,18 @@ _log = logging.getLogger(__name__)

If ``True``, align buffers to the page size.
codec:
The codec to use for encoding buffers. This can be anything that can be
passed to :func:`binpickle.codecs.make_codec`, or it can be a function
that takes a buffer and returns the codec to use for that buffer (to
use different codecs for different types or sizes of buffers).
codecs(list of CodecArg):
The list of codecs to use for encoding buffers. The codecs are
applied in sequence to encode a buffer, and in reverse order to
decode the buffer. There are 4 ways to specify a codec:
* A :class:`numcodecs.abc.Codec` instance.
* A dictionary specifying a codec configuration, suitable for
use by :func:`numcodecs.get_config`.
* A string, which is used as a codec ID to look up the codec (the
``id`` field recognized by :func:`~numcodecs.get_config`)
* A function that takes a buffer and returns any of the above (or
``None``, to skip the step for that buffer), allowing encoding
to vary from buffer to buffer.
"""
filename: str | PathLike
filename: str | PathLike[str]
align: bool

@@ -61,5 +70,5 @@ codecs: list[ResolvedCodec]

self,
filename: str | PathLike,
filename: str | PathLike[str],
*,
align=False,
align: bool = False,
codecs: Optional[list[CodecArg]] = None,

@@ -81,3 +90,3 @@ ):

@classmethod
def mappable(cls, filename: str | PathLike):
def mappable(cls, filename: str | PathLike[str]):
"Convenience method to construct a pickler for memory-mapped use."

@@ -87,3 +96,3 @@ return cls(filename, align=True)

@classmethod
def compressed(cls, filename: str | PathLike, codec: CodecArg = "gzip"):
def compressed(cls, filename: str | PathLike[str], codec: CodecArg = "gzip"):
"Convenience method to construct a pickler for compressed storage."

@@ -120,3 +129,3 @@ return cls(filename, codecs=[codec])

def __exit__(self, *args):
def __exit__(self, *args: Any):
self.close()

@@ -130,3 +139,7 @@ return False

h = FileHeader()
_log.debug("initializing header for %s", self.filename)
if sys.byteorder == "big":
h.flags |= Flags.BIG_ENDIAN
if self.align and not self.codecs:
h.flags |= Flags.MAPPABLE
_log.debug("initializing header for %s: %s", self.filename, h)
self._file.write(h.encode())

@@ -169,3 +182,3 @@ assert self._file.tell() == pos + FileHeader.SIZE

if isinstance(mv.obj, np.ndarray):
binfo = ("ndarray", str(mv.obj.dtype), mv.obj.shape)
binfo = ("ndarray", str(mv.obj.dtype), mv.obj.shape) # type: ignore

@@ -214,3 +227,9 @@ _log.debug("writing %d bytes at position %d", length, offset)

def dump(obj, file: str | PathLike, *, mappable: bool = False, codecs: list[CodecArg] = ["gzip"]):
def dump(
obj: object,
file: str | PathLike[str],
*,
mappable: bool = False,
codecs: list[CodecArg] = ["gzip"],
):
"""

@@ -217,0 +236,0 @@ Dump an object to a BinPickle file. This is a convenience wrapper

@@ -47,2 +47,4 @@ Format

.. autoclass:: Flags
.. autoclass:: FileTrailer

@@ -49,0 +51,0 @@

Metadata-Version: 2.1
Name: binpickle
Version: 0.4.0a1
Version: 0.4.0a2
Summary: Optimized format for pickling binary data.

@@ -33,2 +33,3 @@ Author-email: Michael Ekstrand <mdekstrand@drexel.edu>

Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Operating System :: OS Independent

@@ -44,5 +45,9 @@ Requires-Python: >=3.10

Requires-Dist: setuptools_scm>=8; extra == "dev"
Requires-Dist: build; extra == "dev"
Requires-Dist: twine; extra == "dev"
Requires-Dist: ruff; extra == "dev"
Requires-Dist: mypy~=1.5; extra == "dev"
Requires-Dist: pyright; extra == "dev"
Requires-Dist: copier; extra == "dev"
Requires-Dist: ipython; extra == "dev"
Requires-Dist: pyproject2conda; extra == "dev"
Requires-Dist: sphinx-autobuild; extra == "dev"

@@ -49,0 +54,0 @@ Requires-Dist: humanize~=4.0; extra == "dev"

@@ -9,3 +9,3 @@ [build-system]

authors = [
{name="Michael Ekstrand", email="mdekstrand@drexel.edu"}
{ name = "Michael Ekstrand", email = "mdekstrand@drexel.edu" },
]

@@ -17,2 +17,3 @@ classifiers = [

"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Operating System :: OS Independent",

@@ -25,3 +26,3 @@ ]

dependencies = [
"msgpack >= 1.0",
"msgpack >= 1.0", # p2c: -s msgpack-python
"numcodecs >= 0.12",

@@ -35,8 +36,12 @@ "typing-extensions ~= 4.8",

"setuptools_scm>=8",
"build",
"twine",
"ruff",
"mypy ~=1.5",
"pyright",
"copier",
"ipython",
"pyproject2conda",
"sphinx-autobuild",
"humanize ~=4.0",
"msgpack-types",
"msgpack-types", # p2c: -p
"pandas-stubs",

@@ -66,3 +71,3 @@ ]

[tool.setuptools_scm]
version_scheme = "release-branch-semver"
version_scheme = "guess-next-dev"

@@ -76,2 +81,3 @@ # settings for generating conda environments for dev & CI, when needed

target-version = "py310"
select = ["E", "F", "I"]
exclude = [

@@ -85,4 +91,24 @@ ".git",

[tool.ruff.lint.isort]
section-order = [
"future",
"standard-library",
"third-party",
"first-party",
"local-folder",
]
[tool.ruff.lint.isort.sections]
test = ["pytest", "hypothesis"]
[tool.mypy]
mypy_path = "$MYPY_CONFIG_FILE_DIR/stubs"
exclude = "^docs/"
[tool.pyright]
typeCheckingMode = "strict"
exclude = [
"docs/*",
]
reportUnnecessaryIsInstance = false
reportMissingImports = true
reportMissingTypeStubs = false
from pytest import raises
from binpickle.errors import FormatError
from binpickle.format import FileHeader, FileTrailer, HEADER_FORMAT, TRAILER_FORMAT
from binpickle.format import FileHeader, FileTrailer, HEADER_FORMAT, TRAILER_FORMAT, Flags

@@ -40,2 +40,13 @@

def test_flags_round_trip():
h = FileHeader(length=57, flags=Flags.BIG_ENDIAN)
bs = h.encode()
assert len(bs) == 16
h2 = FileHeader.decode(bs)
assert h2.length == 57
assert h2.flags == Flags.BIG_ENDIAN
assert h2 == h
def test_catch_bad_magic():

@@ -53,5 +64,5 @@ with raises(FormatError) as exc:

def test_catch_bad_padding():
def test_catch_bad_flags():
with raises(FormatError) as exc:
FileHeader.decode(b"BPCK\x00\x02\x00\xff" + (b"\x00" * 8))
assert "unsupported flags" in str(exc.value)

@@ -15,2 +15,3 @@ import itertools as it

from hypothesis.extra.numpy import arrays, scalar_dtypes
from binpickle.errors import FormatWarning

@@ -221,3 +222,8 @@ from binpickle.read import BinPickleFile, load

# make sure we get a warning when opening a compressed file as direct
with pytest.warns(FormatWarning):
with BinPickleFile(file, direct=True) as bpf:
assert not bpf.find_errors()
@settings(deadline=None)

@@ -224,0 +230,0 @@ @given(arrays(scalar_dtypes(), st.integers(500, 10000)))

from . import abc
from . import registry
from abc import ABC
from typing_extensions import Buffer, Optional, Self
class Codec(ABC):
codec_id: Optional[str]
def encode(self, buf: Buffer) -> Buffer: ...
def decode(self, buf: Buffer, out: Optional[Buffer] = None) -> Buffer: ...
def get_config(self) -> dict: ...
@classmethod
def from_config(cls, cfg: dict) -> Self: ...
from .abc import Codec
codec_registry: dict[str, Codec]
def get_codec(config: dict) -> Codec: ...