Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,35 @@ Homepage = "https://libraryofcongress.github.io/bagit-python/"
[tool]

[tool.ruff]
target-version = "py38"
target-version = "py39"

lint.select = [
"A", # flake8-builtins
"ASYNC", # flake8-async
"B", # flake8-bugbear
"BLE", # flake8-blind-except
"C4", # flake8-comprehensions
"ERA", # eradicate
"EXE", # flake8-executable
"F", # Pyflakes
"G", # flake8-logging-format
"ICN", # flake8-import-conventions
"INP", # flake8-no-pep420
"INT", # flake8-gettext
"ISC", # flake8-implicit-str-concat
"N", # pep8-naming
"PIE", # flake8-pie
"PT", # flake8-pytest-style
"PYI", # flake8-pyi
"RET", # flake8-return
"RSE", # flake8-raise
"RUF", # Ruff-specific rules
"TCH", # flake8-type-checking
"TID", # flake8-tidy-imports
"UP", # pyupgrade
"W", # pycodestyle
]


[tool.isort]
line_length = 110
Expand Down
136 changes: 66 additions & 70 deletions src/bagit/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import codecs
Expand Down Expand Up @@ -30,11 +29,11 @@
from urlparse import urlparse


def find_locale_dir():
for prefix in (os.path.dirname(__file__), sys.prefix):
locale_dir = os.path.join(prefix, "locale")
if os.path.isdir(locale_dir):
return locale_dir

Check failure on line 36 in src/bagit/__init__.py

View workflow job for this annotation

GitHub Actions / ruff

ruff (RET503)

src/bagit/__init__.py:32:1: RET503 Missing explicit `return` at the end of function able to return non-`None` value help: Add explicit `return` statement


TRANSLATION_CATALOG = gettext.translation(
Expand Down Expand Up @@ -149,7 +148,7 @@
"""

if checksum is not None:
warnings.warn(

Check failure on line 151 in src/bagit/__init__.py

View workflow job for this annotation

GitHub Actions / ruff

ruff (B028)

src/bagit/__init__.py:151:9: B028 No explicit `stacklevel` keyword argument found help: Set `stacklevel=2`
_(
"The `checksum` argument for `make_bag` should be replaced with `checksums`"
),
Expand Down Expand Up @@ -208,74 +207,73 @@
raise BagError(
_("Read permissions are required to calculate file fixities")
)
else:
LOGGER.info(_("Creating data directory"))

# FIXME: if we calculate full paths we won't need to deal with changing directories
os.chdir(bag_dir)
cwd = os.getcwd()
temp_data = tempfile.mkdtemp(dir=cwd)

for f in os.listdir("."):
if os.path.abspath(f) == temp_data:
continue
new_f = os.path.join(temp_data, f)
LOGGER.info(
_("Moving %(source)s to %(destination)s"),
{"source": f, "destination": new_f},
)
os.rename(f, new_f)
LOGGER.info(_("Creating data directory"))

# FIXME: if we calculate full paths we won't need to deal with changing directories
os.chdir(bag_dir)
cwd = os.getcwd()
temp_data = tempfile.mkdtemp(dir=cwd)

for f in os.listdir("."):
if os.path.abspath(f) == temp_data:
continue
new_f = os.path.join(temp_data, f)
LOGGER.info(
_("Moving %(source)s to %(destination)s"),
{"source": temp_data, "destination": "data"},
{"source": f, "destination": new_f},
)
while True:
try:
os.rename(temp_data, "data")
break
except PermissionError as e:
if hasattr(e, "winerror") and e.winerror == 5:
LOGGER.warning(
_(
"PermissionError [WinError 5] when renaming temp folder. Retrying in 10 seconds..."
)
os.rename(f, new_f)

LOGGER.info(
_("Moving %(source)s to %(destination)s"),
{"source": temp_data, "destination": "data"},
)
while True:
try:
os.rename(temp_data, "data")
break
except PermissionError as e:
if hasattr(e, "winerror") and e.winerror == 5:
LOGGER.warning(
_(
"PermissionError [WinError 5] when renaming temp folder. Retrying in 10 seconds..."
)
time.sleep(10)
else:
raise
)
time.sleep(10)
else:
raise

# permissions for the payload directory should match those of the
# original directory
os.chmod("data", os.stat(cwd).st_mode)
# permissions for the payload directory should match those of the
# original directory
os.chmod("data", os.stat(cwd).st_mode)

total_bytes, total_files = make_manifests(
"data", processes, algorithms=checksums, encoding=encoding
)
total_bytes, total_files = make_manifests(
"data", processes, algorithms=checksums, encoding=encoding
)

LOGGER.info(_("Creating bagit.txt"))
txt = """BagIt-Version: 1.0\nTag-File-Character-Encoding: UTF-8\n"""
with open_text_file("bagit.txt", "w") as bagit_file:
bagit_file.write(txt)

LOGGER.info(_("Creating bag-info.txt"))
if bag_info is None:
bag_info = {}

# allow 'Bagging-Date' and 'Bag-Software-Agent' to be overidden
if "Bagging-Date" not in bag_info:
bag_info["Bagging-Date"] = date.strftime(date.today(), "%Y-%m-%d")
if "Bag-Software-Agent" not in bag_info:
bag_info["Bag-Software-Agent"] = "bagit.py v%s <%s>" % (
VERSION,
PROJECT_URL,
)
LOGGER.info(_("Creating bagit.txt"))
txt = """BagIt-Version: 1.0\nTag-File-Character-Encoding: UTF-8\n"""
with open_text_file("bagit.txt", "w") as bagit_file:
bagit_file.write(txt)

LOGGER.info(_("Creating bag-info.txt"))
if bag_info is None:
bag_info = {}

# allow 'Bagging-Date' and 'Bag-Software-Agent' to be overidden
if "Bagging-Date" not in bag_info:
bag_info["Bagging-Date"] = date.strftime(date.today(), "%Y-%m-%d")
if "Bag-Software-Agent" not in bag_info:
bag_info["Bag-Software-Agent"] = "bagit.py v%s <%s>" % (
VERSION,
PROJECT_URL,
)

Check failure on line 270 in src/bagit/__init__.py

View workflow job for this annotation

GitHub Actions / ruff

ruff (UP031)

src/bagit/__init__.py:267:46: UP031 Use format specifiers instead of percent format help: Replace with format specifiers

bag_info["Payload-Oxum"] = "%s.%s" % (total_bytes, total_files)
_make_tag_file("bag-info.txt", bag_info)
bag_info["Payload-Oxum"] = "%s.%s" % (total_bytes, total_files)

Check failure on line 272 in src/bagit/__init__.py

View workflow job for this annotation

GitHub Actions / ruff

ruff (UP031)

src/bagit/__init__.py:272:36: UP031 Use format specifiers instead of percent format help: Replace with format specifiers
_make_tag_file("bag-info.txt", bag_info)

for c in checksums:
_make_tagmanifest_file(c, bag_dir, encoding="utf-8")
for c in checksums:
_make_tagmanifest_file(c, bag_dir, encoding="utf-8")
except Exception:
LOGGER.exception(_("An error occurred creating a bag in %s"), bag_dir)
raise
Expand All @@ -285,14 +283,14 @@
return Bag(bag_dir)


class Bag(object):
class Bag:
"""A representation of a bag."""

valid_files = ["bagit.txt", "fetch.txt"]

Check failure on line 289 in src/bagit/__init__.py

View workflow job for this annotation

GitHub Actions / ruff

ruff (RUF012)

src/bagit/__init__.py:289:19: RUF012 Mutable default value for class attribute help: Consider initializing in `__init__` or annotating with `typing.ClassVar`
valid_directories = ["data"]

Check failure on line 290 in src/bagit/__init__.py

View workflow job for this annotation

GitHub Actions / ruff

ruff (RUF012)

src/bagit/__init__.py:290:25: RUF012 Mutable default value for class attribute help: Consider initializing in `__init__` or annotating with `typing.ClassVar`

def __init__(self, path):
super(Bag, self).__init__()
super().__init__()
self.tags = {}
self.info = {}
#: Dictionary of manifest entries and the checksum values for each
Expand Down Expand Up @@ -322,12 +320,12 @@

@property
def algs(self):
warnings.warn(_("Use Bag.algorithms instead of Bag.algs"), DeprecationWarning)

Check failure on line 323 in src/bagit/__init__.py

View workflow job for this annotation

GitHub Actions / ruff

ruff (B028)

src/bagit/__init__.py:323:9: B028 No explicit `stacklevel` keyword argument found help: Set `stacklevel=2`
return self.algorithms

@property
def version(self):
warnings.warn(

Check failure on line 328 in src/bagit/__init__.py

View workflow job for this annotation

GitHub Actions / ruff

ruff (B028)

src/bagit/__init__.py:328:9: B028 No explicit `stacklevel` keyword argument found help: Set `stacklevel=2`
_("Use the Bag.version_info tuple instead of Bag.version"),
DeprecationWarning,
)
Expand Down Expand Up @@ -359,10 +357,10 @@
try:
self.version_info = tuple(int(i) for i in self._version.split(".", 1))
except ValueError:
raise BagError(
_("Bag version numbers must be MAJOR.MINOR numbers, not %s")
% self._version
)

Check failure on line 363 in src/bagit/__init__.py

View workflow job for this annotation

GitHub Actions / ruff

ruff (B904)

src/bagit/__init__.py:360:13: B904 Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None` to distinguish them from errors in exception handling

if (0, 93) <= self.version_info <= (0, 95):
self.tag_file_name = "package-info.txt"
Expand All @@ -376,7 +374,7 @@
try:
codecs.lookup(self.encoding)
except LookupError:
raise BagValidationError(_("Unsupported encoding: %s") % self.encoding)

Check failure on line 377 in src/bagit/__init__.py

View workflow job for this annotation

GitHub Actions / ruff

ruff (B904)

src/bagit/__init__.py:377:13: B904 Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None` to distinguish them from errors in exception handling

info_file_path = os.path.join(self.path, self.tag_file_name)
if os.path.exists(info_file_path):
Expand Down Expand Up @@ -723,8 +721,7 @@
)
if self.version_info >= (1,):
raise BagError(msg % warning_ctx)
else:
LOGGER.warning(msg, warning_ctx)
LOGGER.warning(msg, warning_ctx)
else:
raise BagError(
_(
Expand Down Expand Up @@ -960,7 +957,7 @@

class BagValidationError(BagError):
def __init__(self, message, details=None):
super(BagValidationError, self).__init__()
super().__init__()

if details is None:
details = []
Expand All @@ -977,14 +974,14 @@

class ManifestErrorDetail(BagError):
def __init__(self, path):
super(ManifestErrorDetail, self).__init__()
super().__init__()

self.path = path


class ChecksumMismatch(ManifestErrorDetail):
def __init__(self, path, algorithm=None, expected=None, found=None):
super(ChecksumMismatch, self).__init__(path)
super().__init__(path)

self.path = path
self.algorithm = algorithm
Expand Down Expand Up @@ -1021,7 +1018,7 @@
"""

def __init__(self, file_a, file_b):
super(FileNormalizationConflict, self).__init__()
super().__init__()

self.file_a = file_a
self.file_b = file_b
Expand Down Expand Up @@ -1079,8 +1076,7 @@
normalized_filename = normalize_unicode(filename)
if normalized_filename in output:
raise FileNormalizationConflict(filename, output[normalized_filename])
else:
output[normalized_filename] = filename
output[normalized_filename] = filename

return output

Expand Down Expand Up @@ -1153,7 +1149,7 @@
break
for i in f_hashers.values():
i.update(block)
except (OSError, IOError) as e:
except OSError as e:
raise BagValidationError(
_("Could not read %(filename)s: %(error)s")
% {"filename": full_path, "error": str(e)}
Expand Down
16 changes: 7 additions & 9 deletions test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# encoding: utf-8

from __future__ import absolute_import, division, print_function, unicode_literals

import codecs
import datetime
Expand Down Expand Up @@ -38,7 +36,7 @@ class SelfCleaningTestCase(unittest.TestCase):
"""TestCase subclass which cleans up self.tmpdir after each test"""

def setUp(self):
super(SelfCleaningTestCase, self).setUp()
super().setUp()

self.starting_directory = (
os.getcwd()
Expand All @@ -62,7 +60,7 @@ def tearDown(self):

shutil.rmtree(self.tmpdir)

super(SelfCleaningTestCase, self).tearDown()
super().tearDown()


@mock.patch(
Expand Down Expand Up @@ -267,7 +265,7 @@ def test_validation_completeness_error_details(self):
def test_bom_in_bagit_txt(self):
bag = bagit.make_bag(self.tmpdir)
BOM = codecs.BOM_UTF8.decode("utf-8")
with open(j(self.tmpdir, "bagit.txt"), "r") as bf:
with open(j(self.tmpdir, "bagit.txt")) as bf:
bagfile = BOM + bf.read()
with open(j(self.tmpdir, "bagit.txt"), "w") as bf:
bf.write(bagfile)
Expand Down Expand Up @@ -332,7 +330,7 @@ def test_mixed_case_checksums(self):
hasher = hashlib.new("md5")
contents = slurp_text_file(j(self.tmpdir, "manifest-md5.txt")).encode("utf-8")
hasher.update(contents)
with open(j(self.tmpdir, "tagmanifest-md5.txt"), "r") as tagmanifest:
with open(j(self.tmpdir, "tagmanifest-md5.txt")) as tagmanifest:
tagman_contents = tagmanifest.read()
tagman_contents = tagman_contents.replace(
bag.entries["manifest-md5.txt"]["md5"], hasher.hexdigest()
Expand Down Expand Up @@ -424,7 +422,7 @@ def test_validate_optional_tagfile_in_directory(self):
self.assertRaises(bagit.BagValidationError, self.validate, bag)

hasher = hashlib.new("md5")
with open(j(tagdir, "tagfolder", "tagfile"), "r") as tf:
with open(j(tagdir, "tagfolder", "tagfile")) as tf:
contents = tf.read().encode("utf-8")
hasher.update(contents)
with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman:
Expand Down Expand Up @@ -454,7 +452,7 @@ def test_validate_unreadable_file(self):

class TestMultiprocessValidation(TestSingleProcessValidation):
def validate(self, bag, *args, **kwargs):
return super(TestMultiprocessValidation, self).validate(
return super().validate(
bag, *args, processes=2, **kwargs
)

Expand Down Expand Up @@ -1014,7 +1012,7 @@ def test_open_bag_with_unknown_encoding(self):

class TestFetch(SelfCleaningTestCase):
def setUp(self):
super(TestFetch, self).setUp()
super().setUp()

# All of these tests will involve fetch.txt usage with an existing bag
# so we'll simply create one:
Expand Down
8 changes: 4 additions & 4 deletions utils/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
ftp.retrlines("NLST", files.append)

for file in files:
print(("fetching %s" % file))
print("fetching %s" % file)
fh = open(os.path.join("bench-data", file), "wb")
ftp.retrbinary("RETR %s" % file, fh.write)
fh.close()
Expand All @@ -49,7 +49,7 @@
for p in range(1, 9):
t = timeit.Timer(statement % p)
print(
("create w/ %s processes: %.2f seconds " % (p, (10 * t.timeit(number=10) / 10)))
"create w/ %s processes: %.2f seconds " % (p, (10 * t.timeit(number=10) / 10))
)


Expand All @@ -71,10 +71,10 @@
for p in range(1, 9):
t = timeit.Timer(statement % p)
print(
(

"validate w/ %s processes: %.2f seconds "
% (p, (10 * t.timeit(number=10) / 10))
)

)

shutil.rmtree("bench-data-bag")
1 change: 0 additions & 1 deletion utils/locales.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python
# encoding: utf-8

import sys
import subprocess
Expand Down