quilt - npm Package Compare versions

quilt/asa/__init__.py

+101

quilt/asa/img.py

		# Copyright 2018, Quilt Data Inc.
		# Licensed under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		# http://www.apache.org/licenses/LICENSE-2.0

		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.

		"""Convenience functions for displaying images in Jupyter notebooks.

		`pip install quilt[img]`

		Or, in development:
		`pip install -e ./[img]`
		"""

		from math import ceil, floor, sqrt
		from six import string_types

		import matplotlib.image as mpimg
		import matplotlib.pyplot as plt

		from quilt.tools.const import ELLIPSIS
		from quilt.nodes import DataNode, GroupNode
		from quilt.tools.build import splitext_no_dot

		def plot(figsize=None, formats=None, limit=100, titlelen=10, **kwargs):
		"""Display an image [in a Jupyter Notebook] from a Quilt fragment path.
		Intended for use with `%matplotlib inline`.

		Convenience method that loops over supblots that call
		`plt.imshow(image.imread(FRAG_PATH))`.

		Keyword arguments
		* figsize=None # None means auto, else provide (HEIGHT_INCHES, WIDTH_INCHES)
		* formats=None # List of extensions as strings ['jpg', 'png', ...]
		* limit=100 # maximum number of images to display
		* titlelen=10 # max number of characters in subplot title
		* **kwargs - all remaining kwargs are passed to plt.subplots;
		see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.subplots.html
		"""
		# pylint: disable=protected-access
		def _plot(node, paths):
		lower_formats = set((x.lower() for x in formats)) if formats is not None else None
		def node_filter(frag, meta):
		filepath = meta.get('_system', {}).get('filepath', None)
		# don't try to read DataFrames as images
		if isinstance(frag, string_types) and filepath:
		_, ext = splitext_no_dot(filepath)
		if lower_formats is None or ext.lower() in lower_formats:
		return True
		return False
		# assume DataNode has one path; doesn't work with multi-fragment images
		display = [('', paths[0], node._meta)]
		# for GroupNodes, display all DataNode children
		if isinstance(node, GroupNode):
		datanodes = [(x, y) for (x, y) in node._items() if isinstance(y, DataNode)]
		display = [(x, y._data(), y._meta) for (x, y) in datanodes]
		# sort by name so iteration is reproducible (and unit tests pass)
		display = sorted(display, key=lambda rec: rec[0])
		display = [x for x in display if node_filter(x[1], x[2])]
		if len(display) > limit:
		print('Displaying {} of {} images{}'.format(limit, len(display), ELLIPSIS))
		display = display[:limit]
		# display can be empty e.g. if no DataNode children
		if not display:
		print('No images to display.')
		return
		# cast to int to avoid downstream complaints of
		# 'float' object cannot be interpreted as an index
		floatlen = float(len(display)) # prevent integer division in 2.7
		cols = min(int(floor(sqrt(floatlen))), 8)
		rows = int(ceil(floatlen/cols))

		plt.tight_layout()
		plt.subplots(
		rows,
		cols,
		figsize=(cols2, rows2) if not figsize else figsize,
		**kwargs)

		for i in range(rows*cols):
		axes = plt.subplot(rows, cols, i + 1) # subplots start at 1, not 0
		axes.axis('off')
		if i < len(display):
		(name, frag, meta) = display[i]
		plt.title(name[:titlelen] + ELLIPSIS if len(name) > titlelen else name)
		filepath = meta.get('_system', {}).get('filepath', None)
		_, ext = splitext_no_dot(filepath)
		try:
		bits = mpimg.imread(frag, format=ext)
		plt.imshow(bits)
		# Mac throws OSError, Linux IOError if file not recognizable
		except (IOError, OSError) as err:
		print('{}: {}'.format(name, str(err)))
		continue
		return _plot

+90

quilt/asa/pytorch.py

		# Copyright 2018, Quilt Data Inc.
		# Licensed under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		# http://www.apache.org/licenses/LICENSE-2.0

		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.

		"""Present Quilt packages as PyTorch Datasets

		`pip install quilt[pytorch]`

		Or, in development:
		`pip install -e ./[pytorch]`
		"""
		from torch.utils.data import Dataset

		from quilt.nodes import GroupNode

		def dataset(
		node_parser,
		include=lambda x: True,
		input_transform=None,
		target_transform=None):
		"""Convert immediate children of a GroupNode into a torch.data.Dataset
		Keyword arguments
		* node_parser=callable that converts a DataNode to a Dataset item
		* include=lambda x: True
		lambda(quilt.nodes.GroupNode) => {True, False}
		intended to filter nodes based on metadata
		* input_transform=None; optional callable that takes the item as its argument
		* output_transform=None; optional callable that takes the item as its argument;
		implementation may make its own copy of item to avoid side effects

		Dataset.__getitem__ returns the following tuple
		item = node_parser(node)
		(input_transform(item), output_transform(item))
		Or, if no _transform functions are provided:
		(item, item)
		"""
		def _dataset(node, paths): # pylint: disable=unused-argument
		return DatasetFromGroupNode(
		node,
		node_parser=node_parser,
		include=include,
		input_transform=input_transform,
		target_transform=target_transform)

		return _dataset

		# pylint: disable=too-few-public-methods
		# reason: this interface is baked by torch
		class DatasetFromGroupNode(Dataset):
		"""Present immediate children of a GroupNode as a torch.dataset"""
		def __init__(
		self,
		group,
		include,
		node_parser,
		input_transform,
		target_transform):

		super(DatasetFromGroupNode, self).__init__()

		if not isinstance(group, GroupNode):
		raise TypeError('Expected group to be GroupNode, got {}', group)
		if not callable(include):
		raise TypeError('Expected include to be callable, got {}', include)

		self.nodes = [x for x in group if include(x)]
		self.node_parser = node_parser
		self.input_transform = input_transform
		self.target_transform = target_transform

		def __getitem__(self, index):
		item = self.node_parser(self.nodes[index])
		target = item
		if self.input_transform:
		item = self.input_transform(item)
		if self.target_transform:
		target = self.target_transform(target)

		return item, target

		def __len__(self):
		return len(self.nodes)

+114

quilt/test/test_asa_plot.py

		"""test class against quilt.asa.plot"""
		import os

		import numpy as np
		import pytest

		from quilt.tools import command
		from .utils import QuiltTestCase, try_require

		if not try_require('quilt[img]'):
		# pylint: disable=unexpected-keyword-arg
		pytest.skip(
		"only test if [img] extras installed",
		allow_module_level=True)

		# pylint: disable=no-self-use
		class ImportTest(QuiltTestCase):
		# the following two lines must happen first
		import matplotlib as mpl
		mpl.use('Agg') # specify a backend so headless unit tests don't barf

		def test_asa_plot(self):
		from quilt.asa.img import plot

		mydir = os.path.dirname(__file__)
		build_path = os.path.join(mydir, './build_img.yml')
		command.build('foo/imgtest', build_path)
		pkg = command.load('foo/imgtest')
		# expect no exceptions on root
		pkg(asa=plot())
		# pylint: disable=no-member
		# expect no exceptions on GroupNode with only DF children
		pkg.dataframes(asa=plot())
		# expect no exceptions on GroupNode with mixed children
		pkg.mixed(asa=plot())
		# expect no exceptions on dir of images
		pkg.mixed.img(asa=plot())
		pkg.mixed.img(asa=plot(formats=['jpg', 'png']))
		# assert images != filtered, 'Expected only .jpg and .png images'
		# expect no exceptions on single images
		pkg.mixed.img.sf(asa=plot())
		pkg.mixed.img.portal(asa=plot())

		def _are_similar(self, ima, imb, error=0.01):
		"""predicate to see if images differ by less than
		the given error; uses mean squared error; see also
		https://www.pyimagesearch.com/2014/09/15/python-compare-two-images/

		ima, imb: PIL.Image instances
		"""
		ima_ = np.array(ima).astype('float')
		imb_ = np.array(imb).astype('float')
		assert ima_.shape == imb_.shape, 'ima and imb must have same shape'
		# pylint: disable=invalid-name
		for x, y, _ in (ima_.shape, imb_.shape):
		assert x > 0 and y > 0, \
		'unexpected image dimension: {}'.format((x, y))
		# sum of normalized channel differences squared
		error_ = np.sum(((ima_ - imb_)/255) ** 2)
		# normalize by total number of samples
		error_ /= float(ima_.shape[0] * imb_.shape[1])

		return error_ < error

		def test_asa_plot_output(self):
		from PIL import Image
		from matplotlib import pyplot as plt

		from quilt.asa.img import plot

		mydir = os.path.dirname(__file__)
		build_path = os.path.join(mydir, 'build_img.yml')
		command.build('foo/imgtest', build_path)
		pkg = command.load('foo/imgtest')

		outfile = os.path.join('.', 'temp-plot.png')
		# pylint: disable=no-member
		pkg.mixed.img(asa=plot(figsize=(10, 10)))
		# size * dpi = 1000 x 1000 pixels
		plt.savefig(outfile, dpi=100, format='png', transparent=False)

		ref_path = os.path.join(mydir, 'data', 'ref-asa-plot.png')

		ref_img = Image.open(ref_path)
		tst_img = Image.open(outfile)

		assert self._are_similar(ref_img, tst_img), \
		'render differs from reference: {}'.format(ref_img)

		def test_asa_plot_formats_output(self):
		from PIL import Image
		from matplotlib import pyplot as plt

		from quilt.asa.img import plot

		mydir = os.path.dirname(__file__)
		build_path = os.path.join(mydir, 'build_img.yml')
		command.build('foo/imgtest', build_path)
		pkg = command.load('foo/imgtest')

		outfile = os.path.join('.', 'temp-formats-plot.png')

		# pylint: disable=no-member
		pkg.mixed.img(asa=plot(figsize=(10, 10), formats=['png']))
		# size * dpi = 1000 x 1000 pixels
		plt.savefig(outfile, dpi=100, format='png', transparent=False)

		ref_path = os.path.join(mydir, 'data', 'ref-asa-formats.png')

		ref_img = Image.open(ref_path)
		tst_img = Image.open(outfile)

		assert self._are_similar(ref_img, tst_img), \
		'render differs from reference: {}'.format(ref_img)

+89

quilt/test/test_asa_torch.py

		"""test class against quilt.asa.torch"""
		import os

		import pytest
		from six import string_types

		from quilt.tools import command
		from quilt.nodes import DataNode
		from .utils import QuiltTestCase, try_require

		if not try_require('quilt[img,pytorch,torchvision]'):
		# pylint: disable=unexpected-keyword-arg
		pytest.skip("only test if [img,pytorch,torchvision] extras installed",
		allow_module_level=True)

		# pylint: disable=no-self-use
		class ImportTest(QuiltTestCase):
		def test_asa_pytorch(self):
		"""test asa.torch interface by converting a GroupNode with asa="""
		from torchvision.transforms import Compose, CenterCrop, ToTensor, Resize
		from torch.utils.data import Dataset
		from PIL import Image
		from torch import Tensor

		from quilt.asa.pytorch import dataset
		# pylint: disable=missing-docstring
		# helper functions to simulate real pytorch dataset usage
		def calculate_valid_crop_size(crop_size, upscale_factor):
		return crop_size - (crop_size % upscale_factor)

		def node_parser(node):
		path = node()
		if isinstance(path, string_types):
		img = Image.open(path).convert('YCbCr')
		chan, _, _ = img.split()
		return chan
		else:
		raise TypeError('Expected string path to an image fragment')

		def input_transform(crop_size, upscale_factor):
		return Compose([
		CenterCrop(crop_size),
		Resize(crop_size // upscale_factor),
		ToTensor(),
		])

		def target_transform(crop_size):
		def _inner(img):
		img_ = img.copy()
		return Compose([
		CenterCrop(crop_size),
		ToTensor(),
		])(img_)
		return _inner
		# pylint: disable=protected-access
		def is_image(node):
		"""file extension introspection on Quilt nodes"""
		if isinstance(node, DataNode):
		filepath = node._meta.get('_system', {}).get('filepath')
		if filepath:
		return any(
		filepath.endswith(extension)
		for extension in [".png", ".jpg", ".jpeg"])
		# end helper functions

		mydir = os.path.dirname(__file__)
		build_path = os.path.join(mydir, 'build_img.yml')
		command.build('foo/torchtest', build_path)
		pkg = command.load('foo/torchtest')

		upscale_factor = 3
		crop_size = calculate_valid_crop_size(256, upscale_factor)
		# pylint: disable=no-member
		my_dataset = pkg.mixed.img(asa=dataset(
		include=is_image,
		node_parser=node_parser,
		input_transform=input_transform(crop_size, upscale_factor),
		target_transform=target_transform(crop_size)
		))
		assert isinstance(my_dataset, Dataset), \
		'expected type {}, got {}'.format(type(Dataset), type(my_dataset))

		assert my_dataset.__len__() == 2, \
		'expected two images in mixed.img, got {}'.format(my_dataset.__len__())

		for i in range(my_dataset.__len__()):
		tens = my_dataset.__getitem__(i)
		assert all((isinstance(x, Tensor) for x in tens)), \
		'Expected all torch.Tensors in tuple, got {}'.format(tens)

+2

-2

PKG-INFO

		Metadata-Version: 1.1
		Name: quilt
		Version: 2.9.6
		Version: 2.9.7
		Summary: Quilt is a data package manager
		@@ -9,3 +9,3 @@ Home-page: https://github.com/quiltdata/quilt
		License: LICENSE
		Download-URL: https://github.com/quiltdata/quilt/releases/tag/2.9.6
		Download-URL: https://github.com/quiltdata/quilt/releases/tag/2.9.7
		Description-Content-Type: UNKNOWN
		@@ -12,0 +12,0 @@ Description:

+2

-2

quilt.egg-info/PKG-INFO

		Metadata-Version: 1.1
		Name: quilt
		Version: 2.9.6
		Version: 2.9.7
		Summary: Quilt is a data package manager
		@@ -9,3 +9,3 @@ Home-page: https://github.com/quiltdata/quilt
		License: LICENSE
		Download-URL: https://github.com/quiltdata/quilt/releases/tag/2.9.6
		Download-URL: https://github.com/quiltdata/quilt/releases/tag/2.9.7
		Description-Content-Type: UNKNOWN
		@@ -12,0 +12,0 @@ Description:

+11

-0

quilt.egg-info/requires.txt

		@@ -18,4 +18,12 @@ appdirs>=1.4.0

		[img]
		matplotlib>=2.2.2
		Pillow>=5.1.0

		[pytorch]
		torch>=0.4.0

		[tests]
		pytest
		pytest-cov
		responses>=0.7.0
		@@ -28,1 +36,4 @@
		funcsigs

		[torchvision]
		torchvision>=0.2.1

+5

-0

quilt.egg-info/SOURCES.txt

		@@ -14,5 +14,10 @@ README.md
		quilt.egg-info/top_level.txt
		quilt/asa/__init__.py
		quilt/asa/img.py
		quilt/asa/pytorch.py
		quilt/test/__init__.py
		quilt/test/gen_data.py
		quilt/test/integration.py
		quilt/test/test_asa_plot.py
		quilt/test/test_asa_torch.py
		quilt/test/test_build.py
		@@ -19,0 +24,0 @@ quilt/test/test_checks.py

+1

-1

quilt/imports.py

		@@ -66,3 +66,3 @@ """
		child = _from_core_node(package, core_child)
		setattr(node, name, child)
		node[name] = child

		@@ -69,0 +69,0 @@ return node

+69

-17

quilt/nodes.py

		@@ -8,3 +8,3 @@ """
		import pandas as pd
		from six import iteritems, string_types
		from six import iteritems, itervalues, string_types

		@@ -88,8 +88,62 @@ from .tools import core
		"""
		def __init__(self, meta):
		super(GroupNode, self).__init__(meta)

		self._children = {}

		def __getattr__(self, name):
		if name.startswith('_'):
		return super(GroupNode, self).__getattr__(name)
		else:
		try:
		return self[name]
		except KeyError:
		raise AttributeError

		def __setattr__(self, name, value):
		if name.startswith('_') or isinstance(value, Node):
		if name.startswith('_'):
		super(GroupNode, self).__setattr__(name, value)
		else:
		raise AttributeError("{val} is not a valid package node".format(val=value))
		self[name] = value

		def __delattr__(self, name):
		if name.startswith('_'):
		super(GroupNode, self).__detattr__(name, value)
		else:
		try:
		del self[name]
		except KeyError:
		raise AttributeError

		def __dir__(self):
		attrs = set()
		try:
		attrs.update(super(GroupNode, self).__dir__())
		except AttributeError:
		# Fallback for PY2
		attrs.update(dir(type(self)))
		attrs.update(self.__dict__)
		attrs.update(self._children)
		return sorted(attrs)

		def __getitem__(self, name):
		return self._children[name]

		def __setitem__(self, name, value):
		if not isinstance(value, Node):
		raise TypeError("{val} is not a valid package node".format(val=value))
		self._children[name] = value

		def __delitem__(self, name):
		del self._children[name]

		def __contains__(self, name):
		return name in self._children

		def _get(self, name, default=None):
		return self._children.get(name, default)

		def __len__(self):
		return len(self._children)

		def __repr__(self):
		@@ -104,8 +158,6 @@ pinfo = super(GroupNode, self).__repr__()
		def __iter__(self):
		for _, child in self._items():
		yield child
		return itervalues(self._children)

		def _items(self):
		return ((name, child) for name, child in iteritems(self.__dict__)
		if not name.startswith('_'))
		return self._children.items()

		@@ -116,3 +168,3 @@ def _data_keys(self):
		"""
		return [name for name, child in self._items() if not isinstance(child, GroupNode)]
		return [name for name, child in iteritems(self._children) if not isinstance(child, GroupNode)]

		@@ -123,13 +175,13 @@ def _group_keys(self):
		"""
		return [name for name, child in self._items() if isinstance(child, GroupNode)]
		return [name for name, child in iteritems(self._children) if isinstance(child, GroupNode)]

		def _keys(self):
		"""
		keys directly accessible on this object via getattr or .
		keys directly accessible on this object via []
		"""
		return [name for name in self.__dict__ if not name.startswith('_')]
		return self._children.keys()

		def _add_group(self, groupname):
		child = GroupNode({})
		setattr(self, groupname, child)
		self[groupname] = child

		@@ -266,6 +318,6 @@ def _data(self, asa=None):
		for key in path[:-1]:
		child = getattr(node, key, None)
		child = node._get(key)
		if not isinstance(child, GroupNode):
		child = GroupNode({})
		setattr(node, key, child)
		node[key] = child

		@@ -276,3 +328,3 @@ node = child
		data_node = DataNode(self._package, core_node, value, metadata)
		setattr(node, key, data_node)
		node[key] = data_node

		@@ -299,3 +351,3 @@ def _filter(self, lambda_or_dict):
		if filtered_child is not None:
		setattr(filtered, child_name, filtered_child)
		filtered[child_name] = filtered_child

		@@ -306,3 +358,3 @@ # Return the group if:
		# 3) It's the package itself.
		if matched or next(filtered._items(), None) or node == self:
		if matched or len(filtered) or node == self:
		return filtered
		@@ -309,0 +361,0 @@ else:

+1

-0

quilt/test/test_build.py

		@@ -38,2 +38,3 @@ """


		def test_build_from_cache(self):
		@@ -40,0 +41,0 @@ """

+21

-8

quilt/test/test_import.py

		"""
		Tests for magic imports.
		"""

		import os
		import platform
		import time

		import numpy as np
		import pandas as pd
		import pytest
		from six import string_types

		from quilt.nodes import GroupNode, DataNode
		from quilt.tools import command
		from quilt.nodes import DataNode, GroupNode
		from quilt.tools.const import PACKAGE_DIR_NAME
		@@ -18,2 +20,3 @@ from quilt.tools.package import Package

		# pylint: disable=protected-access
		class ImportTest(QuiltTestCase):
		@@ -41,2 +44,5 @@ def test_imports(self):

		assert package['dataframes'] == dataframes
		assert package['README'] == README

		assert set(dataframes._keys()) == {'csv', 'nulls'}
		@@ -46,4 +52,7 @@ assert set(dataframes._group_keys()) == set()

		assert len(package) == 2
		assert len(list(package)) == 2

		assert 'dataframes' in dir(package)

		for item in package:
		@@ -64,2 +73,7 @@ assert isinstance(item, (GroupNode, DataNode))

		# Store data is read-only
		with self.assertRaises(IOError):
		with open(README(), 'w'):
		pass

		# Bad attributes of imported packages
		@@ -364,3 +378,3 @@
		df = pd.DataFrame(dict(a=[1, 2, 3]))
		with self.assertRaises(AttributeError):
		with self.assertRaises(TypeError):
		package4.newdf = df
		@@ -388,3 +402,3 @@
		# current spec requires that build() not update the in-memory module tree.
		newpath1 = getattr(module, newfilename)()
		newpath1 = module[newfilename]()
		assert newpath1 == newfilename
		@@ -397,3 +411,3 @@
		assert reloaded_module is not module
		newpath2 = getattr(reloaded_module, newfilename)()
		newpath2 = reloaded_module[newfilename]()
		assert 'myfile' not in newpath2
		@@ -419,3 +433,3 @@

		assert getattr(package6, newfilename1)() == newfilename2
		assert package6[newfilename1]() == newfilename2

		@@ -447,3 +461,3 @@ def test_team_non_team_imports(self):
		df = pd.DataFrame(dict(a=[1, 2, 3]))
		with self.assertRaises(AttributeError):
		with self.assertRaises(TypeError):
		package4.newdf = df
		@@ -530,2 +544,1 @@
		command.load('foo/package:v:1.0.0')

+22

-1

quilt/test/utils.py

		@@ -7,2 +7,3 @@ """
		import shutil
		from stat import S_IWUSR
		import tempfile
		@@ -41,2 +42,12 @@ import unittest

		def try_require(string):
		"""return true iff specified require string resolves properly;
		for use with @pytest.mark.skipif"""
		import pkg_resources
		try:
		pkg_resources.require(string)
		except pkg_resources.ResolutionError as ex:
		print(ex)
		return False
		return True

		@@ -55,4 +66,14 @@ class BasicQuiltTestCase(unittest.TestCase):
		os.chdir(self._old_dir)
		shutil.rmtree(self._test_dir)

		def _onerror(func, path, exc_info):
		"""
		Handle read-only files on Windows
		"""
		if not os.access(path, os.W_OK):
		os.chmod(path, S_IWUSR)
		func(path)
		else:
		raise
		shutil.rmtree(self._test_dir, onerror=_onerror)

		class QuiltTestCase(BasicQuiltTestCase):
		@@ -59,0 +80,0 @@ """

+3

-4

quilt/tools/command.py

		@@ -15,3 +15,3 @@ # -- coding: utf-8 --
		import re
		from shutil import rmtree, copy
		from shutil import rmtree, copyfile
		import socket
		@@ -1416,3 +1416,3 @@ import stat
		else:
		copy(node(), str(dest))
		copyfile(node(), str(dest))
		elif isinstance(node._node, TableNode):
		@@ -1527,5 +1527,4 @@ ext = node._node.metadata['q_ext']
		subpath = pathlib.PureWindowsPath(*info.subpath)
		# TODO: Change this over to `node['item/subitem']` notation once implemented
		for name in info.subpath:
		node = getattr(node, name)
		node = node._get(name)
		else:
		@@ -1532,0 +1531,0 @@ subpath = pathlib.PureWindowsPath()

+1

-0

quilt/tools/const.py

		@@ -21,2 +21,3 @@ """
		DEFAULT_TEAM = 'Quilt'
		ELLIPSIS = u'\u2026'

		@@ -23,0 +24,0 @@ # reserved words in build.yml

+9

-30

quilt/tools/data_transfer.py

		@@ -216,2 +216,3 @@ """


		def upload_fragments(store, obj_urls, obj_sizes, reupload=False):
		@@ -228,8 +229,4 @@ assert len(obj_urls) == len(obj_sizes)

		headers = {
		'Content-Encoding': 'gzip'
		}
		print("Uploading %d fragments (%d bytes)..." % (total, total_bytes))

		print("Uploading %d fragments (%d bytes before compression)..." % (total, total_bytes))

		with tqdm(total=total_bytes, unit='B', unit_scale=True) as progress:
		@@ -247,27 +244,9 @@ def _worker_thread():
		if reupload or not s3_session.head(obj_urls['head']).ok:
		# Create a temporary gzip'ed file.
		with tempfile.TemporaryFile() as temp_file:
		with open(store.object_path(obj_hash), 'rb') as input_file:
		with gzip.GzipFile(fileobj=temp_file, mode='wb',
		compresslevel=ZLIB_LEVEL) as gzip_file:
		copyfileobj(input_file, gzip_file, CHUNK_SIZE)
		compressed_size = temp_file.tell()
		temp_file.seek(0)

		# Workaround for non-local variables in Python 2.7
		class Context:
		compressed_read = 0
		original_last_update = 0

		def _progress_cb(count):
		Context.compressed_read += count
		original_read = Context.compressed_read * original_size // compressed_size
		with lock:
		progress.update(original_read - Context.original_last_update)
		Context.original_last_update = original_read

		with FileWithReadProgress(temp_file, _progress_cb) as fd:
		url = obj_urls['put']
		response = s3_session.put(url, data=fd, headers=headers)
		response.raise_for_status()
		with FileWithReadProgress(store.object_path(obj_hash), progress.update) as fd:
		url = obj_urls['put']
		# Work around a `requests` bug: it treats size 0 as "unknown" and
		# uses chunked encoding - which S3 doesn't support.
		data = fd if original_size > 0 else b''
		response = s3_session.put(url, data=data)
		response.raise_for_status()
		else:
		@@ -274,0 +253,0 @@ with lock:

+19

-6

quilt/tools/store.py

		@@ -7,2 +7,3 @@ """
		from shutil import copyfile, move, rmtree
		from stat import S_IRUSR, S_IRGRP, S_IROTH, S_IWUSR
		import uuid
		@@ -337,2 +338,3 @@
		if os.path.exists(path):
		os.chmod(path, S_IWUSR)
		os.remove(path)
		@@ -408,3 +410,3 @@ return remove_objs
		objhash = digest_file(path)
		move(path, self.object_path(objhash))
		self._move_to_store(path, objhash)
		hashes.append(objhash)
		@@ -414,3 +416,3 @@ rmtree(storepath)
		filehash = digest_file(storepath)
		move(storepath, self.object_path(filehash))
		self._move_to_store(storepath, filehash)
		hashes = [filehash]
		@@ -433,4 +435,3 @@
		filehash = digest_file(srcfile)
		objpath = self.object_path(filehash)
		if not os.path.exists(objpath):
		if not os.path.exists(self.object_path(filehash)):
		# Copy the file to a temporary location first, then move, to make sure we don't end up with
		@@ -440,3 +441,3 @@ # truncated contents if the build gets interrupted.
		copyfile(srcfile, tmppath)
		move(tmppath, objpath)
		self._move_to_store(tmppath, filehash)

		@@ -473,3 +474,15 @@ return filehash
		metahash = digest_file(path)
		move(path, self.object_path(metahash))
		self._move_to_store(path, metahash)
		return metahash

		def _move_to_store(self, srcpath, objhash):
		"""
		Make the object read-only and move it to the store.
		"""
		destpath = self.object_path(objhash)
		if os.path.exists(destpath):
		# Windows: delete any existing object at the destination.
		os.chmod(destpath, S_IWUSR)
		os.remove(destpath)
		os.chmod(srcpath, S_IRUSR \| S_IRGRP \| S_IROTH) # Make read-only
		move(srcpath, destpath)

+19

-4

setup.py

		@@ -19,3 +19,3 @@ from setuptools import setup, find_packages
		name="quilt",
		version="2.9.6",
		version="2.9.7",
		packages=find_packages(),
		@@ -37,3 +37,3 @@ description='Quilt is a data package manager',
		url='https://github.com/quiltdata/quilt',
		download_url='https://github.com/quiltdata/quilt/releases/tag/2.9.6',
		download_url='https://github.com/quiltdata/quilt/releases/tag/2.9.7',
		keywords='quilt quiltdata shareable data dataframe package platform pandas',
		@@ -47,3 +47,3 @@ install_requires=[
		'pathlib2; python_version<"3.6"', # stdlib backport
		'pyarrow>=0.4.0,<0.8.0', # TODO(dima): Make unit tests work with 0.8.*.
		'pyarrow>=0.4.0,<0.8.0', # TODO(dima): Make unit tests work with 0.8+
		'pyyaml>=3.12',
		@@ -55,4 +55,15 @@ 'requests>=2.12.4',
		],
		# Install with: pip install -e ./[img,tests,...]
		extras_require={
		# Use: pip install --editable ./[tests]
		# See quilt.asa.img module
		'img': [
		'matplotlib>=2.2.2',
		'Pillow>=5.1.0'
		],
		# See quilt.asa.pytorch module
		'pytorch': [
		# May not install on Linux, Windows; See https://pytorch.org/
		'torch>=0.4.0',
		],
		# For dev testing
		'tests': [
		@@ -62,3 +73,7 @@ 'funcsigs; python_version<"3.4"', # stdlib backport
		'pytest',
		'pytest-cov',
		'responses>=0.7.0',
		],
		'torchvision': [
		'torchvision>=0.2.1'
		]
		@@ -65,0 +80,0 @@ },

quilt - npm Package Compare versions

Improved metrics