quilt
Advanced tools
+101
| # Copyright 2018, Quilt Data Inc. | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| """Convenience functions for displaying images in Jupyter notebooks. | ||
| `pip install quilt[img]` | ||
| Or, in development: | ||
| `pip install -e ./[img]` | ||
| """ | ||
| from math import ceil, floor, sqrt | ||
| from six import string_types | ||
| import matplotlib.image as mpimg | ||
| import matplotlib.pyplot as plt | ||
| from quilt.tools.const import ELLIPSIS | ||
| from quilt.nodes import DataNode, GroupNode | ||
| from quilt.tools.build import splitext_no_dot | ||
| def plot(figsize=None, formats=None, limit=100, titlelen=10, **kwargs): | ||
| """Display an image [in a Jupyter Notebook] from a Quilt fragment path. | ||
| Intended for use with `%matplotlib inline`. | ||
| Convenience method that loops over supblots that call | ||
| `plt.imshow(image.imread(FRAG_PATH))`. | ||
| Keyword arguments | ||
| * figsize=None # None means auto, else provide (HEIGHT_INCHES, WIDTH_INCHES) | ||
| * formats=None # List of extensions as strings ['jpg', 'png', ...] | ||
| * limit=100 # maximum number of images to display | ||
| * titlelen=10 # max number of characters in subplot title | ||
| * **kwargs - all remaining kwargs are passed to plt.subplots; | ||
| see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.subplots.html | ||
| """ | ||
| # pylint: disable=protected-access | ||
| def _plot(node, paths): | ||
| lower_formats = set((x.lower() for x in formats)) if formats is not None else None | ||
| def node_filter(frag, meta): | ||
| filepath = meta.get('_system', {}).get('filepath', None) | ||
| # don't try to read DataFrames as images | ||
| if isinstance(frag, string_types) and filepath: | ||
| _, ext = splitext_no_dot(filepath) | ||
| if lower_formats is None or ext.lower() in lower_formats: | ||
| return True | ||
| return False | ||
| # assume DataNode has one path; doesn't work with multi-fragment images | ||
| display = [('', paths[0], node._meta)] | ||
| # for GroupNodes, display all DataNode children | ||
| if isinstance(node, GroupNode): | ||
| datanodes = [(x, y) for (x, y) in node._items() if isinstance(y, DataNode)] | ||
| display = [(x, y._data(), y._meta) for (x, y) in datanodes] | ||
| # sort by name so iteration is reproducible (and unit tests pass) | ||
| display = sorted(display, key=lambda rec: rec[0]) | ||
| display = [x for x in display if node_filter(x[1], x[2])] | ||
| if len(display) > limit: | ||
| print('Displaying {} of {} images{}'.format(limit, len(display), ELLIPSIS)) | ||
| display = display[:limit] | ||
| # display can be empty e.g. if no DataNode children | ||
| if not display: | ||
| print('No images to display.') | ||
| return | ||
| # cast to int to avoid downstream complaints of | ||
| # 'float' object cannot be interpreted as an index | ||
| floatlen = float(len(display)) # prevent integer division in 2.7 | ||
| cols = min(int(floor(sqrt(floatlen))), 8) | ||
| rows = int(ceil(floatlen/cols)) | ||
| plt.tight_layout() | ||
| plt.subplots( | ||
| rows, | ||
| cols, | ||
| figsize=(cols*2, rows*2) if not figsize else figsize, | ||
| **kwargs) | ||
| for i in range(rows*cols): | ||
| axes = plt.subplot(rows, cols, i + 1) # subplots start at 1, not 0 | ||
| axes.axis('off') | ||
| if i < len(display): | ||
| (name, frag, meta) = display[i] | ||
| plt.title(name[:titlelen] + ELLIPSIS if len(name) > titlelen else name) | ||
| filepath = meta.get('_system', {}).get('filepath', None) | ||
| _, ext = splitext_no_dot(filepath) | ||
| try: | ||
| bits = mpimg.imread(frag, format=ext) | ||
| plt.imshow(bits) | ||
| # Mac throws OSError, Linux IOError if file not recognizable | ||
| except (IOError, OSError) as err: | ||
| print('{}: {}'.format(name, str(err))) | ||
| continue | ||
| return _plot |
| # Copyright 2018, Quilt Data Inc. | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| """Present Quilt packages as PyTorch Datasets | ||
| `pip install quilt[pytorch]` | ||
| Or, in development: | ||
| `pip install -e ./[pytorch]` | ||
| """ | ||
| from torch.utils.data import Dataset | ||
| from quilt.nodes import GroupNode | ||
| def dataset( | ||
| node_parser, | ||
| include=lambda x: True, | ||
| input_transform=None, | ||
| target_transform=None): | ||
| """Convert immediate children of a GroupNode into a torch.data.Dataset | ||
| Keyword arguments | ||
| * node_parser=callable that converts a DataNode to a Dataset item | ||
| * include=lambda x: True | ||
| lambda(quilt.nodes.GroupNode) => {True, False} | ||
| intended to filter nodes based on metadata | ||
| * input_transform=None; optional callable that takes the item as its argument | ||
| * output_transform=None; optional callable that takes the item as its argument; | ||
| implementation may make its own copy of item to avoid side effects | ||
| Dataset.__getitem__ returns the following tuple | ||
| item = node_parser(node) | ||
| (input_transform(item), output_transform(item)) | ||
| Or, if no _transform functions are provided: | ||
| (item, item) | ||
| """ | ||
| def _dataset(node, paths): # pylint: disable=unused-argument | ||
| return DatasetFromGroupNode( | ||
| node, | ||
| node_parser=node_parser, | ||
| include=include, | ||
| input_transform=input_transform, | ||
| target_transform=target_transform) | ||
| return _dataset | ||
| # pylint: disable=too-few-public-methods | ||
| # reason: this interface is baked by torch | ||
| class DatasetFromGroupNode(Dataset): | ||
| """Present immediate children of a GroupNode as a torch.dataset""" | ||
| def __init__( | ||
| self, | ||
| group, | ||
| include, | ||
| node_parser, | ||
| input_transform, | ||
| target_transform): | ||
| super(DatasetFromGroupNode, self).__init__() | ||
| if not isinstance(group, GroupNode): | ||
| raise TypeError('Expected group to be GroupNode, got {}', group) | ||
| if not callable(include): | ||
| raise TypeError('Expected include to be callable, got {}', include) | ||
| self.nodes = [x for x in group if include(x)] | ||
| self.node_parser = node_parser | ||
| self.input_transform = input_transform | ||
| self.target_transform = target_transform | ||
| def __getitem__(self, index): | ||
| item = self.node_parser(self.nodes[index]) | ||
| target = item | ||
| if self.input_transform: | ||
| item = self.input_transform(item) | ||
| if self.target_transform: | ||
| target = self.target_transform(target) | ||
| return item, target | ||
| def __len__(self): | ||
| return len(self.nodes) |
| """test class against quilt.asa.plot""" | ||
| import os | ||
| import numpy as np | ||
| import pytest | ||
| from quilt.tools import command | ||
| from .utils import QuiltTestCase, try_require | ||
| if not try_require('quilt[img]'): | ||
| # pylint: disable=unexpected-keyword-arg | ||
| pytest.skip( | ||
| "only test if [img] extras installed", | ||
| allow_module_level=True) | ||
| # pylint: disable=no-self-use | ||
| class ImportTest(QuiltTestCase): | ||
| # the following two lines must happen first | ||
| import matplotlib as mpl | ||
| mpl.use('Agg') # specify a backend so headless unit tests don't barf | ||
| def test_asa_plot(self): | ||
| from quilt.asa.img import plot | ||
| mydir = os.path.dirname(__file__) | ||
| build_path = os.path.join(mydir, './build_img.yml') | ||
| command.build('foo/imgtest', build_path) | ||
| pkg = command.load('foo/imgtest') | ||
| # expect no exceptions on root | ||
| pkg(asa=plot()) | ||
| # pylint: disable=no-member | ||
| # expect no exceptions on GroupNode with only DF children | ||
| pkg.dataframes(asa=plot()) | ||
| # expect no exceptions on GroupNode with mixed children | ||
| pkg.mixed(asa=plot()) | ||
| # expect no exceptions on dir of images | ||
| pkg.mixed.img(asa=plot()) | ||
| pkg.mixed.img(asa=plot(formats=['jpg', 'png'])) | ||
| # assert images != filtered, 'Expected only .jpg and .png images' | ||
| # expect no exceptions on single images | ||
| pkg.mixed.img.sf(asa=plot()) | ||
| pkg.mixed.img.portal(asa=plot()) | ||
| def _are_similar(self, ima, imb, error=0.01): | ||
| """predicate to see if images differ by less than | ||
| the given error; uses mean squared error; see also | ||
| https://www.pyimagesearch.com/2014/09/15/python-compare-two-images/ | ||
| ima, imb: PIL.Image instances | ||
| """ | ||
| ima_ = np.array(ima).astype('float') | ||
| imb_ = np.array(imb).astype('float') | ||
| assert ima_.shape == imb_.shape, 'ima and imb must have same shape' | ||
| # pylint: disable=invalid-name | ||
| for x, y, _ in (ima_.shape, imb_.shape): | ||
| assert x > 0 and y > 0, \ | ||
| 'unexpected image dimension: {}'.format((x, y)) | ||
| # sum of normalized channel differences squared | ||
| error_ = np.sum(((ima_ - imb_)/255) ** 2) | ||
| # normalize by total number of samples | ||
| error_ /= float(ima_.shape[0] * imb_.shape[1]) | ||
| return error_ < error | ||
| def test_asa_plot_output(self): | ||
| from PIL import Image | ||
| from matplotlib import pyplot as plt | ||
| from quilt.asa.img import plot | ||
| mydir = os.path.dirname(__file__) | ||
| build_path = os.path.join(mydir, 'build_img.yml') | ||
| command.build('foo/imgtest', build_path) | ||
| pkg = command.load('foo/imgtest') | ||
| outfile = os.path.join('.', 'temp-plot.png') | ||
| # pylint: disable=no-member | ||
| pkg.mixed.img(asa=plot(figsize=(10, 10))) | ||
| # size * dpi = 1000 x 1000 pixels | ||
| plt.savefig(outfile, dpi=100, format='png', transparent=False) | ||
| ref_path = os.path.join(mydir, 'data', 'ref-asa-plot.png') | ||
| ref_img = Image.open(ref_path) | ||
| tst_img = Image.open(outfile) | ||
| assert self._are_similar(ref_img, tst_img), \ | ||
| 'render differs from reference: {}'.format(ref_img) | ||
| def test_asa_plot_formats_output(self): | ||
| from PIL import Image | ||
| from matplotlib import pyplot as plt | ||
| from quilt.asa.img import plot | ||
| mydir = os.path.dirname(__file__) | ||
| build_path = os.path.join(mydir, 'build_img.yml') | ||
| command.build('foo/imgtest', build_path) | ||
| pkg = command.load('foo/imgtest') | ||
| outfile = os.path.join('.', 'temp-formats-plot.png') | ||
| # pylint: disable=no-member | ||
| pkg.mixed.img(asa=plot(figsize=(10, 10), formats=['png'])) | ||
| # size * dpi = 1000 x 1000 pixels | ||
| plt.savefig(outfile, dpi=100, format='png', transparent=False) | ||
| ref_path = os.path.join(mydir, 'data', 'ref-asa-formats.png') | ||
| ref_img = Image.open(ref_path) | ||
| tst_img = Image.open(outfile) | ||
| assert self._are_similar(ref_img, tst_img), \ | ||
| 'render differs from reference: {}'.format(ref_img) |
| """test class against quilt.asa.torch""" | ||
| import os | ||
| import pytest | ||
| from six import string_types | ||
| from quilt.tools import command | ||
| from quilt.nodes import DataNode | ||
| from .utils import QuiltTestCase, try_require | ||
| if not try_require('quilt[img,pytorch,torchvision]'): | ||
| # pylint: disable=unexpected-keyword-arg | ||
| pytest.skip("only test if [img,pytorch,torchvision] extras installed", | ||
| allow_module_level=True) | ||
| # pylint: disable=no-self-use | ||
| class ImportTest(QuiltTestCase): | ||
| def test_asa_pytorch(self): | ||
| """test asa.torch interface by converting a GroupNode with asa=""" | ||
| from torchvision.transforms import Compose, CenterCrop, ToTensor, Resize | ||
| from torch.utils.data import Dataset | ||
| from PIL import Image | ||
| from torch import Tensor | ||
| from quilt.asa.pytorch import dataset | ||
| # pylint: disable=missing-docstring | ||
| # helper functions to simulate real pytorch dataset usage | ||
| def calculate_valid_crop_size(crop_size, upscale_factor): | ||
| return crop_size - (crop_size % upscale_factor) | ||
| def node_parser(node): | ||
| path = node() | ||
| if isinstance(path, string_types): | ||
| img = Image.open(path).convert('YCbCr') | ||
| chan, _, _ = img.split() | ||
| return chan | ||
| else: | ||
| raise TypeError('Expected string path to an image fragment') | ||
| def input_transform(crop_size, upscale_factor): | ||
| return Compose([ | ||
| CenterCrop(crop_size), | ||
| Resize(crop_size // upscale_factor), | ||
| ToTensor(), | ||
| ]) | ||
| def target_transform(crop_size): | ||
| def _inner(img): | ||
| img_ = img.copy() | ||
| return Compose([ | ||
| CenterCrop(crop_size), | ||
| ToTensor(), | ||
| ])(img_) | ||
| return _inner | ||
| # pylint: disable=protected-access | ||
| def is_image(node): | ||
| """file extension introspection on Quilt nodes""" | ||
| if isinstance(node, DataNode): | ||
| filepath = node._meta.get('_system', {}).get('filepath') | ||
| if filepath: | ||
| return any( | ||
| filepath.endswith(extension) | ||
| for extension in [".png", ".jpg", ".jpeg"]) | ||
| # end helper functions | ||
| mydir = os.path.dirname(__file__) | ||
| build_path = os.path.join(mydir, 'build_img.yml') | ||
| command.build('foo/torchtest', build_path) | ||
| pkg = command.load('foo/torchtest') | ||
| upscale_factor = 3 | ||
| crop_size = calculate_valid_crop_size(256, upscale_factor) | ||
| # pylint: disable=no-member | ||
| my_dataset = pkg.mixed.img(asa=dataset( | ||
| include=is_image, | ||
| node_parser=node_parser, | ||
| input_transform=input_transform(crop_size, upscale_factor), | ||
| target_transform=target_transform(crop_size) | ||
| )) | ||
| assert isinstance(my_dataset, Dataset), \ | ||
| 'expected type {}, got {}'.format(type(Dataset), type(my_dataset)) | ||
| assert my_dataset.__len__() == 2, \ | ||
| 'expected two images in mixed.img, got {}'.format(my_dataset.__len__()) | ||
| for i in range(my_dataset.__len__()): | ||
| tens = my_dataset.__getitem__(i) | ||
| assert all((isinstance(x, Tensor) for x in tens)), \ | ||
| 'Expected all torch.Tensors in tuple, got {}'.format(tens) |
+2
-2
| Metadata-Version: 1.1 | ||
| Name: quilt | ||
| Version: 2.9.6 | ||
| Version: 2.9.7 | ||
| Summary: Quilt is a data package manager | ||
@@ -9,3 +9,3 @@ Home-page: https://github.com/quiltdata/quilt | ||
| License: LICENSE | ||
| Download-URL: https://github.com/quiltdata/quilt/releases/tag/2.9.6 | ||
| Download-URL: https://github.com/quiltdata/quilt/releases/tag/2.9.7 | ||
| Description-Content-Type: UNKNOWN | ||
@@ -12,0 +12,0 @@ Description: |
| Metadata-Version: 1.1 | ||
| Name: quilt | ||
| Version: 2.9.6 | ||
| Version: 2.9.7 | ||
| Summary: Quilt is a data package manager | ||
@@ -9,3 +9,3 @@ Home-page: https://github.com/quiltdata/quilt | ||
| License: LICENSE | ||
| Download-URL: https://github.com/quiltdata/quilt/releases/tag/2.9.6 | ||
| Download-URL: https://github.com/quiltdata/quilt/releases/tag/2.9.7 | ||
| Description-Content-Type: UNKNOWN | ||
@@ -12,0 +12,0 @@ Description: |
@@ -18,4 +18,12 @@ appdirs>=1.4.0 | ||
| [img] | ||
| matplotlib>=2.2.2 | ||
| Pillow>=5.1.0 | ||
| [pytorch] | ||
| torch>=0.4.0 | ||
| [tests] | ||
| pytest | ||
| pytest-cov | ||
| responses>=0.7.0 | ||
@@ -28,1 +36,4 @@ | ||
| funcsigs | ||
| [torchvision] | ||
| torchvision>=0.2.1 |
@@ -14,5 +14,10 @@ README.md | ||
| quilt.egg-info/top_level.txt | ||
| quilt/asa/__init__.py | ||
| quilt/asa/img.py | ||
| quilt/asa/pytorch.py | ||
| quilt/test/__init__.py | ||
| quilt/test/gen_data.py | ||
| quilt/test/integration.py | ||
| quilt/test/test_asa_plot.py | ||
| quilt/test/test_asa_torch.py | ||
| quilt/test/test_build.py | ||
@@ -19,0 +24,0 @@ quilt/test/test_checks.py |
+1
-1
@@ -66,3 +66,3 @@ """ | ||
| child = _from_core_node(package, core_child) | ||
| setattr(node, name, child) | ||
| node[name] = child | ||
@@ -69,0 +69,0 @@ return node |
+69
-17
@@ -8,3 +8,3 @@ """ | ||
| import pandas as pd | ||
| from six import iteritems, string_types | ||
| from six import iteritems, itervalues, string_types | ||
@@ -88,8 +88,62 @@ from .tools import core | ||
| """ | ||
| def __init__(self, meta): | ||
| super(GroupNode, self).__init__(meta) | ||
| self._children = {} | ||
| def __getattr__(self, name): | ||
| if name.startswith('_'): | ||
| return super(GroupNode, self).__getattr__(name) | ||
| else: | ||
| try: | ||
| return self[name] | ||
| except KeyError: | ||
| raise AttributeError | ||
| def __setattr__(self, name, value): | ||
| if name.startswith('_') or isinstance(value, Node): | ||
| if name.startswith('_'): | ||
| super(GroupNode, self).__setattr__(name, value) | ||
| else: | ||
| raise AttributeError("{val} is not a valid package node".format(val=value)) | ||
| self[name] = value | ||
| def __delattr__(self, name): | ||
| if name.startswith('_'): | ||
| super(GroupNode, self).__detattr__(name, value) | ||
| else: | ||
| try: | ||
| del self[name] | ||
| except KeyError: | ||
| raise AttributeError | ||
| def __dir__(self): | ||
| attrs = set() | ||
| try: | ||
| attrs.update(super(GroupNode, self).__dir__()) | ||
| except AttributeError: | ||
| # Fallback for PY2 | ||
| attrs.update(dir(type(self))) | ||
| attrs.update(self.__dict__) | ||
| attrs.update(self._children) | ||
| return sorted(attrs) | ||
| def __getitem__(self, name): | ||
| return self._children[name] | ||
| def __setitem__(self, name, value): | ||
| if not isinstance(value, Node): | ||
| raise TypeError("{val} is not a valid package node".format(val=value)) | ||
| self._children[name] = value | ||
| def __delitem__(self, name): | ||
| del self._children[name] | ||
| def __contains__(self, name): | ||
| return name in self._children | ||
| def _get(self, name, default=None): | ||
| return self._children.get(name, default) | ||
| def __len__(self): | ||
| return len(self._children) | ||
| def __repr__(self): | ||
@@ -104,8 +158,6 @@ pinfo = super(GroupNode, self).__repr__() | ||
| def __iter__(self): | ||
| for _, child in self._items(): | ||
| yield child | ||
| return itervalues(self._children) | ||
| def _items(self): | ||
| return ((name, child) for name, child in iteritems(self.__dict__) | ||
| if not name.startswith('_')) | ||
| return self._children.items() | ||
@@ -116,3 +168,3 @@ def _data_keys(self): | ||
| """ | ||
| return [name for name, child in self._items() if not isinstance(child, GroupNode)] | ||
| return [name for name, child in iteritems(self._children) if not isinstance(child, GroupNode)] | ||
@@ -123,13 +175,13 @@ def _group_keys(self): | ||
| """ | ||
| return [name for name, child in self._items() if isinstance(child, GroupNode)] | ||
| return [name for name, child in iteritems(self._children) if isinstance(child, GroupNode)] | ||
| def _keys(self): | ||
| """ | ||
| keys directly accessible on this object via getattr or . | ||
| keys directly accessible on this object via [] | ||
| """ | ||
| return [name for name in self.__dict__ if not name.startswith('_')] | ||
| return self._children.keys() | ||
| def _add_group(self, groupname): | ||
| child = GroupNode({}) | ||
| setattr(self, groupname, child) | ||
| self[groupname] = child | ||
@@ -266,6 +318,6 @@ def _data(self, asa=None): | ||
| for key in path[:-1]: | ||
| child = getattr(node, key, None) | ||
| child = node._get(key) | ||
| if not isinstance(child, GroupNode): | ||
| child = GroupNode({}) | ||
| setattr(node, key, child) | ||
| node[key] = child | ||
@@ -276,3 +328,3 @@ node = child | ||
| data_node = DataNode(self._package, core_node, value, metadata) | ||
| setattr(node, key, data_node) | ||
| node[key] = data_node | ||
@@ -299,3 +351,3 @@ def _filter(self, lambda_or_dict): | ||
| if filtered_child is not None: | ||
| setattr(filtered, child_name, filtered_child) | ||
| filtered[child_name] = filtered_child | ||
@@ -306,3 +358,3 @@ # Return the group if: | ||
| # 3) It's the package itself. | ||
| if matched or next(filtered._items(), None) or node == self: | ||
| if matched or len(filtered) or node == self: | ||
| return filtered | ||
@@ -309,0 +361,0 @@ else: |
@@ -38,2 +38,3 @@ """ | ||
| def test_build_from_cache(self): | ||
@@ -40,0 +41,0 @@ """ |
| """ | ||
| Tests for magic imports. | ||
| """ | ||
| import os | ||
| import platform | ||
| import time | ||
| import numpy as np | ||
| import pandas as pd | ||
| import pytest | ||
| from six import string_types | ||
| from quilt.nodes import GroupNode, DataNode | ||
| from quilt.tools import command | ||
| from quilt.nodes import DataNode, GroupNode | ||
| from quilt.tools.const import PACKAGE_DIR_NAME | ||
@@ -18,2 +20,3 @@ from quilt.tools.package import Package | ||
| # pylint: disable=protected-access | ||
| class ImportTest(QuiltTestCase): | ||
@@ -41,2 +44,5 @@ def test_imports(self): | ||
| assert package['dataframes'] == dataframes | ||
| assert package['README'] == README | ||
| assert set(dataframes._keys()) == {'csv', 'nulls'} | ||
@@ -46,4 +52,7 @@ assert set(dataframes._group_keys()) == set() | ||
| assert len(package) == 2 | ||
| assert len(list(package)) == 2 | ||
| assert 'dataframes' in dir(package) | ||
| for item in package: | ||
@@ -64,2 +73,7 @@ assert isinstance(item, (GroupNode, DataNode)) | ||
| # Store data is read-only | ||
| with self.assertRaises(IOError): | ||
| with open(README(), 'w'): | ||
| pass | ||
| # Bad attributes of imported packages | ||
@@ -364,3 +378,3 @@ | ||
| df = pd.DataFrame(dict(a=[1, 2, 3])) | ||
| with self.assertRaises(AttributeError): | ||
| with self.assertRaises(TypeError): | ||
| package4.newdf = df | ||
@@ -388,3 +402,3 @@ | ||
| # current spec requires that build() *not* update the in-memory module tree. | ||
| newpath1 = getattr(module, newfilename)() | ||
| newpath1 = module[newfilename]() | ||
| assert newpath1 == newfilename | ||
@@ -397,3 +411,3 @@ | ||
| assert reloaded_module is not module | ||
| newpath2 = getattr(reloaded_module, newfilename)() | ||
| newpath2 = reloaded_module[newfilename]() | ||
| assert 'myfile' not in newpath2 | ||
@@ -419,3 +433,3 @@ | ||
| assert getattr(package6, newfilename1)() == newfilename2 | ||
| assert package6[newfilename1]() == newfilename2 | ||
@@ -447,3 +461,3 @@ def test_team_non_team_imports(self): | ||
| df = pd.DataFrame(dict(a=[1, 2, 3])) | ||
| with self.assertRaises(AttributeError): | ||
| with self.assertRaises(TypeError): | ||
| package4.newdf = df | ||
@@ -530,2 +544,1 @@ | ||
| command.load('foo/package:v:1.0.0') | ||
+22
-1
@@ -7,2 +7,3 @@ """ | ||
| import shutil | ||
| from stat import S_IWUSR | ||
| import tempfile | ||
@@ -41,2 +42,12 @@ import unittest | ||
| def try_require(string): | ||
| """return true iff specified require string resolves properly; | ||
| for use with @pytest.mark.skipif""" | ||
| import pkg_resources | ||
| try: | ||
| pkg_resources.require(string) | ||
| except pkg_resources.ResolutionError as ex: | ||
| print(ex) | ||
| return False | ||
| return True | ||
@@ -55,4 +66,14 @@ class BasicQuiltTestCase(unittest.TestCase): | ||
| os.chdir(self._old_dir) | ||
| shutil.rmtree(self._test_dir) | ||
| def _onerror(func, path, exc_info): | ||
| """ | ||
| Handle read-only files on Windows | ||
| """ | ||
| if not os.access(path, os.W_OK): | ||
| os.chmod(path, S_IWUSR) | ||
| func(path) | ||
| else: | ||
| raise | ||
| shutil.rmtree(self._test_dir, onerror=_onerror) | ||
| class QuiltTestCase(BasicQuiltTestCase): | ||
@@ -59,0 +80,0 @@ """ |
@@ -15,3 +15,3 @@ # -*- coding: utf-8 -*- | ||
| import re | ||
| from shutil import rmtree, copy | ||
| from shutil import rmtree, copyfile | ||
| import socket | ||
@@ -1416,3 +1416,3 @@ import stat | ||
| else: | ||
| copy(node(), str(dest)) | ||
| copyfile(node(), str(dest)) | ||
| elif isinstance(node._node, TableNode): | ||
@@ -1527,5 +1527,4 @@ ext = node._node.metadata['q_ext'] | ||
| subpath = pathlib.PureWindowsPath(*info.subpath) | ||
| # TODO: Change this over to `node['item/subitem']` notation once implemented | ||
| for name in info.subpath: | ||
| node = getattr(node, name) | ||
| node = node._get(name) | ||
| else: | ||
@@ -1532,0 +1531,0 @@ subpath = pathlib.PureWindowsPath() |
@@ -21,2 +21,3 @@ """ | ||
| DEFAULT_TEAM = 'Quilt' | ||
| ELLIPSIS = u'\u2026' | ||
@@ -23,0 +24,0 @@ # reserved words in build.yml |
@@ -216,2 +216,3 @@ """ | ||
| def upload_fragments(store, obj_urls, obj_sizes, reupload=False): | ||
@@ -228,8 +229,4 @@ assert len(obj_urls) == len(obj_sizes) | ||
| headers = { | ||
| 'Content-Encoding': 'gzip' | ||
| } | ||
| print("Uploading %d fragments (%d bytes)..." % (total, total_bytes)) | ||
| print("Uploading %d fragments (%d bytes before compression)..." % (total, total_bytes)) | ||
| with tqdm(total=total_bytes, unit='B', unit_scale=True) as progress: | ||
@@ -247,27 +244,9 @@ def _worker_thread(): | ||
| if reupload or not s3_session.head(obj_urls['head']).ok: | ||
| # Create a temporary gzip'ed file. | ||
| with tempfile.TemporaryFile() as temp_file: | ||
| with open(store.object_path(obj_hash), 'rb') as input_file: | ||
| with gzip.GzipFile(fileobj=temp_file, mode='wb', | ||
| compresslevel=ZLIB_LEVEL) as gzip_file: | ||
| copyfileobj(input_file, gzip_file, CHUNK_SIZE) | ||
| compressed_size = temp_file.tell() | ||
| temp_file.seek(0) | ||
| # Workaround for non-local variables in Python 2.7 | ||
| class Context: | ||
| compressed_read = 0 | ||
| original_last_update = 0 | ||
| def _progress_cb(count): | ||
| Context.compressed_read += count | ||
| original_read = Context.compressed_read * original_size // compressed_size | ||
| with lock: | ||
| progress.update(original_read - Context.original_last_update) | ||
| Context.original_last_update = original_read | ||
| with FileWithReadProgress(temp_file, _progress_cb) as fd: | ||
| url = obj_urls['put'] | ||
| response = s3_session.put(url, data=fd, headers=headers) | ||
| response.raise_for_status() | ||
| with FileWithReadProgress(store.object_path(obj_hash), progress.update) as fd: | ||
| url = obj_urls['put'] | ||
| # Work around a `requests` bug: it treats size 0 as "unknown" and | ||
| # uses chunked encoding - which S3 doesn't support. | ||
| data = fd if original_size > 0 else b'' | ||
| response = s3_session.put(url, data=data) | ||
| response.raise_for_status() | ||
| else: | ||
@@ -274,0 +253,0 @@ with lock: |
+19
-6
@@ -7,2 +7,3 @@ """ | ||
| from shutil import copyfile, move, rmtree | ||
| from stat import S_IRUSR, S_IRGRP, S_IROTH, S_IWUSR | ||
| import uuid | ||
@@ -337,2 +338,3 @@ | ||
| if os.path.exists(path): | ||
| os.chmod(path, S_IWUSR) | ||
| os.remove(path) | ||
@@ -408,3 +410,3 @@ return remove_objs | ||
| objhash = digest_file(path) | ||
| move(path, self.object_path(objhash)) | ||
| self._move_to_store(path, objhash) | ||
| hashes.append(objhash) | ||
@@ -414,3 +416,3 @@ rmtree(storepath) | ||
| filehash = digest_file(storepath) | ||
| move(storepath, self.object_path(filehash)) | ||
| self._move_to_store(storepath, filehash) | ||
| hashes = [filehash] | ||
@@ -433,4 +435,3 @@ | ||
| filehash = digest_file(srcfile) | ||
| objpath = self.object_path(filehash) | ||
| if not os.path.exists(objpath): | ||
| if not os.path.exists(self.object_path(filehash)): | ||
| # Copy the file to a temporary location first, then move, to make sure we don't end up with | ||
@@ -440,3 +441,3 @@ # truncated contents if the build gets interrupted. | ||
| copyfile(srcfile, tmppath) | ||
| move(tmppath, objpath) | ||
| self._move_to_store(tmppath, filehash) | ||
@@ -473,3 +474,15 @@ return filehash | ||
| metahash = digest_file(path) | ||
| move(path, self.object_path(metahash)) | ||
| self._move_to_store(path, metahash) | ||
| return metahash | ||
| def _move_to_store(self, srcpath, objhash): | ||
| """ | ||
| Make the object read-only and move it to the store. | ||
| """ | ||
| destpath = self.object_path(objhash) | ||
| if os.path.exists(destpath): | ||
| # Windows: delete any existing object at the destination. | ||
| os.chmod(destpath, S_IWUSR) | ||
| os.remove(destpath) | ||
| os.chmod(srcpath, S_IRUSR | S_IRGRP | S_IROTH) # Make read-only | ||
| move(srcpath, destpath) |
+19
-4
@@ -19,3 +19,3 @@ from setuptools import setup, find_packages | ||
| name="quilt", | ||
| version="2.9.6", | ||
| version="2.9.7", | ||
| packages=find_packages(), | ||
@@ -37,3 +37,3 @@ description='Quilt is a data package manager', | ||
| url='https://github.com/quiltdata/quilt', | ||
| download_url='https://github.com/quiltdata/quilt/releases/tag/2.9.6', | ||
| download_url='https://github.com/quiltdata/quilt/releases/tag/2.9.7', | ||
| keywords='quilt quiltdata shareable data dataframe package platform pandas', | ||
@@ -47,3 +47,3 @@ install_requires=[ | ||
| 'pathlib2; python_version<"3.6"', # stdlib backport | ||
| 'pyarrow>=0.4.0,<0.8.0', # TODO(dima): Make unit tests work with 0.8.*. | ||
| 'pyarrow>=0.4.0,<0.8.0', # TODO(dima): Make unit tests work with 0.8+ | ||
| 'pyyaml>=3.12', | ||
@@ -55,4 +55,15 @@ 'requests>=2.12.4', | ||
| ], | ||
| # Install with: pip install -e ./[img,tests,...] | ||
| extras_require={ | ||
| # Use: pip install --editable ./[tests] | ||
| # See quilt.asa.img module | ||
| 'img': [ | ||
| 'matplotlib>=2.2.2', | ||
| 'Pillow>=5.1.0' | ||
| ], | ||
| # See quilt.asa.pytorch module | ||
| 'pytorch': [ | ||
| # May not install on Linux, Windows; See https://pytorch.org/ | ||
| 'torch>=0.4.0', | ||
| ], | ||
| # For dev testing | ||
| 'tests': [ | ||
@@ -62,3 +73,7 @@ 'funcsigs; python_version<"3.4"', # stdlib backport | ||
| 'pytest', | ||
| 'pytest-cov', | ||
| 'responses>=0.7.0', | ||
| ], | ||
| 'torchvision': [ | ||
| 'torchvision>=0.2.1' | ||
| ] | ||
@@ -65,0 +80,0 @@ }, |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
385320
4.69%48
11.63%8474
5.12%