quilt
Advanced tools
+2
-2
| Metadata-Version: 1.1 | ||
| Name: quilt | ||
| Version: 2.9.7 | ||
| Version: 2.9.8 | ||
| Summary: Quilt is a data package manager | ||
@@ -9,3 +9,3 @@ Home-page: https://github.com/quiltdata/quilt | ||
| License: LICENSE | ||
| Download-URL: https://github.com/quiltdata/quilt/releases/tag/2.9.7 | ||
| Download-URL: https://github.com/quiltdata/quilt/releases/tag/2.9.8 | ||
| Description-Content-Type: UNKNOWN | ||
@@ -12,0 +12,0 @@ Description: |
| Metadata-Version: 1.1 | ||
| Name: quilt | ||
| Version: 2.9.7 | ||
| Version: 2.9.8 | ||
| Summary: Quilt is a data package manager | ||
@@ -9,3 +9,3 @@ Home-page: https://github.com/quiltdata/quilt | ||
| License: LICENSE | ||
| Download-URL: https://github.com/quiltdata/quilt/releases/tag/2.9.7 | ||
| Download-URL: https://github.com/quiltdata/quilt/releases/tag/2.9.8 | ||
| Description-Content-Type: UNKNOWN | ||
@@ -12,0 +12,0 @@ Description: |
@@ -5,3 +5,3 @@ appdirs>=1.4.0 | ||
| pandas>=0.19.2 | ||
| pyarrow<0.8.0,>=0.4.0 | ||
| pyarrow>=0.9.0 | ||
| pyyaml>=3.12 | ||
@@ -8,0 +8,0 @@ requests>=2.12.4 |
+6
-2
@@ -20,3 +20,3 @@ """ | ||
| from .tools import core | ||
| from .tools.const import SYSTEM_METADATA | ||
| from .tools.const import SYSTEM_METADATA, TargetType | ||
| from .tools.store import PackageStore | ||
@@ -55,4 +55,8 @@ | ||
| 'transform': core_node.metadata.get('q_ext'), | ||
| 'target': | ||
| TargetType.PANDAS.value | ||
| if isinstance(core_node, core.TableNode) | ||
| else core_node.metadata.get('q_target', TargetType.FILE.value), | ||
| } | ||
| node = DataNode(package, core_node, None, metadata) | ||
| node = DataNode(package, core_node.hashes, None, metadata) | ||
| else: | ||
@@ -59,0 +63,0 @@ if isinstance(core_node, core.RootNode): |
+23
-20
@@ -7,7 +7,7 @@ """ | ||
| import numpy as np | ||
| import pandas as pd | ||
| from six import iteritems, itervalues, string_types | ||
| from .tools import core | ||
| from .tools.const import SYSTEM_METADATA | ||
| from .tools.const import SYSTEM_METADATA, TargetType | ||
| from .tools.util import is_nodename | ||
@@ -44,7 +44,7 @@ | ||
| """ | ||
| def __init__(self, package, node, data, meta): | ||
| def __init__(self, package, hashes, data, meta): | ||
| super(DataNode, self).__init__(meta) | ||
| self._package = package | ||
| self._node = node | ||
| self._hashes = hashes | ||
| self.__cached_data = data | ||
@@ -56,2 +56,5 @@ | ||
| def _target(self): | ||
| return TargetType(self._meta[SYSTEM_METADATA]['target']) | ||
| def _data(self, asa=None): | ||
@@ -63,3 +66,3 @@ """ | ||
| if asa is not None: | ||
| if self._package is None or not self._node.hashes: | ||
| if self._package is None or self._hashes is None: | ||
| msg = ( | ||
@@ -71,3 +74,3 @@ "Can only use asa functions with built dataframes." | ||
| store = self._package.get_store() | ||
| return asa(self, [store.object_path(obj) for obj in self._node.hashes]) | ||
| return asa(self, [store.object_path(obj) for obj in self._hashes]) | ||
| else: | ||
@@ -77,8 +80,8 @@ if self.__cached_data is None: | ||
| store = self._package.get_store() | ||
| if isinstance(self._node, core.TableNode): | ||
| self.__cached_data = store.load_dataframe(self._node.hashes) | ||
| elif isinstance(self._node, core.FileNode): | ||
| self.__cached_data = store.get_file(self._node.hashes) | ||
| if self._target() == TargetType.PANDAS: | ||
| self.__cached_data = store.load_dataframe(self._hashes) | ||
| elif self._target() == TargetType.NUMPY: | ||
| self.__cached_data = store.load_numpy(self._hashes) | ||
| else: | ||
| assert False | ||
| self.__cached_data = store.get_file(self._hashes) | ||
| return self.__cached_data | ||
@@ -100,3 +103,3 @@ | ||
| if name.startswith('_'): | ||
| return super(GroupNode, self).__getattr__(name) | ||
| raise AttributeError | ||
| else: | ||
@@ -203,5 +206,5 @@ try: | ||
| else: | ||
| if not isinstance(node._node, core.TableNode): | ||
| if node._target() != TargetType.PANDAS: | ||
| alldfs = False | ||
| if node._node is None or not node._node.hashes: | ||
| if node._package is None or node._hashes is None: | ||
| msg = "Can only merge built dataframes. Build this package and try again." | ||
@@ -214,3 +217,3 @@ raise NotImplementedError(msg) | ||
| raise NotImplementedError("Can only merge dataframes from the same store") | ||
| hash_list += node._node.hashes | ||
| hash_list += node._hashes | ||
@@ -299,4 +302,5 @@ if asa is None: | ||
| if isinstance(value, pd.DataFrame): | ||
| metadata = {} | ||
| core_node = core.TableNode(hashes=[], format=core.PackageFormat.default.value) | ||
| metadata = {SYSTEM_METADATA: {'target': TargetType.PANDAS.value}} | ||
| elif isinstance(value, np.ndarray): | ||
| metadata = {SYSTEM_METADATA: {'target': TargetType.NUMPY.value}} | ||
| elif isinstance(value, string_types + (bytes,)): | ||
@@ -309,7 +313,6 @@ # bytes -> string for consistency when retrieving metadata | ||
| metadata = {SYSTEM_METADATA: {'filepath': value, 'transform': 'id'}} | ||
| core_node = core.FileNode(hashes=[]) | ||
| if build_dir: | ||
| value = os.path.join(build_dir, value) | ||
| else: | ||
| accepted_types = tuple(set((pd.DataFrame, bytes) + string_types)) | ||
| accepted_types = tuple(set((pd.DataFrame, np.ndarray, bytes) + string_types)) | ||
| raise TypeError("Bad value type: Expected instance of any type {!r}, but received type {!r}" | ||
@@ -332,3 +335,3 @@ .format(accepted_types, type(value)), repr(value)[0:100]) | ||
| key = path[-1] | ||
| data_node = DataNode(self._package, core_node, value, metadata) | ||
| data_node = DataNode(self._package, None, value, metadata) | ||
| node[key] = data_node | ||
@@ -335,0 +338,0 @@ |
+60
-26
@@ -7,3 +7,3 @@ """ | ||
| import pytest | ||
| from numpy import dtype | ||
| import numpy as np | ||
| import pandas.api.types as ptypes | ||
@@ -31,3 +31,3 @@ from pandas.core.frame import DataFrame | ||
| path = os.path.join(mydir, './build_large.yml') | ||
| build.build_package(None, 'test_parquet', PACKAGE, path) | ||
| build.build_package(None, 'test_parquet', PACKAGE, [], path) | ||
| # TODO load DFs based on contents of .yml file at PATH | ||
@@ -51,3 +51,3 @@ # not hardcoded vals (this will require loading modules from variable | ||
| # Build once to populate cache | ||
| build.build_package(None, 'test_cache', PACKAGE, path) | ||
| build.build_package(None, 'test_cache', PACKAGE, [], path) | ||
@@ -60,3 +60,3 @@ # Verify cache contents | ||
| # Build again using the cache | ||
| build.build_package(None, 'test_cache', PACKAGE, path) | ||
| build.build_package(None, 'test_cache', PACKAGE, [], path) | ||
@@ -116,3 +116,3 @@ # TODO load DFs based on contents of .yml file at PATH | ||
| with pytest.raises(build.BuildException): | ||
| build.build_package(None, 'test_bad_transform', PACKAGE, str(path)) | ||
| build.build_package(None, 'test_bad_transform', PACKAGE, [], str(path)) | ||
@@ -124,3 +124,3 @@ def test_build_bad_file(self): | ||
| with pytest.raises(build.BuildException): | ||
| build.build_package(None, 'test_bad_file', PACKAGE, str(path)) | ||
| build.build_package(None, 'test_bad_file', PACKAGE, [], str(path)) | ||
@@ -133,3 +133,3 @@ def test_build_empty(self): | ||
| path = os.path.join(mydir, './build_empty.yml') | ||
| build.build_package(None, 'empty', 'pkg', path) | ||
| build.build_package(None, 'empty', 'pkg', [], path) | ||
@@ -142,3 +142,3 @@ from quilt.data.empty import pkg | ||
| path = os.path.join(mydir, './build_reserved.yml') | ||
| build.build_package(None, 'reserved', 'pkg', path) | ||
| build.build_package(None, 'reserved', 'pkg', [], path) | ||
| from quilt.data.reserved import pkg | ||
@@ -157,3 +157,3 @@ assert pkg.file, 'Expected package' | ||
| path = os.path.join(mydir, './build_group_args.yml') | ||
| build.build_package(None, 'groups', 'pkg', path) | ||
| build.build_package(None, 'groups', 'pkg', [], path) | ||
@@ -175,3 +175,3 @@ from quilt.data.groups import pkg | ||
| 'Expected `transform: tsv` to be inferred from file extension' | ||
| assert pkg.group_b.tsv()['Date0'].dtype == dtype('<M8[ns]'), \ | ||
| assert pkg.group_b.tsv()['Date0'].dtype == np.dtype('<M8[ns]'), \ | ||
| 'Expected Date0 column to parse as date' | ||
@@ -201,3 +201,3 @@ assert pkg.group_b.subgroup.tsv().shape == (1, 3), \ | ||
| assert os.path.exists(buildfilepath) | ||
| build.build_package(None, 'test_generated', 'generated', buildfilepath) | ||
| build.build_package(None, 'test_generated', 'generated', [], buildfilepath) | ||
| os.remove(buildfilepath) | ||
@@ -212,3 +212,3 @@ from quilt.data.test_generated.generated import bad, foo, nuts, README | ||
| path = os.path.join(mydir, './build_failover.yml') | ||
| build.build_package(None, 'test_failover', PACKAGE, path) | ||
| build.build_package(None, 'test_failover', PACKAGE, [], path) | ||
| from quilt.data.test_failover.groot import bad | ||
@@ -240,4 +240,4 @@ | ||
| 'dir_ext': {}, | ||
| # Weird characters replaced with a single "_" | ||
| 'a_b_c': {'file': 'a%%b___c'}, | ||
| # Weird characters replaced with "_" | ||
| 'a__b___c': {'file': 'a%%b___c'}, | ||
| # Prepend "n" to files that start with a number | ||
@@ -284,3 +284,3 @@ 'n1': {'file': '1'}, | ||
| with assertRaisesRegex(self, build.BuildException, r'Bad yaml syntax.*build_bad_syntax\.yml'): | ||
| build.build_package(None, 'test_syntax_error', PACKAGE, path) | ||
| build.build_package(None, 'test_syntax_error', PACKAGE, [], path) | ||
@@ -295,3 +295,3 @@ def test_build_no_contents_node(self): | ||
| with assertRaisesRegex(self, build.BuildException, r'Error in build_no_contents_node.yml'): | ||
| build.build_package(None, 'no_contents', PACKAGE, path) | ||
| build.build_package(None, 'no_contents', PACKAGE, [], path) | ||
@@ -307,3 +307,3 @@ def test_build_checks_yaml_syntax_error(self): # pylint: disable=C0103 | ||
| with assertRaisesRegex(self, build.BuildException, r'Bad yaml syntax.*checks_bad_syntax\.yml'): | ||
| build.build_package(None, 'test_syntax_error', PACKAGE, path, checks_path=checks_path) | ||
| build.build_package(None, 'test_syntax_error', PACKAGE, [], path, checks_path=checks_path) | ||
@@ -355,4 +355,4 @@ def test_build_glob_naming_conflict(self): | ||
| # simple checks to ensure matching contents for item notation | ||
| assert node.foo._node == package['foo'] | ||
| assert node.subnode.nuts._node == package['subnode/nuts'] | ||
| assert package['foo'] | ||
| assert package['subnode/nuts'] | ||
@@ -433,3 +433,3 @@ ## Negative tests | ||
| with assertRaisesRegex(self, build.BuildException, r'Package.*not found'): | ||
| build.build_package_from_contents(None, 'test', 'compose2', str(mydir), missing_dep_build) | ||
| build.build_package_from_contents(None, 'test', 'compose2', [], str(mydir), missing_dep_build) | ||
@@ -451,3 +451,3 @@ def test_compose_subpackage_not_found(self): | ||
| with assertRaisesRegex(self, build.BuildException, r'Package.*has no subpackage.*'): | ||
| build.build_package_from_contents(None, 'test', 'compose', str(mydir), missing_dep_build) | ||
| build.build_package_from_contents(None, 'test', 'compose', [], str(mydir), missing_dep_build) | ||
@@ -466,3 +466,3 @@ def test_included_package_is_group_node(self): | ||
| } | ||
| build.build_package_from_contents(None, 'test', 'compose3', str(mydir), build_compose_contents) | ||
| build.build_package_from_contents(None, 'test', 'compose3', [], str(mydir), build_compose_contents) | ||
| from quilt.data.test import compose3 | ||
@@ -482,3 +482,3 @@ | ||
| } | ||
| build.build_package_from_contents(None, 'test', 'compose_root', str(mydir), build_compose_contents) | ||
| build.build_package_from_contents(None, 'test', 'compose_root', [], str(mydir), build_compose_contents) | ||
| from quilt.data.test import compose_root, simple | ||
@@ -508,3 +508,3 @@ | ||
| build.build_package_from_contents(None, 'test', 'pkg_node', str(mydir), build_compose_contents) | ||
| build.build_package_from_contents(None, 'test', 'pkg_node', [], str(mydir), build_compose_contents) | ||
@@ -536,3 +536,3 @@ from quilt.data.test import pkg_node | ||
| with self.assertRaises(build.BuildException): | ||
| build.build_package_from_contents(None, 'test', 'shouldfail', str(mydir), bad_build_contents) | ||
| build.build_package_from_contents(None, 'test', 'shouldfail', [], str(mydir), bad_build_contents) | ||
@@ -553,3 +553,3 @@ def test_parquet_source_file(self): | ||
| } | ||
| build.build_package_from_contents(None, 'test', 'fromparquet', '.', build_contents) | ||
| build.build_package_from_contents(None, 'test', 'fromparquet', [], '.', build_contents) | ||
| pkg = command.load('test/fromparquet') | ||
@@ -559,1 +559,35 @@ assert df.equals(pkg.df()) # pylint:disable=E1101 | ||
| #TODO: Add test for checks on a parquet-sourced dataframe | ||
| def test_subpackage(self): | ||
| mydir = pathlib.Path(os.path.dirname(__file__)) | ||
| command.build('test/foo') | ||
| df = DataFrame(dict(a=[1, 2, 3])) | ||
| arr = np.array([4, 5, 6]) | ||
| path = str(mydir / 'build_simple.yml') | ||
| command.build('test/foo/empty') | ||
| command.build('test/foo/df', df) | ||
| command.build('test/foo/arr', arr) | ||
| command.build('test/foo/file', path) # Adds as a plain file | ||
| command.build('test/foo/stuff', path, build_file=True) # Builds a subpackage | ||
| pkg = command.load('test/foo') | ||
| assert len(pkg.empty) == 0 | ||
| assert pkg.df().equals(df) | ||
| assert np.array_equal(pkg.arr(), arr) | ||
| assert pkg.file | ||
| assert pkg.stuff.foo | ||
| # Cannot build a package out of a data node. | ||
| with self.assertRaises(command.CommandException): | ||
| command.build('test/foo', df) | ||
| with self.assertRaises(command.CommandException): | ||
| command.build('test/foo', arr) | ||
| # Cannot build a subpackage if the package does not exist. | ||
| with self.assertRaises(command.CommandException): | ||
| command.build('test/non_existant/blah') | ||
| with self.assertRaises(command.CommandException): | ||
| command.build('test/non_existant/foo', df) |
@@ -37,3 +37,3 @@ """ | ||
| build.build_package_from_contents( | ||
| None, 'foox', 'barx', mydir, self.build_data, self.checks_contents, dry_run=True) | ||
| None, 'foox', 'barx', [], mydir, self.build_data, self.checks_contents, dry_run=True) | ||
@@ -123,5 +123,5 @@ def build_fail(self, check, regexp=None, nodename='foo'): | ||
| with assertRaisesRegex(self, IOError, 'doesnt_exist.yml'): | ||
| build.build_package(None, 'foox', 'barx', build_fn, checks_fn, dry_run=True) | ||
| build.build_package(None, 'foox', 'barx', [], build_fn, checks_fn, dry_run=True) | ||
| else: | ||
| build.build_package(None, 'foox', 'barx', build_fn, checks_fn, dry_run=True) | ||
| build.build_package(None, 'foox', 'barx', [], build_fn, checks_fn, dry_run=True) | ||
| run_build("build_simple_checks.yml", "checks_simple.yml") | ||
@@ -128,0 +128,0 @@ run_build("doesnt_exist.yml", "checks_simple.yml", True) |
@@ -252,2 +252,13 @@ """ | ||
| # Add some ndarrays | ||
| arr1 = np.array([ | ||
| [[1], [2], [3]], | ||
| [[4], [5], [6]] | ||
| ]) | ||
| arr2 = np.random.rand(30, 40, 50) | ||
| package1._set(['new', 'array1'], arr1) | ||
| package1._set(['new', 'array2'], arr2) | ||
| assert package1.new.array1._data() is arr1 | ||
| assert package1.new.array2._data() is arr2 | ||
| # Add a new file | ||
@@ -291,2 +302,7 @@ file_path = os.path.join(mydir, 'data/foo.csv') | ||
| new_arr1 = package3.new.array1._data() | ||
| new_arr2 = package3.new.array2._data() | ||
| assert new_arr1[1][2][0] == 6 | ||
| assert new_arr2.shape == (30, 40, 50) | ||
| new_file = package3.new.file._data() | ||
@@ -293,0 +309,0 @@ assert isinstance(new_file, string_types) |
+10
-5
@@ -410,3 +410,3 @@ """ | ||
| def build_package(team, username, package, yaml_path, checks_path=None, dry_run=False, env='default'): | ||
| def build_package(team, username, package, subpath, yaml_path, checks_path=None, dry_run=False, env='default'): | ||
| """ | ||
@@ -445,6 +445,6 @@ Builds a package from a given Yaml file and installs it locally. | ||
| checks_contents = None | ||
| build_package_from_contents(team, username, package, os.path.dirname(yaml_path), build_data, | ||
| build_package_from_contents(team, username, package, subpath, os.path.dirname(yaml_path), build_data, | ||
| checks_contents=checks_contents, dry_run=dry_run, env=env) | ||
| def build_package_from_contents(team, username, package, build_dir, build_data, | ||
| def build_package_from_contents(team, username, package, subpath, build_dir, build_data, | ||
| checks_contents=None, dry_run=False, env='default'): | ||
@@ -460,4 +460,9 @@ contents = build_data.get('contents', {}) | ||
| store = PackageStore() | ||
| newpackage = store.create_package(team, username, package, dry_run=dry_run) | ||
| _build_node(build_dir, newpackage, [], contents, | ||
| if subpath: | ||
| newpackage = store.get_package(team, username, package) | ||
| if not newpackage: | ||
| raise BuildException("Package does not exist") | ||
| else: | ||
| newpackage = store.create_package(team, username, package, dry_run=dry_run) | ||
| _build_node(build_dir, newpackage, subpath, contents, | ||
| checks_contents=checks_contents, dry_run=dry_run, env=env) | ||
@@ -464,0 +469,0 @@ |
+96
-65
@@ -24,2 +24,3 @@ # -*- coding: utf-8 -*- | ||
| import numpy as np | ||
| from packaging.version import Version | ||
@@ -37,3 +38,3 @@ import pandas as pd | ||
| from .const import DEFAULT_BUILDFILE, DTIMEF, QuiltException, SYSTEM_METADATA, TargetType | ||
| from .core import (hash_contents, find_object_hashes, TableNode, FileNode, GroupNode, | ||
| from .core import (hash_contents, find_object_hashes, GroupNode, RootNode, | ||
| decode_node, encode_node, LATEST_TAG) | ||
@@ -484,3 +485,3 @@ from .data_transfer import download_fragments, upload_fragments | ||
| def build(package, path=None, dry_run=False, env='default', force=False): | ||
| def build(package, path=None, dry_run=False, env='default', force=False, build_file=False): | ||
| """ | ||
@@ -493,3 +494,3 @@ Compile a Quilt data package, either from a build file or an existing package node. | ||
| # TODO: rename 'path' param to 'target'? It can be a PackageNode as well. | ||
| team, _, _ = parse_package(package) | ||
| team, _, _, subpath = parse_package(package, allow_subpath=True) | ||
| _check_team_id(team) | ||
@@ -505,5 +506,11 @@ logged_in_team = _find_logged_in_team() | ||
| return | ||
| # Backward compatibility: if there's no subpath, we're building a top-level package, | ||
| # so treat `path` as a build file, not as a data node. | ||
| if not subpath: | ||
| build_file = True | ||
| package_hash = hashlib.md5(package.encode('utf-8')).hexdigest() | ||
| try: | ||
| _build_internal(package, path, dry_run, env) | ||
| _build_internal(package, path, dry_run, env, build_file) | ||
| except Exception as ex: | ||
@@ -514,5 +521,5 @@ _log(team, type='build', package=package_hash, dry_run=dry_run, env=env, error=str(ex)) | ||
| def _build_internal(package, path, dry_run, env): | ||
| def _build_internal(package, path, dry_run, env, build_file): | ||
| # we may have a path, git URL, PackageNode, or None | ||
| if isinstance(path, string_types): | ||
| if build_file and isinstance(path, string_types): | ||
| # is this a git url? | ||
@@ -538,18 +545,11 @@ is_git_url = GIT_URL_RE.match(path) | ||
| build_from_node(package, path) | ||
| elif isinstance(path, string_types + (pd.DataFrame, np.ndarray)): | ||
| assert not dry_run # TODO? | ||
| build_from_node(package, nodes.DataNode(None, None, path, {})) | ||
| elif path is None: | ||
| assert not dry_run # TODO? | ||
| _build_empty(package) | ||
| build_from_node(package, nodes.GroupNode({})) | ||
| else: | ||
| raise ValueError("Expected a PackageNode, path or git URL, but got %r" % path) | ||
| def _build_empty(package): | ||
| """ | ||
| Create an empty package for convenient editing of de novo packages | ||
| """ | ||
| team, owner, pkg = parse_package(package) | ||
| store = PackageStore() | ||
| new = store.create_package(team, owner, pkg) | ||
| new.save_contents() | ||
| def build_from_node(package, node): | ||
@@ -559,8 +559,15 @@ """ | ||
| """ | ||
| team, owner, pkg = parse_package(package) | ||
| team, owner, pkg, subpath = parse_package(package, allow_subpath=True) | ||
| _check_team_id(team) | ||
| store = PackageStore() | ||
| package_obj = store.create_package(team, owner, pkg) | ||
| if subpath: | ||
| package_obj = store.get_package(team, owner, pkg) | ||
| if not package_obj: | ||
| raise CommandException("Package does not exist") | ||
| else: | ||
| package_obj = store.create_package(team, owner, pkg) | ||
| if not isinstance(node, nodes.GroupNode): | ||
| raise CommandException("Top-level node must be a group") | ||
| def _process_node(node, path=[]): | ||
| def _process_node(node, path): | ||
| if not isinstance(node._meta, dict): | ||
@@ -588,2 +595,4 @@ raise CommandException( | ||
| package_obj.save_df(data, path, TargetType.PANDAS, filepath, transform, meta) | ||
| elif isinstance(data, np.ndarray): | ||
| package_obj.save_numpy(data, path, TargetType.NUMPY, filepath, transform, meta) | ||
| elif isinstance(data, string_types): | ||
@@ -597,3 +606,3 @@ package_obj.save_file(data, path, TargetType.FILE, filepath, transform, meta) | ||
| try: | ||
| _process_node(node) | ||
| _process_node(node, subpath) | ||
| except StoreException as ex: | ||
@@ -609,3 +618,3 @@ raise CommandException("Failed to build the package: %s" % ex) | ||
| """ | ||
| team, owner, pkg = parse_package(package) | ||
| team, owner, pkg, subpath = parse_package(package, allow_subpath=True) | ||
@@ -624,8 +633,8 @@ if not os.path.exists(path): | ||
| contents = generate_contents(path, outfilename) | ||
| build_package_from_contents(team, owner, pkg, path, contents, dry_run=dry_run, env=env) | ||
| build_package_from_contents(team, owner, pkg, subpath, path, contents, dry_run=dry_run, env=env) | ||
| else: | ||
| build_package(team, owner, pkg, path, dry_run=dry_run, env=env) | ||
| build_package(team, owner, pkg, subpath, path, dry_run=dry_run, env=env) | ||
| if not dry_run: | ||
| print("Built %s%s/%s successfully." % (team + ':' if team else '', owner, pkg)) | ||
| print("Built %s successfully." % package) | ||
| except BuildException as ex: | ||
@@ -661,3 +670,3 @@ raise CommandException("Failed to build the package: %s" % ex) | ||
| """ | ||
| team, owner, pkg = parse_package(package) | ||
| team, owner, pkg, subpath = parse_package(package, allow_subpath=True) | ||
| _check_team_id(team) | ||
@@ -671,3 +680,10 @@ session = _get_session(team) | ||
| pkghash = pkgobj.get_hash() | ||
| contents = pkgobj.get_contents() | ||
| for component in subpath: | ||
| try: | ||
| contents = contents.children[component] | ||
| except (AttributeError, KeyError): | ||
| raise CommandException("Invalid subpath: %r" % component) | ||
| def _push_package(dry_run=False, sizes=dict()): | ||
@@ -678,3 +694,3 @@ data = json.dumps(dict( | ||
| is_team=is_team, | ||
| contents=pkgobj.get_contents(), | ||
| contents=contents, | ||
| description="", # TODO | ||
@@ -686,14 +702,28 @@ sizes=sizes | ||
| return session.put( | ||
| "{url}/api/package/{owner}/{pkg}/{hash}".format( | ||
| url=get_registry_url(team), | ||
| owner=owner, | ||
| pkg=pkg, | ||
| hash=pkghash | ||
| ), | ||
| data=compressed_data, | ||
| headers={ | ||
| 'Content-Encoding': 'gzip' | ||
| } | ||
| ) | ||
| if subpath: | ||
| return session.post( | ||
| "{url}/api/package_update/{owner}/{pkg}/{subpath}".format( | ||
| url=get_registry_url(team), | ||
| owner=owner, | ||
| pkg=pkg, | ||
| subpath='/'.join(subpath) | ||
| ), | ||
| data=compressed_data, | ||
| headers={ | ||
| 'Content-Encoding': 'gzip' | ||
| } | ||
| ) | ||
| else: | ||
| return session.put( | ||
| "{url}/api/package/{owner}/{pkg}/{hash}".format( | ||
| url=get_registry_url(team), | ||
| owner=owner, | ||
| pkg=pkg, | ||
| hash=pkghash | ||
| ), | ||
| data=compressed_data, | ||
| headers={ | ||
| 'Content-Encoding': 'gzip' | ||
| } | ||
| ) | ||
@@ -704,3 +734,3 @@ print("Fetching upload URLs from the registry...") | ||
| assert set(obj_urls) == set(find_object_hashes(pkgobj.get_contents())) | ||
| assert set(obj_urls) == set(find_object_hashes(contents)) | ||
@@ -721,14 +751,16 @@ store = pkgobj.get_store() | ||
| print("Updating the 'latest' tag...") | ||
| session.put( | ||
| "{url}/api/tag/{owner}/{pkg}/{tag}".format( | ||
| url=get_registry_url(team), | ||
| owner=owner, | ||
| pkg=pkg, | ||
| tag=LATEST_TAG | ||
| ), | ||
| data=json.dumps(dict( | ||
| hash=pkghash | ||
| )) | ||
| ) | ||
| if not subpath: | ||
| # Update the latest tag. | ||
| print("Updating the 'latest' tag...") | ||
| session.put( | ||
| "{url}/api/tag/{owner}/{pkg}/{tag}".format( | ||
| url=get_registry_url(team), | ||
| owner=owner, | ||
| pkg=pkg, | ||
| tag=LATEST_TAG | ||
| ), | ||
| data=json.dumps(dict( | ||
| hash=pkghash | ||
| )) | ||
| ) | ||
@@ -997,3 +1029,3 @@ print("Push complete. %s is live:\n%s" % (package, package_url)) | ||
| else: | ||
| hashes.update(obj._node.hashes) # May be empty for nodes created locally | ||
| hashes.update(obj._hashes or []) # May be empty for nodes created locally | ||
@@ -1153,3 +1185,3 @@ missing_hashes = {obj_hash for obj_hash in hashes if not os.path.exists(store.object_path(obj_hash))} | ||
| _print_children(children, child_prefix, path + name) | ||
| elif isinstance(node, TableNode): | ||
| elif node.metadata['q_target'] == TargetType.PANDAS.value: | ||
| df = store.load_dataframe(node.hashes) | ||
@@ -1159,6 +1191,4 @@ assert isinstance(df, pd.DataFrame) | ||
| print(prefix + name_prefix + ": " + info) | ||
| elif isinstance(node, FileNode): | ||
| else: | ||
| print(prefix + name_prefix + name) | ||
| else: | ||
| assert False, "node=%s type=%s" % (node, type(node)) | ||
@@ -1382,3 +1412,3 @@ print(pkgobj.get_path()) | ||
| # If filepath is not present, generate fake path based on node parentage. | ||
| filepath = node._meta.get(SYSTEM_METADATA, {}).get('filepath') | ||
| filepath = node._meta[SYSTEM_METADATA]['filepath'] | ||
| if filepath: | ||
@@ -1393,5 +1423,5 @@ dest = pathlib.PureWindowsPath(filepath) # PureWindowsPath handles all win/lin/osx separators | ||
| # When exporting TableNodes, excel files are to be converted to csv. | ||
| # When exporting dataframes, excel files are to be converted to csv. | ||
| # check also occurs in export_node(), but done here prevents filename conflicts | ||
| if isinstance(node._node, TableNode): | ||
| if node._target() == TargetType.PANDAS: | ||
| if dest.suffix != '.csv': | ||
@@ -1434,3 +1464,3 @@ # avoid name collisions from files with same name but different source, | ||
| dest.parent.mkdir(parents=True, exist_ok=True) | ||
| if isinstance(node._node, FileNode): | ||
| if node._target() == TargetType.FILE: | ||
| if use_symlinks is True: | ||
@@ -1440,4 +1470,3 @@ fs_link(node(), dest) | ||
| copyfile(node(), str(dest)) | ||
| elif isinstance(node._node, TableNode): | ||
| ext = node._node.metadata['q_ext'] | ||
| elif node._target() == TargetType.PANDAS: | ||
| df = node() | ||
@@ -1449,2 +1478,4 @@ # 100 decimal places of pi will allow you to draw a circle the size of the known | ||
| df.to_csv(str(dest), index=False, float_format='%r') | ||
| else: | ||
| assert False | ||
@@ -1495,3 +1526,3 @@ def resolve_dirpath(dirpath): | ||
| * export-time mapping -- user maps two files to the same name | ||
| * coded builds -- user creates two FileNodes with the same path | ||
| * coded builds -- user creates two files with the same path | ||
| * re-rooting absolute paths -- user entered absolute paths, which are re-rooted to the export dir | ||
@@ -1514,3 +1545,3 @@ * build-time duplication -- user enters the same file path twice under different nodes | ||
| continue # not a conflict.. | ||
| if isinstance(src._node, FileNode) and src() == results[dest](): | ||
| if src._target() == TargetType.FILE and src() == results[dest](): | ||
| continue # not a conflict (same src filename, same dest).. | ||
@@ -1517,0 +1548,0 @@ # ..add other conditions that prevent this from being a conflict here.. |
@@ -11,2 +11,3 @@ """ | ||
| PANDAS = 'pandas' | ||
| NUMPY = 'numpy' | ||
| FILE = 'file' | ||
@@ -13,0 +14,0 @@ GROUP = 'group' |
@@ -118,3 +118,3 @@ """ | ||
| build_p.add_argument("path", type=str, help="Path to source directory or YAML file") | ||
| build_p.set_defaults(func=command.build) | ||
| build_p.set_defaults(func=command.build, build_file=True) | ||
@@ -121,0 +121,0 @@ # quilt check |
+15
-19
@@ -7,4 +7,3 @@ import json | ||
| from .core import (decode_node, encode_node, hash_contents, | ||
| FileNode, GroupNode, TableNode, | ||
| PackageFormat) | ||
| FileNode, GroupNode) | ||
| from .util import is_nodename | ||
@@ -143,2 +142,10 @@ | ||
| def save_numpy(self, ndarray, node_path, target, source_path, transform, custom_meta): | ||
| """ | ||
| Save a Numpy array to the store. | ||
| """ | ||
| filehash = self._store.save_numpy(ndarray) | ||
| metahash = self._store.save_metadata(custom_meta) | ||
| self._add_to_contents(node_path, [filehash], target, source_path, transform, metahash) | ||
| def save_file(self, srcfile, node_path, target, source_path, transform, custom_meta): | ||
@@ -226,26 +233,15 @@ """ | ||
| metadata = dict( | ||
| q_ext=transform, | ||
| q_path=source_path, | ||
| q_target=target.value | ||
| ) | ||
| if target is TargetType.GROUP: | ||
| node = GroupNode(dict()) | ||
| elif target is TargetType.PANDAS: | ||
| node = TableNode( | ||
| hashes=hashes, | ||
| format=PackageFormat.default.value, | ||
| metadata=metadata, | ||
| metadata_hash=user_meta_hash | ||
| ) | ||
| elif target is TargetType.FILE: | ||
| else: | ||
| node = FileNode( | ||
| hashes=hashes, | ||
| metadata=metadata, | ||
| metadata=dict( | ||
| q_ext=transform, | ||
| q_path=source_path, | ||
| q_target=target.value | ||
| ), | ||
| metadata_hash=user_meta_hash | ||
| ) | ||
| else: | ||
| assert False, "Unhandled TargetType {tt}".format(tt=target) | ||
| ptr.children[node_path[-1]] = node |
+21
-1
@@ -11,6 +11,7 @@ """ | ||
| from enum import Enum | ||
| import numpy as np | ||
| import pandas as pd | ||
| from .const import DEFAULT_TEAM, PACKAGE_DIR_NAME, QuiltException, SYSTEM_METADATA | ||
| from .core import FileNode, RootNode, TableNode, find_object_hashes | ||
| from .core import FileNode, RootNode, find_object_hashes | ||
| from .hashing import digest_file | ||
@@ -419,2 +420,21 @@ from .package import Package, PackageException | ||
| def load_numpy(self, hash_list): | ||
| """ | ||
| Loads a numpy array. | ||
| """ | ||
| assert len(hash_list) == 1 | ||
| self._check_hashes(hash_list) | ||
| with open(self.object_path(hash_list[0]), 'rb') as fd: | ||
| return np.load(fd, allow_pickle=False) | ||
| def save_numpy(self, nparray): | ||
| storepath = self.temporary_object_path(str(uuid.uuid4())) | ||
| with open(storepath, 'wb') as fd: | ||
| np.save(fd, nparray, allow_pickle=False) | ||
| filehash = digest_file(storepath) | ||
| self._move_to_store(storepath, filehash) | ||
| return filehash | ||
| def get_file(self, hash_list): | ||
@@ -421,0 +441,0 @@ """ |
@@ -198,4 +198,4 @@ """ | ||
| Examples: | ||
| >>> to_identifier('#if') -> '_if' | ||
| >>> to_identifier('global') -> 'global_' | ||
| >>> to_identifier('Alice\'s Restaurant') -> 'Alice_s_Restaurant' | ||
| >>> to_identifier('#if') -> 'if' -> QuiltException | ||
| >>> to_identifier('9foo') -> 'n9foo' | ||
@@ -208,3 +208,3 @@ | ||
| # Not really useful to expose as a CONSTANT, and python will compile and cache | ||
| result = re.sub(r'[^0-9a-zA-Z]+', '_', string) | ||
| result = re.sub(r'[^0-9a-zA-Z_]', '_', string) | ||
@@ -211,0 +211,0 @@ # compatibility with older behavior and tests, doesn't hurt anyways -- "_" is a |
+3
-3
@@ -19,3 +19,3 @@ from setuptools import setup, find_packages | ||
| name="quilt", | ||
| version="2.9.7", | ||
| version="2.9.8", | ||
| packages=find_packages(), | ||
@@ -37,3 +37,3 @@ description='Quilt is a data package manager', | ||
| url='https://github.com/quiltdata/quilt', | ||
| download_url='https://github.com/quiltdata/quilt/releases/tag/2.9.7', | ||
| download_url='https://github.com/quiltdata/quilt/releases/tag/2.9.8', | ||
| keywords='quilt quiltdata shareable data dataframe package platform pandas', | ||
@@ -47,3 +47,3 @@ install_requires=[ | ||
| 'pathlib2; python_version<"3.6"', # stdlib backport | ||
| 'pyarrow>=0.4.0,<0.8.0', # TODO(dima): Make unit tests work with 0.8+ | ||
| 'pyarrow>=0.9.0', | ||
| 'pyyaml>=3.12', | ||
@@ -50,0 +50,0 @@ 'requests>=2.12.4', |
Sorry, the diff of this file is too big to display
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
389647
1.12%8567
1.1%