rdbtools - PyPI Package Compare versions

rdbtools/cli/report.html.template

Sorry, the diff of this file is not supported yet

+5

-5

PKG-INFO

		Metadata-Version: 1.1
		Name: rdbtools
		Version: 0.1.6
		Version: 0.1.7
		Summary: Utilities to convert Redis RDB files to JSON or SQL formats
		Home-page: https://github.com/sripathikrishnan/redis-rdb-tools
		Author: Sripathi Krishnan
		Author-email: Sripathi.Krishnan@gmail.com
		Author: Sripathi Krishnan, Redis Labs
		Author-email: oss@redislabs.com
		License: MIT
		Download-URL: http://cloud.github.com/downloads/andymccurdy/redis-py/redis-0.1.6.tar.gz
		Download-URL: https://github.com/sripathikrishnan/redis-rdb-tools/archive/rdbtools-0.1.7.tar.gz
		Description:
		@@ -25,3 +25,3 @@ Parse Redis dump.rdb files, Analyze Memory, and Export Data to JSON
		Platform: UNKNOWN
		Classifier: Development Status :: 4 - Beta
		Classifier: Development Status :: 5 - Production/Stable
		Classifier: Environment :: Console
		@@ -28,0 +28,0 @@ Classifier: Intended Audience :: Developers

+5

-5

rdbtools.egg-info/PKG-INFO

		Metadata-Version: 1.1
		Name: rdbtools
		Version: 0.1.6
		Version: 0.1.7
		Summary: Utilities to convert Redis RDB files to JSON or SQL formats
		Home-page: https://github.com/sripathikrishnan/redis-rdb-tools
		Author: Sripathi Krishnan
		Author-email: Sripathi.Krishnan@gmail.com
		Author: Sripathi Krishnan, Redis Labs
		Author-email: oss@redislabs.com
		License: MIT
		Download-URL: http://cloud.github.com/downloads/andymccurdy/redis-py/redis-0.1.6.tar.gz
		Download-URL: https://github.com/sripathikrishnan/redis-rdb-tools/archive/rdbtools-0.1.7.tar.gz
		Description:
		@@ -25,3 +25,3 @@ Parse Redis dump.rdb files, Analyze Memory, and Export Data to JSON
		Platform: UNKNOWN
		Classifier: Development Status :: 4 - Beta
		Classifier: Development Status :: 5 - Production/Stable
		Classifier: Environment :: Console
		@@ -28,0 +28,0 @@ Classifier: Intended Audience :: Developers

+2

-2

rdbtools.egg-info/SOURCES.txt

		@@ -10,3 +10,2 @@ CHANGES
		rdbtools/parser.py
		rdbtools/sharder.py
		rdbtools.egg-info/PKG-INFO
		@@ -20,2 +19,3 @@ rdbtools.egg-info/SOURCES.txt
		rdbtools/cli/redis_memory_for_key.py
		rdbtools/cli/redis_profiler.py
		rdbtools/cli/redis_profiler.py
		rdbtools/cli/report.html.template

+1

-1

rdbtools/__init__.py

		@@ -5,3 +5,3 @@ from rdbtools.parser import RdbCallback, RdbParser, DebugCallback

		__version__ = '0.1.6'
		__version__ = '0.1.7'
		VERSION = tuple(map(int, __version__.split('.')))
		@@ -8,0 +8,0 @@

+8

-7

rdbtools/callbacks.py

		@@ -169,11 +169,12 @@ import calendar

		def start_list(self, key, length, expiry, info):
		self._start_key(key, length)
		def start_list(self, key, expiry, info):
		self._start_key(key, 0)
		self._out.write('%s:[' % encode_key(key))

		def rpush(self, key, value) :
		self._elements_in_key += 1
		self._write_comma()
		self._out.write('%s' % encode_value(value))

		def end_list(self, key):
		def end_list(self, key, info):
		self._end_key(key)
		@@ -239,3 +240,3 @@ self._out.write(']')

		def start_list(self, key, length, expiry, info):
		def start_list(self, key, expiry, info):
		self._index = 0
		@@ -248,3 +249,3 @@

		def end_list(self, key):
		def end_list(self, key, info):
		pass
		@@ -339,3 +340,3 @@

		def start_list(self, key, length, expiry, info):
		def start_list(self, key, expiry, info):
		self.pre_expiry(key, expiry)
		@@ -346,3 +347,3 @@

		def end_list(self, key):
		def end_list(self, key, info):
		self.post_expiry(key)
		@@ -349,0 +350,0 @@

+1

-1

rdbtools/cli/redis_memory_for_key.py

		@@ -22,3 +22,3 @@ #!/usr/bin/env python
		%prog user:13423
		%prog -h localhost -p 6379 user:13423
		%prog -s localhost -p 6379 user:13423
		"""
		@@ -25,0 +25,0 @@

+191

-43

rdbtools/memprofiler.py

		from collections import namedtuple
		import random
		import json
		import bisect
		from distutils.version import StrictVersion

		@@ -41,2 +43,4 @@ from rdbtools.parser import RdbCallback
		self.add_scatter('string_memory_by_length', record.bytes, record.size)
		elif record.type == 'dict':
		pass
		else:
		@@ -85,3 +89,3 @@ raise Exception('Invalid data type %s' % record.type)
		'''
		def __init__(self, stream, architecture):
		def __init__(self, stream, architecture, redis_version='3.2'):
		self._stream = stream
		@@ -93,7 +97,17 @@ self._dbnum = 0
		self._len_largest_element = 0

		self._db_keys = 0
		self._db_expires = 0
		self._aux_used_mem = None
		self._aux_redis_ver = None
		self._aux_redis_bits = None
		self._redis_version = StrictVersion(redis_version)
		self._total_internal_frag = 0
		if architecture == 64 or architecture == '64':
		self._pointer_size = 8
		self._long_size = 8
		self._architecture = 64
		elif architecture == 32 or architecture == '32':
		self._pointer_size = 4
		self._long_size = 4
		self._architecture = 32

		@@ -103,11 +117,26 @@ def start_rdb(self):

		def aux_field(self, key, value):
		#print('aux: %s %s' % (key, value))
		if key == 'used-mem':
		self._aux_used_mem = int(value)
		if key == 'redis-ver':
		self._aux_redis_ver = value
		if key == 'redis-bits':
		self._aux_redis_bits = int(value)

		def start_database(self, db_number):
		self._dbnum = db_number
		self._db_keys = 0
		self._db_expires = 0

		def end_database(self, db_number):
		pass

		record = MemoryRecord(self._dbnum, "dict", None, self.hashtable_overhead(self._db_keys), None, None, None)
		self._stream.next_record(record)
		record = MemoryRecord(self._dbnum, "dict", None, self.hashtable_overhead(self._db_expires), None, None, None)
		self._stream.next_record(record)

		def end_rdb(self):
		#print('internal fragmentation: %s' % self._total_internal_frag)
		pass


		def set(self, key, value, expiry, info):
		@@ -175,4 +204,6 @@ self._current_encoding = info['encoding']

		def start_list(self, key, length, expiry, info):
		self._current_length = length
		def start_list(self, key, expiry, info):
		self._current_length = 0
		self._list_items_size = 0
		self._list_items_zipped_size = 0
		self._current_encoding = info['encoding']
		@@ -183,21 +214,52 @@ size = self.sizeof_string(key)
		size += self.key_expiry_overhead(expiry)

		if 'sizeof_value' in info:
		size += info['sizeof_value']
		elif 'encoding' in info and info['encoding'] == 'linkedlist':
		size += self.linkedlist_overhead()

		# ignore the encoding in the rdb, and predict the encoding that will be used at the target redis version
		if self._redis_version >= StrictVersion('3.2'):
		# default configuration of redis 3.2
		self._current_encoding = "quicklist"
		self._list_max_ziplist_size = 8192 # default is -2 which means 8k
		self._list_compress_depth = 0 # currently we only support no compression which is the default
		self._cur_zips = 1
		self._cur_zip_size = 0
		else:
		raise Exception('start_list', 'Could not find encoding or sizeof_value in info object %s' % info)
		# default configuration fo redis 2.8 -> 3.0
		self._current_encoding = "ziplist"
		self._list_max_ziplist_entries = 512
		self._list_max_ziplist_value = 64

		self._current_size = size

		def rpush(self, key, value) :
		if(element_length(value) > self._len_largest_element) :
		def rpush(self, key, value):
		self._current_length += 1
		size = self.sizeof_string(value) if type(value) != int else 4

		if(element_length(value) > self._len_largest_element):
		self._len_largest_element = element_length(value)

		if self._current_encoding == 'linkedlist':
		self._current_size += self.sizeof_string(value)
		self._current_size += self.linkedlist_entry_overhead()
		self._current_size += self.robj_overhead()

		def end_list(self, key):

		if self._current_encoding == "ziplist":
		self._list_items_zipped_size += self.ziplist_entry_overhead(value)
		if self._current_length > self._list_max_ziplist_entries or size > self._list_max_ziplist_value:
		self._current_encoding = "linkedlist"
		elif self._current_encoding == "quicklist":
		if self._cur_zip_size + size > self._list_max_ziplist_size:
		self._cur_zip_size = size
		self._cur_zips += 1
		else:
		self._cur_zip_size += size
		self._list_items_zipped_size += self.ziplist_entry_overhead(value)
		self._list_items_size += size # not to be used in case of ziplist or quicklist

		def end_list(self, key, info):
		if self._current_encoding == 'quicklist':
		self._current_size += self.quicklist_overhead(self._cur_zips)
		self._current_size += self.ziplist_header_overhead() * self._cur_zips
		self._current_size += self._list_items_zipped_size
		elif self._current_encoding == 'ziplist':
		self._current_size += self.ziplist_header_overhead()
		self._current_size += self._list_items_zipped_size
		else: # linkedlist
		self._current_size += self.linkedlist_entry_overhead() * self._current_length
		self._current_size += self.linkedlist_overhead()
		self._current_size += self.robj_overhead() * self._current_length
		self._current_size += self._list_items_size
		record = MemoryRecord(self._dbnum, "list", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element)
		@@ -239,2 +301,3 @@ self._stream.next_record(record)
		def end_key(self):
		self._db_keys += 1
		self._current_encoding = None
		@@ -245,9 +308,3 @@ self._current_size = 0
		def sizeof_string(self, string):
		# See struct sdshdr over here https://github.com/antirez/redis/blob/unstable/src/sds.h
		# int len : 4 bytes
		# int free : 4 bytes
		# char buf[] : size will be the length of the string
		# 1 extra byte is used to store the null character at the end of the string
		# Redis internally stores integers as a long
		# Integers less than REDIS_SHARED_INTEGERS are stored in a shared memory pool
		# https://github.com/antirez/redis/blob/unstable/src/sds.h
		try:
		@@ -261,3 +318,14 @@ num = int(string)
		pass
		return len(string) + 8 + 1 + self.malloc_overhead()
		l = len(string)
		if self._redis_version < StrictVersion('3.2'):
		return self.malloc_overhead(l + 8 + 1)
		if l < 2**5:
		return self.malloc_overhead(l + 1 + 1)
		if l < 2**8:
		return self.malloc_overhead(l + 1 + 2 + 1)
		if l < 2**16:
		return self.malloc_overhead(l + 1 + 4 + 1)
		if l < 2**32:
		return self.malloc_overhead(l + 1 + 8 + 1)
		return self.malloc_overhead(l + 1 + 16 + 1)

		@@ -273,2 +341,3 @@ def top_level_object_overhead(self):
		return 0
		self._db_expires += 1
		# Key expiry is stored in a hashtable, so we have to pay for the cost of a hashtable entry
		@@ -281,4 +350,3 @@ # The timestamp itself is stored as an int64, which is a 8 bytes
		# See the structures dict and dictht
		# 2 * (3 unsigned longs + 1 pointer) + 2 ints + 2 pointers
		# = 56 + 4 * sizeof_pointer()
		# 2 * (3 unsigned longs + 1 pointer) + int + long + 2 pointers
		#
		@@ -288,10 +356,11 @@ # Additionally, see **table in dictht
		# When the hashtable is rehashing, another instance of **table is created
		# We are assuming 0.5 percent probability of rehashing, and so multiply
		# Due to the possibility of rehashing during loading, we calculate the worse
		# case in which both tables are allocated, and so multiply
		# the size of **table by 1.5
		return 56 + 4self.sizeof_pointer() + self.next_power(size)self.sizeof_pointer()*1.5
		return 4 + 7self.sizeof_long() + 4self.sizeof_pointer() + self.next_power(size)self.sizeof_pointer()1.5

		def hashtable_entry_overhead(self):
		# See https://github.com/antirez/redis/blob/unstable/src/dict.h
		# Each dictEntry has 3 pointers
		return 3*self.sizeof_pointer()
		# Each dictEntry has 2 pointers + int64
		return 2*self.sizeof_pointer() + 8

		@@ -301,4 +370,9 @@ def linkedlist_overhead(self):
		# A list has 5 pointers + an unsigned long
		return 8 + 5*self.sizeof_pointer()

		return self.sizeof_long() + 5*self.sizeof_pointer()

		def quicklist_overhead(self, zip_count):
		quicklist = 2self.sizeof_pointer()+self.sizeof_long()+24
		quickitem = 4self.sizeof_pointer()+self.sizeof_long()+24
		return quicklist + zip_count*quickitem

		def linkedlist_entry_overhead(self):
		@@ -308,3 +382,36 @@ # See https://github.com/antirez/redis/blob/unstable/src/adlist.h
		return 3*self.sizeof_pointer()


		def ziplist_header_overhead(self):
		# See https://github.com/antirez/redis/blob/unstable/src/ziplist.c
		# <zlbytes><zltail><zllen><entry><entry><zlend>
		return 4 + 4 + 2 + 1

		def ziplist_entry_overhead(self, value):
		# See https://github.com/antirez/redis/blob/unstable/src/ziplist.c
		if type(value) == int:
		header = 1
		if value < 12:
		size = 0
		elif value < 2**8:
		size = 1
		elif value < 2**16:
		size = 2
		elif value < 2**24:
		size = 3
		elif value < 2**32:
		size = 4
		else:
		size = 8
		else:
		size = len(value)
		if size <= 63:
		header = 1
		elif size <= 16383:
		header = 2
		else:
		header = 5
		# add len again for prev_len of the next record
		prev_len = 1 if size < 254 else 5
		return prev_len + header + size

		def skiplist_overhead(self, size):
		@@ -319,4 +426,6 @@ return 2self.sizeof_pointer() + self.hashtable_overhead(size) + (2self.sizeof_pointer() + 16)

		def malloc_overhead(self):
		return self.size_t()
		def malloc_overhead(self, size):
		alloc = get_jemalloc_allocation(size)
		self._total_internal_frag += alloc - size
		return alloc

		@@ -329,2 +438,5 @@ def size_t(self):

		def sizeof_long(self):
		return self._long_size

		def next_power(self, size):
		@@ -347,3 +459,2 @@ power = 1


		def element_length(element):
		@@ -356,2 +467,39 @@ if isinstance(element, int):
		return len(element)



		# size classes from jemalloc 4.0.4 using LG_QUANTUM=3
		jemalloc_size_classes = [
		8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 448, 512, 640, 768, 896, 1024,
		1280, 1536, 1792, 2048, 2560, 3072, 3584, 4096, 5120, 6144, 7168, 8192, 10240, 12288, 14336, 16384, 20480, 24576,
		28672, 32768, 40960, 49152, 57344, 65536, 81920, 98304, 114688,131072, 163840, 196608, 229376, 262144, 327680,
		393216, 458752, 524288, 655360, 786432, 917504, 1048576, 1310720, 1572864, 1835008, 2097152, 2621440, 3145728,
		3670016, 4194304, 5242880, 6291456, 7340032, 8388608, 10485760, 12582912, 14680064, 16777216, 20971520, 25165824,
		29360128, 33554432, 41943040, 50331648, 58720256, 67108864, 83886080, 100663296, 117440512, 134217728, 167772160,
		201326592, 234881024, 268435456, 335544320, 402653184, 469762048, 536870912, 671088640, 805306368, 939524096,
		1073741824, 1342177280, 1610612736, 1879048192, 2147483648, 2684354560, 3221225472, 3758096384, 4294967296,
		5368709120, 6442450944, 7516192768, 8589934592, 10737418240, 12884901888, 15032385536, 17179869184, 21474836480,
		25769803776, 30064771072, 34359738368, 42949672960, 51539607552, 60129542144, 68719476736, 85899345920,
		103079215104, 120259084288, 137438953472, 171798691840, 206158430208, 240518168576, 274877906944, 343597383680,
		412316860416, 481036337152, 549755813888, 687194767360, 824633720832, 962072674304, 1099511627776,1374389534720,
		1649267441664, 1924145348608, 2199023255552, 2748779069440, 3298534883328, 3848290697216, 4398046511104,
		5497558138880, 6597069766656, 7696581394432, 8796093022208, 10995116277760, 13194139533312, 15393162788864,
		17592186044416, 21990232555520, 26388279066624, 30786325577728, 35184372088832, 43980465111040, 52776558133248,
		61572651155456, 70368744177664, 87960930222080, 105553116266496, 123145302310912, 140737488355328, 175921860444160,
		211106232532992, 246290604621824, 281474976710656, 351843720888320, 422212465065984, 492581209243648,
		562949953421312, 703687441776640, 844424930131968, 985162418487296, 1125899906842624, 1407374883553280,
		1688849860263936, 1970324836974592, 2251799813685248, 2814749767106560, 3377699720527872, 3940649673949184,
		4503599627370496, 5629499534213120, 6755399441055744, 7881299347898368, 9007199254740992, 11258999068426240,
		13510798882111488, 15762598695796736, 18014398509481984, 22517998136852480, 27021597764222976,31525197391593472,
		36028797018963968, 45035996273704960, 54043195528445952, 63050394783186944, 72057594037927936, 90071992547409920,
		108086391056891904, 126100789566373888, 144115188075855872, 180143985094819840, 216172782113783808,
		252201579132747776, 288230376151711744, 360287970189639680, 432345564227567616, 504403158265495552,
		576460752303423488, 720575940379279360, 864691128455135232, 1008806316530991104, 1152921504606846976,
		1441151880758558720, 1729382256910270464, 2017612633061982208, 2305843009213693952, 2882303761517117440,
		3458764513820540928, 4035225266123964416, 4611686018427387904, 5764607523034234880, 6917529027641081856,
		8070450532247928832, 9223372036854775808, 11529215046068469760, 13835058055282163712, 16140901064495857664
		] # TODO: use different table depending oon the redis-version used

		def get_jemalloc_allocation(size):
		idx = bisect.bisect_left(jemalloc_size_classes, size)
		alloc = jemalloc_size_classes[idx] if idx < len(jemalloc_size_classes) else size
		return alloc

+96

-25

rdbtools/parser.py

		@@ -17,2 +17,4 @@ import struct

		REDIS_RDB_OPCODE_AUX = 250
		REDIS_RDB_OPCODE_RESIZEDB = 251
		REDIS_RDB_OPCODE_EXPIRETIME_MS = 252
		@@ -33,2 +35,3 @@ REDIS_RDB_OPCODE_EXPIRETIME = 253
		REDIS_RDB_TYPE_HASH_ZIPLIST = 13
		REDIS_RDB_TYPE_LIST_QUICKLIST = 14

		@@ -42,5 +45,5 @@ REDIS_RDB_ENC_INT8 = 0
		0 : "string", 1 : "list", 2 : "set", 3 : "sortedset", 4 : "hash",
		9 : "hash", 10 : "list", 11 : "set", 12 : "sortedset", 13 : "hash"}
		9 : "hash", 10 : "list", 11 : "set", 12 : "sortedset", 13 : "hash", 14 : "list"}

		class RdbCallback:
		class RdbCallback(object):
		"""
		@@ -57,3 +60,12 @@ A Callback to handle events as the Redis dump file is parsed.
		pass


		def aux_field(self, key, value):
		""""
		Called in the beginning of the RDB with various meta data fields such as:
		redis-ver, redis-bits, ctime, used-mem
		exists since redis 3.2 (RDB v7)

		"""
		pass

		def start_database(self, db_number):
		@@ -70,3 +82,11 @@ """
		pass


		def db_size(self, db_size, expires_size):
		"""
		Called per database before the keys, with the key count in the main dictioney and the total voletaile key count
		exists since redis 3.2 (RDB v7)

		"""
		pass

		def set(self, key, value, expiry, info):
		@@ -154,3 +174,3 @@ """

		def start_list(self, key, length, expiry, info):
		def start_list(self, key, expiry, info):
		"""
		@@ -160,3 +180,2 @@ Callback to handle the start of a list
		`key` is the redis key for this list
		`length` is the number of elements in this list
		`expiry` is a `datetime` object. None means the object does not expire
		@@ -173,3 +192,3 @@ `info` is a dictionary containing additional information about this object.

		def rpush(self, key, value) :
		def rpush(self, key, value):
		"""
		@@ -186,3 +205,3 @@ Callback to insert a new value into this list

		def end_list(self, key):
		def end_list(self, key, info):
		"""
		@@ -192,3 +211,4 @@ Called when there are no more elements in this list
		`key` the redis key for this list

		`info` is a dictionary containing additional information about this object that wasn't known in start_list.

		"""
		@@ -276,2 +296,3 @@ pass
		self.init_filter(filters)
		self._rdb_version = 0

		@@ -283,7 +304,10 @@ def parse(self, filename):
		"""
		with open(filename, "rb") as f:
		self.parse_fd(open(filename, "rb"))

		def parse_fd(self, fd):
		with fd as f:
		self.verify_magic_string(f.read(5))
		self.verify_version(f.read(4))
		self._callback.start_rdb()


		is_first_database = True
		@@ -294,3 +318,3 @@ db_number = 0
		data_type = read_unsigned_char(f)


		if data_type == REDIS_RDB_OPCODE_EXPIRETIME_MS :
		@@ -302,3 +326,3 @@ self._expiry = to_datetime(read_unsigned_long(f) * 1000)
		data_type = read_unsigned_char(f)


		if data_type == REDIS_RDB_OPCODE_SELECTDB :
		@@ -311,6 +335,22 @@ if not is_first_database :
		continue


		if data_type == REDIS_RDB_OPCODE_AUX:
		aux_key = self.read_string(f)
		aux_val = self.read_string(f)
		ret = self._callback.aux_field(aux_key, aux_val)
		if ret:
		break # TODO: make all callbacks return abort flag
		continue

		if data_type == REDIS_RDB_OPCODE_RESIZEDB:
		db_size = self.read_length(f)
		expire_size = self.read_length(f)
		self._callback.db_size(db_size, expire_size)
		continue

		if data_type == REDIS_RDB_OPCODE_EOF :
		self._callback.end_database(db_number)
		self._callback.end_rdb()
		if self._rdb_version >= 5:
		f.read(8)
		break
		@@ -364,2 +404,4 @@
		val = self.lzf_decompress(f.read(clen), l)
		else:
		raise Exception('read_string', "Invalid string encoding %s"%(length))
		else :
		@@ -382,7 +424,7 @@ val = f.read(length)
		length = self.read_length(f)
		self._callback.start_list(self._key, length, self._expiry, info={'encoding':'linkedlist' })
		self._callback.start_list(self._key, self._expiry, info={'encoding':'linkedlist' })
		for count in xrange(0, length) :
		val = self.read_string(f)
		self._callback.rpush(self._key, val)
		self._callback.end_list(self._key)
		self._callback.end_list(self._key, info={'encoding':'linkedlist' })
		elif enc_type == REDIS_RDB_TYPE_SET :
		@@ -427,2 +469,4 @@ # A redis list is just a sequence of strings
		self.read_hash_from_ziplist(f)
		elif enc_type == REDIS_RDB_TYPE_LIST_QUICKLIST:
		self.read_list_from_quicklist(f)
		else :
		@@ -478,4 +522,6 @@ raise Exception('read_object', 'Invalid object type %d for key %s' % (enc_type, self._key))
		skip_strings = 1
		elif enc_type == REDIS_RDB_TYPE_LIST_QUICKLIST:
		skip_strings = self.read_length(f)
		else :
		raise Exception('read_object', 'Invalid object type %d for key %s' % (enc_type, self._key))
		raise Exception('skip_object', 'Invalid object type %d for key %s' % (enc_type, self._key))
		for x in xrange(0, skip_strings):
		@@ -493,7 +539,7 @@ self.skip_string(f)
		if encoding == 8 :
		entry = read_unsigned_long(buff)
		entry = read_signed_long(buff)
		elif encoding == 4 :
		entry = read_unsigned_int(buff)
		entry = read_signed_int(buff)
		elif encoding == 2 :
		entry = read_unsigned_short(buff)
		entry = read_signed_short(buff)
		else :
		@@ -510,3 +556,3 @@ raise Exception('read_intset', 'Invalid encoding %d for key %s' % (encoding, self._key))
		num_entries = read_unsigned_short(buff)
		self._callback.start_list(self._key, num_entries, self._expiry, info={'encoding':'ziplist', 'sizeof_value':len(raw_string)})
		self._callback.start_list(self._key, self._expiry, info={'encoding':'ziplist', 'sizeof_value':len(raw_string)})
		for x in xrange(0, num_entries) :
		@@ -518,4 +564,22 @@ val = self.read_ziplist_entry(buff)
		raise Exception('read_ziplist', "Invalid zip list end - %d for key %s" % (zlist_end, self._key))
		self._callback.end_list(self._key)
		self._callback.end_list(self._key, info={'encoding':'ziplist'})

		def read_list_from_quicklist(self, f):
		count = self.read_length(f)
		total_size = 0
		self._callback.start_list(self._key, self._expiry, info={'encoding': 'quicklist', 'zips': count})
		for i in xrange(0, count):
		raw_string = self.read_string(f)
		total_size += len(raw_string)
		buff = StringIO(raw_string)
		zlbytes = read_unsigned_int(buff)
		tail_offset = read_unsigned_int(buff)
		num_entries = read_unsigned_short(buff)
		for x in xrange(0, num_entries):
		self._callback.rpush(self._key, self.read_ziplist_entry(buff))
		zlist_end = read_unsigned_char(buff)
		if zlist_end != 255:
		raise Exception('read_quicklist', "Invalid zip list end - %d for key %s" % (zlist_end, self._key))
		self._callback.end_list(self._key, info={'encoding': 'quicklist', 'zips': count, 'sizeof_value': total_size})

		def read_zset_from_ziplist(self, f) :
		@@ -633,4 +697,5 @@ raw_string = self.read_string(f)
		version = int(version_str)
		if version < 1 or version > 6 :
		if version < 1 or version > 7:
		raise Exception('verify_version', 'Invalid RDB version number %d' % version)
		self._rdb_version = version

		@@ -776,4 +841,10 @@ def init_filter(self, filters):

		def aux_field(self, key, value):
		print('aux:[%s:%s]' % (key, value))

		def start_database(self, db_number):
		print('{')

		def db_size(self, db_size, expires_size):
		print('db_size: %s, expires_size %s' % (db_size, expires_size))

		@@ -802,3 +873,3 @@ def set(self, key, value, expiry):

		def start_list(self, key, length, expiry):
		def start_list(self, key, expiry, info):
		print('"%s" : [' % str(key))
		@@ -809,3 +880,3 @@

		def end_list(self, key):
		def end_list(self, key, info):
		print(']')
		@@ -812,0 +883,0 @@

+5

-5

setup.py

		@@ -26,7 +26,7 @@ #!/usr/bin/env python
		'url': 'https://github.com/sripathikrishnan/redis-rdb-tools',
		'download_url' : 'http://cloud.github.com/downloads/andymccurdy/redis-py/redis-%s.tar.gz' % __version__,
		'author' : 'Sripathi Krishnan',
		'download_url': 'https://github.com/sripathikrishnan/redis-rdb-tools/archive/rdbtools-%s.tar.gz' % __version__,
		'author': 'Sripathi Krishnan, Redis Labs',
		'author_email' : 'Sripathi.Krishnan@gmail.com',
		'maintainer' : 'Sripathi Krishnan',
		'maintainer_email' : 'Sripathi.Krishnan@gmail.com',
		'maintainer': 'Sripathi Krishnan, Redis Labs',
		'maintainer_email': 'oss@redislabs.com',
		'keywords' : ['Redis', 'RDB', 'Export', 'Dump', 'Memory Profiler'],
		@@ -44,3 +44,3 @@ 'license' : 'MIT',
		'classifiers' : [
		'Development Status :: 4 - Beta',
		'Development Status :: 5 - Production/Stable',
		'Environment :: Console',
		@@ -47,0 +47,0 @@ 'Intended Audience :: Developers',

-517

rdbtools/sharder.py

		import struct
		import io
		import sys
		import datetime
		import re

		try :
		from StringIO import StringIO
		except ImportError:
		from io import StringIO

		REDIS_RDB_6BITLEN = 0
		REDIS_RDB_14BITLEN = 1
		REDIS_RDB_32BITLEN = 2
		REDIS_RDB_ENCVAL = 3

		REDIS_RDB_OPCODE_EXPIRETIME_MS = 252
		REDIS_RDB_OPCODE_EXPIRETIME = 253
		REDIS_RDB_OPCODE_SELECTDB = 254
		REDIS_RDB_OPCODE_EOF = 255

		REDIS_RDB_TYPE_STRING = 0
		REDIS_RDB_TYPE_LIST = 1
		REDIS_RDB_TYPE_SET = 2
		REDIS_RDB_TYPE_ZSET = 3
		REDIS_RDB_TYPE_HASH = 4
		REDIS_RDB_TYPE_HASH_ZIPMAP = 9
		REDIS_RDB_TYPE_LIST_ZIPLIST = 10
		REDIS_RDB_TYPE_SET_INTSET = 11
		REDIS_RDB_TYPE_ZSET_ZIPLIST = 12
		REDIS_RDB_TYPE_HASH_ZIPLIST = 13

		REDIS_RDB_ENC_INT8 = 0
		REDIS_RDB_ENC_INT16 = 1
		REDIS_RDB_ENC_INT32 = 2
		REDIS_RDB_ENC_LZF = 3

		DATA_TYPE_MAPPING = {
		0 : "string", 1 : "list", 2 : "set", 3 : "sortedset", 4 : "hash",
		9 : "hash", 10 : "list", 11 : "set", 12 : "sortedset", 13 : "hash"}

		class RdbSplitter :
		"""
		Split a given RDB file into multiple RDB Files
		"""
		def __init__(self) :
		pass

		def split(self, filename):
		"""
		Parse a redis rdb dump file, and call methods in the
		callback object during the parsing operation.
		"""
		with open(filename, "rb") as f:
		self.verify_magic_string(f.read(5))
		self.verify_version(f.read(4))
		db_number = 0
		while True :
		self._expiry = None
		data_type = read_unsigned_char(f)

		if data_type == REDIS_RDB_OPCODE_EXPIRETIME_MS :
		self._expiry = to_datetime(read_unsigned_long(f) * 1000)
		data_type = read_unsigned_char(f)
		elif data_type == REDIS_RDB_OPCODE_EXPIRETIME :
		self._expiry = to_datetime(read_unsigned_int(f) * 1000000)
		data_type = read_unsigned_char(f)

		if data_type == REDIS_RDB_OPCODE_SELECTDB :
		db_number = self.read_length(f)
		continue

		if data_type == REDIS_RDB_OPCODE_EOF :
		break

		if self.matches_filter(db_number) :
		self._key = self.read_string(f)
		if self.matches_filter(db_number, self._key, data_type):
		self.read_object(f, data_type)
		else:
		self.skip_object(f, data_type)
		else :
		self.skip_key_and_object(f, data_type)

		def read_length_with_encoding(self, f) :
		length = 0
		is_encoded = False
		bytes = []
		bytes.append(read_unsigned_char(f))
		enc_type = (bytes[0] & 0xC0) >> 6
		if enc_type == REDIS_RDB_ENCVAL :
		is_encoded = True
		length = bytes[0] & 0x3F
		elif enc_type == REDIS_RDB_6BITLEN :
		length = bytes[0] & 0x3F
		elif enc_type == REDIS_RDB_14BITLEN :
		bytes.append(read_unsigned_char(f))
		length = ((bytes[0]&0x3F)<<8)\|bytes[1]
		else :
		length = ntohl(f)
		return (length, is_encoded)

		def read_length(self, f) :
		return self.read_length_with_encoding(f)[0]

		def read_string(self, f) :
		tup = self.read_length_with_encoding(f)
		length = tup[0]
		is_encoded = tup[1]
		val = None
		if is_encoded :
		if length == REDIS_RDB_ENC_INT8 :
		val = read_signed_char(f)
		elif length == REDIS_RDB_ENC_INT16 :
		val = read_signed_short(f)
		elif length == REDIS_RDB_ENC_INT32 :
		val = read_signed_int(f)
		elif length == REDIS_RDB_ENC_LZF :
		clen = self.read_length(f)
		l = self.read_length(f)
		val = self.lzf_decompress(f.read(clen), l)
		else :
		val = f.read(length)
		return val

		# Read an object for the stream
		# f is the redis file
		# enc_type is the type of object
		def read_object(self, f, enc_type) :
		if enc_type == REDIS_RDB_TYPE_STRING :
		val = self.read_string(f)
		self._callback.set(self._key, val, self._expiry, info={'encoding':'string'})
		elif enc_type == REDIS_RDB_TYPE_LIST :
		# A redis list is just a sequence of strings
		# We successively read strings from the stream and create a list from it
		# The lists are in order i.e. the first string is the head,
		# and the last string is the tail of the list
		length = self.read_length(f)
		self._callback.start_list(self._key, length, self._expiry, info={'encoding':'linkedlist' })
		for count in xrange(0, length) :
		val = self.read_string(f)
		self._callback.rpush(self._key, val)
		self._callback.end_list(self._key)
		elif enc_type == REDIS_RDB_TYPE_SET :
		# A redis list is just a sequence of strings
		# We successively read strings from the stream and create a set from it
		# Note that the order of strings is non-deterministic
		length = self.read_length(f)
		self._callback.start_set(self._key, length, self._expiry, info={'encoding':'hashtable'})
		for count in xrange(0, length) :
		val = self.read_string(f)
		self._callback.sadd(self._key, val)
		self._callback.end_set(self._key)
		elif enc_type == REDIS_RDB_TYPE_ZSET :
		length = self.read_length(f)
		self._callback.start_sorted_set(self._key, length, self._expiry, info={'encoding':'skiplist'})
		for count in xrange(0, length) :
		val = self.read_string(f)
		dbl_length = read_unsigned_char(f)
		score = f.read(dbl_length)
		if isinstance(score, str):
		score = float(score)
		self._callback.zadd(self._key, score, val)
		self._callback.end_sorted_set(self._key)
		elif enc_type == REDIS_RDB_TYPE_HASH :
		length = self.read_length(f)
		self._callback.start_hash(self._key, length, self._expiry, info={'encoding':'hashtable'})
		for count in xrange(0, length) :
		field = self.read_string(f)
		value = self.read_string(f)
		self._callback.hset(self._key, field, value)
		self._callback.end_hash(self._key)
		elif enc_type == REDIS_RDB_TYPE_HASH_ZIPMAP :
		self.read_zipmap(f)
		elif enc_type == REDIS_RDB_TYPE_LIST_ZIPLIST :
		self.read_ziplist(f)
		elif enc_type == REDIS_RDB_TYPE_SET_INTSET :
		self.read_intset(f)
		elif enc_type == REDIS_RDB_TYPE_ZSET_ZIPLIST :
		self.read_zset_from_ziplist(f)
		elif enc_type == REDIS_RDB_TYPE_HASH_ZIPLIST :
		self.read_hash_from_ziplist(f)
		else :
		raise Exception('read_object', 'Invalid object type %d for key %s' % (enc_type, self._key))

		def skip_key_and_object(self, f, data_type):
		self.skip_string(f)
		self.skip_object(f, data_type)

		def skip_string(self, f):
		tup = self.read_length_with_encoding(f)
		length = tup[0]
		is_encoded = tup[1]
		bytes_to_skip = 0
		if is_encoded :
		if length == REDIS_RDB_ENC_INT8 :
		bytes_to_skip = 1
		elif length == REDIS_RDB_ENC_INT16 :
		bytes_to_skip = 2
		elif length == REDIS_RDB_ENC_INT32 :
		bytes_to_skip = 4
		elif length == REDIS_RDB_ENC_LZF :
		clen = self.read_length(f)
		l = self.read_length(f)
		bytes_to_skip = clen
		else :
		bytes_to_skip = length

		skip(f, bytes_to_skip)

		def skip_object(self, f, enc_type):
		skip_strings = 0
		if enc_type == REDIS_RDB_TYPE_STRING :
		skip_strings = 1
		elif enc_type == REDIS_RDB_TYPE_LIST :
		skip_strings = self.read_length(f)
		elif enc_type == REDIS_RDB_TYPE_SET :
		skip_strings = self.read_length(f)
		elif enc_type == REDIS_RDB_TYPE_ZSET :
		skip_strings = self.read_length(f) * 2
		elif enc_type == REDIS_RDB_TYPE_HASH :
		skip_strings = self.read_length(f) * 2
		elif enc_type == REDIS_RDB_TYPE_HASH_ZIPMAP :
		skip_strings = 1
		elif enc_type == REDIS_RDB_TYPE_LIST_ZIPLIST :
		skip_strings = 1
		elif enc_type == REDIS_RDB_TYPE_SET_INTSET :
		skip_strings = 1
		elif enc_type == REDIS_RDB_TYPE_ZSET_ZIPLIST :
		skip_strings = 1
		elif enc_type == REDIS_RDB_TYPE_HASH_ZIPLIST :
		skip_strings = 1
		else :
		raise Exception('read_object', 'Invalid object type %d for key %s' % (enc_type, self._key))
		for x in xrange(0, skip_strings):
		self.skip_string(f)


		def read_intset(self, f) :
		raw_string = self.read_string(f)
		buff = StringIO(raw_string)
		encoding = read_unsigned_int(buff)
		num_entries = read_unsigned_int(buff)
		self._callback.start_set(self._key, num_entries, self._expiry, info={'encoding':'intset', 'sizeof_value':len(raw_string)})
		for x in xrange(0, num_entries) :
		if encoding == 8 :
		entry = read_unsigned_long(buff)
		elif encoding == 4 :
		entry = read_unsigned_int(buff)
		elif encoding == 2 :
		entry = read_unsigned_short(buff)
		else :
		raise Exception('read_intset', 'Invalid encoding %d for key %s' % (encoding, self._key))
		self._callback.sadd(self._key, entry)
		self._callback.end_set(self._key)

		def read_ziplist(self, f) :
		raw_string = self.read_string(f)
		buff = StringIO(raw_string)
		zlbytes = read_unsigned_int(buff)
		tail_offset = read_unsigned_int(buff)
		num_entries = read_unsigned_short(buff)
		self._callback.start_list(self._key, num_entries, self._expiry, info={'encoding':'ziplist', 'sizeof_value':len(raw_string)})
		for x in xrange(0, num_entries) :
		val = self.read_ziplist_entry(buff)
		self._callback.rpush(self._key, val)
		zlist_end = read_unsigned_char(buff)
		if zlist_end != 255 :
		raise Exception('read_ziplist', "Invalid zip list end - %d for key %s" % (zlist_end, self._key))
		self._callback.end_list(self._key)

		def read_zset_from_ziplist(self, f) :
		raw_string = self.read_string(f)
		buff = StringIO(raw_string)
		zlbytes = read_unsigned_int(buff)
		tail_offset = read_unsigned_int(buff)
		num_entries = read_unsigned_short(buff)
		if (num_entries % 2) :
		raise Exception('read_zset_from_ziplist', "Expected even number of elements, but found %d for key %s" % (num_entries, self._key))
		num_entries = num_entries /2
		self._callback.start_sorted_set(self._key, num_entries, self._expiry, info={'encoding':'ziplist', 'sizeof_value':len(raw_string)})
		for x in xrange(0, num_entries) :
		member = self.read_ziplist_entry(buff)
		score = self.read_ziplist_entry(buff)
		if isinstance(score, str) :
		score = float(score)
		self._callback.zadd(self._key, score, member)
		zlist_end = read_unsigned_char(buff)
		if zlist_end != 255 :
		raise Exception('read_zset_from_ziplist', "Invalid zip list end - %d for key %s" % (zlist_end, self._key))
		self._callback.end_sorted_set(self._key)

		def read_hash_from_ziplist(self, f) :
		raw_string = self.read_string(f)
		buff = StringIO(raw_string)
		zlbytes = read_unsigned_int(buff)
		tail_offset = read_unsigned_int(buff)
		num_entries = read_unsigned_short(buff)
		if (num_entries % 2) :
		raise Exception('read_hash_from_ziplist', "Expected even number of elements, but found %d for key %s" % (num_entries, self._key))
		num_entries = num_entries /2
		self._callback.start_hash(self._key, num_entries, self._expiry, info={'encoding':'ziplist', 'sizeof_value':len(raw_string)})
		for x in xrange(0, num_entries) :
		field = self.read_ziplist_entry(buff)
		value = self.read_ziplist_entry(buff)
		self._callback.hset(self._key, field, value)
		zlist_end = read_unsigned_char(buff)
		if zlist_end != 255 :
		raise Exception('read_hash_from_ziplist', "Invalid zip list end - %d for key %s" % (zlist_end, self._key))
		self._callback.end_hash(self._key)


		def read_ziplist_entry(self, f) :
		length = 0
		value = None
		prev_length = read_unsigned_char(f)
		if prev_length == 254 :
		prev_length = read_unsigned_int(f)
		entry_header = read_unsigned_char(f)
		if (entry_header >> 6) == 0 :
		length = entry_header & 0x3F
		value = f.read(length)
		elif (entry_header >> 6) == 1 :
		length = ((entry_header & 0x3F) << 8) \| read_unsigned_char(f)
		value = f.read(length)
		elif (entry_header >> 6) == 2 :
		length = read_unsigned_int(f)
		value = f.read(length)
		elif (entry_header >> 4) == 12 :
		value = read_signed_short(f)
		elif (entry_header >> 4) == 13 :
		value = read_signed_int(f)
		elif (entry_header >> 4) == 14 :
		value = read_signed_long(f)
		elif (entry_header == 240) :
		value = read_24bit_signed_number(f)
		elif (entry_header == 254) :
		value = read_signed_char(f)
		elif (entry_header >= 241 and entry_header <= 253) :
		value = entry_header - 241
		else :
		raise Exception('read_ziplist_entry', 'Invalid entry_header %d for key %s' % (entry_header, self._key))
		return value

		def read_zipmap(self, f) :
		raw_string = self.read_string(f)
		buff = io.BytesIO(bytearray(raw_string))
		num_entries = read_unsigned_char(buff)
		self._callback.start_hash(self._key, num_entries, self._expiry, info={'encoding':'zipmap', 'sizeof_value':len(raw_string)})
		while True :
		next_length = self.read_zipmap_next_length(buff)
		if next_length is None :
		break
		key = buff.read(next_length)
		next_length = self.read_zipmap_next_length(buff)
		if next_length is None :
		raise Exception('read_zip_map', 'Unexepcted end of zip map for key %s' % self._key)
		free = read_unsigned_char(buff)
		value = buff.read(next_length)
		try:
		value = int(value)
		except ValueError:
		pass

		skip(buff, free)
		self._callback.hset(self._key, key, value)
		self._callback.end_hash(self._key)

		def read_zipmap_next_length(self, f) :
		num = read_unsigned_char(f)
		if num < 254:
		return num
		elif num == 254:
		return read_unsigned_int(f)
		else:
		return None

		def verify_magic_string(self, magic_string) :
		if magic_string != 'REDIS' :
		raise Exception('verify_magic_string', 'Invalid File Format')

		def verify_version(self, version_str) :
		version = int(version_str)
		if version < 1 or version > 6 :
		raise Exception('verify_version', 'Invalid RDB version number %d' % version)

		def init_filter(self, filters):
		self._filters = {}
		if not filters:
		filters={}

		if not 'dbs' in filters:
		self._filters['dbs'] = None
		elif isinstance(filters['dbs'], int):
		self._filters['dbs'] = (filters['dbs'], )
		elif isinstance(filters['dbs'], list):
		self._filters['dbs'] = [int(x) for x in filters['dbs']]
		else:
		raise Exception('init_filter', 'invalid value for dbs in filter %s' %filters['dbs'])

		if not ('keys' in filters and filters['keys']):
		self._filters['keys'] = re.compile(".*")
		else:
		self._filters['keys'] = re.compile(filters['keys'])

		if not 'types' in filters:
		self._filters['types'] = ('set', 'hash', 'sortedset', 'string', 'list')
		elif isinstance(filters['types'], str):
		self._filters['types'] = (filters['types'], )
		elif isinstance(filters['types'], list):
		self._filters['types'] = [str(x) for x in filters['types']]
		else:
		raise Exception('init_filter', 'invalid value for types in filter %s' %filters['types'])

		def matches_filter(self, db_number, key=None, data_type=None):
		if self._filters['dbs'] and (not db_number in self._filters['dbs']):
		return False
		if key and (not self._filters['keys'].match(str(key))):
		return False

		if data_type is not None and (not self.get_logical_type(data_type) in self._filters['types']):
		return False
		return True

		def get_logical_type(self, data_type):
		return DATA_TYPE_MAPPING[data_type]

		def lzf_decompress(self, compressed, expected_length):
		in_stream = bytearray(compressed)
		in_len = len(in_stream)
		in_index = 0
		out_stream = bytearray()
		out_index = 0

		while in_index < in_len :
		ctrl = in_stream[in_index]
		if not isinstance(ctrl, int) :
		raise Exception('lzf_decompress', 'ctrl should be a number %s for key %s' % (str(ctrl), self._key))
		in_index = in_index + 1
		if ctrl < 32 :
		for x in xrange(0, ctrl + 1) :
		out_stream.append(in_stream[in_index])
		#sys.stdout.write(chr(in_stream[in_index]))
		in_index = in_index + 1
		out_index = out_index + 1
		else :
		length = ctrl >> 5
		if length == 7 :
		length = length + in_stream[in_index]
		in_index = in_index + 1

		ref = out_index - ((ctrl & 0x1f) << 8) - in_stream[in_index] - 1
		in_index = in_index + 1
		for x in xrange(0, length + 2) :
		out_stream.append(out_stream[ref])
		ref = ref + 1
		out_index = out_index + 1
		if len(out_stream) != expected_length :
		raise Exception('lzf_decompress', 'Expected lengths do not match %d != %d for key %s' % (len(out_stream), expected_length, self._key))
		return str(out_stream)

		def skip(f, free):
		if free :
		f.read(free)

		def ntohl(f) :
		val = read_unsigned_int(f)
		new_val = 0
		new_val = new_val \| ((val & 0x000000ff) << 24)
		new_val = new_val \| ((val & 0xff000000) >> 24)
		new_val = new_val \| ((val & 0x0000ff00) << 8)
		new_val = new_val \| ((val & 0x00ff0000) >> 8)
		return new_val

		def to_datetime(usecs_since_epoch):
		seconds_since_epoch = usecs_since_epoch / 1000000
		useconds = usecs_since_epoch % 1000000
		dt = datetime.datetime.utcfromtimestamp(seconds_since_epoch)
		delta = datetime.timedelta(microseconds = useconds)
		return dt + delta

		def read_signed_char(f) :
		return struct.unpack('b', f.read(1))[0]

		def read_unsigned_char(f) :
		return struct.unpack('B', f.read(1))[0]

		def read_signed_short(f) :
		return struct.unpack('h', f.read(2))[0]

		def read_unsigned_short(f) :
		return struct.unpack('H', f.read(2))[0]

		def read_signed_int(f) :
		return struct.unpack('i', f.read(4))[0]

		def read_unsigned_int(f) :
		return struct.unpack('I', f.read(4))[0]

		def read_24bit_signed_number(f):
		s = '0' + f.read(3)
		num = struct.unpack('i', s)[0]
		return num >> 8

		def read_signed_long(f) :
		return struct.unpack('q', f.read(8))[0]

		def read_unsigned_long(f) :
		return struct.unpack('Q', f.read(8))[0]

		def string_as_hexcode(string) :
		for s in string :
		if isinstance(s, int) :
		print(hex(s))
		else :
		print(hex(ord(s)))

rdbtools - pypi Package Compare versions

Worsened metrics