New Research: Supply Chain Attack on Axios Pulls Malicious Dependency from npm.Details →
Socket
Book a DemoSign in
Socket

rdbtools

Package Overview
Dependencies
Maintainers
2
Versions
12
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

rdbtools - pypi Package Compare versions

Comparing version
0.1.6
to
0.1.7
rdbtools/cli/report.html.template

Sorry, the diff of this file is not supported yet

+5
-5
Metadata-Version: 1.1
Name: rdbtools
Version: 0.1.6
Version: 0.1.7
Summary: Utilities to convert Redis RDB files to JSON or SQL formats
Home-page: https://github.com/sripathikrishnan/redis-rdb-tools
Author: Sripathi Krishnan
Author-email: Sripathi.Krishnan@gmail.com
Author: Sripathi Krishnan, Redis Labs
Author-email: oss@redislabs.com
License: MIT
Download-URL: http://cloud.github.com/downloads/andymccurdy/redis-py/redis-0.1.6.tar.gz
Download-URL: https://github.com/sripathikrishnan/redis-rdb-tools/archive/rdbtools-0.1.7.tar.gz
Description:

@@ -25,3 +25,3 @@ Parse Redis dump.rdb files, Analyze Memory, and Export Data to JSON

Platform: UNKNOWN
Classifier: Development Status :: 4 - Beta
Classifier: Development Status :: 5 - Production/Stable
Classifier: Environment :: Console

@@ -28,0 +28,0 @@ Classifier: Intended Audience :: Developers

Metadata-Version: 1.1
Name: rdbtools
Version: 0.1.6
Version: 0.1.7
Summary: Utilities to convert Redis RDB files to JSON or SQL formats
Home-page: https://github.com/sripathikrishnan/redis-rdb-tools
Author: Sripathi Krishnan
Author-email: Sripathi.Krishnan@gmail.com
Author: Sripathi Krishnan, Redis Labs
Author-email: oss@redislabs.com
License: MIT
Download-URL: http://cloud.github.com/downloads/andymccurdy/redis-py/redis-0.1.6.tar.gz
Download-URL: https://github.com/sripathikrishnan/redis-rdb-tools/archive/rdbtools-0.1.7.tar.gz
Description:

@@ -25,3 +25,3 @@ Parse Redis dump.rdb files, Analyze Memory, and Export Data to JSON

Platform: UNKNOWN
Classifier: Development Status :: 4 - Beta
Classifier: Development Status :: 5 - Production/Stable
Classifier: Environment :: Console

@@ -28,0 +28,0 @@ Classifier: Intended Audience :: Developers

@@ -10,3 +10,2 @@ CHANGES

rdbtools/parser.py
rdbtools/sharder.py
rdbtools.egg-info/PKG-INFO

@@ -20,2 +19,3 @@ rdbtools.egg-info/SOURCES.txt

rdbtools/cli/redis_memory_for_key.py
rdbtools/cli/redis_profiler.py
rdbtools/cli/redis_profiler.py
rdbtools/cli/report.html.template

@@ -5,3 +5,3 @@ from rdbtools.parser import RdbCallback, RdbParser, DebugCallback

__version__ = '0.1.6'
__version__ = '0.1.7'
VERSION = tuple(map(int, __version__.split('.')))

@@ -8,0 +8,0 @@

@@ -169,11 +169,12 @@ import calendar

def start_list(self, key, length, expiry, info):
self._start_key(key, length)
def start_list(self, key, expiry, info):
self._start_key(key, 0)
self._out.write('%s:[' % encode_key(key))
def rpush(self, key, value) :
self._elements_in_key += 1
self._write_comma()
self._out.write('%s' % encode_value(value))
def end_list(self, key):
def end_list(self, key, info):
self._end_key(key)

@@ -239,3 +240,3 @@ self._out.write(']')

def start_list(self, key, length, expiry, info):
def start_list(self, key, expiry, info):
self._index = 0

@@ -248,3 +249,3 @@

def end_list(self, key):
def end_list(self, key, info):
pass

@@ -339,3 +340,3 @@

def start_list(self, key, length, expiry, info):
def start_list(self, key, expiry, info):
self.pre_expiry(key, expiry)

@@ -346,3 +347,3 @@

def end_list(self, key):
def end_list(self, key, info):
self.post_expiry(key)

@@ -349,0 +350,0 @@

@@ -22,3 +22,3 @@ #!/usr/bin/env python

%prog user:13423
%prog -h localhost -p 6379 user:13423
%prog -s localhost -p 6379 user:13423
"""

@@ -25,0 +25,0 @@

from collections import namedtuple
import random
import json
import bisect
from distutils.version import StrictVersion

@@ -41,2 +43,4 @@ from rdbtools.parser import RdbCallback

self.add_scatter('string_memory_by_length', record.bytes, record.size)
elif record.type == 'dict':
pass
else:

@@ -85,3 +89,3 @@ raise Exception('Invalid data type %s' % record.type)

'''
def __init__(self, stream, architecture):
def __init__(self, stream, architecture, redis_version='3.2'):
self._stream = stream

@@ -93,7 +97,17 @@ self._dbnum = 0

self._len_largest_element = 0
self._db_keys = 0
self._db_expires = 0
self._aux_used_mem = None
self._aux_redis_ver = None
self._aux_redis_bits = None
self._redis_version = StrictVersion(redis_version)
self._total_internal_frag = 0
if architecture == 64 or architecture == '64':
self._pointer_size = 8
self._long_size = 8
self._architecture = 64
elif architecture == 32 or architecture == '32':
self._pointer_size = 4
self._long_size = 4
self._architecture = 32

@@ -103,11 +117,26 @@ def start_rdb(self):

def aux_field(self, key, value):
#print('aux: %s %s' % (key, value))
if key == 'used-mem':
self._aux_used_mem = int(value)
if key == 'redis-ver':
self._aux_redis_ver = value
if key == 'redis-bits':
self._aux_redis_bits = int(value)
def start_database(self, db_number):
self._dbnum = db_number
self._db_keys = 0
self._db_expires = 0
def end_database(self, db_number):
pass
record = MemoryRecord(self._dbnum, "dict", None, self.hashtable_overhead(self._db_keys), None, None, None)
self._stream.next_record(record)
record = MemoryRecord(self._dbnum, "dict", None, self.hashtable_overhead(self._db_expires), None, None, None)
self._stream.next_record(record)
def end_rdb(self):
#print('internal fragmentation: %s' % self._total_internal_frag)
pass
def set(self, key, value, expiry, info):

@@ -175,4 +204,6 @@ self._current_encoding = info['encoding']

def start_list(self, key, length, expiry, info):
self._current_length = length
def start_list(self, key, expiry, info):
self._current_length = 0
self._list_items_size = 0
self._list_items_zipped_size = 0
self._current_encoding = info['encoding']

@@ -183,21 +214,52 @@ size = self.sizeof_string(key)

size += self.key_expiry_overhead(expiry)
if 'sizeof_value' in info:
size += info['sizeof_value']
elif 'encoding' in info and info['encoding'] == 'linkedlist':
size += self.linkedlist_overhead()
# ignore the encoding in the rdb, and predict the encoding that will be used at the target redis version
if self._redis_version >= StrictVersion('3.2'):
# default configuration of redis 3.2
self._current_encoding = "quicklist"
self._list_max_ziplist_size = 8192 # default is -2 which means 8k
self._list_compress_depth = 0 # currently we only support no compression which is the default
self._cur_zips = 1
self._cur_zip_size = 0
else:
raise Exception('start_list', 'Could not find encoding or sizeof_value in info object %s' % info)
# default configuration fo redis 2.8 -> 3.0
self._current_encoding = "ziplist"
self._list_max_ziplist_entries = 512
self._list_max_ziplist_value = 64
self._current_size = size
def rpush(self, key, value) :
if(element_length(value) > self._len_largest_element) :
def rpush(self, key, value):
self._current_length += 1
size = self.sizeof_string(value) if type(value) != int else 4
if(element_length(value) > self._len_largest_element):
self._len_largest_element = element_length(value)
if self._current_encoding == 'linkedlist':
self._current_size += self.sizeof_string(value)
self._current_size += self.linkedlist_entry_overhead()
self._current_size += self.robj_overhead()
def end_list(self, key):
if self._current_encoding == "ziplist":
self._list_items_zipped_size += self.ziplist_entry_overhead(value)
if self._current_length > self._list_max_ziplist_entries or size > self._list_max_ziplist_value:
self._current_encoding = "linkedlist"
elif self._current_encoding == "quicklist":
if self._cur_zip_size + size > self._list_max_ziplist_size:
self._cur_zip_size = size
self._cur_zips += 1
else:
self._cur_zip_size += size
self._list_items_zipped_size += self.ziplist_entry_overhead(value)
self._list_items_size += size # not to be used in case of ziplist or quicklist
def end_list(self, key, info):
if self._current_encoding == 'quicklist':
self._current_size += self.quicklist_overhead(self._cur_zips)
self._current_size += self.ziplist_header_overhead() * self._cur_zips
self._current_size += self._list_items_zipped_size
elif self._current_encoding == 'ziplist':
self._current_size += self.ziplist_header_overhead()
self._current_size += self._list_items_zipped_size
else: # linkedlist
self._current_size += self.linkedlist_entry_overhead() * self._current_length
self._current_size += self.linkedlist_overhead()
self._current_size += self.robj_overhead() * self._current_length
self._current_size += self._list_items_size
record = MemoryRecord(self._dbnum, "list", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element)

@@ -239,2 +301,3 @@ self._stream.next_record(record)

def end_key(self):
self._db_keys += 1
self._current_encoding = None

@@ -245,9 +308,3 @@ self._current_size = 0

def sizeof_string(self, string):
# See struct sdshdr over here https://github.com/antirez/redis/blob/unstable/src/sds.h
# int len : 4 bytes
# int free : 4 bytes
# char buf[] : size will be the length of the string
# 1 extra byte is used to store the null character at the end of the string
# Redis internally stores integers as a long
# Integers less than REDIS_SHARED_INTEGERS are stored in a shared memory pool
# https://github.com/antirez/redis/blob/unstable/src/sds.h
try:

@@ -261,3 +318,14 @@ num = int(string)

pass
return len(string) + 8 + 1 + self.malloc_overhead()
l = len(string)
if self._redis_version < StrictVersion('3.2'):
return self.malloc_overhead(l + 8 + 1)
if l < 2**5:
return self.malloc_overhead(l + 1 + 1)
if l < 2**8:
return self.malloc_overhead(l + 1 + 2 + 1)
if l < 2**16:
return self.malloc_overhead(l + 1 + 4 + 1)
if l < 2**32:
return self.malloc_overhead(l + 1 + 8 + 1)
return self.malloc_overhead(l + 1 + 16 + 1)

@@ -273,2 +341,3 @@ def top_level_object_overhead(self):

return 0
self._db_expires += 1
# Key expiry is stored in a hashtable, so we have to pay for the cost of a hashtable entry

@@ -281,4 +350,3 @@ # The timestamp itself is stored as an int64, which is a 8 bytes

# See the structures dict and dictht
# 2 * (3 unsigned longs + 1 pointer) + 2 ints + 2 pointers
# = 56 + 4 * sizeof_pointer()
# 2 * (3 unsigned longs + 1 pointer) + int + long + 2 pointers
#

@@ -288,10 +356,11 @@ # Additionally, see **table in dictht

# When the hashtable is rehashing, another instance of **table is created
# We are assuming 0.5 percent probability of rehashing, and so multiply
# Due to the possibility of rehashing during loading, we calculate the worse
# case in which both tables are allocated, and so multiply
# the size of **table by 1.5
return 56 + 4*self.sizeof_pointer() + self.next_power(size)*self.sizeof_pointer()*1.5
return 4 + 7*self.sizeof_long() + 4*self.sizeof_pointer() + self.next_power(size)*self.sizeof_pointer()*1.5
def hashtable_entry_overhead(self):
# See https://github.com/antirez/redis/blob/unstable/src/dict.h
# Each dictEntry has 3 pointers
return 3*self.sizeof_pointer()
# Each dictEntry has 2 pointers + int64
return 2*self.sizeof_pointer() + 8

@@ -301,4 +370,9 @@ def linkedlist_overhead(self):

# A list has 5 pointers + an unsigned long
return 8 + 5*self.sizeof_pointer()
return self.sizeof_long() + 5*self.sizeof_pointer()
def quicklist_overhead(self, zip_count):
quicklist = 2*self.sizeof_pointer()+self.sizeof_long()+2*4
quickitem = 4*self.sizeof_pointer()+self.sizeof_long()+2*4
return quicklist + zip_count*quickitem
def linkedlist_entry_overhead(self):

@@ -308,3 +382,36 @@ # See https://github.com/antirez/redis/blob/unstable/src/adlist.h

return 3*self.sizeof_pointer()
def ziplist_header_overhead(self):
# See https://github.com/antirez/redis/blob/unstable/src/ziplist.c
# <zlbytes><zltail><zllen><entry><entry><zlend>
return 4 + 4 + 2 + 1
def ziplist_entry_overhead(self, value):
# See https://github.com/antirez/redis/blob/unstable/src/ziplist.c
if type(value) == int:
header = 1
if value < 12:
size = 0
elif value < 2**8:
size = 1
elif value < 2**16:
size = 2
elif value < 2**24:
size = 3
elif value < 2**32:
size = 4
else:
size = 8
else:
size = len(value)
if size <= 63:
header = 1
elif size <= 16383:
header = 2
else:
header = 5
# add len again for prev_len of the next record
prev_len = 1 if size < 254 else 5
return prev_len + header + size
def skiplist_overhead(self, size):

@@ -319,4 +426,6 @@ return 2*self.sizeof_pointer() + self.hashtable_overhead(size) + (2*self.sizeof_pointer() + 16)

def malloc_overhead(self):
return self.size_t()
def malloc_overhead(self, size):
alloc = get_jemalloc_allocation(size)
self._total_internal_frag += alloc - size
return alloc

@@ -329,2 +438,5 @@ def size_t(self):

def sizeof_long(self):
return self._long_size
def next_power(self, size):

@@ -347,3 +459,2 @@ power = 1

def element_length(element):

@@ -356,2 +467,39 @@ if isinstance(element, int):

return len(element)
# size classes from jemalloc 4.0.4 using LG_QUANTUM=3
jemalloc_size_classes = [
8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 448, 512, 640, 768, 896, 1024,
1280, 1536, 1792, 2048, 2560, 3072, 3584, 4096, 5120, 6144, 7168, 8192, 10240, 12288, 14336, 16384, 20480, 24576,
28672, 32768, 40960, 49152, 57344, 65536, 81920, 98304, 114688,131072, 163840, 196608, 229376, 262144, 327680,
393216, 458752, 524288, 655360, 786432, 917504, 1048576, 1310720, 1572864, 1835008, 2097152, 2621440, 3145728,
3670016, 4194304, 5242880, 6291456, 7340032, 8388608, 10485760, 12582912, 14680064, 16777216, 20971520, 25165824,
29360128, 33554432, 41943040, 50331648, 58720256, 67108864, 83886080, 100663296, 117440512, 134217728, 167772160,
201326592, 234881024, 268435456, 335544320, 402653184, 469762048, 536870912, 671088640, 805306368, 939524096,
1073741824, 1342177280, 1610612736, 1879048192, 2147483648, 2684354560, 3221225472, 3758096384, 4294967296,
5368709120, 6442450944, 7516192768, 8589934592, 10737418240, 12884901888, 15032385536, 17179869184, 21474836480,
25769803776, 30064771072, 34359738368, 42949672960, 51539607552, 60129542144, 68719476736, 85899345920,
103079215104, 120259084288, 137438953472, 171798691840, 206158430208, 240518168576, 274877906944, 343597383680,
412316860416, 481036337152, 549755813888, 687194767360, 824633720832, 962072674304, 1099511627776,1374389534720,
1649267441664, 1924145348608, 2199023255552, 2748779069440, 3298534883328, 3848290697216, 4398046511104,
5497558138880, 6597069766656, 7696581394432, 8796093022208, 10995116277760, 13194139533312, 15393162788864,
17592186044416, 21990232555520, 26388279066624, 30786325577728, 35184372088832, 43980465111040, 52776558133248,
61572651155456, 70368744177664, 87960930222080, 105553116266496, 123145302310912, 140737488355328, 175921860444160,
211106232532992, 246290604621824, 281474976710656, 351843720888320, 422212465065984, 492581209243648,
562949953421312, 703687441776640, 844424930131968, 985162418487296, 1125899906842624, 1407374883553280,
1688849860263936, 1970324836974592, 2251799813685248, 2814749767106560, 3377699720527872, 3940649673949184,
4503599627370496, 5629499534213120, 6755399441055744, 7881299347898368, 9007199254740992, 11258999068426240,
13510798882111488, 15762598695796736, 18014398509481984, 22517998136852480, 27021597764222976,31525197391593472,
36028797018963968, 45035996273704960, 54043195528445952, 63050394783186944, 72057594037927936, 90071992547409920,
108086391056891904, 126100789566373888, 144115188075855872, 180143985094819840, 216172782113783808,
252201579132747776, 288230376151711744, 360287970189639680, 432345564227567616, 504403158265495552,
576460752303423488, 720575940379279360, 864691128455135232, 1008806316530991104, 1152921504606846976,
1441151880758558720, 1729382256910270464, 2017612633061982208, 2305843009213693952, 2882303761517117440,
3458764513820540928, 4035225266123964416, 4611686018427387904, 5764607523034234880, 6917529027641081856,
8070450532247928832, 9223372036854775808, 11529215046068469760, 13835058055282163712, 16140901064495857664
] # TODO: use different table depending oon the redis-version used
def get_jemalloc_allocation(size):
idx = bisect.bisect_left(jemalloc_size_classes, size)
alloc = jemalloc_size_classes[idx] if idx < len(jemalloc_size_classes) else size
return alloc

@@ -17,2 +17,4 @@ import struct

REDIS_RDB_OPCODE_AUX = 250
REDIS_RDB_OPCODE_RESIZEDB = 251
REDIS_RDB_OPCODE_EXPIRETIME_MS = 252

@@ -33,2 +35,3 @@ REDIS_RDB_OPCODE_EXPIRETIME = 253

REDIS_RDB_TYPE_HASH_ZIPLIST = 13
REDIS_RDB_TYPE_LIST_QUICKLIST = 14

@@ -42,5 +45,5 @@ REDIS_RDB_ENC_INT8 = 0

0 : "string", 1 : "list", 2 : "set", 3 : "sortedset", 4 : "hash",
9 : "hash", 10 : "list", 11 : "set", 12 : "sortedset", 13 : "hash"}
9 : "hash", 10 : "list", 11 : "set", 12 : "sortedset", 13 : "hash", 14 : "list"}
class RdbCallback:
class RdbCallback(object):
"""

@@ -57,3 +60,12 @@ A Callback to handle events as the Redis dump file is parsed.

pass
def aux_field(self, key, value):
""""
Called in the beginning of the RDB with various meta data fields such as:
redis-ver, redis-bits, ctime, used-mem
exists since redis 3.2 (RDB v7)
"""
pass
def start_database(self, db_number):

@@ -70,3 +82,11 @@ """

pass
def db_size(self, db_size, expires_size):
"""
Called per database before the keys, with the key count in the main dictioney and the total voletaile key count
exists since redis 3.2 (RDB v7)
"""
pass
def set(self, key, value, expiry, info):

@@ -154,3 +174,3 @@ """

def start_list(self, key, length, expiry, info):
def start_list(self, key, expiry, info):
"""

@@ -160,3 +180,2 @@ Callback to handle the start of a list

`key` is the redis key for this list
`length` is the number of elements in this list
`expiry` is a `datetime` object. None means the object does not expire

@@ -173,3 +192,3 @@ `info` is a dictionary containing additional information about this object.

def rpush(self, key, value) :
def rpush(self, key, value):
"""

@@ -186,3 +205,3 @@ Callback to insert a new value into this list

def end_list(self, key):
def end_list(self, key, info):
"""

@@ -192,3 +211,4 @@ Called when there are no more elements in this list

`key` the redis key for this list
`info` is a dictionary containing additional information about this object that wasn't known in start_list.
"""

@@ -276,2 +296,3 @@ pass

self.init_filter(filters)
self._rdb_version = 0

@@ -283,7 +304,10 @@ def parse(self, filename):

"""
with open(filename, "rb") as f:
self.parse_fd(open(filename, "rb"))
def parse_fd(self, fd):
with fd as f:
self.verify_magic_string(f.read(5))
self.verify_version(f.read(4))
self._callback.start_rdb()
is_first_database = True

@@ -294,3 +318,3 @@ db_number = 0

data_type = read_unsigned_char(f)
if data_type == REDIS_RDB_OPCODE_EXPIRETIME_MS :

@@ -302,3 +326,3 @@ self._expiry = to_datetime(read_unsigned_long(f) * 1000)

data_type = read_unsigned_char(f)
if data_type == REDIS_RDB_OPCODE_SELECTDB :

@@ -311,6 +335,22 @@ if not is_first_database :

continue
if data_type == REDIS_RDB_OPCODE_AUX:
aux_key = self.read_string(f)
aux_val = self.read_string(f)
ret = self._callback.aux_field(aux_key, aux_val)
if ret:
break # TODO: make all callbacks return abort flag
continue
if data_type == REDIS_RDB_OPCODE_RESIZEDB:
db_size = self.read_length(f)
expire_size = self.read_length(f)
self._callback.db_size(db_size, expire_size)
continue
if data_type == REDIS_RDB_OPCODE_EOF :
self._callback.end_database(db_number)
self._callback.end_rdb()
if self._rdb_version >= 5:
f.read(8)
break

@@ -364,2 +404,4 @@

val = self.lzf_decompress(f.read(clen), l)
else:
raise Exception('read_string', "Invalid string encoding %s"%(length))
else :

@@ -382,7 +424,7 @@ val = f.read(length)

length = self.read_length(f)
self._callback.start_list(self._key, length, self._expiry, info={'encoding':'linkedlist' })
self._callback.start_list(self._key, self._expiry, info={'encoding':'linkedlist' })
for count in xrange(0, length) :
val = self.read_string(f)
self._callback.rpush(self._key, val)
self._callback.end_list(self._key)
self._callback.end_list(self._key, info={'encoding':'linkedlist' })
elif enc_type == REDIS_RDB_TYPE_SET :

@@ -427,2 +469,4 @@ # A redis list is just a sequence of strings

self.read_hash_from_ziplist(f)
elif enc_type == REDIS_RDB_TYPE_LIST_QUICKLIST:
self.read_list_from_quicklist(f)
else :

@@ -478,4 +522,6 @@ raise Exception('read_object', 'Invalid object type %d for key %s' % (enc_type, self._key))

skip_strings = 1
elif enc_type == REDIS_RDB_TYPE_LIST_QUICKLIST:
skip_strings = self.read_length(f)
else :
raise Exception('read_object', 'Invalid object type %d for key %s' % (enc_type, self._key))
raise Exception('skip_object', 'Invalid object type %d for key %s' % (enc_type, self._key))
for x in xrange(0, skip_strings):

@@ -493,7 +539,7 @@ self.skip_string(f)

if encoding == 8 :
entry = read_unsigned_long(buff)
entry = read_signed_long(buff)
elif encoding == 4 :
entry = read_unsigned_int(buff)
entry = read_signed_int(buff)
elif encoding == 2 :
entry = read_unsigned_short(buff)
entry = read_signed_short(buff)
else :

@@ -510,3 +556,3 @@ raise Exception('read_intset', 'Invalid encoding %d for key %s' % (encoding, self._key))

num_entries = read_unsigned_short(buff)
self._callback.start_list(self._key, num_entries, self._expiry, info={'encoding':'ziplist', 'sizeof_value':len(raw_string)})
self._callback.start_list(self._key, self._expiry, info={'encoding':'ziplist', 'sizeof_value':len(raw_string)})
for x in xrange(0, num_entries) :

@@ -518,4 +564,22 @@ val = self.read_ziplist_entry(buff)

raise Exception('read_ziplist', "Invalid zip list end - %d for key %s" % (zlist_end, self._key))
self._callback.end_list(self._key)
self._callback.end_list(self._key, info={'encoding':'ziplist'})
def read_list_from_quicklist(self, f):
count = self.read_length(f)
total_size = 0
self._callback.start_list(self._key, self._expiry, info={'encoding': 'quicklist', 'zips': count})
for i in xrange(0, count):
raw_string = self.read_string(f)
total_size += len(raw_string)
buff = StringIO(raw_string)
zlbytes = read_unsigned_int(buff)
tail_offset = read_unsigned_int(buff)
num_entries = read_unsigned_short(buff)
for x in xrange(0, num_entries):
self._callback.rpush(self._key, self.read_ziplist_entry(buff))
zlist_end = read_unsigned_char(buff)
if zlist_end != 255:
raise Exception('read_quicklist', "Invalid zip list end - %d for key %s" % (zlist_end, self._key))
self._callback.end_list(self._key, info={'encoding': 'quicklist', 'zips': count, 'sizeof_value': total_size})
def read_zset_from_ziplist(self, f) :

@@ -633,4 +697,5 @@ raw_string = self.read_string(f)

version = int(version_str)
if version < 1 or version > 6 :
if version < 1 or version > 7:
raise Exception('verify_version', 'Invalid RDB version number %d' % version)
self._rdb_version = version

@@ -776,4 +841,10 @@ def init_filter(self, filters):

def aux_field(self, key, value):
print('aux:[%s:%s]' % (key, value))
def start_database(self, db_number):
print('{')
def db_size(self, db_size, expires_size):
print('db_size: %s, expires_size %s' % (db_size, expires_size))

@@ -802,3 +873,3 @@ def set(self, key, value, expiry):

def start_list(self, key, length, expiry):
def start_list(self, key, expiry, info):
print('"%s" : [' % str(key))

@@ -809,3 +880,3 @@

def end_list(self, key):
def end_list(self, key, info):
print(']')

@@ -812,0 +883,0 @@

@@ -26,7 +26,7 @@ #!/usr/bin/env python

'url': 'https://github.com/sripathikrishnan/redis-rdb-tools',
'download_url' : 'http://cloud.github.com/downloads/andymccurdy/redis-py/redis-%s.tar.gz' % __version__,
'author' : 'Sripathi Krishnan',
'download_url': 'https://github.com/sripathikrishnan/redis-rdb-tools/archive/rdbtools-%s.tar.gz' % __version__,
'author': 'Sripathi Krishnan, Redis Labs',
'author_email' : 'Sripathi.Krishnan@gmail.com',
'maintainer' : 'Sripathi Krishnan',
'maintainer_email' : 'Sripathi.Krishnan@gmail.com',
'maintainer': 'Sripathi Krishnan, Redis Labs',
'maintainer_email': 'oss@redislabs.com',
'keywords' : ['Redis', 'RDB', 'Export', 'Dump', 'Memory Profiler'],

@@ -44,3 +44,3 @@ 'license' : 'MIT',

'classifiers' : [
'Development Status :: 4 - Beta',
'Development Status :: 5 - Production/Stable',
'Environment :: Console',

@@ -47,0 +47,0 @@ 'Intended Audience :: Developers',

import struct
import io
import sys
import datetime
import re
try :
from StringIO import StringIO
except ImportError:
from io import StringIO
REDIS_RDB_6BITLEN = 0
REDIS_RDB_14BITLEN = 1
REDIS_RDB_32BITLEN = 2
REDIS_RDB_ENCVAL = 3
REDIS_RDB_OPCODE_EXPIRETIME_MS = 252
REDIS_RDB_OPCODE_EXPIRETIME = 253
REDIS_RDB_OPCODE_SELECTDB = 254
REDIS_RDB_OPCODE_EOF = 255
REDIS_RDB_TYPE_STRING = 0
REDIS_RDB_TYPE_LIST = 1
REDIS_RDB_TYPE_SET = 2
REDIS_RDB_TYPE_ZSET = 3
REDIS_RDB_TYPE_HASH = 4
REDIS_RDB_TYPE_HASH_ZIPMAP = 9
REDIS_RDB_TYPE_LIST_ZIPLIST = 10
REDIS_RDB_TYPE_SET_INTSET = 11
REDIS_RDB_TYPE_ZSET_ZIPLIST = 12
REDIS_RDB_TYPE_HASH_ZIPLIST = 13
REDIS_RDB_ENC_INT8 = 0
REDIS_RDB_ENC_INT16 = 1
REDIS_RDB_ENC_INT32 = 2
REDIS_RDB_ENC_LZF = 3
DATA_TYPE_MAPPING = {
0 : "string", 1 : "list", 2 : "set", 3 : "sortedset", 4 : "hash",
9 : "hash", 10 : "list", 11 : "set", 12 : "sortedset", 13 : "hash"}
class RdbSplitter :
"""
Split a given RDB file into multiple RDB Files
"""
def __init__(self) :
pass
def split(self, filename):
"""
Parse a redis rdb dump file, and call methods in the
callback object during the parsing operation.
"""
with open(filename, "rb") as f:
self.verify_magic_string(f.read(5))
self.verify_version(f.read(4))
db_number = 0
while True :
self._expiry = None
data_type = read_unsigned_char(f)
if data_type == REDIS_RDB_OPCODE_EXPIRETIME_MS :
self._expiry = to_datetime(read_unsigned_long(f) * 1000)
data_type = read_unsigned_char(f)
elif data_type == REDIS_RDB_OPCODE_EXPIRETIME :
self._expiry = to_datetime(read_unsigned_int(f) * 1000000)
data_type = read_unsigned_char(f)
if data_type == REDIS_RDB_OPCODE_SELECTDB :
db_number = self.read_length(f)
continue
if data_type == REDIS_RDB_OPCODE_EOF :
break
if self.matches_filter(db_number) :
self._key = self.read_string(f)
if self.matches_filter(db_number, self._key, data_type):
self.read_object(f, data_type)
else:
self.skip_object(f, data_type)
else :
self.skip_key_and_object(f, data_type)
def read_length_with_encoding(self, f) :
length = 0
is_encoded = False
bytes = []
bytes.append(read_unsigned_char(f))
enc_type = (bytes[0] & 0xC0) >> 6
if enc_type == REDIS_RDB_ENCVAL :
is_encoded = True
length = bytes[0] & 0x3F
elif enc_type == REDIS_RDB_6BITLEN :
length = bytes[0] & 0x3F
elif enc_type == REDIS_RDB_14BITLEN :
bytes.append(read_unsigned_char(f))
length = ((bytes[0]&0x3F)<<8)|bytes[1]
else :
length = ntohl(f)
return (length, is_encoded)
def read_length(self, f) :
return self.read_length_with_encoding(f)[0]
def read_string(self, f) :
tup = self.read_length_with_encoding(f)
length = tup[0]
is_encoded = tup[1]
val = None
if is_encoded :
if length == REDIS_RDB_ENC_INT8 :
val = read_signed_char(f)
elif length == REDIS_RDB_ENC_INT16 :
val = read_signed_short(f)
elif length == REDIS_RDB_ENC_INT32 :
val = read_signed_int(f)
elif length == REDIS_RDB_ENC_LZF :
clen = self.read_length(f)
l = self.read_length(f)
val = self.lzf_decompress(f.read(clen), l)
else :
val = f.read(length)
return val
# Read an object for the stream
# f is the redis file
# enc_type is the type of object
def read_object(self, f, enc_type) :
if enc_type == REDIS_RDB_TYPE_STRING :
val = self.read_string(f)
self._callback.set(self._key, val, self._expiry, info={'encoding':'string'})
elif enc_type == REDIS_RDB_TYPE_LIST :
# A redis list is just a sequence of strings
# We successively read strings from the stream and create a list from it
# The lists are in order i.e. the first string is the head,
# and the last string is the tail of the list
length = self.read_length(f)
self._callback.start_list(self._key, length, self._expiry, info={'encoding':'linkedlist' })
for count in xrange(0, length) :
val = self.read_string(f)
self._callback.rpush(self._key, val)
self._callback.end_list(self._key)
elif enc_type == REDIS_RDB_TYPE_SET :
# A redis list is just a sequence of strings
# We successively read strings from the stream and create a set from it
# Note that the order of strings is non-deterministic
length = self.read_length(f)
self._callback.start_set(self._key, length, self._expiry, info={'encoding':'hashtable'})
for count in xrange(0, length) :
val = self.read_string(f)
self._callback.sadd(self._key, val)
self._callback.end_set(self._key)
elif enc_type == REDIS_RDB_TYPE_ZSET :
length = self.read_length(f)
self._callback.start_sorted_set(self._key, length, self._expiry, info={'encoding':'skiplist'})
for count in xrange(0, length) :
val = self.read_string(f)
dbl_length = read_unsigned_char(f)
score = f.read(dbl_length)
if isinstance(score, str):
score = float(score)
self._callback.zadd(self._key, score, val)
self._callback.end_sorted_set(self._key)
elif enc_type == REDIS_RDB_TYPE_HASH :
length = self.read_length(f)
self._callback.start_hash(self._key, length, self._expiry, info={'encoding':'hashtable'})
for count in xrange(0, length) :
field = self.read_string(f)
value = self.read_string(f)
self._callback.hset(self._key, field, value)
self._callback.end_hash(self._key)
elif enc_type == REDIS_RDB_TYPE_HASH_ZIPMAP :
self.read_zipmap(f)
elif enc_type == REDIS_RDB_TYPE_LIST_ZIPLIST :
self.read_ziplist(f)
elif enc_type == REDIS_RDB_TYPE_SET_INTSET :
self.read_intset(f)
elif enc_type == REDIS_RDB_TYPE_ZSET_ZIPLIST :
self.read_zset_from_ziplist(f)
elif enc_type == REDIS_RDB_TYPE_HASH_ZIPLIST :
self.read_hash_from_ziplist(f)
else :
raise Exception('read_object', 'Invalid object type %d for key %s' % (enc_type, self._key))
def skip_key_and_object(self, f, data_type):
self.skip_string(f)
self.skip_object(f, data_type)
def skip_string(self, f):
tup = self.read_length_with_encoding(f)
length = tup[0]
is_encoded = tup[1]
bytes_to_skip = 0
if is_encoded :
if length == REDIS_RDB_ENC_INT8 :
bytes_to_skip = 1
elif length == REDIS_RDB_ENC_INT16 :
bytes_to_skip = 2
elif length == REDIS_RDB_ENC_INT32 :
bytes_to_skip = 4
elif length == REDIS_RDB_ENC_LZF :
clen = self.read_length(f)
l = self.read_length(f)
bytes_to_skip = clen
else :
bytes_to_skip = length
skip(f, bytes_to_skip)
def skip_object(self, f, enc_type):
skip_strings = 0
if enc_type == REDIS_RDB_TYPE_STRING :
skip_strings = 1
elif enc_type == REDIS_RDB_TYPE_LIST :
skip_strings = self.read_length(f)
elif enc_type == REDIS_RDB_TYPE_SET :
skip_strings = self.read_length(f)
elif enc_type == REDIS_RDB_TYPE_ZSET :
skip_strings = self.read_length(f) * 2
elif enc_type == REDIS_RDB_TYPE_HASH :
skip_strings = self.read_length(f) * 2
elif enc_type == REDIS_RDB_TYPE_HASH_ZIPMAP :
skip_strings = 1
elif enc_type == REDIS_RDB_TYPE_LIST_ZIPLIST :
skip_strings = 1
elif enc_type == REDIS_RDB_TYPE_SET_INTSET :
skip_strings = 1
elif enc_type == REDIS_RDB_TYPE_ZSET_ZIPLIST :
skip_strings = 1
elif enc_type == REDIS_RDB_TYPE_HASH_ZIPLIST :
skip_strings = 1
else :
raise Exception('read_object', 'Invalid object type %d for key %s' % (enc_type, self._key))
for x in xrange(0, skip_strings):
self.skip_string(f)
def read_intset(self, f) :
raw_string = self.read_string(f)
buff = StringIO(raw_string)
encoding = read_unsigned_int(buff)
num_entries = read_unsigned_int(buff)
self._callback.start_set(self._key, num_entries, self._expiry, info={'encoding':'intset', 'sizeof_value':len(raw_string)})
for x in xrange(0, num_entries) :
if encoding == 8 :
entry = read_unsigned_long(buff)
elif encoding == 4 :
entry = read_unsigned_int(buff)
elif encoding == 2 :
entry = read_unsigned_short(buff)
else :
raise Exception('read_intset', 'Invalid encoding %d for key %s' % (encoding, self._key))
self._callback.sadd(self._key, entry)
self._callback.end_set(self._key)
def read_ziplist(self, f) :
raw_string = self.read_string(f)
buff = StringIO(raw_string)
zlbytes = read_unsigned_int(buff)
tail_offset = read_unsigned_int(buff)
num_entries = read_unsigned_short(buff)
self._callback.start_list(self._key, num_entries, self._expiry, info={'encoding':'ziplist', 'sizeof_value':len(raw_string)})
for x in xrange(0, num_entries) :
val = self.read_ziplist_entry(buff)
self._callback.rpush(self._key, val)
zlist_end = read_unsigned_char(buff)
if zlist_end != 255 :
raise Exception('read_ziplist', "Invalid zip list end - %d for key %s" % (zlist_end, self._key))
self._callback.end_list(self._key)
def read_zset_from_ziplist(self, f) :
raw_string = self.read_string(f)
buff = StringIO(raw_string)
zlbytes = read_unsigned_int(buff)
tail_offset = read_unsigned_int(buff)
num_entries = read_unsigned_short(buff)
if (num_entries % 2) :
raise Exception('read_zset_from_ziplist', "Expected even number of elements, but found %d for key %s" % (num_entries, self._key))
num_entries = num_entries /2
self._callback.start_sorted_set(self._key, num_entries, self._expiry, info={'encoding':'ziplist', 'sizeof_value':len(raw_string)})
for x in xrange(0, num_entries) :
member = self.read_ziplist_entry(buff)
score = self.read_ziplist_entry(buff)
if isinstance(score, str) :
score = float(score)
self._callback.zadd(self._key, score, member)
zlist_end = read_unsigned_char(buff)
if zlist_end != 255 :
raise Exception('read_zset_from_ziplist', "Invalid zip list end - %d for key %s" % (zlist_end, self._key))
self._callback.end_sorted_set(self._key)
def read_hash_from_ziplist(self, f) :
raw_string = self.read_string(f)
buff = StringIO(raw_string)
zlbytes = read_unsigned_int(buff)
tail_offset = read_unsigned_int(buff)
num_entries = read_unsigned_short(buff)
if (num_entries % 2) :
raise Exception('read_hash_from_ziplist', "Expected even number of elements, but found %d for key %s" % (num_entries, self._key))
num_entries = num_entries /2
self._callback.start_hash(self._key, num_entries, self._expiry, info={'encoding':'ziplist', 'sizeof_value':len(raw_string)})
for x in xrange(0, num_entries) :
field = self.read_ziplist_entry(buff)
value = self.read_ziplist_entry(buff)
self._callback.hset(self._key, field, value)
zlist_end = read_unsigned_char(buff)
if zlist_end != 255 :
raise Exception('read_hash_from_ziplist', "Invalid zip list end - %d for key %s" % (zlist_end, self._key))
self._callback.end_hash(self._key)
def read_ziplist_entry(self, f) :
length = 0
value = None
prev_length = read_unsigned_char(f)
if prev_length == 254 :
prev_length = read_unsigned_int(f)
entry_header = read_unsigned_char(f)
if (entry_header >> 6) == 0 :
length = entry_header & 0x3F
value = f.read(length)
elif (entry_header >> 6) == 1 :
length = ((entry_header & 0x3F) << 8) | read_unsigned_char(f)
value = f.read(length)
elif (entry_header >> 6) == 2 :
length = read_unsigned_int(f)
value = f.read(length)
elif (entry_header >> 4) == 12 :
value = read_signed_short(f)
elif (entry_header >> 4) == 13 :
value = read_signed_int(f)
elif (entry_header >> 4) == 14 :
value = read_signed_long(f)
elif (entry_header == 240) :
value = read_24bit_signed_number(f)
elif (entry_header == 254) :
value = read_signed_char(f)
elif (entry_header >= 241 and entry_header <= 253) :
value = entry_header - 241
else :
raise Exception('read_ziplist_entry', 'Invalid entry_header %d for key %s' % (entry_header, self._key))
return value
def read_zipmap(self, f) :
raw_string = self.read_string(f)
buff = io.BytesIO(bytearray(raw_string))
num_entries = read_unsigned_char(buff)
self._callback.start_hash(self._key, num_entries, self._expiry, info={'encoding':'zipmap', 'sizeof_value':len(raw_string)})
while True :
next_length = self.read_zipmap_next_length(buff)
if next_length is None :
break
key = buff.read(next_length)
next_length = self.read_zipmap_next_length(buff)
if next_length is None :
raise Exception('read_zip_map', 'Unexepcted end of zip map for key %s' % self._key)
free = read_unsigned_char(buff)
value = buff.read(next_length)
try:
value = int(value)
except ValueError:
pass
skip(buff, free)
self._callback.hset(self._key, key, value)
self._callback.end_hash(self._key)
def read_zipmap_next_length(self, f) :
num = read_unsigned_char(f)
if num < 254:
return num
elif num == 254:
return read_unsigned_int(f)
else:
return None
def verify_magic_string(self, magic_string) :
if magic_string != 'REDIS' :
raise Exception('verify_magic_string', 'Invalid File Format')
def verify_version(self, version_str) :
version = int(version_str)
if version < 1 or version > 6 :
raise Exception('verify_version', 'Invalid RDB version number %d' % version)
def init_filter(self, filters):
self._filters = {}
if not filters:
filters={}
if not 'dbs' in filters:
self._filters['dbs'] = None
elif isinstance(filters['dbs'], int):
self._filters['dbs'] = (filters['dbs'], )
elif isinstance(filters['dbs'], list):
self._filters['dbs'] = [int(x) for x in filters['dbs']]
else:
raise Exception('init_filter', 'invalid value for dbs in filter %s' %filters['dbs'])
if not ('keys' in filters and filters['keys']):
self._filters['keys'] = re.compile(".*")
else:
self._filters['keys'] = re.compile(filters['keys'])
if not 'types' in filters:
self._filters['types'] = ('set', 'hash', 'sortedset', 'string', 'list')
elif isinstance(filters['types'], str):
self._filters['types'] = (filters['types'], )
elif isinstance(filters['types'], list):
self._filters['types'] = [str(x) for x in filters['types']]
else:
raise Exception('init_filter', 'invalid value for types in filter %s' %filters['types'])
def matches_filter(self, db_number, key=None, data_type=None):
if self._filters['dbs'] and (not db_number in self._filters['dbs']):
return False
if key and (not self._filters['keys'].match(str(key))):
return False
if data_type is not None and (not self.get_logical_type(data_type) in self._filters['types']):
return False
return True
def get_logical_type(self, data_type):
return DATA_TYPE_MAPPING[data_type]
def lzf_decompress(self, compressed, expected_length):
in_stream = bytearray(compressed)
in_len = len(in_stream)
in_index = 0
out_stream = bytearray()
out_index = 0
while in_index < in_len :
ctrl = in_stream[in_index]
if not isinstance(ctrl, int) :
raise Exception('lzf_decompress', 'ctrl should be a number %s for key %s' % (str(ctrl), self._key))
in_index = in_index + 1
if ctrl < 32 :
for x in xrange(0, ctrl + 1) :
out_stream.append(in_stream[in_index])
#sys.stdout.write(chr(in_stream[in_index]))
in_index = in_index + 1
out_index = out_index + 1
else :
length = ctrl >> 5
if length == 7 :
length = length + in_stream[in_index]
in_index = in_index + 1
ref = out_index - ((ctrl & 0x1f) << 8) - in_stream[in_index] - 1
in_index = in_index + 1
for x in xrange(0, length + 2) :
out_stream.append(out_stream[ref])
ref = ref + 1
out_index = out_index + 1
if len(out_stream) != expected_length :
raise Exception('lzf_decompress', 'Expected lengths do not match %d != %d for key %s' % (len(out_stream), expected_length, self._key))
return str(out_stream)
def skip(f, free):
if free :
f.read(free)
def ntohl(f) :
val = read_unsigned_int(f)
new_val = 0
new_val = new_val | ((val & 0x000000ff) << 24)
new_val = new_val | ((val & 0xff000000) >> 24)
new_val = new_val | ((val & 0x0000ff00) << 8)
new_val = new_val | ((val & 0x00ff0000) >> 8)
return new_val
def to_datetime(usecs_since_epoch):
seconds_since_epoch = usecs_since_epoch / 1000000
useconds = usecs_since_epoch % 1000000
dt = datetime.datetime.utcfromtimestamp(seconds_since_epoch)
delta = datetime.timedelta(microseconds = useconds)
return dt + delta
def read_signed_char(f) :
return struct.unpack('b', f.read(1))[0]
def read_unsigned_char(f) :
return struct.unpack('B', f.read(1))[0]
def read_signed_short(f) :
return struct.unpack('h', f.read(2))[0]
def read_unsigned_short(f) :
return struct.unpack('H', f.read(2))[0]
def read_signed_int(f) :
return struct.unpack('i', f.read(4))[0]
def read_unsigned_int(f) :
return struct.unpack('I', f.read(4))[0]
def read_24bit_signed_number(f):
s = '0' + f.read(3)
num = struct.unpack('i', s)[0]
return num >> 8
def read_signed_long(f) :
return struct.unpack('q', f.read(8))[0]
def read_unsigned_long(f) :
return struct.unpack('Q', f.read(8))[0]
def string_as_hexcode(string) :
for s in string :
if isinstance(s, int) :
print(hex(s))
else :
print(hex(ord(s)))