CafeDB
A lightweight, human-readable JSON database for Python with zero dependencies and powerful querying capabilities.

Features
- Zero Configuration - Just point to a JSON/CDB file and start working
- Human-Readable - All data stored in plain JSON format
- Advanced Querying - Rich operator support including wildcards, regex, ranges, and logical operators ($or support)
- Thread-Safe - Built-in locking for concurrent operations
- Atomic Writes - Crash-safe operations with temporary files
- Automatic Backups - Optional backup creation on each write
- Transaction Support - Rollback capability for batch operations
- Detailed Statistics - Get insights about your data with extended field statistics
- No Dependencies - Pure Python with standard library only
Installation
Option 1: pip install
pip install cafedb
Option 2: Copy the module
Simply copy cafedb.py to your project directory:
from cafedb import CafeDB
Option 3: Install from source
git clone https://github.com/Crystallinecore/cafedb.git
cd cafedb
pip install -e .
Quick Start
from cafedb import CafeDB
db = CafeDB("mydata.cdb", verbose=True)
db.create_table("users")
db.insert("users", {
"name": "Alice Johnson",
"age": 28,
"email": "alice@example.com",
"city": "Paris"
})
db.insert_many("users", [
{"name": "Bob Smith", "age": 34, "email": "bob@example.com"},
{"name": "Carol White", "age": 22, "email": "carol@example.com"}
])
results = db.select("users", {"age": {"$gte": 25}})
top_users = db.select("users", order_by="age", reverse=True, limit=5)
db.update("users",
{"name": "Alice Johnson"},
{"city": "London"}
)
db.delete("users", {"age": {"$lt": 25}})
Query Operators
CafeDB supports powerful query operators for filtering data:
Comparison Operators
db.select("users", {"age": 28})
db.select("users", {"age": {"$eq": 28}})
db.select("users", {"age": {"$ne": 28}})
db.select("users", {"age": {"$gt": 25}})
db.select("users", {"age": {"$gte": 25}})
db.select("users", {"age": {"$lt": 30}})
db.select("users", {"age": {"$lte": 30}})
db.select("users", {"age": {"$between": [25, 35]}})
Membership Operators
db.select("users", {"city": {"$in": ["Paris", "London", "Berlin"]}})
db.select("users", {"city": {"$nin": ["Paris", "London"]}})
String Operators
db.select("users", {"name": "A*"})
db.select("users", {"email": "*@gmail.com"})
db.select("users", {"bio": {"$contains": "python"}})
db.select("users", {"name": {"$startswith": "Ali"}})
db.select("users", {"email": {"$endswith": ".com"}})
db.select("users", {"email": {"$regex": r"^[a-z]+@gmail\.com$"}})
db.select("users", {"name": {"$like": "J*son"}})
db.select("users", {"phone": {"$exists": True}})
Logical Operators
db.select("users", {
"age": {"$gte": 25},
"city": "Paris",
"email": "*@gmail.com"
})
db.select("users", {
"$or": [
{"city": "Paris"},
{"city": "London"}
]
})
db.select("users", {
"age": {"$gte": 25},
"$or": [
{"city": "Paris"},
{"score": {"$gte": 85}}
]
})
Advanced Features
Sorting and Pagination
results = db.select("users", order_by="age")
results = db.select("users", order_by="score", reverse=True)
results = db.select("users", limit=10, offset=20)
results = db.select(
"users",
{"age": {"$gte": 25}},
order_by="score",
reverse=True,
limit=5,
offset=0
)
Field Projection
results = db.select("users", fields=["name", "email"])
Custom Filter Functions
results = db.select("users", lambda row: row["age"] > 25 and "@gmail.com" in row["email"])
db.update(
"users",
{"age": {"$gte": 30}},
lambda row: {**row, "category": "senior", "discount": row["age"] * 0.01}
)
Transactions
try:
with db.transaction():
db.insert("users", user1)
db.update("users", {"name": "Alice"}, {"status": "active"})
db.delete("users", {"status": "inactive"})
except Exception as e:
print(f"Transaction failed: {e}")
Batch Operations
users = [
{"name": "User1", "age": 25},
{"name": "User2", "age": 30},
{"name": "User3", "age": 35}
]
count = db.insert_many("users", users)
print(f"Inserted {count} rows")
count = db.update(
"users",
{"age": {"$gte": 30}},
{"category": "senior"}
)
print(f"Updated {count} rows")
count = db.delete("users", {"age": {"$lt": 18}})
print(f"Deleted {count} rows")
Utility Methods
Table Management
tables = db.list_tables()
if db.exists_table("users"):
print("Users table exists")
count = db.clear_table("users")
print(f"Cleared {count} rows")
db.drop_table("users")
Statistics and Information
total = db.count("users")
filtered = db.count("users", {"age": {"$gte": 25}})
stats = db.stats("users")
print(f"Total rows: {stats['total_rows']}")
print(f"Size: {stats['size_kb']} KB")
print(f"Fields: {stats['fields']}")
info = db.info()
print(f"Path: {info['path']}")
print(f"Tables: {info['table_count']}")
print(f"Total rows: {info['total_rows']}")
print(f"Created: {info['created']}")
print(f"Last modified: {info['last_modified']}")
Configuration Options
db = CafeDB("data.json", verbose=True)
db = CafeDB("data.json", backup=False)
db = CafeDB("data.json", verbose=True, backup=True)
Error Handling
CafeDB uses custom exceptions for better error handling:
from cafedb import (
CafeDB,
CafeDBError,
TableNotFoundError,
TableExistsError,
QueryError
)
try:
db.create_table("users")
except TableExistsError:
print("Table already exists")
try:
db.select("nonexistent")
except TableNotFoundError as e:
print(f"Error: {e}")
try:
db.select("users", {"age": {"$invalid": 25}})
except QueryError as e:
print(f"Invalid query: {e}")
Data Structure
CafeDB stores data in a simple JSON format:
{
"_meta": {
"tables": ["users", "products"],
"created": "2025-01-15T10:30:00",
"last_modified": "2025-01-15T11:45:00",
"version": "1.0.0"
},
"users": [
{
"name": "Alice Johnson",
"age": 28,
"email": "alice@example.com",
"_inserted_at": "2025-01-15T10:35:00",
"_updated_at": "2025-01-15T11:00:00"
}
]
}
Best Practices
- Use batch operations -
insert_many() is more efficient than multiple insert() calls
- Enable backups for production - Keep
backup=True for important data
- Use transactions for related changes - Group operations that should succeed or fail together
- Use field projection - Only select the fields you need with the
fields parameter
- Handle exceptions - Always catch
TableNotFoundError, QueryError, etc.
- Keep tables under 10,000 rows - For optimal performance
Limitations
- Not suitable for very large datasets (>100MB)
- No SQL-like JOIN operations (use application-level logic)
- No built-in indexing (all queries are full table scans)
- Single-file storage (one JSON file per database)
- Queries work on top-level fields only (no nested field queries)
Use Cases
Perfect for:
- Small to medium-sized datasets
- Configuration storage
- Rapid prototyping
- Testing and development
- Embedded applications
- Data that needs to be human-readable
- Applications where simplicity is more important than performance
API Reference
Core Methods
create_table(table_name: str) - Create a new table
drop_table(table_name: str) - Delete a table
insert(table_name: str, row: dict) - Insert a single row
insert_many(table_name: str, rows: List[dict]) - Batch insert rows
select(table_name: str, filters=None, fields=None, limit=None, offset=0, order_by=None, reverse=False) - Query rows
update(table_name: str, filters, updater) - Update matching rows
delete(table_name: str, filters) - Delete matching rows
count(table_name: str, filters=None) - Count matching rows
list_tables() - List all tables
exists_table(table_name: str) - Check if table exists
stats(table_name: str) - Get table statistics
clear_table(table_name: str) - Remove all rows from table
info() - Get database information
Context Managers
transaction() - Execute operations with rollback support
Complete Example
from cafedb import CafeDB
db = CafeDB("app.json", verbose=True)
if not db.exists_table("users"):
db.create_table("users")
db.insert_many("users", [
{"username": "alice", "email": "alice@example.com", "age": 28, "role": "admin"},
{"username": "bob", "email": "bob@example.com", "age": 34, "role": "user"},
{"username": "carol", "email": "carol@example.com", "age": 22, "role": "user"}
])
admins = db.select("users", {"role": "admin"})
results = db.select("users", {
"age": {"$between": [25, 35]},
"$or": [
{"role": "admin"},
{"role": "moderator"}
]
})
db.update(
"users",
{"username": "alice"},
{"email": "alice.new@example.com"}
)
db.update(
"users",
{"age": {"$gte": 30}},
{"role": "senior"}
)
stats = db.stats("users")
print(f"Total users: {stats['total_rows']}")
print(f"Database size: {stats['size_kb']} KB")
for field, info in stats['fields'].items():
print(f"{field}: {info['unique_count']} unique, {info['present_percentage']}% present")
try:
with db.transaction():
db.insert("users", {"username": "dave", "age": 40})
db.update("users", {"role": "user"}, {"verified": True})
except Exception as e:
print(f"Transaction failed: {e}")
Contributing
Contributions are welcome! Feel free to:
- Report bugs
- Suggest features
- Submit pull requests
- Improve documentation
License
MIT License - Free to use and modify for any purpose.
FAQ
Q: Can CafeDB handle millions of records?
A: CafeDB is optimized for small to medium datasets (< 100K records). For larger datasets, consider a traditional database.
Q: Is CafeDB thread-safe?
A: Yes, CafeDB uses threading.RLock() for thread-safe operations.
Q: Can I use CafeDB in production?
A: CafeDB is great for prototypes, small applications, and internal tools. For mission-critical production systems with high load, use PostgreSQL, MongoDB, etc.
Q: How do I backup my database?
A: CafeDB creates automatic backups (if enabled). You can also manually copy the JSON file.
Q: What about JOIN operations?
A: CafeDB doesn't support JOINs. Query multiple tables and combine results in your application code.
â CafeDB - Simple, powerful, human-readable database for Python.