ArangoDB-DGL Adapter
The ArangoDB-DGL Adapter exports Graphs from ArangoDB, the multi-model database for graph & beyond, into Deep Graph Library (DGL), a python package for graph neural networks, and vice-versa.
Note: The ArangoDB-DGL Adapter currently only supports the use of PyTorch as the DGL backend. Support for MXNet and Tensorflow will be added in the future.
About DGL
The Deep Graph Library (DGL) is an easy-to-use, high performance and scalable Python package for deep learning on graphs. DGL is framework agnostic, meaning if a deep graph model is a component of an end-to-end application, the rest of the logics can be implemented in any major frameworks, such as PyTorch, Apache MXNet or TensorFlow.
Installation
Latest Release
pip install adbdgl-adapter
Current State
pip install git+https://github.com/arangoml/dgl-adapter.git
Quickstart
Also available as an ArangoDB Lunch & Learn session: Graph & Beyond Course #2.8
import dgl
import torch
import pandas
from arango import ArangoClient
from adbdgl_adapter import ADBDGL_Adapter, ADBDGL_Controller
from adbdgl_adapter.encoders import IdentityEncoder, CategoricalEncoder
db = ArangoClient().db()
adbdgl_adapter = ADBDGL_Adapter(db)
fake_hetero = dgl.heterograph({
("user", "follows", "user"): (torch.tensor([0, 1]), torch.tensor([1, 2])),
("user", "follows", "topic"): (torch.tensor([1, 1]), torch.tensor([1, 2])),
("user", "plays", "game"): (torch.tensor([0, 3]), torch.tensor([3, 4])),
})
fake_hetero.nodes["user"].data["features"] = torch.tensor([21, 44, 16, 25])
fake_hetero.nodes["user"].data["label"] = torch.tensor([1, 2, 0, 1])
fake_hetero.nodes["game"].data["features"] = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1], [1, 1]])
fake_hetero.edges[("user", "plays", "game")].data["features"] = torch.tensor([[6, 1], [1000, 0]])
DGL to ArangoDB
adb_g = adbdgl_adapter.dgl_to_arangodb("FakeHetero", fake_hetero)
metagraph = {
"nodeTypes": {
"user": {
"features": "user_age",
"label": label_tensor_to_2_column_dataframe,
},
"game": {"features"}
},
"edgeTypes": {
("user", "plays", "game"): {
"features": ["hours_played", "is_satisfied_with_game"]
},
},
}
def label_tensor_to_2_column_dataframe(dgl_tensor: torch.Tensor, adb_df: pandas.DataFrame) -> pandas.DataFrame:
"""A user-defined function to create two
ArangoDB attributes out of the 'user' label tensor
:param dgl_tensor: The DGL Tensor containing the data
:type dgl_tensor: torch.Tensor
:param adb_df: The ArangoDB DataFrame to populate, whose
size is preset to the length of **dgl_tensor**.
:type adb_df: pandas.DataFrame
:return: The populated ArangoDB DataFrame
:rtype: pandas.DataFrame
"""
label_map = {0: "Class A", 1: "Class B", 2: "Class C"}
adb_df["label_num"] = dgl_tensor.tolist()
adb_df["label_str"] = adb_df["label_num"].map(label_map)
return adb_df
adb_g = adbdgl_adapter.dgl_to_arangodb("FakeHetero", fake_hetero, metagraph, explicit_metagraph=False)
adb_g = adbdgl_adapter.dgl_to_arangodb("FakeHetero", fake_hetero, metagraph, explicit_metagraph=True)
class Custom_ADBDGL_Controller(ADBDGL_Controller):
def _prepare_dgl_node(self, dgl_node: dict, node_type: str) -> dict:
"""Optionally modify a DGL node object before it gets inserted into its designated ArangoDB collection.
:param dgl_node: The DGL node object to (optionally) modify.
:param node_type: The DGL Node Type of the node.
:return: The DGL Node object
"""
dgl_node["foo"] = "bar"
return dgl_node
def _prepare_dgl_edge(self, dgl_edge: dict, edge_type: tuple) -> dict:
"""Optionally modify a DGL edge object before it gets inserted into its designated ArangoDB collection.
:param dgl_edge: The DGL edge object to (optionally) modify.
:param edge_type: The Edge Type of the DGL edge. Formatted
as (from_collection, edge_collection, to_collection)
:return: The DGL Edge object
"""
dgl_edge["bar"] = "foo"
return dgl_edge
adb_g = ADBDGL_Adapter(db, Custom_ADBDGL_Controller()).dgl_to_arangodb("FakeHetero", fake_hetero)
ArangoDB to DGL
db.delete_graph("FakeHetero", drop_collections=True, ignore_missing=True)
adbdgl_adapter.dgl_to_arangodb("FakeHetero", fake_hetero)
dgl_g = adbdgl_adapter.arangodb_graph_to_dgl("FakeHetero")
dgl_g = adbdgl_adapter.arangodb_collections_to_dgl("FakeHetero", v_cols={"user", "game"}, e_cols={"plays"})
metagraph_v1 = {
"vertexCollections": {
"user": {"features", "label"},
"game": {"dgl_game_features": "features"},
"topic": {},
},
"edgeCollections": {
"plays": {"dgl_plays_features": "features"},
"follows": {}
},
}
dgl_g = adbdgl_adapter.arangodb_to_dgl("FakeHetero", metagraph_v1)
metagraph_v2 = {
"vertexCollections": {
"Movies": {
"features": {
"Action": IdentityEncoder(dtype=torch.long),
"Drama": IdentityEncoder(dtype=torch.long),
},
"label": "Comedy",
},
"Users": {
"features": {
"Gender": CategoricalEncoder(),
"Age": IdentityEncoder(dtype=torch.long),
}
},
},
"edgeCollections": {"Ratings": {"weight": "Rating"}},
}
dgl_g = adbdgl_adapter.arangodb_to_dgl("imdb", metagraph_v2)
metagraph_v3 = {
"vertexCollections": {
"user": {
"features": udf_user_features,
"label": lambda df: torch.tensor(df["label"].to_list()),
},
"game": {"features": udf_game_features},
},
"edgeCollections": {
"plays": {"features": (lambda df: torch.tensor(df["features"].to_list()))},
},
}
def udf_user_features(user_df: pandas.DataFrame) -> torch.Tensor:
return torch.tensor(user_df["features"].to_list())
def udf_game_features(game_df: pandas.DataFrame) -> torch.Tensor:
return torch.tensor(game_df["features"].to_list())
dgl_g = adbdgl_adapter.arangodb_to_dgl("FakeHetero", metagraph_v3)
Development & Testing
Prerequisite: arangorestore
git clone https://github.com/arangoml/dgl-adapter.git
cd dgl-adapter
- (create virtual environment of choice)
pip install -e .[dev]
- (create an ArangoDB instance with method of choice)
pytest --url <> --dbName <> --username <> --password <>
Note: A pytest
parameter can be omitted if the endpoint is using its default value:
def pytest_addoption(parser):
parser.addoption("--url", action="store", default="http://localhost:8529")
parser.addoption("--dbName", action="store", default="_system")
parser.addoption("--username", action="store", default="root")
parser.addoption("--password", action="store", default="")