examples.berkeleydb_example — rdflib 7.1.4 documentation (original) (raw)

""" BerkeleyDB in use as a persistent Graph store.

Example 1: simple actions

Example 2: larger data

import os import tempfile

from rdflib import ConjunctiveGraph, Literal, Namespace from rdflib.plugins.stores.berkeleydb import has_bsddb from rdflib.store import NO_STORE, VALID_STORE

[docs]def example_1(): """Creates a ConjunctiveGraph and performs some BerkeleyDB tasks with it""" path = tempfile.NamedTemporaryFile().name

# Declare we are using a BerkeleyDB Store
graph = ConjunctiveGraph("BerkeleyDB")

# Open previously created store, or create it if it doesn't exist yet
# (always doesn't exist in this example as using temp file location)
rt = graph.open(path, create=False)

if rt == NO_STORE:
    # There is no underlying BerkeleyDB infrastructure, so create it
    print("Creating new DB")
    graph.open(path, create=True)
else:
    print("Using existing DB")
    assert rt == VALID_STORE, "The underlying store is corrupt"

print("Triples in graph before add:", len(graph))
print("(will always be 0 when using temp file for DB)")

# Now we'll add some triples to the graph & commit the changes
EG = Namespace("http://example.net/test/")  # noqa: N806
graph.bind("eg", EG)

graph.add((EG["pic:1"], EG.name, Literal("Jane & Bob")))
graph.add((EG["pic:2"], EG.name, Literal("Squirrel in Tree")))

graph.commit()

print("Triples in graph after add:", len(graph))
print("(should be 2)")

# display the graph in Turtle
print(graph.serialize())

# close when done, otherwise BerkeleyDB will leak lock entries.
graph.close()

graph = None

# reopen the graph
graph = ConjunctiveGraph("BerkeleyDB")

graph.open(path, create=False)

print("Triples still in graph:", len(graph))
print("(should still be 2)")

graph.close()

# Clean up the temp folder to remove the BerkeleyDB database files...
for f in os.listdir(path):
    os.unlink(path + "/" + f)
os.rmdir(path)

[docs]def example_2(): """Loads a number of SKOS vocabularies from GitHub into a BerkeleyDB-backed graph stored in the local folder 'gsq_vocabs'

Should print out the number of triples after each load, e.g.:
    177
    248
    289
    379
    421
    628
    764
    813
    965
    1381
    9666
    9719
    ...
"""
import base64
import json
from urllib.error import HTTPError
from urllib.request import Request, urlopen

g = ConjunctiveGraph("BerkeleyDB")
g.open("gsg_vocabs", create=True)

# gsq_vocabs = "https://api.github.com/repos/geological-survey-of-queensland/vocabularies/git/trees/master"
gsq_vocabs = "https://api.github.com/repos/geological-survey-of-queensland/vocabularies/git/trees/cd7244d39337c1f4ef164b1cf1ea1f540a7277db"
try:
    res = urlopen(Request(gsq_vocabs, headers={"Accept": "application/json"}))
except HTTPError as e:
    return e.code, str(e), None

data = res.read()
encoding = res.info().get_content_charset("utf-8")
j = json.loads(data.decode(encoding))
for v in j["tree"]:
    # process the element in GitHub result if it's a Turtle file
    if v["path"].endswith(".ttl"):
        # for each file, call it by URL, decode it and parse it into the graph
        r = urlopen(v["url"])
        content = json.loads(r.read().decode())["content"]
        g.parse(data=base64.b64decode(content).decode(), format="turtle")
        print(len(g))

print("loading complete")

if name == "main": if has_bsddb: # Only run the examples if BerkeleyDB is available example_1() example_2()