Triples from the default graph in RDF documents do not go into the default_context of Dataset or ConjunctiveGraph · Issue #2404 · RDFLib/rdflib (original) (raw)

I expect data from the default graph in RDF documents to go into the default context/graph of Dataset and ConjunctiveGraph, but they don't.

Given my expectation, this test should pass:

import logging from typing import Type, Union

import pytest from pytest_check import check

from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, ConjunctiveGraph, Dataset, Graph from rdflib.term import BNode, URIRef

RDF_DOCUMENT = """ @prefix egdo: http://example.org/.

{ egdo:gds0 egdo:note "This is a triple inside the default graph." . }

egdo:gds0 egdo:note "This is an additional note on a triple which is in the default graph." .

egdo:gds1 egdo:note "This is a triple inside the default graph." .

egdo:g0 { egdo:g0s0 egdo:note "This is a triple inside a graph named http://example.org/g0." . }

egdo:g1 { egdo:g1s0 egdo:note "This is a triple inside a graph named http://example.org/g1." . } """

@pytest.mark.parametrize( ["container_type"], [ (Dataset,), (ConjunctiveGraph,), ], ) def test_issue(container_type: Union[Type[Dataset], Type[ConjunctiveGraph]]) -> None: logging.debug("container_type = %s", container_type) container = container_type()

if container_type is Dataset:
    # An empty dataset has 1 default graph and no named graphs, so 1 graph in
    # total.
    with check:
        assert 1 == sum(1 for _ in container.contexts())
    with check:
        assert DATASET_DEFAULT_GRAPH_ID == next((context.identifier for context in container.contexts()), None)
    with check:
        assert container.default_context == next(container.contexts(), None)
else:
    with check:
        assert isinstance(container.default_context.identifier, BNode)

# Load an RDF document with triples in three graphs into the dataset.
container.parse(data=RDF_DOCUMENT, format="trig")

context_identifiers = set(context.identifier for context in container.contexts())

logging.info("context_identifiers = %s", context_identifiers)
logging.info(
    "container.default_context.triples(...) = %s",
    set(container.default_context.triples((None, None, None))),
)

all_contexts = set(container.contexts())
logging.info("all_contexts = %s", (context.identifier for context in all_contexts))

non_default_contexts = set(container.contexts()) - {container.default_context}
with check:
    # There should now be two graphs in the container that are not the default graph.
    logging.info("non_default_graphs = %s", (context.identifier for context in non_default_contexts))
    assert 2 == len(non_default_contexts)

with check:
    # The identifiers of the the non-default graphs should be the ones from the document.
    assert {
        URIRef("http://example.org/g0"),
        URIRef("http://example.org/g1"),
        DATASET_DEFAULT_GRAPH_ID,
    } == set(context.identifier for context in non_default_contexts)

with check:
    # The default graph should have 3 triples.
    assert 3 == len(container.default_context)

However, when running it against the main branch, it fails:

$ poetry run pytest --log-level DEBUG '--tb=native' tmp/test_issue.py ============================================================================ test session starts ============================================================================ platform linux -- Python 3.11.3, pytest-7.3.1, pluggy-1.0.0 rootdir: /home/iwana/sw/d/github.com/iafork/rdflib.change_c configfile: pyproject.toml plugins: cov-4.0.0, check-2.1.4 collected 2 items

tmp/test_issue.py FF [100%]

================================================================================= FAILURES ================================================================================== ____________________________________________________________________________ test_issue[Dataset] ____________________________________________________________________________ FAILURE: assert 2 == 3

FAILURE: assert {rdflib.term....lib:default')} == {rdflib.term....mple.org/g1')} Extra items in the left set: rdflib.term.URIRef('urn:x-rdflib:default') Extra items in the right set: rdflib.term.BNode('N9153ac77fb8243faac7831b44ec24a62') Use -v to get more diff FAILURE: assert 3 == 0 + where 0 = len(<Graph identifier=urn:x-rdflib:default (<class 'rdflib.graph.Graph'>)>) + where <Graph identifier=urn:x-rdflib:default (<class 'rdflib.graph.Graph'>)> = <Graph identifier=N93bafe80e9974e39bd283121bc421bfc (<class 'rdflib.graph.Dataset'>)>.default_context

Failed Checks: 3 ----------------------------------------------------------------------------- Captured log call ----------------------------------------------------------------------------- 2023-05-23T00:06:06.530 DEBUG root test_issue.py:39:test_issue container_type = <class 'rdflib.graph.Dataset'> 2023-05-23T00:06:06.533 INFO root test_issue.py:60:test_issue context_identifiers = {rdflib.term.BNode('N9153ac77fb8243faac7831b44ec24a62'), rdflib.term.URIRef('urn:x-rdflib:default'), rdflib.term.URIRef('http://example.org/g1'), rdflib.term.URIRef('http://example.org/g0')} 2023-05-23T00:06:06.533 INFO root test_issue.py:61:test_issue container.default_context.triples(...) = set() 2023-05-23T00:06:06.533 INFO root test_issue.py:67:test_issue all_contexts = <generator object test_issue.. at 0x7f2f5ddb64d0> 2023-05-23T00:06:06.533 INFO root test_issue.py:72:test_issue non_default_graphs = <generator object test_issue.. at 0x7f2f5ddb6670> _______________________________________________________________________ test_issue[ConjunctiveGraph] ________________________________________________________________________ FAILURE: assert 2 == 3

FAILURE: assert {rdflib.term....lib:default')} == {rdflib.term....mple.org/g1')} Extra items in the left set: rdflib.term.URIRef('urn:x-rdflib:default') Extra items in the right set: rdflib.term.BNode('N30a66c45d1ed43ae9a77ad56db6701a2') Use -v to get more diff FAILURE: assert 3 == 0 + where 0 = len(<Graph identifier=Nab52a9aa38b8413bb5d93ea446c39f47 (<class 'rdflib.graph.Graph'>)>) + where <Graph identifier=Nab52a9aa38b8413bb5d93ea446c39f47 (<class 'rdflib.graph.Graph'>)> = <Graph identifier=N53736eed43af4da9a0b81453e5dace35 (<class 'rdflib.graph.ConjunctiveGraph'>)>.default_context

Failed Checks: 3 ----------------------------------------------------------------------------- Captured log call ----------------------------------------------------------------------------- 2023-05-23T00:06:06.548 DEBUG root test_issue.py:39:test_issue container_type = <class 'rdflib.graph.ConjunctiveGraph'> 2023-05-23T00:06:06.548 INFO root test_issue.py:60:test_issue context_identifiers = {rdflib.term.BNode('N30a66c45d1ed43ae9a77ad56db6701a2'), rdflib.term.URIRef('http://example.org/g1'), rdflib.term.URIRef('http://example.org/g0')} 2023-05-23T00:06:06.548 INFO root test_issue.py:61:test_issue container.default_context.triples(...) = set() 2023-05-23T00:06:06.548 INFO root test_issue.py:67:test_issue all_contexts = <generator object test_issue.. at 0x7f2f5ddb6670> 2023-05-23T00:06:06.548 INFO root test_issue.py:72:test_issue non_default_graphs = <generator object test_issue.. at 0x7f2f5ddb6a80> ========================================================================== short test summary info ========================================================================== FAILED tmp/test_issue.py::test_issue[Dataset] FAILED tmp/test_issue.py::test_issue[ConjunctiveGraph] ============================================================================= 2 failed in 0.06s =============================================================================