Fix prov ns by nicholascar · Pull Request #1318 · RDFLib/rdflib (original) (raw)

For reference the exact test I ran against it:

def test_prov(self) -> None:
    graph = Graph()
    graph.parse(source=(SCRIPT_PATH.parent / "data" / "prov.ttl"), format="turtle")

    qres = graph.query(
        r"""
        PREFIX owl: <http://www.w3.org/2002/07/owl#>
        SELECT DISTINCT ?term ?ontology WHERE {
            ?term ?p [].
            # MINUS { ?term rdf:type owl:AnnotationProperty }
            FILTER isIRI(?term).
            FILTER strStarts(str(?term), "http://www.w3.org/ns/prov#").
            FILTER (str(?term) != "http://www.w3.org/ns/prov#")
            OPTIONAL { ?term rdfs:isDefinedBy ?ontology }
        }
        """
    )
    owl_terms = {}
    for row in qres:
        owl_terms[row["term"]] = row["ontology"]
    owl_term_set = set(owl_terms.items())

    provo_uri = URIRef("http://www.w3.org/ns/prov-o#")
    # provo_term_set is terms matching :
    #   [] rdfs:isDefinedBy  <http://www.w3.org/ns/prov-o#>
    provo_term_set = set([term for term in owl_term_set if term[1] == provo_uri])

    rdflib_term_set = set(
        [
            (PROV[termword], owl_terms.get(PROV[termword], None))
            for termword in dir(PROV)
        ]
    )
    self.assertLessEqual(provo_term_set, rdflib_term_set)
    self.assertGreaterEqual(owl_term_set, rdflib_term_set)

I won't mind making a PR for it, but it is rather slow and does not necessarily make sense to run every time, could maybe add them conditional on an environment variable like TEST_PEDANTIC or TEST_SLOW, so that if either of those are set the test runs.