Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correct datatypes for string expressions #1636

Open
wants to merge 34 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
47feb71
STRLEN für UTF 8 angepasst
DuDaAG Oct 24, 2024
4877e8f
Test
DuDaAG Oct 24, 2024
aefd889
test rückgäning
DuDaAG Oct 24, 2024
b2cb8c6
find pull-request
DuDaAG Oct 25, 2024
96b1959
Fix test
DuDaAG Oct 25, 2024
b7806b8
Update src/engine/sparqlExpressions/StringExpressions.cpp
DuDaAG Oct 31, 2024
a83d74b
Update src/engine/sparqlExpressions/StringExpressions.cpp
DuDaAG Oct 31, 2024
e73d1ab
Format
Nov 1, 2024
0ed79df
new LiteralOrIriValueGetter
Nov 13, 2024
2666b78
Merge branch 'my-branch' into master
DuDaAG Nov 13, 2024
27fff04
Merge pull request #3 from DuDaAG/master
DuDaAG Nov 13, 2024
7078f60
idToLiteralAndIri with specifiactions
Nov 22, 2024
2581f4c
some fixes
Nov 22, 2024
6d7a2b2
Add Test IdToLiteralOrIri and some formatting
Nov 24, 2024
5948dcb
formatting
Nov 24, 2024
52ef1f5
Correction for sonar
Nov 24, 2024
f15bf94
SubStr improvements
Nov 29, 2024
774d52b
fix
Nov 29, 2024
d4b49c0
little changes
Nov 30, 2024
72aaa00
Feedback implemented
Dec 7, 2024
25000a9
format
Dec 7, 2024
617c3b7
New position codespell-ignore
Dec 7, 2024
f631ec2
delete codespell-ignore
Dec 7, 2024
889e9dd
UTF8 handling in subStr
Dec 7, 2024
0c41603
format
Dec 7, 2024
1b7e1b4
Add runtime error
Dec 12, 2024
344560a
syntax
Dec 12, 2024
67c747a
fix
Dec 12, 2024
313bba4
T
Dec 12, 2024
be80b09
add exceptions
Dec 12, 2024
2adaa30
nix
Dec 14, 2024
39ca3cb
Merge branch 'master' into Correct-Datatypes-for-StringExpressions
joka921 Dec 18, 2024
7455f29
idToLiteral without Iri
Jan 6, 2025
ac95531
Merge branch 'ad-freiburg:master' into Correct-Datatypes-for-StringEx…
DuDaAG Jan 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 22 additions & 24 deletions src/engine/ExportQueryExecutionTrees.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,8 +350,8 @@ ExportQueryExecutionTrees::idToStringAndTypeForEncodedValue(Id id) {
}

// _____________________________________________________________________________
std::optional<LiteralOrIri>
ExportQueryExecutionTrees::idToLiteralOrIriForEncodedValue(
std::optional<ad_utility::triple_component::Literal>
ExportQueryExecutionTrees::idToLiteralForEncodedValue(
Id id, bool onlyReturnLiteralsWithXsdString) {
if (onlyReturnLiteralsWithXsdString) {
return std::nullopt;
Expand All @@ -361,7 +361,8 @@ ExportQueryExecutionTrees::idToLiteralOrIriForEncodedValue(
return std::nullopt;
}

return LiteralOrIri::literalWithoutQuotes(optionalStringAndType->first);
return ad_utility::triple_component::Literal::literalWithoutQuotes(
optionalStringAndType->first);
}

// _____________________________________________________________________________
Expand All @@ -372,28 +373,25 @@ bool ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString(
}

// _____________________________________________________________________________
std::optional<LiteralOrIri> ExportQueryExecutionTrees::handleIriOrLiteral(
LiteralOrIri word, bool onlyReturnLiterals,
bool onlyReturnLiteralsWithXsdString) {
std::optional<ad_utility::triple_component::Literal>
ExportQueryExecutionTrees::handleIriOrLiteral(
LiteralOrIri word, bool onlyReturnLiteralsWithXsdString) {
if (!word.isLiteral()) {
if (onlyReturnLiterals || onlyReturnLiteralsWithXsdString) {
AD_THROW("The input is an IRI, but only literals are allowed.");
return std::nullopt;
}
return word;
AD_THROW("The input is an IRI, but only literals are allowed.");
return std::nullopt;
}

if (onlyReturnLiteralsWithXsdString) {
if (isPlainLiteralOrLiteralWithXsdString(word)) {
return word;
return word.getLiteral();
}
return std::nullopt;
}

if (word.hasDatatype() && !isPlainLiteralOrLiteralWithXsdString(word)) {
word.getLiteral().removeDatatype();
}
return word;
return word.getLiteral();
}

// _____________________________________________________________________________
Expand Down Expand Up @@ -463,9 +461,10 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id,

// _____________________________________________________________________________
template <bool onlyReturnLiterals>
std::optional<LiteralOrIri> ExportQueryExecutionTrees::idToLiteralOrIri(
const Index& index, Id id, const LocalVocab& localVocab,
bool onlyReturnLiteralsWithXsdString) {
std::optional<ad_utility::triple_component::Literal>
ExportQueryExecutionTrees::idToLiteral(const Index& index, Id id,
const LocalVocab& localVocab,
bool onlyReturnLiteralsWithXsdString) {
using enum Datatype;
auto datatype = id.getDatatype();

Expand All @@ -477,19 +476,18 @@ std::optional<LiteralOrIri> ExportQueryExecutionTrees::idToLiteralOrIri(

switch (datatype) {
case WordVocabIndex:
return LiteralOrIri::literalWithoutQuotes(
return ad_utility::triple_component::Literal::literalWithoutQuotes(
index.indexToString(id.getWordVocabIndex()));
case VocabIndex:
case LocalVocabIndex:
return handleIriOrLiteral(
getLiteralOrIriFromVocabIndex(index, id, localVocab),
onlyReturnLiterals, onlyReturnLiteralsWithXsdString);
onlyReturnLiteralsWithXsdString);
case TextRecordIndex:
AD_THROW("TextRecordIndex case is not implemented.");
return std::nullopt;
default:
return idToLiteralOrIriForEncodedValue(id,
onlyReturnLiteralsWithXsdString);
return idToLiteralForEncodedValue(id, onlyReturnLiteralsWithXsdString);
}
}

Expand All @@ -515,14 +513,14 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id,
std::identity&& escapeFunction);

// ___________________________________________________________________________
template std::optional<LiteralOrIri>
ExportQueryExecutionTrees::idToLiteralOrIri<false>(
template std::optional<ad_utility::triple_component::Literal>
ExportQueryExecutionTrees::idToLiteral<false>(
const Index& index, Id id, const LocalVocab& localVocab,
bool onlyReturnLiteralsWithXsdString);

// ___________________________________________________________________________
template std::optional<LiteralOrIri>
ExportQueryExecutionTrees::idToLiteralOrIri<true>(
template std::optional<ad_utility::triple_component::Literal>
ExportQueryExecutionTrees::idToLiteral<true>(
const Index& index, Id id, const LocalVocab& localVocab,
bool onlyReturnLiteralsWithXsdString);

Expand Down
12 changes: 6 additions & 6 deletions src/engine/ExportQueryExecutionTrees.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class ExportQueryExecutionTrees {
// return 'std::nullopt'. These semantics are useful for the string
// expressions in StringExpressions.cpp.
template <bool returnOnlyLiterals = false>
static std::optional<LiteralOrIri> idToLiteralOrIri(
static std::optional<ad_utility::triple_component::Literal> idToLiteral(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can (inside the ExportQueryExecutionTrees class)
add a
using Literal = ad_utility::triple_component::Literal ,
Then you can consistently write Literal in most of the places
(maybe also repeat the using inside the .cpp file, then it even works in the return types etc.

const Index& index, Id id, const LocalVocab& localVocab,
bool onlyReturnLiteralsWithXsdString = false);

Expand All @@ -88,14 +88,14 @@ class ExportQueryExecutionTrees {
// If `onlyReturnLiteralsWithXsdString` is `true`, returns `std::nullopt`.
// If `onlyReturnLiteralsWithXsdString` is `false`, removes datatypes from
// literals (e.g. the integer `42` is converted to the plain literal `"42"`).
static std::optional<LiteralOrIri> idToLiteralOrIriForEncodedValue(
Id id, bool onlyReturnLiteralsWithXsdString = false);
static std::optional<ad_utility::triple_component::Literal>
idToLiteralForEncodedValue(Id id,
bool onlyReturnLiteralsWithXsdString = false);

// A helper function for the `idToLiteralOrIri` function. Checks and processes
// a LiteralOrIri based on the given parameters.
static std::optional<LiteralOrIri> handleIriOrLiteral(
LiteralOrIri word, bool onlyReturnLiterals,
bool onlyReturnLiteralsWithXsdString);
static std::optional<ad_utility::triple_component::Literal>
handleIriOrLiteral(LiteralOrIri word, bool onlyReturnLiteralsWithXsdString);

// Checks if a LiteralOrIri is either a plain literal (without datatype)
// or a literal with the `xsd:string` datatype.
Expand Down
22 changes: 11 additions & 11 deletions src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,26 +91,26 @@ std::optional<std::string> StringValueGetter::operator()(
}

// ____________________________________________________________________________
std::optional<LiteralOrIri> LiteralOrIriValueGetter::operator()(
Id id, const EvaluationContext* context) const {
return ExportQueryExecutionTrees::idToLiteralOrIri(context->_qec.getIndex(),
id, context->_localVocab);
std::optional<ad_utility::triple_component::Literal>
LiteralValueGetter::operator()(Id id, const EvaluationContext* context) const {
return ExportQueryExecutionTrees::idToLiteral(context->_qec.getIndex(), id,
context->_localVocab);
}

// ____________________________________________________________________________
std::optional<LiteralOrIri>
LiteralOrIriValueGetterWithXsdStringFilter::operator()(
std::optional<ad_utility::triple_component::Literal>
LiteralValueGetterWithXsdStringFilter::operator()(
Id id, const EvaluationContext* context) const {
return ExportQueryExecutionTrees::idToLiteralOrIri(
context->_qec.getIndex(), id, context->_localVocab, true);
return ExportQueryExecutionTrees::idToLiteral(context->_qec.getIndex(), id,
context->_localVocab, true);
}

// ____________________________________________________________________________
std::optional<LiteralOrIri>
LiteralOrIriValueGetterWithXsdStringFilter::operator()(
std::optional<ad_utility::triple_component::Literal>
LiteralValueGetterWithXsdStringFilter::operator()(
const LiteralOrIri& s, const EvaluationContext*) const {
if (ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString(s)) {
return s;
return s.getLiteral();
}
AD_THROW("Input is not a plain string or xsd:string.");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like a debug output.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should definitely be removed.

return std::nullopt;
Expand Down
28 changes: 14 additions & 14 deletions src/engine/sparqlExpressions/SparqlExpressionValueGetters.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,29 +143,29 @@ struct StringValueGetter : Mixin<StringValueGetter> {

// This class can be used as the `ValueGetter` argument of Expression
// templates. It produces a LiteralOrIri.
struct LiteralOrIriValueGetter : Mixin<LiteralOrIriValueGetter> {
using Mixin<LiteralOrIriValueGetter>::operator();
struct LiteralValueGetter : Mixin<LiteralValueGetter> {
using Mixin<LiteralValueGetter>::operator();

std::optional<LiteralOrIri> operator()(ValueId,
const EvaluationContext*) const;
std::optional<ad_utility::triple_component::Literal> operator()(
ValueId, const EvaluationContext*) const;

std::optional<LiteralOrIri> operator()(const LiteralOrIri& s,
const EvaluationContext*) const {
return s;
std::optional<ad_utility::triple_component::Literal> operator()(
const LiteralOrIri& s, const EvaluationContext*) const {
return s.getLiteral();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This most definitely doesn't work, you have to turn Iris iinto Literals here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something you can (and probably should, as this is the core of your project) add:
Dedicated unit tests for the ValueGetters
(Add and register a new ValueGetterTest.cpp test file and test all cases for the value getters here.

}
};

// Same as above but only literals with 'xsd:string' datatype or no datatype are
// returned.
struct LiteralOrIriValueGetterWithXsdStringFilter
: Mixin<LiteralOrIriValueGetterWithXsdStringFilter> {
using Mixin<LiteralOrIriValueGetterWithXsdStringFilter>::operator();
struct LiteralValueGetterWithXsdStringFilter
: Mixin<LiteralValueGetterWithXsdStringFilter> {
using Mixin<LiteralValueGetterWithXsdStringFilter>::operator();

std::optional<LiteralOrIri> operator()(ValueId,
const EvaluationContext*) const;
std::optional<ad_utility::triple_component::Literal> operator()(
ValueId, const EvaluationContext*) const;

std::optional<LiteralOrIri> operator()(const LiteralOrIri& s,
const EvaluationContext*) const;
std::optional<ad_utility::triple_component::Literal> operator()(
const LiteralOrIri& s, const EvaluationContext*) const;
};

// Value getter for `isBlank`.
Expand Down
104 changes: 51 additions & 53 deletions src/engine/sparqlExpressions/StringExpressions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,49 @@ class StringExpressionImpl : public SparqlExpression {
}
};

// Same as the `StringExpressionImpl` above, but with the LiteralOrValueGetter.
template <size_t N, typename Function,
typename... AdditionalNonStringValueGetters>
class LiteralExpressionImpl : public SparqlExpression {
private:
using ExpressionWithStr = NARY<
N, FV<Function, LiteralValueGetter, AdditionalNonStringValueGetters...>>;
using ExpressionWithoutStr =
NARY<N, FV<Function, LiteralValueGetterWithXsdStringFilter,
AdditionalNonStringValueGetters...>>;

SparqlExpression::Ptr impl_;

public:
explicit LiteralExpressionImpl(
SparqlExpression::Ptr child,
std::same_as<SparqlExpression::Ptr> auto... children)
requires(sizeof...(children) + 1 == N) {
AD_CORRECTNESS_CHECK(child != nullptr);
if (child->isStrExpression()) {
auto childrenOfStr = std::move(*child).moveChildrenOut();
AD_CORRECTNESS_CHECK(childrenOfStr.size() == 1);
impl_ = std::make_unique<ExpressionWithStr>(
std::move(childrenOfStr.at(0)), std::move(children)...);
} else {
impl_ = std::make_unique<ExpressionWithoutStr>(std::move(child),
std::move(children)...);
}
}

ExpressionResult evaluate(EvaluationContext* context) const override {
return impl_->evaluate(context);
}
std::string getCacheKey(const VariableToColumnMap& varColMap) const override {
return impl_->getCacheKey(varColMap);
}

private:
std::span<SparqlExpression::Ptr> childrenImpl() override {
return impl_->children();
}
};

// Lift a `Function` that takes one or multiple `std::string`s (possibly via
// references) and returns an `Id` or `std::string` to a function that takes the
// same number of `std::optional<std::string>` and returns `Id` or
Expand Down Expand Up @@ -201,8 +244,9 @@ class SubstrImpl {
};

public:
IdOrLiteralOrIri operator()(std::optional<LiteralOrIri> s, NumericValue start,
NumericValue length) const {
IdOrLiteralOrIri operator()(
std::optional<ad_utility::triple_component::Literal> s,
NumericValue start, NumericValue length) const {
if (!s.has_value() || std::holds_alternative<NotNumeric>(start) ||
std::holds_alternative<NotNumeric>(length)) {
return Id::makeUndefined();
Expand Down Expand Up @@ -243,60 +287,14 @@ class SubstrImpl {
std::size_t endByteOffset = utf8ToByteOffset(str, startInt + lengthInt);
std::size_t byteLength = endByteOffset - startByteOffset;

s.value().getLiteral().setSubstr(startByteOffset, byteLength);
return std::move(s.value());
}
};

// Implementation of the `SUBSTR` SPARQL function. It dynamically
// selects the appropriate value getter for the first argument based on whether
// it is a `STR()` expression (using
// `LiteralOrIriValueGetterWithXsdStringFilter`) or another type (using
// `LiteralOrIriValueGetter`).
class SubstrExpressionImpl : public SparqlExpression {
private:
using ExpressionWithStr =
NARY<3, FV<SubstrImpl, LiteralOrIriValueGetterWithXsdStringFilter,
NumericValueGetter, NumericValueGetter>>;
using ExpressionWithoutStr =
NARY<3, FV<SubstrImpl, LiteralOrIriValueGetter, NumericValueGetter,
NumericValueGetter>>;

SparqlExpression::Ptr impl_;

public:
explicit SubstrExpressionImpl(
SparqlExpression::Ptr child,
std::same_as<SparqlExpression::Ptr> auto... children)
requires(sizeof...(children) + 1 == 3) {
AD_CORRECTNESS_CHECK(child != nullptr);

if (child->isStrExpression()) {
auto childrenOfStr = std::move(*child).moveChildrenOut();
AD_CORRECTNESS_CHECK(childrenOfStr.size() == 1);
impl_ = std::make_unique<ExpressionWithStr>(
std::move(childrenOfStr.at(0)), std::move(children)...);
} else {
impl_ = std::make_unique<ExpressionWithoutStr>(std::move(child),
std::move(children)...);
}
}

ExpressionResult evaluate(EvaluationContext* context) const override {
return impl_->evaluate(context);
}

std::string getCacheKey(const VariableToColumnMap& varColMap) const override {
return impl_->getCacheKey(varColMap);
}

private:
std::span<SparqlExpression::Ptr> childrenImpl() override {
return impl_->children();
s.value().setSubstr(startByteOffset, byteLength);
return std::move(LiteralOrIri(s.value()));
}
};

using SubstrExpression = SubstrExpressionImpl;
using SubstrExpression =
LiteralExpressionImpl<3, SubstrImpl, NumericValueGetter,
NumericValueGetter>;

// STRSTARTS
[[maybe_unused]] auto strStartsImpl = [](std::string_view text,
Expand Down
4 changes: 2 additions & 2 deletions test/ExportQueryExecutionTreesTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1649,10 +1649,10 @@ TEST(ExportQueryExecutionTrees, idToLiteralOrIriFunctionality) {
auto callIdToLiteralOrIri = [&](Id id, bool onlyLiterals,
bool onlyLiteralsWithXsdString = false) {
if (onlyLiterals) {
return ExportQueryExecutionTrees::idToLiteralOrIri<true>(
return ExportQueryExecutionTrees::idToLiteral<true>(
qec->getIndex(), id, LocalVocab{}, onlyLiteralsWithXsdString);
} else {
return ExportQueryExecutionTrees::idToLiteralOrIri<false>(
return ExportQueryExecutionTrees::idToLiteral<false>(
qec->getIndex(), id, LocalVocab{}, onlyLiteralsWithXsdString);
}
};
Expand Down
Loading