Skip to content

Commit

Permalink
GH-5148 add tests and extend corruption handling to more parts of the…
Browse files Browse the repository at this point in the history
… code
  • Loading branch information
hmottestad committed Oct 23, 2024
1 parent cad4af9 commit 196cf9d
Show file tree
Hide file tree
Showing 7 changed files with 596 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -61,18 +61,18 @@ public Statement getNextElement() throws SailException {
}

int subjID = ByteArrayUtil.getInt(nextValue, TripleStore.SUBJ_IDX);
Resource subj = (Resource) valueStore.getValue(subjID);
Resource subj = valueStore.getResource(subjID);

int predID = ByteArrayUtil.getInt(nextValue, TripleStore.PRED_IDX);
IRI pred = (IRI) valueStore.getValue(predID);
IRI pred = valueStore.getIRI(predID);

int objID = ByteArrayUtil.getInt(nextValue, TripleStore.OBJ_IDX);
Value obj = valueStore.getValue(objID);

Resource context = null;
int contextID = ByteArrayUtil.getInt(nextValue, TripleStore.CONTEXT_IDX);
if (contextID != 0) {
context = (Resource) valueStore.getValue(contextID);
context = valueStore.getResource(contextID);
}

return valueStore.createStatement(subj, pred, obj, context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,17 @@
import org.eclipse.rdf4j.model.vocabulary.XSD;
import org.eclipse.rdf4j.sail.SailException;
import org.eclipse.rdf4j.sail.nativerdf.datastore.DataStore;
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRI;
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRIOrBNode;
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptLiteral;
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptValue;
import org.eclipse.rdf4j.sail.nativerdf.model.NativeBNode;
import org.eclipse.rdf4j.sail.nativerdf.model.NativeIRI;
import org.eclipse.rdf4j.sail.nativerdf.model.NativeLiteral;
import org.eclipse.rdf4j.sail.nativerdf.model.NativeResource;
import org.eclipse.rdf4j.sail.nativerdf.model.NativeValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* File-based indexed storage and retrieval of RDF values. ValueStore maps RDF values to integer IDs and vice-versa.
Expand All @@ -50,9 +55,7 @@
@InternalUseOnly
public class ValueStore extends SimpleValueFactory {

/*-----------*
* Constants *
*-----------*/
private static final Logger logger = LoggerFactory.getLogger(ValueStore.class);

/**
* The default value cache size.
Expand Down Expand Up @@ -127,7 +130,8 @@ public class ValueStore extends SimpleValueFactory {
/**
* Do not throw an exception in case a value cannot be loaded, e.g. due to a corrupt value store.
*/
private final boolean softFailOnCorruptData;
public static boolean SOFT_FAIL_ON_CORRUPT_DATA = "true"
.equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData"));;

/*--------------*
* Constructors *
Expand All @@ -153,14 +157,6 @@ public ValueStore(File dataDir, boolean forceSync, int valueCacheSize, int value

setNewRevision();

/*
* Soft failure when a ValueStore is corrupt (i.e., one or more NativeValues cannot be read properly) can be
* enabled using the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData (boolean). The
* default behavior is that ValueStore will fail hard with a SailException, whereas softFaileOnCorruptData set
* to true will make ValueStore return instances of CorruptValue if NativeValue cannot be read.
*/
this.softFailOnCorruptData = "true"
.equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData"));
}

/*---------*
Expand Down Expand Up @@ -195,6 +191,7 @@ public Lock getReadLock() throws InterruptedException {
* @throws IOException If an I/O error occurred.
*/
public NativeValue getValue(int id) throws IOException {

// Check value cache
Integer cacheID = id;
NativeValue resultValue = valueCache.get(cacheID);
Expand All @@ -206,12 +203,55 @@ public NativeValue getValue(int id) throws IOException {
if (data != null) {
resultValue = data2value(id, data);

// Store value in cache
valueCache.put(cacheID, resultValue);
if (!(resultValue instanceof CorruptValue)) {
// Store value in cache
valueCache.put(cacheID, resultValue);
}
}
}

return resultValue;

}

/**
* Gets the Resource for the specified ID.
*
* @param id A value ID.
* @return The Resource for the ID, or <var>null</var> no such value could be found.
* @throws IOException If an I/O error occurred.
*/
public <T extends NativeValue & Resource> T getResource(int id) throws IOException {

NativeValue resultValue = getValue(id);

if (!(resultValue instanceof Resource)) {
if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) {
return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData());
}
}

return (T) resultValue;
}

/**
* Gets the IRI for the specified ID.
*
* @param id A value ID.
* @return The IRI for the ID, or <var>null</var> no such value could be found.
* @throws IOException If an I/O error occurred.
*/
public <T extends NativeValue & IRI> T getIRI(int id) throws IOException {

NativeValue resultValue = getValue(id);

if (!(resultValue instanceof Resource)) {
if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) {
return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData());
}
}

return (T) resultValue;
}

/**
Expand Down Expand Up @@ -542,7 +582,8 @@ private boolean isNamespaceData(byte[] data) {

private NativeValue data2value(int id, byte[] data) throws IOException {
if (data.length == 0) {
if (softFailOnCorruptData) {
if (SOFT_FAIL_ON_CORRUPT_DATA) {
logger.error("Soft fail on corrupt data: Empty data array for value with id {}", id);
return new CorruptValue(revision, id, data);
}
throw new SailException("Empty data array for value with id " + id);
Expand All @@ -555,52 +596,69 @@ private NativeValue data2value(int id, byte[] data) throws IOException {
case LITERAL_VALUE:
return data2literal(id, data);
default:
if (softFailOnCorruptData) {
if (SOFT_FAIL_ON_CORRUPT_DATA) {
logger.error("Soft fail on corrupt data: Invalid type {} for value with id {}", data[0], id);
return new CorruptValue(revision, id, data);
}
throw new SailException("Invalid type " + data[0] + " for value with id " + id);
}
}

private NativeIRI data2uri(int id, byte[] data) throws IOException {
int nsID = ByteArrayUtil.getInt(data, 1);
String namespace = getNamespace(nsID);
private <T extends IRI & NativeValue> T data2uri(int id, byte[] data) throws IOException {
try {
int nsID = ByteArrayUtil.getInt(data, 1);
String namespace = getNamespace(nsID);

String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8);

String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8);
return (T) new NativeIRI(revision, namespace, localName, id);
} catch (Throwable e) {
if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) {
return (T) new CorruptIRI(revision, id, data);
}
throw e;
}

return new NativeIRI(revision, namespace, localName, id);
}

private NativeBNode data2bnode(int id, byte[] data) {
String nodeID = new String(data, 1, data.length - 1, StandardCharsets.UTF_8);
return new NativeBNode(revision, nodeID, id);
}

private NativeLiteral data2literal(int id, byte[] data) throws IOException {
// Get datatype
int datatypeID = ByteArrayUtil.getInt(data, 1);
IRI datatype = null;
if (datatypeID != NativeValue.UNKNOWN_ID) {
datatype = (IRI) getValue(datatypeID);
}
private <T extends NativeValue & Literal> T data2literal(int id, byte[] data) throws IOException {
try {
// Get datatype
int datatypeID = ByteArrayUtil.getInt(data, 1);
IRI datatype = null;
if (datatypeID != NativeValue.UNKNOWN_ID) {
datatype = (IRI) getValue(datatypeID);
}

// Get language tag
String lang = null;
int langLength = data[5];
if (langLength > 0) {
lang = new String(data, 6, langLength, StandardCharsets.UTF_8);
}
// Get language tag
String lang = null;
int langLength = data[5];
if (langLength > 0) {
lang = new String(data, 6, langLength, StandardCharsets.UTF_8);
}

// Get label
String label = new String(data, 6 + langLength, data.length - 6 - langLength, StandardCharsets.UTF_8);
// Get label
String label = new String(data, 6 + langLength, data.length - 6 - langLength, StandardCharsets.UTF_8);

if (lang != null) {
return new NativeLiteral(revision, label, lang, id);
} else if (datatype != null) {
return new NativeLiteral(revision, label, datatype, id);
} else {
return new NativeLiteral(revision, label, CoreDatatype.XSD.STRING, id);
if (lang != null) {
return (T) new NativeLiteral(revision, label, lang, id);
} else if (datatype != null) {
return (T) new NativeLiteral(revision, label, datatype, id);
} else {
return (T) new NativeLiteral(revision, label, CoreDatatype.XSD.STRING, id);
}
} catch (Throwable e) {
if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) {
return (T) new CorruptLiteral(revision, id, data);
}
throw e;
}

}

private String data2namespace(byte[] data) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*******************************************************************************
* Copyright (c) 2024 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
******************************************************************************/

package org.eclipse.rdf4j.sail.nativerdf.model;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;

/**
* CorruptIRI is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
* ValueStore#softFailOnCorruptData).
*
* @author Håvard M. Ottestad
*/
public class CorruptIRI extends CorruptValue implements IRI {

private static final long serialVersionUID = -6995615243794525852L;

public CorruptIRI(ValueStoreRevision revision, int internalID, byte[] data) {
super(revision, internalID, data);
}

public String stringValue() {
return "CorruptIRI_with_ID_" + getInternalID();
}

@Override
public String getNamespace() {
return "CORRUPT";
}

@Override
public String getLocalName() {
return "CORRUPT";
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}

if (o instanceof CorruptIRI && getInternalID() != NativeValue.UNKNOWN_ID) {
CorruptIRI otherCorruptValue = (CorruptIRI) o;

if (otherCorruptValue.getInternalID() != NativeValue.UNKNOWN_ID
&& getValueStoreRevision().equals(otherCorruptValue.getValueStoreRevision())) {
// CorruptValue is from the same revision of the same native store with both IDs set
return getInternalID() == otherCorruptValue.getInternalID();
}
}

return super.equals(o);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*******************************************************************************
* Copyright (c) 2024 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
******************************************************************************/

package org.eclipse.rdf4j.sail.nativerdf.model;

import org.eclipse.rdf4j.model.BNode;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;

/**
* CorruptIRIOrBNode is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
* ValueStore#softFailOnCorruptData).
*
* @author Håvard M. Ottestad
*/
public class CorruptIRIOrBNode extends CorruptValue implements IRI, BNode {

private static final long serialVersionUID = 3709784393454516043L;

public CorruptIRIOrBNode(ValueStoreRevision revision, int internalID, byte[] data) {
super(revision, internalID, data);
}

public String stringValue() {
return "CorruptIRI_with_ID_" + getInternalID();
}

@Override
public String getNamespace() {
return "CORRUPT";
}

@Override
public String getLocalName() {
return "CORRUPT";
}

@Override
public String getID() {
return "";
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}

if (o instanceof CorruptIRIOrBNode && getInternalID() != NativeValue.UNKNOWN_ID) {
CorruptIRIOrBNode otherCorruptValue = (CorruptIRIOrBNode) o;

if (otherCorruptValue.getInternalID() != NativeValue.UNKNOWN_ID
&& getValueStoreRevision().equals(otherCorruptValue.getValueStoreRevision())) {
// CorruptValue is from the same revision of the same native store with both IDs set
return getInternalID() == otherCorruptValue.getInternalID();
}
}

return super.equals(o);
}

}
Loading

0 comments on commit 196cf9d

Please sign in to comment.