Skip to content

Commit

Permalink
apacheGH-1873: Error if quads in the inputs but output is formatted t…
Browse files Browse the repository at this point in the history
…riples.
  • Loading branch information
afs committed May 21, 2023
1 parent a8cdfa9 commit 087f55b
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,25 @@

package org.apache.jena.riot.system;

import org.apache.jena.atlas.logging.Log;
import org.apache.jena.sparql.core.Quad;
import org.slf4j.Logger;

/**
* {@link StreamRDF} that expects triples not quads.
* Issues a warning when the first quad is seen.
* Runs an action the first time a quad is seen.
* Quads that are the default graph or no graph are redirected to {@link StreamRDF#triple}.
*/
public class StreamTriplesOnly extends StreamRDFWrapper {

public static StreamRDF warnIfQuads(Logger log, StreamRDF stream) {
return new StreamTriplesOnly(log, stream);
public static StreamRDF actionIfQuads(StreamRDF stream, Runnable action) {
return new StreamTriplesOnly(stream, action);
}

private boolean seenQuads = false;
private final Logger log;
private final Runnable action;

private StreamTriplesOnly(Logger logger, StreamRDF sink) {
private StreamTriplesOnly(StreamRDF sink, Runnable action) {
super(sink) ;
this.log = logger;
this.action = action;
}

@Override
Expand All @@ -48,7 +46,7 @@ public void quad(Quad quad) {
return;
}
if ( ! seenQuads ) {
Log.warn(log, "Quads in triples output - quads ignored");
action.run();
seenQuads = true;
}
}
Expand Down
17 changes: 13 additions & 4 deletions jena-cmds/src/main/java/arq/cmdline/ModLangOutput.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public class ModLangOutput extends ModBase
protected ArgDecl argPretty = new ArgDecl(ArgDecl.HasValue, "formatted", "pretty", "fmt") ;
protected ArgDecl argStream = new ArgDecl(ArgDecl.HasValue, "stream") ;
protected ArgDecl argCompress = new ArgDecl(ArgDecl.NoValue, "compress") ;
private boolean compressedOutput = false ;
private boolean compressedOutput = false ;
private RDFFormat streamOutput = null ;
private RDFFormat formattedOutput = null ;

Expand All @@ -51,9 +51,18 @@ public void registerWith(CmdGeneral cmdLine) {

@Override
public void processArgs(CmdArgModule cmdLine) {

// [QT] ** check only one of argPretty, argStream, argOutput **

{
int x = 0;
if ( cmdLine.contains(argPretty) )
x++;
if ( cmdLine.contains(argStream) )
x++;
if ( cmdLine.contains(argOutput) )
x++;

if ( x >= 2 )
throw new CmdException("Multiple output choices given: Use one of--stream, --output and --formatted");
}
if ( cmdLine.contains(argPretty) ) {
String langName = cmdLine.getValue(argPretty) ;
Lang lang = RDFLanguages.nameToLang(langName) ;
Expand Down
80 changes: 64 additions & 16 deletions jena-cmds/src/main/java/riotcmd/CmdLangParse.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,42 @@ protected String getSummary() {
protected List<ParseRecord> outcomes = new ArrayList<>();

protected OutputStream outputWrite = System.out;
protected StreamRDF outputStream = null;
protected StreamRDF parserOutputStream = null;
protected String parserBaseIRI = null;
protected boolean passRelativeURIs = false;
protected String writerBaseIRI = null;

@Override
protected void processModulesAndArgs() {
cmdStrictMode = super.contains(strictDecl);
// checking.

// True if any input is quads
// False is unknow (stdin, no --syntax)
// Check will also be done during parsing.
boolean someQuadsInput = false;

if ( ! getPositional().isEmpty() || modLangParse.getLang() != null ) {
// otherwise the input is stdin and maybe triples or quads.
if ( modLangParse.getLang() != null ) {
someQuadsInput = RDFLanguages.isQuads(modLangParse.getLang());
} else {
for ( String fn : super.getPositional() ) {
Lang lang = RDFLanguages.filenameToLang(fn);
if ( !RDFLanguages.isQuads(lang) ) {
someQuadsInput = false;
break;
}
}
}
}
RDFFormat output = modLangOutput.getOutputStreamFormat();
if ( output == null )
output = modLangOutput.getOutputFormatted();
if ( output == null )
output = RDFFormat.NQUADS;

if ( someQuadsInput && ! isQuadsOutput() )
Log.warn(SysRIOT.getLogger(), "Quads syntax in the input files but triple output requested.");
}

protected interface PostParseHandler {
Expand Down Expand Up @@ -164,19 +192,30 @@ protected void exec() {
}
}

outputStream = null;
parserOutputStream = null;
PostParseHandler postParse = null;

outputStream = createStreamSink();
if ( outputStream == null ) {
parserOutputStream = createStreamSink();
if ( parserOutputStream == null ) {
Pair<StreamRDF, PostParseHandler> p = createAccumulateSink();
outputStream = p.getLeft();
parserOutputStream = p.getLeft();
postParse = p.getRight();
}

if ( ! isQuadsOutput() ) {
// Only pass through triples.
outputStream = StreamTriplesOnly.warnIfQuads(SysRIOT.getLogger(), outputStream);
final StreamRDF dest = parserOutputStream;
if ( isStreamingOutput() ) {
Runnable action = () -> {
// dest may be significantly buffered over the top of the output stream.
// The log message does not necessarily come out in the right place - it may be early.
IO.flush(outputWrite);
Log.warn(SysRIOT.getLogger(), "Quads in triples output - quads ignored.");
};
parserOutputStream = StreamTriplesOnly.actionIfQuads(parserOutputStream, action);
} else {
// Not streaming - code can issue error before formatting.
}
}

try {
Expand Down Expand Up @@ -332,19 +371,20 @@ protected ParseRecord parseRIOT(RDFParserBuilder builder, String filename) {
if ( labelsAsGiven )
builder.labelToNode(LabelToNode.createUseLabelAsGiven());

StreamRDF s = outputStream;
// Build parser output additions.
StreamRDF s = parserOutputStream;
if ( setup != null )
s = RDFSFactory.streamRDFS(s, setup);
StreamRDFCounting sink = StreamRDFLib.count(s);
StreamRDFCounting parserOut = StreamRDFLib.count(s);
s = null;

boolean successful = true;

modTime.startTimer();
RDFParser parser = builder.build();
try {
sink.start();
parser.parse(sink);
parserOut.start();
parser.parse(parserOut);
successful = true;
} catch (RiotNotFoundException ex) {
errHandler.error(ex.getMessage(), -1, -1);
Expand All @@ -354,9 +394,9 @@ protected ParseRecord parseRIOT(RDFParserBuilder builder, String filename) {
} catch (IRIException ex) {
successful = false;
}
sink.finish();
parserOut.finish();
long x = modTime.endTimer();
ParseRecord outcome = new ParseRecord(filename, successful, x, sink.countTriples(), sink.countQuads(), errHandler);
ParseRecord outcome = new ParseRecord(filename, successful, x, parserOut.countTriples(), parserOut.countQuads(), errHandler);
return outcome;
}

Expand All @@ -383,8 +423,10 @@ protected Pair<StreamRDF, PostParseHandler> createAccumulateSink() {
builder.source(dsg);
else {
// Should only see triples - this is a consistency check.
if ( dsg.size() > 0 )
Log.warn(SysRIOT.getLogger(), "Quads seen when only triples expected - quads ignored");
if ( dsg.size() > 0 ) {
Log.error(SysRIOT.getLogger(), "Quads seen in input but output is triples only.");
throw new CmdException();
}
builder.source(dsg.getDefaultGraph());
}

Expand All @@ -401,12 +443,18 @@ protected boolean isQuadsOutput() {
RDFFormat fmt = modLangOutput.getOutputStreamFormat();
if ( fmt == null)
fmt = modLangOutput.getOutputFormatted();
if ( fmt != null && RDFLanguages.isTriples(fmt.getLang()) )
// RDFLanguages.isTriples means the language can be used in a triples context
// hence the test is "not quads".
if ( fmt != null && ! RDFLanguages.isQuads(fmt.getLang()) )
return false;
else
return true;
}

protected boolean isStreamingOutput() {
return modLangOutput.getOutputStreamFormat() != null;
}

protected Tokenizer makeTokenizer(InputStream in) {
Tokenizer tokenizer = TokenizerText.create().source(in).build();
return tokenizer;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,18 +166,20 @@ public ElementGroup getClause() {
*/
private static void testTriple(TriplePath t) {
// verify Triple is valid
boolean validSubject = t.getSubject().isURI() || t.getSubject().isBlank() || t.getSubject().isVariable()
|| t.getSubject().equals(Node.ANY);
boolean validSubject =
t.getSubject().isURI() || t.getSubject().isBlank() || t.getObject().isNodeTriple()
|| t.getSubject().isVariable() || t.getSubject().equals(Node.ANY);
boolean validPredicate;

if (t.isTriple()) {
validPredicate = t.getPredicate().isURI() || t.getPredicate().isVariable()
|| t.getPredicate().equals(Node.ANY);
validPredicate = t.getPredicate().isURI()
|| t.getPredicate().isVariable() || t.getPredicate().equals(Node.ANY);
} else {
validPredicate = t.getPath() != null;
}

boolean validObject = t.getObject().isURI() || t.getObject().isLiteral() || t.getObject().isBlank()
boolean validObject =
t.getObject().isURI() || t.getObject().isLiteral() || t.getObject().isBlank() || t.getObject().isNodeTriple()
|| t.getObject().isVariable() || t.getObject().equals(Node.ANY);

if (!validSubject || !validPredicate || !validObject) {
Expand Down

0 comments on commit 087f55b

Please sign in to comment.