Skip to content

Commit

Permalink
Merge pull request #533 from hbz/514-import
Browse files Browse the repository at this point in the history
Create import files
  • Loading branch information
fsteeg authored Jan 21, 2020
2 parents 0784348 + ba013ad commit f04dddb
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 36 deletions.
77 changes: 52 additions & 25 deletions app/Import700n.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@

/* Copyright 2019 Fabian Steeg, hbz. Licensed under the GPLv2 */

import static play.test.Helpers.running;
import static play.test.Helpers.testServer;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
Expand All @@ -18,6 +21,7 @@

import com.fasterxml.jackson.databind.JsonNode;

import controllers.nwbib.Lobid;
import play.libs.Json;

/**
Expand All @@ -42,32 +46,55 @@ public class Import700n {
* @param args Optional, the input (jsonl) and the output (txt) file names
*/
public static void main(String[] args) {
if (args.length == 2) {
dataIn = new File(args[0]);
dataOut = new File(args[1]);
}
try (Scanner scanner = new Scanner(dataIn, StandardCharsets.UTF_8.name());
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(dataOut), StandardCharsets.UTF_8))) {
while (scanner.hasNextLine()) {
JsonNode record = Json.parse(scanner.nextLine());
Stream<String> subjects = Streams.concat(//
processSpatial(record), processSubject(record));
String resultLine = String.format("%s\t%s", //
record.get("hbzId").asText(),
subjects.collect(Collectors.joining(", "))//
// https://github.com/hbz/lobid-resources/issues/1018
.replaceAll("spatial#N05", "spatial#N04"));
resultLine = resultLine//
.replace("spatial#N04$$0", "Westfalen$$0")
.replace("Siebengebirge$$0https://nwbib.de/spatial#Q4236",
"Siebengebirge$$0https://nwbib.de/spatial#N23");
System.out.println(resultLine);
writer.write(resultLine + "\n");
running(testServer(3333), () -> {
if (args.length == 2) {
dataIn = new File(args[0]);
dataOut = new File(args[1]);
}
} catch (IOException e) {
e.printStackTrace();
}
try (Scanner scanner = new Scanner(dataIn, StandardCharsets.UTF_8.name());
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(dataOut), StandardCharsets.UTF_8))) {
while (scanner.hasNextLine()) {
JsonNode record = Json.parse(scanner.nextLine());
String resultLine = processLobidResource(record);
// String resultLine = processNwbibSnapshot(record);
System.out.println(resultLine);
writer.write(resultLine + "\n");
}
} catch (IOException e) {
e.printStackTrace();
}
});
}

@SuppressWarnings("unused")
private static String processNwbibSnapshot(JsonNode record) {
String id = record.get("hbzId").asText();
String result = processLobidResource(Lobid.getResource(id));
// See https://github.com/hbz/nwbib/issues/516
result = result
.replace("\t",
"\t\"Bistum Münster$$0https://nwbib.de/spatial#Q769380\", ")
.replaceAll(", $", "");
return result;
}

private static String processLobidResource(JsonNode record) {
Stream<String> subjects = Streams.concat(//
processSpatial(record), processSubject(record));
String resultLine = String.format("%s\t%s", //
record.get("hbzId").asText(),
subjects.collect(Collectors.joining(", "))//
// https://github.com/hbz/lobid-resources/issues/1018
.replaceAll("spatial#N05", "spatial#N04"));
resultLine = resultLine//
.replace("spatial#N04$$0", "Westfalen$$0")
.replace("Siebengebirge$$0https://nwbib.de/spatial#Q4236",
"Siebengebirge$$0https://nwbib.de/spatial#N23")
.replace(//
"Kleinere geistliche Territorien im Rheinland$$0https://nwbib.de/spatial#N52", //
"Kleinere Territorien im Rheinland$$0https://nwbib.de/spatial#N54");
return resultLine;
}

private static Stream<String> processSubject(JsonNode record) {
Expand Down
Loading

0 comments on commit f04dddb

Please sign in to comment.