Merge pull request #14 from AKSW/dev

added ceur-ws link for Meyer 2024 sparql, added some braces
AKSW · Jan 3, 2025 · c7ab788 · c7ab788
2 parents 4b47fbf + ab07b1d
commit c7ab788
Showing 1 changed file with 26 additions and 20 deletions.
diff --git a/aksw.bib b/aksw.bib
@@ -12421,10 +12421,10 @@ @InProceedings{Dziwis2022OntoflowUserFriendly
 
 @InProceedings{Frey2023BenchmarkingAbilitiesLarge,
   author    = {Frey, Johannes and Meyer, Lars-Peter and Arndt, Natanael and Brei, Felix and Bulert, Kirill},
-  booktitle = {Proceedings of Workshop Deep Learning for Knowledge Graphs (DL4KG) @ ISWC23},
-  title     = {Benchmarking the Abilities of Large Language Models for RDF Knowledge Graph Creation and Comprehension: How Well Do LLMs Speak Turtle?},
+  booktitle = {Proceedings of Workshop Deep Learning for Knowledge Graphs ({DL4KG}) @ {ISWC23}},
+  title     = {Benchmarking the Abilities of Large Language Models for {RDF} Knowledge Graph Creation and Comprehension: How Well Do {LLMs} Speak Turtle?},
   year      = {2023},
-  series    = {CEUR Workshop Proceedings},
+  series    = {{CEUR} Workshop Proceedings},
   volume    = {3559},
   abstract  = {Large Language Models (LLMs) are advancing at a rapid pace, with significant improvements at natural language processing and coding tasks. Yet, their ability to work with formal languages representing data, specifically within the realm of knowledge graph engineering, remains under-investigated. To evaluate the proficiency of various LLMs, we created a set of five tasks that probe their ability to parse, understand, analyze, and create knowledge graphs serialized in Turtle syntax. These tasks, each embodying distinct degrees of complexity and being able to scale with the size of the problem, have been integrated into our automated evaluation system, the LLM-KG-Bench. The evaluation encompassed four commercially available LLMs - GPT-3.5, GPT-4, Claude 1.3, and Claude 2.0, as well as two freely accessible offline models, GPT4All Vicuna and GPT4All Falcon 13B. This analysis offers an in-depth understanding of the strengths and shortcomings of LLMs in relation to their application within RDF knowledge graph engineering workflows utilizing Turtle representation. While our findings show that the latest commercial models outperform their forerunners in terms of proficiency with the Turtle language, they also reveal an apparent weakness. These models fall short when it comes to adhering strictly to the output formatting constraints, a crucial requirement in this context.},
   comment   = {Code: https://github.com/AKSW/LLM-KG-Bench
@@ -12437,11 +12437,12 @@ @InProceedings{Frey2023BenchmarkingAbilitiesLarge
 
 @InProceedings{Meyer2023DevelopingScalableBenchmark,
   author    = {Meyer, Lars-Peter and Frey, Johannes and Junghanns, Kurt and Brei, Felix and Bulert, Kirill and Gründer-Fahrer, Sabine and Martin, Michael},
-  booktitle = {Proceedings of Poster Track of Semantics 2023},
+  booktitle = {Proceedings of Poster Track of {SEMANTiCS} 2023},
   title     = {Developing a Scalable Benchmark for Assessing Large Language Models in Knowledge Graph Engineering},
   year      = {2023},
+  editor    = {Neha Keshan and Sebastian Neumaier and Anna Lisa Gentile and Sahar Vahdati},
   pages     = {16--20},
-  series    = {CEUR Workshop Proceedings},
+  series    = {{CEUR} Workshop Proceedings},
   volume    = {3526},
   abstract  = {As the field of Large Language Models (LLMs) evolves at an accelerated pace, the critical need to assess and monitor their performance emerges. We introduce a benchmarking framework focused on knowledge graph engineering (KGE) accompanied by three challenges addressing syntax and error correction, facts extraction and dataset generation. We show that while being a useful tool, LLMs are yet unfit to assist in knowledge graph generation with zero-shot prompting. Consequently, our LLM-KG-Bench framework provides automatic evaluation and storage of LLM responses as well as statistical data and visualization tools to support tracking of prompt engineering and model performance.},
   comment   = {Code: https://github.com/AKSW/LLM-KG-Bench
@@ -12454,8 +12455,8 @@ @InProceedings{Meyer2023DevelopingScalableBenchmark
 
 @InProceedings{Meyer2023LLMassistedKnowledge,
   author    = {Meyer, Lars-Peter and Stadler, Claus and Frey, Johannes and Radtke, Norman and Junghanns, Kurt and Meissner, Roy and Dziwis, Gordian and Bulert, Kirill and Martin, Michael},
-  booktitle = {First Working Conference on Artificial Intelligence Development for a Resilient and Sustainable Tomorrow (AITomorrow) 2023},
-  title     = {LLM-assisted Knowledge Graph Engineering: Experiments with ChatGPT},
+  booktitle = {First Working Conference on Artificial Intelligence Development for a Resilient and Sustainable Tomorrow ({AITomorrow}) 2023},
+  title     = {{LLM}-assisted Knowledge Graph Engineering: Experiments with {ChatGPT}},
   year      = {2024},
   address   = {Wiesbaden},
   editor    = {Christian Zinke-Wehlmann and Julia Friedrich},
@@ -12527,11 +12528,11 @@ @InProceedings{icsc2023natuke
 
 @InProceedings{Brei2024Leveragingsmalllanguage,
   author    = {Brei, Felix and Frey, Johannes and Meyer, Lars-Peter},
-  booktitle = {Proceedings of the Third International Workshop on Linked Data-driven Resilience Research 2024 (D2R2'24), colocated with ESWC 2024},
-  title     = {Leveraging small language models for Text2SPARQLtasks to improve the resilience of AI assistance},
+  booktitle = {Proceedings of the Third International Workshop on Linked Data-driven Resilience Research 2024 ({D2R2}'24), colocated with {ESWC} 2024},
+  title     = {Leveraging small language models for {Text2SPARQL} tasks to improve the resilience of {AI} assistance},
   year      = {2024},
   editor    = {Julia Holze and Sebastian Tramp and Michael Martin and Sören Auer and Ricardo Usbeck and Nenad Krdzavac},
-  series    = {CEUR Workshop Proceedings},
+  series    = {{CEUR} Workshop Proceedings},
   volume    = {3707},
   abstract  = {In this work we will show that language models with less than one billion parameters can be used to translate natural language to SPARQL queries after fine-tuning. Using three different datasets ranging from academic to real world, we identify prerequisites that the training data must fulfill in order for the training to be successful. The goal is to empower users of semantic web technology to use AI assistance with affordable commodity hardware, making them more resilient against external factors},
   doi       = {10.48550/arXiv.2405.17076},
@@ -12542,8 +12543,8 @@ @InProceedings{Brei2024Leveragingsmalllanguage
 
 @InProceedings{Frey2024AssessingEvolutionLLM,
   author    = {Johannes Frey and Lars-Peter Meyer and Felix Brei and Sabine Gründer-Fahrer and Michael Martin},
-  booktitle = {Proceedings of Special Track Large Language Models for Knowledge Engineering at Extended Semantic Web Conference 2024 (ESWC24)},
-  title     = {Assessing the Evolution of LLM capabilities for Knowledge Graph Engineering in 2023},
+  booktitle = {Proceedings of Special Track Large Language Models for Knowledge Engineering at Extended Semantic Web Conference 2024 ({ESWC24})},
+  title     = {Assessing the Evolution of {LLM} capabilities for Knowledge Graph Engineering in 2023},
   year      = {2024},
   abstract  = {In this study, we evaluate the evolution of LLM capabilities w.r.t. the RDF Turtle and SPARQL language as foundational skills to assist with various KGE tasks. We measure the LLM response quality using 6 LLM-KG-Bench tasks for a total of 15 LLM versions available over the course of 2023, covering 5 different “major version” LLM classes (GPT3.5 Turbo, GPT4, Claude-1.x, Claude-2.x, and Claude-instant-1.x).},
   keywords  = {group_aksw sys:relevantFor:infai es frey lpmeyer martin},
@@ -12564,14 +12565,19 @@ @InProceedings{Kilic2024TowardsRegionalPublic
 }
 
 @InProceedings{Meyer2024AssessingSparqlCapabilititesLLM,
-  author   = {Lars-Peter Meyer and Johannes Frey and Felix Brei and Natanael Arndt},
-  title    = {Assessing SPARQL capabilities of Large Language Models},
-  year     = {2024},
-  abstract = {The integration of Large Language Models (LLMs) with Knowledge Graphs (KGs) offers significant synergistic potential for knowledge-driven applications. One possible integration is the interpretation and generation of formal languages, such as those used in the Semantic Web, with SPARQL being a core technology for accessing KGs. In this paper, we focus on measuring out-of-the box capabilities of LLMs to work with SPARQL and more specifically with SPARQL SELECT queries applying a quantitative approach. We implemented various benchmarking tasks in the LLM-KG-Bench framework for automated execution and evaluation with several LLMs. The tasks assess capabilities along the dimensions of syntax, semantic read, semantic create, and the role of knowledge graph prompt inclusion. With this new benchmarking tasks, we evaluated a selection of GPT, Gemini, and Claude models. Our findings indicate that working with SPARQL SELECT queries is still challenging for LLMs and heavily depends on the specific LLM as well as the complexity of the task. While fixing basic syntax errors seems to pose no problems for the best of the current LLMs evaluated, creating semantically correct SPARQL SELECT queries is difficult in several cases.},
-  comment  = {to appear in Proceedings of Workshop NLP4KGC, colocated with SEMANTICS 2024, at https://ceur-ws.org},
-  doi      = {10.48550/ARXIV.2409.05925},
-  keywords = {group_aksw sys:relevantFor:infai es lpmeyer brei frey arndt},
-  url      = {https://arxiv.org/pdf/2409.05925},
+  author    = {Lars-Peter Meyer and Johannes Frey and Felix Brei and Natanael Arndt},
+  booktitle = {Proceedings of the 3rd International Workshop on Natural Language Processing for Knowledge Graph Creation co-located with 20th International Conference on Semantic Systems ({SEMANTiCS} 2024)},
+  title     = {Assessing {SPARQL} capabilities of Large Language Models},
+  year      = {2024},
+  editor    = {Edlira Vakaj and Sima Iranmanesh and Rizou Stamartina and Nandana Mihindukulasooriya and Sanju Tiwari and Fernando Ortiz-Rodríguez and Ryan Mcgranaghan},
+  pages     = {35-53},
+  series    = {{CEUR} Workshop Proceedings},
+  volume    = {3874},
+  abstract  = {The integration of Large Language Models (LLMs) with Knowledge Graphs (KGs) offers significant synergistic potential for knowledge-driven applications. One possible integration is the interpretation and generation of formal languages, such as those used in the Semantic Web, with SPARQL being a core technology for accessing KGs. In this paper, we focus on measuring out-of-the box capabilities of LLMs to work with SPARQL and more specifically with SPARQL SELECT queries applying a quantitative approach. We implemented various benchmarking tasks in the LLM-KG-Bench framework for automated execution and evaluation with several LLMs. The tasks assess capabilities along the dimensions of syntax, semantic read, semantic create, and the role of knowledge graph prompt inclusion. With this new benchmarking tasks, we evaluated a selection of GPT, Gemini, and Claude models. Our findings indicate that working with SPARQL SELECT queries is still challenging for LLMs and heavily depends on the specific LLM as well as the complexity of the task. While fixing basic syntax errors seems to pose no problems for the best of the current LLMs evaluated, creating semantically correct SPARQL SELECT queries is difficult in several cases.},
+  doi       = {10.48550/ARXIV.2409.05925},
+  issn      = {1613-0073},
+  keywords  = {group_aksw sys:relevantFor:infai es lpmeyer brei frey arndt},
+  url       = {https://ceur-ws.org/Vol-3874/paper3.pdf},
 }
 
 @InProceedings{gastinger2024dynamic,