Skip to content

Commit

Permalink
modernized code for Gameinder and HtmlParser
Browse files Browse the repository at this point in the history
  • Loading branch information
HerrKnarz committed Mar 21, 2023
1 parent fe51c13 commit 118deb6
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.5.0" />
<PackageReference Include="System.ValueTuple" Version="4.5.0" />
<PackageReference Include="xunit" Version="2.4.2" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.5">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
Expand Down
49 changes: 29 additions & 20 deletions Metadata/WikipediaMetadata/GameFinder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,29 @@ namespace WikipediaMetadata
internal class GameFinder
{
private readonly PluginSettings _settings;
private string _wikiNameVideoGame;
private string _wikiName;
private string _wikiStart;

public GameFinder(PluginSettings settings)
{
_settings = settings;
}

private void PrepareSearchTerms(string searchTerm)
/// <summary>
/// Prepares different strings for search functions
/// </summary>
/// <param name="searchTerm">Term to search for</param>
/// <returns> nameVideoGame = search term with " (video game)" added
/// compareName = search term without special characters and whitespaces to compare with results
/// startName = the first five characters of the search term to order by those.</returns>
private (string nameVideoGame, string compareName, string startName) PrepareSearchTerms(string searchTerm)
{
_wikiNameVideoGame = (searchTerm + " (video game)").RemoveSpecialChars().ToLower().Replace(" ", "");
_wikiName = searchTerm.RemoveSpecialChars().ToLower().Replace(" ", "");
_wikiStart = _wikiName.Substring(0, (_wikiName.Length > 5) ? 5 : _wikiName.Length);
string compareName = searchTerm.RemoveSpecialChars().ToLower().Replace(" ", "");

return
(
$"{searchTerm} (video game)".RemoveSpecialChars().ToLower().Replace(" ", ""),
compareName,
new string(compareName.Take(5).ToArray())
);
}

/// <summary>
Expand All @@ -37,7 +46,7 @@ public Page FindGame(string gameName)
{
Page foundPage = null;

PrepareSearchTerms(gameName);
(string nameVideoGame, string compareName, _) = PrepareSearchTerms(gameName);

string searchName = gameName.RemoveEditionSuffix();

Expand All @@ -53,11 +62,13 @@ public Page FindGame(string gameName)
// page title, so we try that first, before searching the name itself. Only if we get a 100% match, we'll
// use the page in background mode. The description also needs to have the words "video game" in it to
// avoid cases like "Doom", where a completely wrong page would be returned.
List<Page> foundPages = searchResult.Pages.Where(p => p.Description != null && p.Description.ToLower().Contains("video game")).ToList();

foundPage =
searchResult.Pages.Where(p => p.Description != null && p.Description.ToLower().Contains("video game") && p.KeyMatch == _wikiNameVideoGame).FirstOrDefault() ??
searchResult.Pages.Where(p => p.Description != null && p.Description.ToLower().Contains("video game") && p.KeyMatch == searchNameVideoGame).FirstOrDefault() ??
searchResult.Pages.Where(p => p.Description != null && p.Description.ToLower().Contains("video game") && p.KeyMatch == _wikiName).FirstOrDefault() ??
searchResult.Pages.Where(p => p.Description != null && p.Description.ToLower().Contains("video game") && p.KeyMatch == searchName).FirstOrDefault();
foundPages.Where(p => p.KeyMatch == nameVideoGame).FirstOrDefault() ??
foundPages.Where(p => p.KeyMatch == searchNameVideoGame).FirstOrDefault() ??
foundPages.Where(p => p.KeyMatch == compareName).FirstOrDefault() ??
foundPages.Where(p => p.KeyMatch == searchName).FirstOrDefault();
}
return foundPage;
}
Expand All @@ -69,7 +80,7 @@ public Page FindGame(string gameName)
/// <returns>List of found results</returns>
public List<GenericItemOption> GetSearchResults(string searchTerm)
{
PrepareSearchTerms(searchTerm);
(string nameVideoGame, string compareName, string startName) = PrepareSearchTerms(searchTerm);

// We search for the game name on Wikipedia
WikipediaSearchResult searchResult = ApiCaller.GetSearchResults(searchTerm);
Expand All @@ -81,16 +92,14 @@ public List<GenericItemOption> GetSearchResults(string searchTerm)
// titles starting with the game name, then by titles starting with the first five characters of the game
// name and at last by page title itself.
return searchResult.Pages.Select(WikipediaItemOption.FromWikipediaSearchResult)
.OrderByDescending(o => o.Name.RemoveSpecialChars().ToLower().Replace(" ", "").StartsWith(_wikiNameVideoGame))
.ThenByDescending(o => o.Name.RemoveSpecialChars().ToLower().Replace(" ", "").StartsWith(_wikiStart))
.ThenByDescending(o => o.Name.RemoveSpecialChars().ToLower().Replace(" ", "").Contains(_wikiName))
.OrderByDescending(o => o.Name.RemoveSpecialChars().ToLower().Replace(" ", "").StartsWith(nameVideoGame))
.ThenByDescending(o => o.Name.RemoveSpecialChars().ToLower().Replace(" ", "").StartsWith(startName))
.ThenByDescending(o => o.Name.RemoveSpecialChars().ToLower().Replace(" ", "").Contains(compareName))
.ThenByDescending(o => o.Description != null && o.Description.Contains("video game"))
.ToList<GenericItemOption>();
}
else
{
return searchResult.Pages.Select(WikipediaItemOption.FromWikipediaSearchResult).ToList<GenericItemOption>();
}

return searchResult.Pages.Select(WikipediaItemOption.FromWikipediaSearchResult).ToList<GenericItemOption>();
}
}
}
26 changes: 9 additions & 17 deletions Metadata/WikipediaMetadata/HtmlParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,10 @@ public HtmlParser(string gameKey, PluginSettings settings)
// build a simple new html string.
string apiUrl = string.Format(Resources.PageHtmlUrl, gameKey.UrlEncode());

HtmlWeb web = new HtmlWeb();
HtmlDocument doc = web.Load(apiUrl);
HtmlDocument doc = new HtmlWeb().Load(apiUrl);

// We go through all sections, because those typically contain the text sections of the page.
HtmlNodeCollection topLevelSections = doc.DocumentNode.SelectNodes("//body/section");

foreach (HtmlNode topLevelSection in topLevelSections)
foreach (HtmlNode topLevelSection in doc.DocumentNode.SelectNodes("//body/section"))
{
// First we check, if the current section is the "external links block by fetching its heading.
HtmlNode linkNode = topLevelSection.SelectSingleNode("./h2");
Expand All @@ -56,9 +53,7 @@ public HtmlParser(string gameKey, PluginSettings settings)
}

// Now we fetch all allowed second level nodes.
List<HtmlNode> secondLevelNodes = topLevelSection.ChildNodes.Where(c => Resources.AllowedSecondLevelNodes.Contains(c.Name)).ToList();

foreach (HtmlNode secondLevelNode in secondLevelNodes)
foreach (HtmlNode secondLevelNode in topLevelSection.ChildNodes.Where(c => Resources.AllowedSecondLevelNodes.Contains(c.Name)))
{
// If the heading is one of the unwanted sections, we completely omit the section.
if (secondLevelNode.Name == "h2" && unwantedParagraphs.Contains(secondLevelNode.InnerText.ToLower().Trim()))
Expand All @@ -68,17 +63,13 @@ public HtmlParser(string gameKey, PluginSettings settings)
else if (secondLevelNode.Name == "section")
{
// We now look for third level nodes and add those to the description.
List<HtmlNode> thirdLevelNodes = secondLevelNode.ChildNodes.Where(c => Resources.AllowedThirdLevelNodes.Contains(c.Name)).ToList();

foreach (HtmlNode thirdLevelNode in thirdLevelNodes)
foreach (HtmlNode thirdLevelNode in secondLevelNode.ChildNodes.Where(c => Resources.AllowedThirdLevelNodes.Contains(c.Name)))
{
if (thirdLevelNode.Name == "section")
{
// We now look for fourth level nodes and add those to the description. Since further levels are
// very rarely used, we don't consider those for now.
List<HtmlNode> fourthLevelNodes = thirdLevelNode.ChildNodes.Where(c => Resources.AllowedFourthLevelNodes.Contains(c.Name)).ToList();

foreach (HtmlNode fourthLevelNode in fourthLevelNodes)
foreach (HtmlNode fourthLevelNode in thirdLevelNode.ChildNodes.Where(c => Resources.AllowedFourthLevelNodes.Contains(c.Name)))
{
AddSectionToDescription(fourthLevelNode);
}
Expand Down Expand Up @@ -131,7 +122,7 @@ private HtmlNode RemoveAnnotationMarks(HtmlNode text)
{
HtmlNodeCollection supNodes = text.SelectNodes("./sup");

if (supNodes != null && supNodes.Count > 0)
if (supNodes?.Any() ?? false)
{
foreach (HtmlNode annotation in supNodes)
{
Expand Down Expand Up @@ -183,7 +174,8 @@ private void GetExternalLinks(HtmlNode node)
if (linkList != null)
{
HtmlNodeCollection listItems = linkList.SelectNodes("./li");
if (listItems != null && listItems.Count > 0)

if (listItems?.Any() ?? false)
{
foreach (HtmlNode listItem in listItems)
{
Expand Down Expand Up @@ -262,7 +254,7 @@ private HtmlNode RemoveUnwantedTags(HtmlNode htmlNode, string[] acceptableTags)
{
if (node.Name == "a")
{
if (node.Attributes != null && node.Attributes.Any())
if (node.Attributes?.Any() ?? false)
{
foreach (HtmlAttribute attribute in node.Attributes.ToList())
{
Expand Down
3 changes: 3 additions & 0 deletions Metadata/WikipediaMetadata/WikipediaMetadata.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,9 @@
<PackageReference Include="PlayniteSDK">
<Version>6.8.0</Version>
</PackageReference>
<PackageReference Include="System.ValueTuple">
<Version>4.5.0</Version>
</PackageReference>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
</Project>

0 comments on commit 118deb6

Please sign in to comment.