From 39a38b5f0808373b0ba64a23f2c97c6dde5044cc Mon Sep 17 00:00:00 2001 From: "Artemov Ivan (ZOXEXIVO)" Date: Tue, 5 Nov 2024 00:28:56 +0300 Subject: [PATCH] Update packages --- src/Backend/Geen.Core/Geen.Core.csproj | 2 +- .../Geen.Core/Services/Text/ContentService.cs | 121 +++++++++--------- src/Backend/Geen.Data/Geen.Data.csproj | 8 +- src/Backend/Geen.Jobs/Geen.Jobs.csproj | 2 +- src/Backend/Geen.Web/Geen.Web.csproj | 6 +- 5 files changed, 66 insertions(+), 73 deletions(-) diff --git a/src/Backend/Geen.Core/Geen.Core.csproj b/src/Backend/Geen.Core/Geen.Core.csproj index b44aff0..4d01dff 100644 --- a/src/Backend/Geen.Core/Geen.Core.csproj +++ b/src/Backend/Geen.Core/Geen.Core.csproj @@ -3,6 +3,6 @@ net8.0 - + \ No newline at end of file diff --git a/src/Backend/Geen.Core/Services/Text/ContentService.cs b/src/Backend/Geen.Core/Services/Text/ContentService.cs index 52d9093..21bf5a7 100644 --- a/src/Backend/Geen.Core/Services/Text/ContentService.cs +++ b/src/Backend/Geen.Core/Services/Text/ContentService.cs @@ -1,12 +1,10 @@ using System; using System.Collections.Generic; using System.Linq; -using System.Text; using System.Text.RegularExpressions; using Geen.Core.Domains.Mentions; using Geen.Core.Models.Content; using Geen.Core.Services.Interfaces; -using Geen.Core.Services.Text.Extensions; namespace Geen.Core.Services.Text; @@ -16,7 +14,7 @@ public interface IContentService string GenerateBasicTitle(MentionModel mention); } -public class ContentService : IContentService +public partial class ContentService : IContentService { private readonly IClubCacheRepository _clubCacheRepository; private readonly IPlayerCacheRepository _playerCacheRepository; @@ -30,126 +28,121 @@ public ContentService(IPlayerCacheRepository playerCacheRepository, public ContentProcessingResult Process(string text, ContentContext context) { - var cleanedText = text.Trim().AsText(); + var cleanedText = text.AsSpan().Trim().ToString(); - var builder = new StringBuilder(cleanedText); - - var words = cleanedText - .Split(new[] { ' ', ',', '.', '(', ')', '\n', '-', '/', '\\', '<', '>' }, - StringSplitOptions.RemoveEmptyEntries) + var words = cleanedText.Split(' ', ',', '.', '(', ')', '\n', '-', '/', '\\', '<', '>') .Where(x => x.Length > 3); var result = new ContentProcessingResult(); var replacedWords = new HashSet(); + var playerReplacements = new Dictionary(); + var clubReplacements = new Dictionary(); + foreach (var word in words) { if (replacedWords.Contains(word)) continue; - var loweredWord = word.ToLower(); + var loweredWord = word.ToString().ToLowerInvariant(); var playerInfo = _playerCacheRepository.GetPlayerUrl(loweredWord, context); if (!string.IsNullOrWhiteSpace(playerInfo.Url)) { - builder.Replace(word, $"{word}"); + playerReplacements[word.ToString()] = $"{word}"; result.PlayerIds.Add(playerInfo.Id); - - replacedWords.Add(word); + replacedWords.Add(word.ToString()); } var clubInfo = _clubCacheRepository.GetClubUrl(loweredWord, context); if (!string.IsNullOrWhiteSpace(clubInfo.Url)) { - builder.Replace(word, $"{word}"); + clubReplacements[word.ToString()] = $"{word}"; result.ClubIds.Add(clubInfo.Id); - - replacedWords.Add(word); + replacedWords.Add(word.ToString()); } } - result.Text = builder.ToString(); + var pattern = string.Join("|", playerReplacements.Keys.Concat(clubReplacements.Keys).Select(Regex.Escape)); + result.Text = Regex.Replace(cleanedText, pattern, match => + { + if (playerReplacements.TryGetValue(match.Value, out var playerReplacement)) + return playerReplacement; + if (clubReplacements.TryGetValue(match.Value, out var clubReplacement)) + return clubReplacement; + return match.Value; + }); return result; } - public string GenerateBasicTitle(MentionModel mention) - { - if (mention.Player != null) return GenerateTitleForPlayer(mention); - - if (mention.Club != null) return GenerateTitleForClub(mention); + public string GenerateBasicTitle(MentionModel mention) => + mention.Player != null ? GenerateTitleForPlayer(mention) : + mention.Club != null ? GenerateTitleForClub(mention) : string.Empty; - return string.Empty; - } + private string GenerateTitleForPlayer(MentionModel playerMention) => ExtractPhrase(playerMention.Text); - private string GenerateTitleForPlayer(MentionModel playerMention) - { - return ExtractPhraze(playerMention.Text); - } + private string GenerateTitleForClub(MentionModel clubMention) => ExtractPhrase(clubMention.Text); - private string GenerateTitleForClub(MentionModel clubMention) - { - return ExtractPhraze(clubMention.Text); - } - - private string ExtractPhraze(string text) + private string ExtractPhrase(string text) { var cleanText = StripTextFromTags(text); + var wordSplittedText = cleanText.Split(' ', StringSplitOptions.RemoveEmptyEntries); - var wordSplittedText = cleanText.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); - - if (wordSplittedText.Length > 9) - return ClearTailFromNonAplha(string.Join(" ", wordSplittedText.Take(9))); - - if (char.IsLower(cleanText.First())) - return MakeFirstLetterUppercase(cleanText); - - return ClearTailFromNonAplha(cleanText); + return wordSplittedText.Length > 9 ? ClearTailFromNonAlpha(string.Concat(wordSplittedText.Take(9))) : + char.IsLower(cleanText[0]) ? char.ToUpperInvariant(cleanText[0]) + cleanText[1..] : ClearTailFromNonAlpha(cleanText); } - private static string MakeFirstLetterUppercase(string text) + private static string ClearTailFromNonAlpha(string text) { - return text.First().ToString().ToUpper() + text.Substring(1); - } + var span = text.AsSpan(); + var lastAlphaIndex = span.Length - 1; - private static string ClearTailFromNonAplha(string text) - { - var lastAlphaIndex = text.Length - 1; - - while (lastAlphaIndex >= 1 && !char.IsLetterOrDigit(text[lastAlphaIndex])) + while (lastAlphaIndex >= 1 && !char.IsLetterOrDigit(span[lastAlphaIndex])) lastAlphaIndex--; - return text.Substring(0, lastAlphaIndex + 1); + return span[..(lastAlphaIndex + 1)].ToString(); } + private static readonly Regex TagRegex = MyTagRegex(); + private static readonly Regex WhitespaceRegex = MyWhitespaceRegex(); + private static readonly Regex NewlineRegex = MyNewlineRegex(); + private static readonly Regex MultiNewlineRegex = MyMultiNewlineRegex(); + private string StripTextFromTags(string text) { if (string.IsNullOrWhiteSpace(text)) return string.Empty; - var s = Regex.Replace(text, @"<(.|\n)*?>", string.Empty); - - s = s.Replace(" ", " "); + var stripped = TagRegex.Replace(text, string.Empty).Replace(" ", " "); + stripped = WhitespaceRegex.Replace(stripped, " "); + stripped = NewlineRegex.Replace(stripped, " "); + stripped = MultiNewlineRegex.Replace(stripped, " "); - s = Regex.Replace(s, @"\s+", " "); - s = Regex.Replace(s, @"\r\n", " "); - s = Regex.Replace(s, @"\n+", " "); - - return s; + return stripped; } + + [GeneratedRegex(@"<(.|\n)*?>", RegexOptions.Compiled)] + private static partial Regex MyTagRegex(); + [GeneratedRegex(@"\s+", RegexOptions.Compiled)] + private static partial Regex MyWhitespaceRegex(); + [GeneratedRegex(@"\r\n", RegexOptions.Compiled)] + private static partial Regex MyNewlineRegex(); + [GeneratedRegex(@"\n+", RegexOptions.Compiled)] + private static partial Regex MyMultiNewlineRegex(); } public class ContentProcessingResult { public ContentProcessingResult() { - PlayerIds = new SortedSet(); - ClubIds = new SortedSet(); + PlayerIds = new HashSet(); + ClubIds = new HashSet(); } public string Text { get; set; } - public SortedSet PlayerIds { get; set; } - public SortedSet ClubIds { get; set; } + public HashSet PlayerIds { get; set; } + public HashSet ClubIds { get; set; } } \ No newline at end of file diff --git a/src/Backend/Geen.Data/Geen.Data.csproj b/src/Backend/Geen.Data/Geen.Data.csproj index 23ef86f..4359c4c 100644 --- a/src/Backend/Geen.Data/Geen.Data.csproj +++ b/src/Backend/Geen.Data/Geen.Data.csproj @@ -4,11 +4,11 @@ - + - - - + + + diff --git a/src/Backend/Geen.Jobs/Geen.Jobs.csproj b/src/Backend/Geen.Jobs/Geen.Jobs.csproj index 1930c9a..afa1829 100644 --- a/src/Backend/Geen.Jobs/Geen.Jobs.csproj +++ b/src/Backend/Geen.Jobs/Geen.Jobs.csproj @@ -13,7 +13,7 @@ - + diff --git a/src/Backend/Geen.Web/Geen.Web.csproj b/src/Backend/Geen.Web/Geen.Web.csproj index ac45c42..e08e438 100644 --- a/src/Backend/Geen.Web/Geen.Web.csproj +++ b/src/Backend/Geen.Web/Geen.Web.csproj @@ -3,9 +3,9 @@ net8.0 - - - + + +