search optimizations

This commit is contained in:
sbwalker 2024-07-17 13:57:47 -04:00
parent ada8809ec0
commit 71e472f330
9 changed files with 52 additions and 86 deletions

View File

@ -42,7 +42,7 @@
Toolbar="Bottom" Toolbar="Bottom"
Parameters="@($"q={_keywords}")"> Parameters="@($"q={_keywords}")">
<Row> <Row>
<div class="search-item"> <div class="search-item mb-2">
<h4 class="mb-1"><a href="@context.Url">@context.Title</a></h4> <h4 class="mb-1"><a href="@context.Url">@context.Title</a></h4>
<p class="mb-0 text-muted">@((MarkupString)context.Snippet)</p> <p class="mb-0 text-muted">@((MarkupString)context.Snippet)</p>
</div> </div>
@ -61,6 +61,7 @@
</div> </div>
</div> </div>
</div> </div>
@code { @code {
public override string RenderMode => RenderModes.Static; public override string RenderMode => RenderModes.Static;

View File

@ -1,23 +0,0 @@
using System.Collections.Generic;
using Oqtane.Documentation;
using Oqtane.Models;
using Oqtane.Shared;
namespace Oqtane.Modules.Admin.SearchResults
{
[PrivateApi("Mark this as private, since it's not very useful in the public docs")]
public class ModuleInfo : IModule
{
public ModuleDefinition ModuleDefinition => new ModuleDefinition
{
Name = "Search Results",
Description = "Display Search Results",
Version = Constants.Version,
Categories = "Admin",
Resources = new List<Resource>()
{
new Resource { ResourceType = ResourceType.Stylesheet, Url = "~/Module.css" }
}
};
}
}

View File

@ -58,7 +58,6 @@ namespace Oqtane.Infrastructure
var currentTime = DateTime.UtcNow; var currentTime = DateTime.UtcNow;
var lastIndexedOn = Convert.ToDateTime(siteSettings.GetValue(SearchLastIndexedOnSetting, DateTime.MinValue.ToString())); var lastIndexedOn = Convert.ToDateTime(siteSettings.GetValue(SearchLastIndexedOnSetting, DateTime.MinValue.ToString()));
log += $"Index Date: {lastIndexedOn}<br />";
var ignorePaths = siteSettings.GetValue(SearchIgnorePathsSetting, "").Split(','); var ignorePaths = siteSettings.GetValue(SearchIgnorePathsSetting, "").Split(',');
var ignoreEntities = siteSettings.GetValue(SearchIgnoreEntitiesSetting, "").Split(','); var ignoreEntities = siteSettings.GetValue(SearchIgnoreEntitiesSetting, "").Split(',');

View File

@ -129,6 +129,36 @@ namespace Oqtane.Providers
return Task.CompletedTask; return Task.CompletedTask;
} }
private void CleanSearchContent(SearchContent searchContent)
{
searchContent.Title = GetCleanContent(searchContent.Title);
searchContent.Description = GetCleanContent(searchContent.Description);
searchContent.Body = GetCleanContent(searchContent.Body);
searchContent.AdditionalContent = GetCleanContent(searchContent.AdditionalContent);
}
private string GetCleanContent(string content)
{
if (string.IsNullOrWhiteSpace(content))
{
return string.Empty;
}
content = WebUtility.HtmlDecode(content);
var page = new HtmlDocument();
page.LoadHtml(content);
var phrases = page.DocumentNode.Descendants().Where(i =>
i.NodeType == HtmlNodeType.Text &&
i.ParentNode.Name != "script" &&
i.ParentNode.Name != "style" &&
!string.IsNullOrEmpty(i.InnerText.Trim())
).Select(i => i.InnerText);
return string.Join(" ", phrases);
}
private void AnalyzeSearchContent(SearchContent searchContent, Dictionary<string, string> siteSettings) private void AnalyzeSearchContent(SearchContent searchContent, Dictionary<string, string> siteSettings)
{ {
var ignoreWords = IgnoreWords.Split(','); var ignoreWords = IgnoreWords.Split(',');
@ -180,14 +210,15 @@ namespace Oqtane.Providers
private static Dictionary<string, int> GetWords(string content, string[] ignoreWords, int minimumWordLength) private static Dictionary<string, int> GetWords(string content, string[] ignoreWords, int minimumWordLength)
{ {
content = FormatText(content); content = FormatContent(content);
var words = new Dictionary<string, int>(); var words = new Dictionary<string, int>();
if (!string.IsNullOrEmpty(content)) if (!string.IsNullOrEmpty(content))
{ {
foreach (var word in content.Split(' ')) foreach (var term in content.Split(' '))
{ {
var word = term.ToLower().Trim();
if (word.Length >= minimumWordLength && !ignoreWords.Contains(word)) if (word.Length >= minimumWordLength && !ignoreWords.Contains(word))
{ {
if (!words.ContainsKey(word)) if (!words.ContainsKey(word))
@ -205,48 +236,16 @@ namespace Oqtane.Providers
return words; return words;
} }
private static string FormatText(string text) private static string FormatContent(string text)
{ {
text = HtmlEntity.DeEntitize(text); text = HtmlEntity.DeEntitize(text);
foreach (var punctuation in ".?!,;:-_()[]{}'\"/\\".ToCharArray()) foreach (var punctuation in ".?!,;:_()[]{}'\"/\\".ToCharArray())
{ {
text = text.Replace(punctuation, ' '); text = text.Replace(punctuation, ' ');
} }
text = text.Replace(" ", " ").ToLower().Trim();
return text; return text;
} }
private void CleanSearchContent(SearchContent searchContent)
{
searchContent.Title = GetCleanContent(searchContent.Title);
searchContent.Description = GetCleanContent(searchContent.Description);
searchContent.Body = GetCleanContent(searchContent.Body);
searchContent.AdditionalContent = GetCleanContent(searchContent.AdditionalContent);
}
private string GetCleanContent(string content)
{
if (string.IsNullOrWhiteSpace(content))
{
return string.Empty;
}
content = WebUtility.HtmlDecode(content);
var page = new HtmlDocument();
page.LoadHtml(content);
var phrases = page.DocumentNode.Descendants().Where(i =>
i.NodeType == HtmlNodeType.Text &&
i.ParentNode.Name != "script" &&
i.ParentNode.Name != "style" &&
!string.IsNullOrEmpty(i.InnerText.Trim())
).Select(i => i.InnerText);
return string.Join(" ", phrases);
}
public Task ResetIndex() public Task ResetIndex()
{ {
_searchContentRepository.DeleteAllSearchContent(); _searchContentRepository.DeleteAllSearchContent();

View File

@ -3,6 +3,7 @@ using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Threading.Tasks; using System.Threading.Tasks;
using Microsoft.EntityFrameworkCore; using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Query;
using Oqtane.Models; using Oqtane.Models;
using Oqtane.Shared; using Oqtane.Shared;
@ -26,9 +27,14 @@ namespace Oqtane.Repository
.ThenInclude(w => w.SearchWord) .ThenInclude(w => w.SearchWord)
.Where(i => i.SiteId == searchQuery.SiteId); .Where(i => i.SiteId == searchQuery.SiteId);
if (searchQuery.EntityNames != null && searchQuery.EntityNames.Any()) if (!string.IsNullOrEmpty(searchQuery.IncludeEntities))
{ {
searchContents = searchContents.Where(i => searchQuery.EntityNames.Contains(i.EntityName)); searchContents = searchContents.Where(i => searchQuery.IncludeEntities.Split(',', StringSplitOptions.RemoveEmptyEntries).Contains(i.EntityName));
}
if (!string.IsNullOrEmpty(searchQuery.ExcludeEntities))
{
searchContents = searchContents.Where(i => !searchQuery.ExcludeEntities.Split(',', StringSplitOptions.RemoveEmptyEntries).Contains(i.EntityName));
} }
if (searchQuery.From != DateTime.MinValue) if (searchQuery.From != DateTime.MinValue)

View File

@ -1,3 +0,0 @@
.search-result-container ul.pagination li label, .search-result-container ul.dropdown-menu li label {
cursor: pointer;
}

View File

@ -1,7 +1,4 @@
using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks; using System.Threading.Tasks;
using Oqtane.Models; using Oqtane.Models;

View File

@ -12,7 +12,9 @@ namespace Oqtane.Models
public string Keywords { get; set; } public string Keywords { get; set; }
public List<string> EntityNames { get; set; } = new List<string>(); public string IncludeEntities { get; set; } = ""; // comma delimited entities to include
public string ExcludeEntities { get; set; } = ""; // comma delimited entities to exclude
public DateTime From { get; set; } public DateTime From { get; set; }

View File

@ -4,13 +4,6 @@ namespace Oqtane.Shared
{ {
public sealed class SearchUtils public sealed class SearchUtils
{ {
private static readonly List<string> _systemPages;
static SearchUtils()
{
_systemPages = new List<string> { "login", "register", "profile", "404", "search" };
}
public static List<string> GetKeywords(string keywords) public static List<string> GetKeywords(string keywords)
{ {
var keywordsList = new List<string>(); var keywordsList = new List<string>();
@ -27,10 +20,5 @@ namespace Oqtane.Shared
return keywordsList; return keywordsList;
} }
public static bool IsSystemPage(Models.Page page)
{
return page.Path.Contains("admin") || _systemPages.Contains(page.Path);
}
} }
} }