search optimizations
This commit is contained in:
@ -129,6 +129,36 @@ namespace Oqtane.Providers
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
private void CleanSearchContent(SearchContent searchContent)
|
||||
{
|
||||
searchContent.Title = GetCleanContent(searchContent.Title);
|
||||
searchContent.Description = GetCleanContent(searchContent.Description);
|
||||
searchContent.Body = GetCleanContent(searchContent.Body);
|
||||
searchContent.AdditionalContent = GetCleanContent(searchContent.AdditionalContent);
|
||||
}
|
||||
|
||||
private string GetCleanContent(string content)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(content))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
content = WebUtility.HtmlDecode(content);
|
||||
|
||||
var page = new HtmlDocument();
|
||||
page.LoadHtml(content);
|
||||
|
||||
var phrases = page.DocumentNode.Descendants().Where(i =>
|
||||
i.NodeType == HtmlNodeType.Text &&
|
||||
i.ParentNode.Name != "script" &&
|
||||
i.ParentNode.Name != "style" &&
|
||||
!string.IsNullOrEmpty(i.InnerText.Trim())
|
||||
).Select(i => i.InnerText);
|
||||
|
||||
return string.Join(" ", phrases);
|
||||
}
|
||||
|
||||
private void AnalyzeSearchContent(SearchContent searchContent, Dictionary<string, string> siteSettings)
|
||||
{
|
||||
var ignoreWords = IgnoreWords.Split(',');
|
||||
@ -180,14 +210,15 @@ namespace Oqtane.Providers
|
||||
|
||||
private static Dictionary<string, int> GetWords(string content, string[] ignoreWords, int minimumWordLength)
|
||||
{
|
||||
content = FormatText(content);
|
||||
content = FormatContent(content);
|
||||
|
||||
var words = new Dictionary<string, int>();
|
||||
|
||||
if (!string.IsNullOrEmpty(content))
|
||||
{
|
||||
foreach (var word in content.Split(' '))
|
||||
foreach (var term in content.Split(' '))
|
||||
{
|
||||
var word = term.ToLower().Trim();
|
||||
if (word.Length >= minimumWordLength && !ignoreWords.Contains(word))
|
||||
{
|
||||
if (!words.ContainsKey(word))
|
||||
@ -205,48 +236,16 @@ namespace Oqtane.Providers
|
||||
return words;
|
||||
}
|
||||
|
||||
private static string FormatText(string text)
|
||||
private static string FormatContent(string text)
|
||||
{
|
||||
text = HtmlEntity.DeEntitize(text);
|
||||
foreach (var punctuation in ".?!,;:-_()[]{}'\"/\\".ToCharArray())
|
||||
foreach (var punctuation in ".?!,;:_()[]{}'\"/\\".ToCharArray())
|
||||
{
|
||||
text = text.Replace(punctuation, ' ');
|
||||
}
|
||||
text = text.Replace(" ", " ").ToLower().Trim();
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
private void CleanSearchContent(SearchContent searchContent)
|
||||
{
|
||||
searchContent.Title = GetCleanContent(searchContent.Title);
|
||||
searchContent.Description = GetCleanContent(searchContent.Description);
|
||||
searchContent.Body = GetCleanContent(searchContent.Body);
|
||||
searchContent.AdditionalContent = GetCleanContent(searchContent.AdditionalContent);
|
||||
}
|
||||
|
||||
private string GetCleanContent(string content)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(content))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
content = WebUtility.HtmlDecode(content);
|
||||
|
||||
var page = new HtmlDocument();
|
||||
page.LoadHtml(content);
|
||||
|
||||
var phrases = page.DocumentNode.Descendants().Where(i =>
|
||||
i.NodeType == HtmlNodeType.Text &&
|
||||
i.ParentNode.Name != "script" &&
|
||||
i.ParentNode.Name != "style" &&
|
||||
!string.IsNullOrEmpty(i.InnerText.Trim())
|
||||
).Select(i => i.InnerText);
|
||||
|
||||
return string.Join(" ", phrases);
|
||||
}
|
||||
|
||||
public Task ResetIndex()
|
||||
{
|
||||
_searchContentRepository.DeleteAllSearchContent();
|
||||
|
Reference in New Issue
Block a user