return the words count to calculate the ranking.
This commit is contained in:
@ -18,10 +18,6 @@ namespace Oqtane.Providers
|
|||||||
{
|
{
|
||||||
private readonly ISearchContentRepository _searchContentRepository;
|
private readonly ISearchContentRepository _searchContentRepository;
|
||||||
|
|
||||||
private const float TitleBoost = 100f;
|
|
||||||
private const float DescriptionBoost = 10f;
|
|
||||||
private const float BodyBoost = 10f;
|
|
||||||
private const float AdditionalContentBoost = 5f;
|
|
||||||
private const string IgnoreWords = "the,be,to,of,and,a,i,in,that,have,it,for,not,on,with,he,as,you,do,at,this,but,his,by,from,they,we,say,her,she,or,an,will,my,one,all,would,there,their,what,so,up,out,if,about,who,get,which,go,me,when,make,can,like,time,no,just,him,know,take,people,into,year,your,good,some,could,them,see,other,than,then,now,look,only,come,its,over,think,also,back,after,use,two,how,our,work,first,well,way,even,new,want,because,any,these,give,day,most,us";
|
private const string IgnoreWords = "the,be,to,of,and,a,i,in,that,have,it,for,not,on,with,he,as,you,do,at,this,but,his,by,from,they,we,say,her,she,or,an,will,my,one,all,would,there,their,what,so,up,out,if,about,who,get,which,go,me,when,make,can,like,time,no,just,him,know,take,people,into,year,your,good,some,could,them,see,other,than,then,now,look,only,come,its,over,think,also,back,after,use,two,how,our,work,first,well,way,even,new,want,because,any,these,give,day,most,us";
|
||||||
private const int WordMinLength = 3;
|
private const int WordMinLength = 3;
|
||||||
public string Name => Constants.DefaultSearchProviderName;
|
public string Name => Constants.DefaultSearchProviderName;
|
||||||
@ -55,6 +51,9 @@ namespace Oqtane.Providers
|
|||||||
//remove exist document
|
//remove exist document
|
||||||
_searchContentRepository.DeleteSearchContent(searchContent.EntityName, searchContent.EntityId);
|
_searchContentRepository.DeleteSearchContent(searchContent.EntityName, searchContent.EntityId);
|
||||||
|
|
||||||
|
//clean the search content to remove html tags
|
||||||
|
CleanSearchContent(searchContent);
|
||||||
|
|
||||||
_searchContentRepository.AddSearchContent(searchContent);
|
_searchContentRepository.AddSearchContent(searchContent);
|
||||||
|
|
||||||
//save the index words
|
//save the index words
|
||||||
@ -152,10 +151,7 @@ namespace Oqtane.Providers
|
|||||||
var score = 0f;
|
var score = 0f;
|
||||||
foreach (var keyword in SearchUtils.GetKeywordsList(searchQuery.Keywords))
|
foreach (var keyword in SearchUtils.GetKeywordsList(searchQuery.Keywords))
|
||||||
{
|
{
|
||||||
score += Regex.Matches(searchContent.Title, keyword, RegexOptions.IgnoreCase).Count * TitleBoost;
|
score += searchContent.Words.Where(i => i.WordSource.Word.StartsWith(keyword)).Sum(i => i.Count);
|
||||||
score += Regex.Matches(searchContent.Description, keyword, RegexOptions.IgnoreCase).Count * DescriptionBoost;
|
|
||||||
score += Regex.Matches(searchContent.Body, keyword, RegexOptions.IgnoreCase).Count * BodyBoost;
|
|
||||||
score += Regex.Matches(searchContent.AdditionalContent, keyword, RegexOptions.IgnoreCase).Count * AdditionalContentBoost;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return score / 100;
|
return score / 100;
|
||||||
@ -241,37 +237,24 @@ namespace Oqtane.Providers
|
|||||||
|
|
||||||
private static Dictionary<string, int> GetWords(string content, int minLength)
|
private static Dictionary<string, int> GetWords(string content, int minLength)
|
||||||
{
|
{
|
||||||
content = WebUtility.HtmlDecode(content);
|
content = FormatText(content);
|
||||||
|
|
||||||
var words = new Dictionary<string, int>();
|
var words = new Dictionary<string, int>();
|
||||||
var ignoreWords = IgnoreWords.Split(',');
|
var ignoreWords = IgnoreWords.Split(',');
|
||||||
|
|
||||||
var page = new HtmlDocument();
|
if (!string.IsNullOrEmpty(content))
|
||||||
page.LoadHtml(content);
|
|
||||||
|
|
||||||
var phrases = page.DocumentNode.Descendants().Where(i =>
|
|
||||||
i.NodeType == HtmlNodeType.Text &&
|
|
||||||
i.ParentNode.Name != "script" &&
|
|
||||||
i.ParentNode.Name != "style" &&
|
|
||||||
!string.IsNullOrEmpty(i.InnerText.Trim())
|
|
||||||
).Select(i => FormatText(i.InnerText));
|
|
||||||
|
|
||||||
foreach (var phrase in phrases)
|
|
||||||
{
|
{
|
||||||
if (!string.IsNullOrEmpty(phrase))
|
foreach (var word in content.Split(' '))
|
||||||
{
|
{
|
||||||
foreach (var word in phrase.Split(' '))
|
if (word.Length >= minLength && !ignoreWords.Contains(word))
|
||||||
{
|
{
|
||||||
if (word.Length >= minLength && !ignoreWords.Contains(word))
|
if (!words.ContainsKey(word))
|
||||||
{
|
{
|
||||||
if (!words.ContainsKey(word))
|
words.Add(word, 1);
|
||||||
{
|
}
|
||||||
words.Add(word, 1);
|
else
|
||||||
}
|
{
|
||||||
else
|
words[word] += 1;
|
||||||
{
|
|
||||||
words[word] += 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -288,8 +271,38 @@ namespace Oqtane.Providers
|
|||||||
text = text.Replace(punctuation, ' ');
|
text = text.Replace(punctuation, ' ');
|
||||||
}
|
}
|
||||||
text = text.Replace(" ", " ").ToLower().Trim();
|
text = text.Replace(" ", " ").ToLower().Trim();
|
||||||
return text;
|
|
||||||
|
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void CleanSearchContent(SearchContent searchContent)
|
||||||
|
{
|
||||||
|
searchContent.Title = GetCleanContent(searchContent.Title);
|
||||||
|
searchContent.Description = GetCleanContent(searchContent.Description);
|
||||||
|
searchContent.Body = GetCleanContent(searchContent.Body);
|
||||||
|
searchContent.AdditionalContent = GetCleanContent(searchContent.AdditionalContent);
|
||||||
|
}
|
||||||
|
|
||||||
|
private string GetCleanContent(string content)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(content))
|
||||||
|
{
|
||||||
|
return string.Empty;
|
||||||
|
}
|
||||||
|
|
||||||
|
content = WebUtility.HtmlDecode(content);
|
||||||
|
|
||||||
|
var page = new HtmlDocument();
|
||||||
|
page.LoadHtml(content);
|
||||||
|
|
||||||
|
var phrases = page.DocumentNode.Descendants().Where(i =>
|
||||||
|
i.NodeType == HtmlNodeType.Text &&
|
||||||
|
i.ParentNode.Name != "script" &&
|
||||||
|
i.ParentNode.Name != "style" &&
|
||||||
|
!string.IsNullOrEmpty(i.InnerText.Trim())
|
||||||
|
).Select(i => i.InnerText);
|
||||||
|
|
||||||
|
return string.Join(" ", phrases);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,8 @@ namespace Oqtane.Repository
|
|||||||
using var db = _dbContextFactory.CreateDbContext();
|
using var db = _dbContextFactory.CreateDbContext();
|
||||||
var searchContentList = db.SearchContent.AsNoTracking()
|
var searchContentList = db.SearchContent.AsNoTracking()
|
||||||
.Include(i => i.Properties)
|
.Include(i => i.Properties)
|
||||||
|
.Include(i => i.Words)
|
||||||
|
.ThenInclude(w => w.WordSource)
|
||||||
.Where(i => i.SiteId == searchQuery.SiteId && i.IsActive);
|
.Where(i => i.SiteId == searchQuery.SiteId && i.IsActive);
|
||||||
|
|
||||||
if (searchQuery.EntityNames != null && searchQuery.EntityNames.Any())
|
if (searchQuery.EntityNames != null && searchQuery.EntityNames.Any())
|
||||||
|
@ -186,7 +186,6 @@ namespace Oqtane.Services
|
|||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
CleanSearchContent(searchContent);
|
|
||||||
searchProvider.SaveSearchContent(searchContent);
|
searchProvider.SaveSearchContent(searchContent);
|
||||||
}
|
}
|
||||||
catch(Exception ex)
|
catch(Exception ex)
|
||||||
@ -231,35 +230,5 @@ namespace Oqtane.Services
|
|||||||
|
|
||||||
return string.Empty;
|
return string.Empty;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void CleanSearchContent(SearchContent searchContent)
|
|
||||||
{
|
|
||||||
searchContent.Title = GetCleanContent(searchContent.Title);
|
|
||||||
searchContent.Description = GetCleanContent(searchContent.Description);
|
|
||||||
searchContent.Body = GetCleanContent(searchContent.Body);
|
|
||||||
searchContent.AdditionalContent = GetCleanContent(searchContent.AdditionalContent);
|
|
||||||
}
|
|
||||||
|
|
||||||
private string GetCleanContent(string content)
|
|
||||||
{
|
|
||||||
if(string.IsNullOrWhiteSpace(content))
|
|
||||||
{
|
|
||||||
return string.Empty;
|
|
||||||
}
|
|
||||||
|
|
||||||
content = WebUtility.HtmlDecode(content);
|
|
||||||
|
|
||||||
var page = new HtmlDocument();
|
|
||||||
page.LoadHtml(content);
|
|
||||||
|
|
||||||
var phrases = page.DocumentNode.Descendants().Where(i =>
|
|
||||||
i.NodeType == HtmlNodeType.Text &&
|
|
||||||
i.ParentNode.Name != "script" &&
|
|
||||||
i.ParentNode.Name != "style" &&
|
|
||||||
!string.IsNullOrEmpty(i.InnerText.Trim())
|
|
||||||
).Select(i => i.InnerText);
|
|
||||||
|
|
||||||
return string.Join(" ", phrases);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -20,7 +20,7 @@ namespace Oqtane.Shared
|
|||||||
{
|
{
|
||||||
if (!string.IsNullOrWhiteSpace(keyword.Trim()))
|
if (!string.IsNullOrWhiteSpace(keyword.Trim()))
|
||||||
{
|
{
|
||||||
keywordsList.Add(keyword.Trim());
|
keywordsList.Add(keyword.Trim().ToLower());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user