improve search result performance and relevancy

This commit is contained in:
sbwalker 2024-08-08 14:11:27 -04:00
parent ef791aa22a
commit 340ef46469
4 changed files with 91 additions and 54 deletions

View File

@ -51,7 +51,7 @@ namespace Oqtane.Providers
ContentModifiedOn = searchContent.ContentModifiedOn,
SearchContentProperties = searchContent.SearchContentProperties,
Snippet = BuildSnippet(searchContent, searchQuery),
Score = CalculateScore(searchContent, searchQuery)
Score = (searchContent.Count / 100f)
};
return searchResult;
@ -99,17 +99,6 @@ namespace Oqtane.Providers
return snippet;
}
private float CalculateScore(SearchContent searchContent, SearchQuery searchQuery)
{
var score = 0f;
foreach (var keyword in SearchUtils.GetKeywords(searchQuery.Keywords))
{
score += searchContent.SearchContentWords.Where(i => i.SearchWord.Word.StartsWith(keyword)).Sum(i => i.Count);
}
return score / 100;
}
public Task SaveSearchContent(SearchContent searchContent, Dictionary<string, string> siteSettings)
{
// remove existing search content

View File

@ -1,6 +1,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Threading.Tasks;
using Microsoft.EntityFrameworkCore;
using Oqtane.Models;
@ -20,50 +21,82 @@ namespace Oqtane.Repository
public async Task<IEnumerable<SearchContent>> GetSearchContentsAsync(SearchQuery searchQuery)
{
using var db = _dbContextFactory.CreateDbContext();
var searchContents = db.SearchContent.AsNoTracking()
.Include(i => i.SearchContentProperties)
.Include(i => i.SearchContentWords)
.ThenInclude(w => w.SearchWord)
.Where(i => i.SiteId == searchQuery.SiteId);
var keywords = SearchUtils.GetKeywords(searchQuery.Keywords);
// using dynamic SQL for query performance (this could be replaced with linq if the exact query structure can be replicated)
var parameters = new List<object>();
parameters.Add(searchQuery.SiteId);
var query = "SELECT sc.*, Count ";
query += "FROM ( ";
query += "SELECT sc.SearchContentId, SUM(Count) AS Count ";
query += "FROM SearchContent sc ";
query += "INNER JOIN SearchContentWord scw ON sc.SearchContentId = scw.SearchContentId ";
query += "INNER JOIN SearchWord sw ON scw.SearchWordId = sw.SearchWordId ";
query += "WHERE sc.SiteId = {0} ";
if (keywords.Count > 0)
{
query += "AND ( ";
for (int index = 0; index < keywords.Count; index++)
{
query += (index == 0 ? "" : "OR ") + "Word LIKE {" + parameters.Count + "} ";
parameters.Add(keywords[index] + "%");
}
query += " ) ";
}
query += "GROUP BY sc.SearchContentId ";
query += ") AS Scores ";
query += "INNER JOIN SearchContent sc ON sc.SearchContentId = Scores.SearchContentId ";
if (searchQuery.Properties != null && searchQuery.Properties.Any())
{
query += "LEFT JOIN SearchContentProperty scp ON sc.SearchContentId = scp.SearchContentId ";
}
query += "WHERE sc.SiteId = {0} ";
if (!string.IsNullOrEmpty(searchQuery.IncludeEntities))
{
searchContents = searchContents.Where(i => searchQuery.IncludeEntities.Split(',', StringSplitOptions.RemoveEmptyEntries).Contains(i.EntityName));
query += "AND sc.EntityName IN ( ";
var entities = searchQuery.IncludeEntities.Split(',', StringSplitOptions.RemoveEmptyEntries);
for (int index = 0; index < entities.Length; index++)
{
query += (index == 0 ? "" : ", ") + "{" + parameters.Count + "} ";
parameters.Add(entities[index]);
}
query += " ) ";
}
if (!string.IsNullOrEmpty(searchQuery.ExcludeEntities))
{
searchContents = searchContents.Where(i => !searchQuery.ExcludeEntities.Split(',', StringSplitOptions.RemoveEmptyEntries).Contains(i.EntityName));
query += "AND sc.EntityName NOT IN ( ";
var entities = searchQuery.ExcludeEntities.Split(',', StringSplitOptions.RemoveEmptyEntries);
for (int index = 0; index < entities.Length; index++)
{
query += (index == 0 ? "" : ", ") + "{" + parameters.Count + "} ";
parameters.Add(entities[index]);
}
query += " ) ";
}
if (searchQuery.FromDate != DateTime.MinValue)
if (searchQuery.FromDate.ToString() != DateTime.MinValue.ToString())
{
searchContents = searchContents.Where(i => i.ContentModifiedOn >= searchQuery.FromDate);
query += "AND sc.ContentModifiedOn >= {" + parameters.Count + "} ";
parameters.Add(searchQuery.FromDate);
}
if (searchQuery.ToDate != DateTime.MaxValue)
if (searchQuery.ToDate.ToString() != DateTime.MaxValue.ToString())
{
searchContents = searchContents.Where(i => i.ContentModifiedOn <= searchQuery.ToDate);
query += "AND sc.ContentModifiedOn <= {" + parameters.Count + "} ";
parameters.Add(searchQuery.ToDate);
}
if (searchQuery.Properties != null && searchQuery.Properties.Any())
{
foreach (var property in searchQuery.Properties)
{
searchContents = searchContents.Where(i => i.SearchContentProperties.Any(p => p.Name == property.Key && p.Value == property.Value));
query += "AND ( scp.Key = {" + parameters.Count + "} ";
parameters.Add(property.Key);
query += "AND scp.Value = {" + parameters.Count + "} ) ";
parameters.Add(property.Value);
}
}
var filteredContentList = new List<SearchContent>();
if (!string.IsNullOrEmpty(searchQuery.Keywords))
{
foreach (var keyword in SearchUtils.GetKeywords(searchQuery.Keywords))
{
filteredContentList.AddRange(await searchContents.Where(i => i.SearchContentWords.Any(w => w.SearchWord.Word.StartsWith(keyword))).ToListAsync());
}
}
return filteredContentList.DistinctBy(i => i.UniqueKey);
return await db.SearchContent.FromSql(FormattableStringFactory.Create(query, parameters.ToArray())).ToListAsync();
}
public SearchContent AddSearchContent(SearchContent searchContent)

View File

@ -37,12 +37,29 @@ namespace Oqtane.Services
var searchProvider = GetSearchProvider(searchQuery.SiteId);
var searchResults = await searchProvider.GetSearchResultsAsync(searchQuery);
var totalResults = 0;
// security trim results
var results = searchResults.Where(item => HasViewPermission(item, searchQuery))
.OrderBy(item => item.Url).ThenByDescending(item => item.Score);
// trim results
var results = searchResults.Where(i => HasViewPermission(i, searchQuery))
.OrderBy(i => i.Url).ThenByDescending(i => i.Score)
.DistinctBy(i => i.Url);
// aggegrate by Url
results.GroupBy(group => group.Url)
.Select(result => new SearchResult
{
SearchContentId = result.First().SearchContentId,
SiteId = result.First().SiteId,
EntityName = result.First().EntityName,
EntityId = result.First().EntityId,
Title = result.First().Title,
Description = result.First().Description,
Body = result.First().Body,
Url = result.First().Url,
Permissions = result.First().Permissions,
ContentModifiedBy = result.First().ContentModifiedBy,
ContentModifiedOn = result.First().ContentModifiedOn,
SearchContentProperties = result.First().SearchContentProperties,
Snippet = result.First().Snippet,
Score = result.Sum(group => group.Score) // recalculate score
});
// sort results
if (searchQuery.SortOrder == SearchSortOrder.Descending)
@ -76,12 +93,10 @@ namespace Oqtane.Services
}
}
totalResults = results.Count();
return new SearchResults
{
Results = results.Skip(searchQuery.PageIndex * searchQuery.PageSize).Take(searchQuery.PageSize).ToList(),
TotalResults = totalResults
TotalResults = results.Count()
};
}

View File

@ -31,21 +31,21 @@ namespace Oqtane.Models
public string AdditionalContent { get; set; }
[NotMapped]
public bool IsDeleted { get; set; }
public List<SearchContentProperty> SearchContentProperties { get; set; }
public DateTime CreatedOn { get; set; }
public int Count { get; set; } // only populated for queries
public List<SearchContentProperty> SearchContentProperties { get; set; } // only used during updates
[NotMapped]
public int TenantId { get; set; }
public bool IsDeleted { get; set; } // only used during updates
[NotMapped]
public int TenantId { get; set; } // only used during updates
[NotMapped]
public string UniqueKey => $"{TenantId}:{SiteId}:{EntityName}:{EntityId}";
public List<SearchContentWord> SearchContentWords { get; set; }
// constructors
public SearchContent() { }