Lucene is an elegant solution for adding content indexing and search in a .Net application.
After doing a few Google searches and also studying this excellent basic sample, I wrote a simple implementation which automatically indexes entities implementing ISearchable when they are saved, and offers a simple way to search the website content.
Here it is:
- in the application infrastructure:
- an interface and a general class implementing the interface:
public interface ISearchable
{
string Identifier { get; }
string Name { get; }
string Details { get; }
IEnumerable<ISearchable> GetSearchableList();
string SearchType { get; }
}
class SearchableItem : ISearchable
{
public string Identifier { get; set; }
public string Name { get; set; }
public string Details { get; set; }
public string SearchType { get; set; }
public IEnumerable<ISearchable> GetSearchableList()
{
yield return this;
}
}
public class ApplicationSetting
{
public static string SearchIndexingDirectory
{
get
{
return ConfigurationManager.AppSettings["SearchIndexingDirectory"];
}
}
}
public class Constants
{
public class Search
{
public const string SearchFieldDetailsDelimiter = " | ";
public const string TagSearchIdentifier = "T";
}
public class Domain
{
public const int InvalidId = -1;
}
}
SearchIndexingDirectory uses an application setting from web.config, but you can replace it with a variable or anything representing the path where you want Lucene to generate its index.
public class SearchService
{
private static bool _indexDirectoryExists;
private void EnsureIndexExists ( string indexDirectory )
{
if (_indexDirectoryExists)
return;
if (!IndexReader.IndexExists ( indexDirectory ))
{
IndexWriter writer = new IndexWriter ( indexDirectory,
new StandardAnalyzer ( ), true );
writer.Close ( );
_indexDirectoryExists = true;
}
}
public void IndexData ( IEnumerable<ISearchable> listToIndex )
{
string indexDirectory = ApplicationSetting.SearchIndexingDirectory;
EnsureIndexExists ( indexDirectory );
IList<ISearchable> listWithoutDuplicates = new List<ISearchable>();
foreach (ISearchable item in listToIndex)
{
if (listWithoutDuplicates.FirstOrDefault(p => p.Identifier == item.Identifier) == null)
listWithoutDuplicates.Add(item);
}
IndexModifier modifier = new IndexModifier(indexDirectory, new StandardAnalyzer(), false);
foreach(ISearchable item in listWithoutDuplicates)
{
Term term = new Term ( "id", item.Identifier );
modifier.DeleteDocuments( term );
InsertDoc ( modifier, item );
}
modifier.Optimize ( );
modifier.Close ( );
}
private void InsertDoc ( IndexModifier modifier, ISearchable dataToIndex )
{
Document doc = new Document ( );
doc.Add ( new Field ( "id", dataToIndex.Identifier, Field.Store.YES, Field.Index.UN_TOKENIZED ) );
doc.Add ( new Field ( "name", dataToIndex.Name, Field.Store.YES, Field.Index.TOKENIZED ) );
doc.Add ( new Field ( "details", dataToIndex.Details, Field.Store.YES, Field.Index.TOKENIZED ) );
doc.Add ( new Field ( "type", dataToIndex.SearchType, Field.Store.YES, Field.Index.UN_TOKENIZED ) );
modifier.AddDocument ( doc );
}
public static string PrepareSearchDetailsFieldFor ( params string[] values )
{
string ret = String.Empty;
foreach(string str in values)
ret += str + Constants.Search.SearchFieldDetailsDelimiter;
return ret;
}
public IList<ISearchable> Search ( string searchString )
{
IList<ISearchable> ret = new List<ISearchable>();
IndexSearcher searcher = new IndexSearcher(ApplicationSetting.SearchIndexingDirectory);
QueryParser parser = new QueryParser("name", new StandardAnalyzer());
string searchQuery = "(" + searchString + " OR (details: " + searchString + "))";
Hits hitColl = searcher.Search(parser.Parse(searchQuery));
for (int i = 0; i < hitColl.Length ( ); i++)
{
Document doc = hitColl.Doc(i);
ret.Add ( new SearchableItem { Details = doc.Get ( "details" ), Identifier = doc.Get ( "id" ), Name = doc.Get ( "name" ), SearchType = doc.Get ( "type" ) } );
}
searcher.Close();
return ret;
}
public static string[] SeparateDetailsFrom(string details)
{
return details.Split(new[] {Constants.Search.SearchFieldDetailsDelimiter},
StringSplitOptions.RemoveEmptyEntries);
}
public static int RetrieveIdFromIdentifier(string input, string searchTypeIdentifier)
{
int ret;
if (Int32.TryParse ( input.Replace( searchTypeIdentifier, "" ), out ret ))
return ret;
return Constants.Domain.InvalidId;
}
}
- a search type visitor, which will be used to select results of a certain type from a list of search results (this can be easily replaced with simple Linq selects):
internal class SearchTypeVisitor : IValueReturningVisitor<IList<ISearchable>,ISearchable>
{
private readonly string _searchType;
private readonly IList<ISearchable> _results = new List<ISearchable>();
public SearchTypeVisitor(string searchType)
{
_searchType = searchType;
}
public void Visit(ISearchable item)
{
if (item.SearchType == _searchType)
_results.Add(item);
}
public IList<ISearchable> GetResult()
{
return _results;
}
}
- in the Domain:
- in the repository, when we save an entity we are also indexing the information:
public void Save ( EntityType entity )
{
SessionHolder.Current.SaveOrUpdate(entity);
ISearchable searchable = entity as ISearchable;
if (searchable != null)
_searchService.IndexData(searchable.GetSearchableList());
}
If you are using a BaseRepository or a generic repository, you will only need to write this once.
- in the entities which should be searchable, we implement ISearchable:
public class Tag: ISearchable
{
public virtual string Identifier
{
get { return Constants.Search.TagSearchIdentifier + Id; }
}
public virtual string Name
{
get { return TagName; }
}
public virtual string Details
{
get
{
return SearchService.PrepareSearchDetailsFieldFor(
TagCount.ToString(),
((int) TagType).ToString()
);
}
}
public virtual IEnumerable<ISearchable> GetSearchableList ( )
{
yield return this;
}
public virtual string SearchType
{
get { return Constants.Search.TagSearchIdentifier; }
}
}
If the entity is an aggregate containing subentities which should be indexed, we can also add those in GetSearchableList:
public virtual IEnumerable<ISearchable> GetSearchableList ( )
{
yield return this;
foreach (Subentity s in Subentities)
yield return s;
}
- we can also implement mappers, if we want to generate entities from the search results:
internal class TagFromSearchableMapper : IMapper<ISearchable, Tag>
{
public Tag MapFrom ( ISearchable input )
{
string[] details = SearchService.SeparateDetailsFrom ( input.Details );
int tagCount;
Int32.TryParse ( details[0], out tagCount );
return new Tag ( input.Name, (enumTagType)Enum.Parse ( typeof(enumTagType), details[1] ) )
{
TagCount = tagCount,
Id =
SearchService.RetrieveIdFromIdentifier ( input.Identifier, Constants.Search.TagSearchIdentifier )
};
}
}
Otherwise, we will display the information directly from the list of search results.
- in the application layer, a SearchTask which can return the result (a search data transfer object) when the user runs a search:
public class SearchTask
{
readonly SearchService _searchService = new SearchService ( );
public SearchDTO Search ( string text )
{
IList<ISearchable> searchResults = _searchService.Search(text);
IEnumerable<Tag> tags =
searchResults.GetResultOfVisitingAllItemsWith(new SearchTypeVisitor(Constants.Search.TagSearchIdentifier)).
MapAllUsing(new TagFromSearchableMapper());
return new SearchDTO
{
Tags = tags.ToList()
};
}
}
public class SearchDTO
{
public IList<Tag> Tags { get; set; }
}
This class uses the SearchTypeVisitor to filter the tags, and maps the list of ISearchable instances into an IEnumerable<Tag> with the help of the TagFromSearchableMapper class.
GetResultOfVisitingAllItemsWith and MapAllUsing are extensions which help in applying a visitor or mapper to an entire list of items. To obtain the same result, just go over the list manually and apply the desired action on each of the items.
The SearchTask and the SearchDTO from this example are very simple, but they can contain more logic or information depending on the case.
- in the UI, calls to the SearchTask will bring the desired information:
readonly SearchTask _searchTask = new SearchTask();
SearchDTO dto = _searchTask.Search(searchText);
This solution is only a starting point, and it can surely be improved.
One of the things which should change is the fact that the domain contains information about the search infrastructure, which is not a very good thing. A better place for this information are DTO classes in the application layer.
Maybe I will talk more about this in a future post.