Skip to content

Commit

Permalink
Merge pull request #1 from ilya-g/APIchange
Browse files Browse the repository at this point in the history
Api reworked
  • Loading branch information
ilya-g committed Feb 9, 2015
2 parents 3be2360 + df4218f commit 36afa99
Show file tree
Hide file tree
Showing 20 changed files with 555 additions and 433 deletions.
45 changes: 36 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

This library provides the way to index text documents by words they contain.

## Usage
## Basic usage

First you should create the `Indexer` instance
First you should create the `IndexerSet` instance

````C#
var indexer = Indexer.Create();
var indexerSet = IndexerSet.Create();
````

An `IndexerCreationOptions` instance with the following additional options can be specified as an argument to
Expand All @@ -16,23 +16,50 @@ this method:
- string comparison type used to compare words in index
- `IStreamParser` or `ILineParser` that defines the way words would be extracted from documents content

Then you can add one or several document sources to obtain documents
Then you can add one or more document sources to obtain documents
from. There are two standard implementations of document source:
`SingleFileDocumentSource` and `DirectoryDocumentSource`.

`````C#
indexer.AddSource(new DirectoryDocumentSource(baseDirectory, "*.cs"));
indexer.AddSource(new SingleFileDocumentSource(Path.Combine(baseDirectory, "example.txt"));
indexerSet.Add(new DirectoryDocumentSource(baseDirectory, "*.cs"));
indexerSet.Add(new SingleFileDocumentSource(Path.Combine(baseDirectory, "example.txt"));
`````

The `Indexer` provides the `Index` property which can be used then to query
The `IndexerSet` provides the `Index` property which can be used then to query
documents from the index:

`````C#
// matches only "apple" word, returns single WordDocuments collection
var appleDocuments = indexer.Index.GetExactWord("apple");
var appleDocuments = indexerSet.Index.GetExactWord("apple");
`````

The returned instance of `WordDocuments` is a collection of `DocumentInfo`s, each pointing to the original document containing the word being searched for.

`````C#
// matches all words starting with "ban" and returns the list of WordDocuments
// for each word matched
var bananaDocuments = indexer.Index.GetWordsStartWith("ban");
var banWords = indexerSet.Index.GetWordsStartWith("ban");
`````

This query will return sequence of `WordDocuments`, one for each matching word. Then you can flatten this sequence with SelectMany operation:
`var banDocuments = banWords.SelectMany(wordDocuments => wordDocuments);`

## Advanced usage

An `IIndex` instance can be used without creating `IndexerSet`. The following example shows, how to create an index and attach an Indexer to it:

`````C#
var options = new IndexerCreationOptions();
var index = options.CreateIndex();
var parser = options.GetDefaultStreamParser();

var indexer = new Indexer(
index,
new DirectoryDocumentSource(AppDomain.CurrentDomain.BaseDirectory, "*.txt"),
parser);
indexer.StartIndexing();
`````

## Custom document parsers

To create custom document parser one should implement `ILineParser` or `IStreamParser` interface. The former being provided with the text line by line should extract all words it can from each individual line. No state should be shared between calls by `ILineParser` implementation. The latter is provided with `TextReader` and can read entire document with it.
34 changes: 17 additions & 17 deletions examples/Primitive.Text.Indexing.UI/IndexerViewModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Input;
using JetBrains.Annotations;
using Primitive.Text.Documents;
Expand All @@ -24,22 +23,23 @@ public class IndexerViewModel : INotifyPropertyChanged
public IndexerViewModel()
{
DefaultSearchPattern = new SearchPattern("*.txt");
RemoveDocumentSourceCommand = new DelegateCommand<SourceIndexingAgent>(RemoveDocumentSource);
RemoveDocumentSourceCommand = new DelegateCommand<Indexer>(RemoveDocumentSource);
SearchCommand = new DelegateCommand(ExecuteQuery);

var baseDirectory = MoveUpThroughHierarhy(new DirectoryInfo(AppDomain.CurrentDomain.BaseDirectory), 5).FullName;

Indexer = Indexer.Create(new IndexerCreationOptions() { IndexLocking = IndexLocking.ReadWrite});
Indexer.AddSource(new DirectoryDocumentSource(baseDirectory, "*.cs"), autoStartIndexing: false);
Indexer.AddSource(new DirectoryDocumentSource(baseDirectory, "*.xml"), autoStartIndexing: false);
IndexerSet = IndexerSet.Create(new IndexerCreationOptions() { IndexLocking = IndexLocking.ReadWrite });
IndexerSet.Add(new DirectoryDocumentSource(baseDirectory, "*.cs"), autoStartIndexing: false);
IndexerSet.Add(new DirectoryDocumentSource(baseDirectory, "*.xml"), autoStartIndexing: false);
}


public Indexer Indexer { get; private set; }
public IndexerSet IndexerSet { get; private set; }

public IReadOnlyList<SourceIndexingAgent> DocumentSources

public IReadOnlyList<Indexer> Indexers
{
get { return Indexer.Sources; }
get { return IndexerSet.Indexers; }
}

public SearchPattern DefaultSearchPattern { get; set; }
Expand Down Expand Up @@ -79,7 +79,7 @@ public void ExecuteQuery()
return;
}

var index = terms.Length > 1 ? Indexer.Index.Snapshot() : Indexer.Index;
var index = terms.Length > 1 ? IndexerSet.Index.Snapshot() : IndexerSet.Index;

HashSet<DocumentInfo> resultDocumentSet = null;
foreach (var term in terms)
Expand Down Expand Up @@ -128,8 +128,8 @@ private static DirectoryInfo MoveUpThroughHierarhy([NotNull] DirectoryInfo direc

public void StartIndexingAllSources()
{
foreach (var documentSourceIndexer in DocumentSources)
documentSourceIndexer.StartIndexing();
foreach (var indexer in Indexers)
indexer.StartIndexing();
}

public void AddDocumentSourcesFromPathList([NotNull] IEnumerable<string> files)
Expand All @@ -151,19 +151,19 @@ public void AddDocumentSourcesFromPathList([NotNull] IEnumerable<string> files)
Console.WriteLine(e);
continue;
}
Indexer.AddSource(documentSource);
IndexerSet.Add(documentSource);
}
OnPropertyChanged("DocumentSources");
OnPropertyChanged("Indexers");
}



private void RemoveDocumentSource(SourceIndexingAgent sourceIndexingAgent)
private void RemoveDocumentSource(Indexer indexer)
{
if (sourceIndexingAgent != null)
if (indexer != null)
{
Indexer.RemoveSource(sourceIndexingAgent);
OnPropertyChanged("DocumentSources");
IndexerSet.Remove(indexer);
OnPropertyChanged("Indexers");
ExecuteQuery();
}
}
Expand Down
6 changes: 3 additions & 3 deletions examples/Primitive.Text.Indexing.UI/MainWindow.xaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<TextBlock VerticalAlignment="Center">Drop files or directories to this window to include them in index</TextBlock>
</DockPanel>
<ListBox DockPanel.Dock="Top" Height="200"
ItemsSource="{Binding DocumentSources}">
ItemsSource="{Binding Indexers}">
<ListBox.ItemTemplate>
<DataTemplate>
<StackPanel Orientation="Vertical">
Expand All @@ -43,13 +43,13 @@
</TextBlock>
</DataTemplate>
</StackPanel.Resources>
<ContentPresenter Content="{Binding DocumentSource}" TextElement.FontSize="14" />
<ContentPresenter Content="{Binding Source}" TextElement.FontSize="14" />
<StackPanel Orientation="Horizontal">
<TextBlock Text="{Binding State}" Width="200">
<TextBlock.Style>
<Style TargetType="TextBlock">
<Style.Triggers>
<DataTrigger Binding="{Binding State}" Value="{x:Static indexing:SourceIndexingState.Failed}">
<DataTrigger Binding="{Binding State}" Value="{x:Static indexing:IndexingState.Failed}">
<Setter Property="Foreground" Value="OrangeRed" />
<Setter Property="ToolTip" Value="{Binding Error}" />
</DataTrigger>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ public abstract class DocumentSourceBase : IDocumentSource


/// <summary>
/// Extracts words to index from the <paramref name="document"/> with the specified <paramref name="streamParser"/>
/// Extracts words to index from the <paramref name="document"/> with the specified <paramref name="textParser"/>
/// </summary>
/// <param name="document">The document from this source</param>
/// <param name="streamParser">The parser to be used to extract words from the document stream</param>
/// <param name="textParser">The parser to be used to extract words from the document stream</param>
/// <returns>
/// Returns an observable sequence of document words, that being subscribed to
/// pushes all words from the document and then completes. This sequence also complete with fail, if there was
Expand All @@ -38,15 +38,15 @@ public abstract class DocumentSourceBase : IDocumentSource
/// <remarks>
/// This method can be overriden in derived classes to add some behavior to the returned observable sequence
/// </remarks>
public virtual IObservable<string> ExtractDocumentWords(DocumentInfo document, IStreamParser streamParser)
public virtual IObservable<string> ExtractDocumentWords(DocumentInfo document, ITextParser textParser)
{
EnsureOwnDocument(document);

return Observable.Using(
() => OpenDocument(document),
reader =>
reader != null
? streamParser.ExtractWords(reader)
? textParser.ExtractWords(reader)
: Observable.Empty<string>());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ public override TextReader OpenDocument(DocumentInfo document)


/// <summary>
/// Extracts words to index from the <paramref name="document"/> with the specified <paramref name="streamParser"/>
/// Extracts words to index from the <paramref name="document"/> with the specified <paramref name="textParser"/>
/// </summary>
/// <param name="document">The document from this source</param>
/// <param name="streamParser">The parser to be used to extract words from the document stream</param>
/// <param name="textParser">The parser to be used to extract words from the document stream</param>
/// <returns>
/// Returns an observable sequence of document words, that being subscribed to
/// pushes all words from the document and then completes. This sequence also complete with fail, if there was
Expand All @@ -73,10 +73,10 @@ public override TextReader OpenDocument(DocumentInfo document)
/// This override adds retry semantics in case of document file is locked or cannot be opened due to some other
/// <see cref="IOException"/>
/// </remarks>
public override IObservable<string> ExtractDocumentWords(DocumentInfo document, IStreamParser streamParser)
public override IObservable<string> ExtractDocumentWords(DocumentInfo document, ITextParser textParser)
{
return RetryOn(
base.ExtractDocumentWords(document, streamParser),
base.ExtractDocumentWords(document, textParser),
shouldRetry: e => e is IOException, retryTimes: 3, retryDelay: TimeSpan.FromSeconds(1));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ public interface IDocumentSource
IObservable<DocumentInfo> WatchForChangedDocuments();

/// <summary>
/// Extracts words to index from the <paramref name="document"/> with the specified <paramref name="streamParser"/>
/// Extracts words to index from the <paramref name="document"/> with the specified <paramref name="textParser"/>
/// </summary>
/// <param name="document">The document from this source</param>
/// <param name="streamParser">The parser to be used to extract words from the document stream</param>
/// <param name="textParser">The parser to be used to extract words from the document stream</param>
/// <returns>
/// Returns an observable sequence of document words, that being subscribed to
/// pushes all words from the document and then completes.
/// This sequence can also complete with fail, if there was an error opening or reading the document.
/// </returns>
IObservable<string> ExtractDocumentWords([NotNull] DocumentInfo document, [NotNull] IStreamParser streamParser);
IObservable<string> ExtractDocumentWords([NotNull] DocumentInfo document, [NotNull] ITextParser textParser);
}
}
4 changes: 3 additions & 1 deletion src/Primitive.Text.Indexing/Documents/WordDocuments.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,17 @@ public struct WordDocuments : IGrouping<string, DocumentInfo>, IReadOnlyCollecti
private readonly string word;
private readonly IReadOnlyCollection<DocumentInfo> documents;

internal WordDocuments([CanBeNull] string word, [NotNull] IReadOnlyCollection<DocumentInfo> documents)
internal WordDocuments([NotNull] string word, [NotNull] IReadOnlyCollection<DocumentInfo> documents)
{
if (word == null) throw new ArgumentNullException("word");
if (documents == null) throw new ArgumentNullException("documents");
this.word = word;
this.documents = documents;
}
/// <summary>
/// Gets the word that all the documents have
/// </summary>
[NotNull]
public string Word { get { return word; } }


Expand Down
4 changes: 2 additions & 2 deletions src/Primitive.Text.Indexing/Indexing/IIndex.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public interface IReadOnlyIndex
/// <see cref="WordDocuments"/> structure, containing the word that was queried
/// and the documents from the index associtated with that word
/// </returns>
WordDocuments GetExactWord([CanBeNull] string word);
WordDocuments GetExactWord([NotNull] string word);

/// <summary>
/// Queries the index for all words starting with the specified <paramref name="wordBeginning"/> part
Expand Down Expand Up @@ -86,7 +86,7 @@ public interface IReadOnlyIndex
public interface IIndex : IReadOnlyIndex
{
/// <summary>
/// Creates the copy of this index, that will remain unchanged even if this instace is changed later.
/// Creates the copy of this index, that will remain unchanged even if this instance is changed later.
/// </summary>
/// <returns><see cref="IReadOnlyIndex"/> instance that holds the frozen snapshot of this index</returns>
/// <remarks>
Expand Down
Loading

0 comments on commit 36afa99

Please sign in to comment.