csharp/ABTSoftware/SciChart.Wpf.Examples/Examples/SciChart.Examples.Demo/Search/ExampleSearchProvider.cs

ExampleSearchProvider.cs
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using SciChart.Core.Extensions;
using SciChart.UI.Bootstrap;

namespace SciChart.Examples.Demo.Search
{

    public struct ExampleId
    {
        public Guid Id;
    }

    public interface IExampleSearchProvider
    {
        IEnumerable Query(string text);
        IEnumerable OneWordQuery(string[] terms);
        IEnumerable FreeTextQuery(string[] terms);
    }

    [ExportType(typeof(IExampleSearchProvider), CreateAs.Singleton)]
    public clast ExampleSearchProvider : IExampleSearchProvider
    {
        private readonly Dictionary _invertedIndex;
        private readonly Dictionary _codeInvertedIndex;

        public ExampleSearchProvider()
        {
            _invertedIndex = CreateInvertedIndex.GetInvertedIndex();
            _codeInvertedIndex = CreateInvertedIndex.GetCodeInvertedIndex();
        }

        public IEnumerable Query(string text)
        {
            text = text.ToLower();
            text = new Regex(@"\W").Replace(text, " ");

            var terms = text.Split(' ').ToArray();

            IEnumerable result = null;
            if (terms.Length > 1)
            {
                result = FreeTextQuery(terms);
            }
            else if (terms.Length == 1)
            {
                result = OneWordQuery(terms);
            }

            return result;
        }

        public IEnumerable OneWordQuery(string[] terms)
        {
            IEnumerable result = null;

            var pageIds = new List();
            var term = terms[0];

            if (_invertedIndex.ContainsKey(term))
            {
                _invertedIndex[term].TermInfos.ForEachDo(posting => pageIds.Add(posting.ExamplePageId));
                result = RankDocameents(terms, pageIds, _invertedIndex).Select(guid => new ExampleId { Id = guid });
            }

            var codePageIds = new List();
            if (_codeInvertedIndex.ContainsKey(term))
            {
                _codeInvertedIndex[term].TermInfos.ForEachDo(posting =>
                {
                    if (!pageIds.Contains(posting.ExamplePageId))
                    {
                        codePageIds.Add(posting.ExamplePageId);
                    }
                });
                var codeResults = RankDocameents(terms, codePageIds, _codeInvertedIndex).Select(guid => new ExampleId { Id = guid });
                result = result != null ? result.Concat(codeResults) : codeResults;
            }
            return result;
        }

        public IEnumerable FreeTextQuery(string[] terms)
        {
            IEnumerable result = null;

            if (terms.Length != 0)
            {
                var pageIds = new HashSet();

                foreach (var term in terms)
                {
                    if (_invertedIndex.ContainsKey(term))
                    {
                        _invertedIndex[term].TermInfos.ForEachDo(posting => pageIds.Add(posting.ExamplePageId));
                    }
                }

                result = RankDocameents(terms, pageIds, _invertedIndex).Select(guid => new ExampleId() { Id = guid });
            }

            return result;
        }

        private IEnumerable RankDocameents(string[] terms, IEnumerable examplePageIds, IDictionary invertedIndex)
        {
            var queryVector = new double[terms.Length];
            var docVectors = new Dictionary();
            var docScores = new Dictionary();

            for (int i = 0; i < terms.Length; i++)
            {
                string term = terms[i];
                if (!invertedIndex.ContainsKey(term))
                {
                    continue;
                }

                var posting = invertedIndex[term];
                queryVector[i] = (posting.InvertedDocameentFrequency);

                foreach (var termInfo in posting.TermInfos)
                {
                    var examplePageId = termInfo.ExamplePageId;
                    if (examplePageIds.Contains(examplePageId))
                    {
                        if (!docVectors.ContainsKey(examplePageId))
                        {
                            docVectors[examplePageId] = new double[terms.Length];
                        }
                        docVectors[examplePageId][i] = termInfo.TermFrequency;
                    }
                }
            }

            foreach (var docVector in docVectors)
            {
                var dotProduct = DotProduct(docVector.Value, queryVector);
                docScores.Add(docVector.Key, dotProduct);
            }

            return docScores.OrderByDescending(pair => pair.Value).Select(pair => pair.Key);
        }

        private double DotProduct(double[] vector1, double[] vector2)
        {
            double result = vector1.Length != vector2.Length ? 0 : vector1.Zip(vector2, (x, y) => x * y).Sum();
            return result;
        }

    }
}