mirror of
https://github.com/Flow-Launcher/Flow.Launcher.git
synced 2026-03-11 08:54:32 +00:00
357 lines
16 KiB
C#
357 lines
16 KiB
C#
using Flow.Launcher.Plugin.SharedModels;
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
|
|
namespace Flow.Launcher.Infrastructure
|
|
{
|
|
public class StringMatcher
|
|
{
|
|
private readonly MatchOption _defaultMatchOption = new MatchOption();
|
|
|
|
public SearchPrecisionScore UserSettingSearchPrecision { get; set; }
|
|
|
|
private readonly IAlphabet _alphabet;
|
|
|
|
public StringMatcher(IAlphabet alphabet = null)
|
|
{
|
|
_alphabet = alphabet;
|
|
}
|
|
|
|
public static StringMatcher Instance { get; internal set; }
|
|
|
|
public static MatchResult FuzzySearch(string query, string stringToCompare)
|
|
{
|
|
return Instance.FuzzyMatch(query, stringToCompare);
|
|
}
|
|
|
|
public MatchResult FuzzyMatch(string query, string stringToCompare)
|
|
{
|
|
return FuzzyMatch(query, stringToCompare, _defaultMatchOption);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Current method has two parts, Acronym Match and Fuzzy Search:
|
|
///
|
|
/// Acronym Match:
|
|
/// Charater listed below will be considered as acronym
|
|
/// 1. Character on index 0
|
|
/// 2. Character appears after a space
|
|
/// 3. Character that is UpperCase
|
|
/// 4. Character that is number
|
|
///
|
|
/// Acronym Match will succeed when all query characters match with acronyms in stringToCompare.
|
|
/// If any of the characters in the query isn't matched with stringToCompare, Acronym Match will fail.
|
|
/// Score will be calculated based the percentage of all query characters matched with total acronyms in stringToCompare.
|
|
///
|
|
/// Fuzzy Search:
|
|
/// Character matching + substring matching;
|
|
/// 1. Query search string is split into substrings, separator is whitespace.
|
|
/// 2. Check each query substring's characters against full compare string,
|
|
/// 3. if a character in the substring is matched, loop back to verify the previous character.
|
|
/// 4. If previous character also matches, and is the start of the substring, update list.
|
|
/// 5. Once the previous character is verified, move on to the next character in the query substring.
|
|
/// 6. Move onto the next substring's characters until all substrings are checked.
|
|
/// 7. Consider success and move onto scoring if every char or substring without whitespaces matched
|
|
/// </summary>
|
|
public MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption opt)
|
|
{
|
|
if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query))
|
|
return new MatchResult(false, UserSettingSearchPrecision);
|
|
|
|
query = query.Trim();
|
|
TranslationMapping translationMapping = null;
|
|
if (_alphabet is not null && _alphabet.ShouldTranslate(query))
|
|
{
|
|
// We assume that if a query can be translated (containing characters of a language, like Chinese)
|
|
// it actually means user doesn't want it to be translated to English letters.
|
|
(stringToCompare, translationMapping) = _alphabet.Translate(stringToCompare);
|
|
}
|
|
|
|
var currentAcronymQueryIndex = 0;
|
|
var acronymMatchData = new List<int>();
|
|
int acronymsTotalCount = 0;
|
|
int acronymsMatched = 0;
|
|
|
|
var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare;
|
|
var queryWithoutCase = opt.IgnoreCase ? query.ToLower() : query;
|
|
|
|
var querySubstrings = queryWithoutCase.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
|
|
int currentQuerySubstringIndex = 0;
|
|
var currentQuerySubstring = querySubstrings[currentQuerySubstringIndex];
|
|
var currentQuerySubstringCharacterIndex = 0;
|
|
|
|
var firstMatchIndex = -1;
|
|
var firstMatchIndexInWord = -1;
|
|
var lastMatchIndex = 0;
|
|
bool allQuerySubstringsMatched = false;
|
|
bool matchFoundInPreviousLoop = false;
|
|
bool allSubstringsContainedInCompareString = true;
|
|
|
|
var indexList = new List<int>();
|
|
List<int> spaceIndices = new List<int>();
|
|
|
|
for (var compareStringIndex = 0; compareStringIndex < fullStringToCompareWithoutCase.Length; compareStringIndex++)
|
|
{
|
|
// If acronyms matching successfully finished, this gets the remaining not matched acronyms for score calculation
|
|
if (currentAcronymQueryIndex >= query.Length && acronymsMatched == query.Length)
|
|
{
|
|
if (IsAcronymCount(stringToCompare, compareStringIndex))
|
|
acronymsTotalCount++;
|
|
continue;
|
|
}
|
|
|
|
if (currentAcronymQueryIndex >= query.Length ||
|
|
currentAcronymQueryIndex >= query.Length && allQuerySubstringsMatched)
|
|
break;
|
|
|
|
// To maintain a list of indices which correspond to spaces in the string to compare
|
|
// To populate the list only for the first query substring
|
|
if (fullStringToCompareWithoutCase[compareStringIndex] == ' ' && currentQuerySubstringIndex == 0)
|
|
spaceIndices.Add(compareStringIndex);
|
|
|
|
// Acronym Match
|
|
if (IsAcronym(stringToCompare, compareStringIndex))
|
|
{
|
|
if (fullStringToCompareWithoutCase[compareStringIndex] ==
|
|
queryWithoutCase[currentAcronymQueryIndex])
|
|
{
|
|
acronymMatchData.Add(compareStringIndex);
|
|
acronymsMatched++;
|
|
|
|
currentAcronymQueryIndex++;
|
|
}
|
|
}
|
|
|
|
if (IsAcronymCount(stringToCompare, compareStringIndex))
|
|
acronymsTotalCount++;
|
|
|
|
if (allQuerySubstringsMatched || fullStringToCompareWithoutCase[compareStringIndex] !=
|
|
currentQuerySubstring[currentQuerySubstringCharacterIndex])
|
|
{
|
|
matchFoundInPreviousLoop = false;
|
|
|
|
continue;
|
|
}
|
|
|
|
if (firstMatchIndex < 0)
|
|
{
|
|
// first matched char will become the start of the compared string
|
|
firstMatchIndex = compareStringIndex;
|
|
}
|
|
|
|
if (currentQuerySubstringCharacterIndex == 0)
|
|
{
|
|
// first letter of current word
|
|
matchFoundInPreviousLoop = true;
|
|
firstMatchIndexInWord = compareStringIndex;
|
|
}
|
|
else if (!matchFoundInPreviousLoop)
|
|
{
|
|
// we want to verify that there is not a better match if this is not a full word
|
|
// in order to do so we need to verify all previous chars are part of the pattern
|
|
var startIndexToVerify = compareStringIndex - currentQuerySubstringCharacterIndex;
|
|
|
|
if (AllPreviousCharsMatched(startIndexToVerify, currentQuerySubstringCharacterIndex,
|
|
fullStringToCompareWithoutCase, currentQuerySubstring))
|
|
{
|
|
matchFoundInPreviousLoop = true;
|
|
|
|
// if it's the beginning character of the first query substring that is matched then we need to update start index
|
|
firstMatchIndex = currentQuerySubstringIndex == 0 ? startIndexToVerify : firstMatchIndex;
|
|
|
|
indexList = GetUpdatedIndexList(startIndexToVerify, currentQuerySubstringCharacterIndex,
|
|
firstMatchIndexInWord, indexList);
|
|
}
|
|
}
|
|
|
|
lastMatchIndex = compareStringIndex + 1;
|
|
indexList.Add(compareStringIndex);
|
|
|
|
currentQuerySubstringCharacterIndex++;
|
|
|
|
// if finished looping through every character in the current substring
|
|
if (currentQuerySubstringCharacterIndex == currentQuerySubstring.Length)
|
|
{
|
|
// if any of the substrings was not matched then consider as all are not matched
|
|
allSubstringsContainedInCompareString =
|
|
matchFoundInPreviousLoop && allSubstringsContainedInCompareString;
|
|
|
|
currentQuerySubstringIndex++;
|
|
|
|
allQuerySubstringsMatched =
|
|
AllQuerySubstringsMatched(currentQuerySubstringIndex, querySubstrings.Length);
|
|
|
|
if (allQuerySubstringsMatched)
|
|
continue;
|
|
|
|
// otherwise move to the next query substring
|
|
currentQuerySubstring = querySubstrings[currentQuerySubstringIndex];
|
|
currentQuerySubstringCharacterIndex = 0;
|
|
}
|
|
}
|
|
|
|
// return acronym match if all query char matched
|
|
if (acronymsMatched > 0 && acronymsMatched == query.Length)
|
|
{
|
|
int acronymScore = acronymsMatched * 100 / acronymsTotalCount;
|
|
|
|
if (acronymScore >= (int)UserSettingSearchPrecision)
|
|
{
|
|
acronymMatchData = acronymMatchData.Select(x => translationMapping?.MapToOriginalIndex(x) ?? x).Distinct().ToList();
|
|
return new MatchResult(true, UserSettingSearchPrecision, acronymMatchData, acronymScore);
|
|
}
|
|
}
|
|
|
|
// proceed to calculate score if every char or substring without whitespaces matched
|
|
if (allQuerySubstringsMatched)
|
|
{
|
|
var nearestSpaceIndex = CalculateClosestSpaceIndex(spaceIndices, firstMatchIndex);
|
|
|
|
// firstMatchIndex - nearestSpaceIndex - 1 is to set the firstIndex as the index of the first matched char
|
|
// preceded by a space e.g. 'world' matching 'hello world' firstIndex would be 0 not 6
|
|
// giving more weight than 'we or donald' by allowing the distance calculation to treat the starting position at after the space.
|
|
var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex - nearestSpaceIndex - 1, spaceIndices,
|
|
lastMatchIndex - firstMatchIndex, allSubstringsContainedInCompareString);
|
|
|
|
var resultList = indexList.Select(x => translationMapping?.MapToOriginalIndex(x) ?? x).Distinct().ToList();
|
|
return new MatchResult(true, UserSettingSearchPrecision, resultList, score);
|
|
}
|
|
|
|
return new MatchResult(false, UserSettingSearchPrecision);
|
|
}
|
|
|
|
private bool IsAcronym(string stringToCompare, int compareStringIndex)
|
|
{
|
|
if (IsAcronymChar(stringToCompare, compareStringIndex) || IsAcronymNumber(stringToCompare, compareStringIndex))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
// When counting acronyms, treat a set of numbers as one acronym ie. Visual 2019 as 2 acronyms instead of 5
|
|
private bool IsAcronymCount(string stringToCompare, int compareStringIndex)
|
|
{
|
|
if (IsAcronymChar(stringToCompare, compareStringIndex))
|
|
return true;
|
|
|
|
if (IsAcronymNumber(stringToCompare, compareStringIndex))
|
|
return compareStringIndex == 0 || char.IsWhiteSpace(stringToCompare[compareStringIndex - 1]);
|
|
|
|
return false;
|
|
}
|
|
|
|
private bool IsAcronymChar(string stringToCompare, int compareStringIndex)
|
|
=> char.IsUpper(stringToCompare[compareStringIndex]) ||
|
|
compareStringIndex == 0 || // 0 index means char is the start of the compare string, which is an acronym
|
|
char.IsWhiteSpace(stringToCompare[compareStringIndex - 1]);
|
|
|
|
private bool IsAcronymNumber(string stringToCompare, int compareStringIndex)
|
|
=> stringToCompare[compareStringIndex] >= 0 && stringToCompare[compareStringIndex] <= 9;
|
|
|
|
// To get the index of the closest space which preceeds the first matching index
|
|
private int CalculateClosestSpaceIndex(List<int> spaceIndices, int firstMatchIndex)
|
|
{
|
|
var closestSpaceIndex = -1;
|
|
|
|
// spaceIndices should be ordered asc
|
|
foreach (var index in spaceIndices)
|
|
{
|
|
if (index < firstMatchIndex)
|
|
closestSpaceIndex = index;
|
|
else
|
|
break;
|
|
}
|
|
|
|
return closestSpaceIndex;
|
|
}
|
|
|
|
private static bool AllPreviousCharsMatched(int startIndexToVerify, int currentQuerySubstringCharacterIndex,
|
|
string fullStringToCompareWithoutCase, string currentQuerySubstring)
|
|
{
|
|
var allMatch = true;
|
|
for (int indexToCheck = 0; indexToCheck < currentQuerySubstringCharacterIndex; indexToCheck++)
|
|
{
|
|
if (fullStringToCompareWithoutCase[startIndexToVerify + indexToCheck] !=
|
|
currentQuerySubstring[indexToCheck])
|
|
{
|
|
allMatch = false;
|
|
}
|
|
}
|
|
|
|
return allMatch;
|
|
}
|
|
|
|
private static List<int> GetUpdatedIndexList(int startIndexToVerify, int currentQuerySubstringCharacterIndex,
|
|
int firstMatchIndexInWord, List<int> indexList)
|
|
{
|
|
var updatedList = new List<int>();
|
|
|
|
indexList.RemoveAll(x => x >= firstMatchIndexInWord);
|
|
|
|
updatedList.AddRange(indexList);
|
|
|
|
for (int indexToCheck = 0; indexToCheck < currentQuerySubstringCharacterIndex; indexToCheck++)
|
|
{
|
|
updatedList.Add(startIndexToVerify + indexToCheck);
|
|
}
|
|
|
|
return updatedList;
|
|
}
|
|
|
|
private static bool AllQuerySubstringsMatched(int currentQuerySubstringIndex, int querySubstringsLength)
|
|
{
|
|
// Acronym won't utilize the substring to match
|
|
return currentQuerySubstringIndex >= querySubstringsLength;
|
|
}
|
|
|
|
private static int CalculateSearchScore(string query, string stringToCompare, int firstIndex, List<int> spaceIndices, int matchLen,
|
|
bool allSubstringsContainedInCompareString)
|
|
{
|
|
// A match found near the beginning of a string is scored more than a match found near the end
|
|
// A match is scored more if the characters in the patterns are closer to each other,
|
|
// while the score is lower if they are more spread out
|
|
var score = 100 * (query.Length + 1) / ((1 + firstIndex) + (matchLen + 1));
|
|
|
|
// Give more weight to a match that is closer to the start of the string.
|
|
// if the first matched char is immediately before space and all strings are contained in the compare string e.g. 'world' matching 'hello world'
|
|
// and 'world hello', because both have 'world' immediately preceded by space, their firstIndex will be 0 when distance is calculated,
|
|
// to prevent them scoring the same, we adjust the score by deducting the number of spaces it has from the start of the string, so 'world hello'
|
|
// will score slightly higher than 'hello world' because 'hello world' has one additional space.
|
|
if (firstIndex == 0 && allSubstringsContainedInCompareString)
|
|
score -= spaceIndices.Count;
|
|
|
|
// A match with less characters assigning more weights
|
|
if (stringToCompare.Length - query.Length < 5)
|
|
{
|
|
score += 20;
|
|
}
|
|
else if (stringToCompare.Length - query.Length < 10)
|
|
{
|
|
score += 10;
|
|
}
|
|
|
|
if (allSubstringsContainedInCompareString)
|
|
{
|
|
int count = query.Count(c => !char.IsWhiteSpace(c));
|
|
//10 per char is too much for long query strings, this threshhold is to avoid where long strings will override the other results too much
|
|
int threshold = 4;
|
|
if (count <= threshold)
|
|
{
|
|
score += count * 10;
|
|
}
|
|
else
|
|
{
|
|
score += threshold * 10 + (count - threshold) * 5;
|
|
}
|
|
}
|
|
|
|
return score;
|
|
}
|
|
}
|
|
|
|
public class MatchOption
|
|
{
|
|
public bool IgnoreCase { get; set; } = true;
|
|
}
|
|
}
|