Flow.Launcher/Flow.Launcher.Infrastructure/PinyinAlphabet.cs

197 lines
7.4 KiB
C#
Raw Permalink Normal View History

2025-04-10 01:56:27 +00:00
using System;
using System.Collections.Concurrent;
2024-06-02 06:19:27 +00:00
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.IO;
2025-04-05 15:02:28 +00:00
using System.Text;
using System.Text.Json;
using CommunityToolkit.Mvvm.DependencyInjection;
2025-04-05 15:02:28 +00:00
using Flow.Launcher.Infrastructure.UserSettings;
using ToolGood.Words.Pinyin;
using Flow.Launcher.Infrastructure.Logger;
2020-04-21 09:12:17 +00:00
namespace Flow.Launcher.Infrastructure
{
public class PinyinAlphabet : IAlphabet
{
2025-07-13 15:29:41 +00:00
private readonly ConcurrentDictionary<string, (string translation, TranslationMapping map)> _pinyinCache = new();
2025-04-05 15:01:09 +00:00
private readonly Settings _settings;
private ReadOnlyDictionary<string, string> currentDoublePinyinTable;
2025-04-05 15:01:09 +00:00
public PinyinAlphabet()
{
2025-04-05 15:01:09 +00:00
_settings = Ioc.Default.GetRequiredService<Settings>();
LoadDoublePinyinTable();
_settings.PropertyChanged += (sender, e) =>
{
2025-07-14 11:29:12 +00:00
switch (e.PropertyName)
{
case nameof(Settings.ShouldUsePinyin):
2025-07-14 11:30:16 +00:00
if (_settings.ShouldUsePinyin)
{
Reload();
}
2025-07-14 11:29:12 +00:00
break;
case nameof(Settings.UseDoublePinyin):
case nameof(Settings.DoublePinyinSchema):
2025-07-14 11:30:16 +00:00
if (_settings.UseDoublePinyin)
{
Reload();
}
2025-07-14 11:29:12 +00:00
break;
}
};
}
2025-06-14 14:22:25 +00:00
public void Reload()
{
LoadDoublePinyinTable();
_pinyinCache.Clear();
}
private void CreateDoublePinyinTableFromStream(Stream jsonStream)
{
var table = JsonSerializer.Deserialize<Dictionary<string, Dictionary<string, string>>>(jsonStream) ??
2025-07-13 15:29:41 +00:00
throw new InvalidOperationException("Failed to deserialize double pinyin table: result is null");
var schemaKey = _settings.DoublePinyinSchema.ToString();
if (!table.TryGetValue(schemaKey, out var schemaDict))
2025-06-14 14:22:25 +00:00
{
2025-07-13 15:29:41 +00:00
throw new ArgumentException($"DoublePinyinSchema '{schemaKey}' is invalid or double pinyin table is broken.");
2025-06-14 14:22:25 +00:00
}
2025-07-13 15:29:41 +00:00
currentDoublePinyinTable = new ReadOnlyDictionary<string, string>(schemaDict);
2025-06-14 14:22:25 +00:00
}
private void LoadDoublePinyinTable()
{
2025-07-13 15:29:41 +00:00
if (!_settings.UseDoublePinyin)
{
2025-07-13 15:29:41 +00:00
currentDoublePinyinTable = new ReadOnlyDictionary<string, string>(new Dictionary<string, string>());
return;
}
var tablePath = Path.Combine(AppContext.BaseDirectory, "Resources", "double_pinyin.json");
try
{
using var fs = File.OpenRead(tablePath);
CreateDoublePinyinTableFromStream(fs);
}
2025-07-13 15:29:41 +00:00
catch (FileNotFoundException e)
{
2025-07-13 15:29:41 +00:00
Log.Exception(nameof(PinyinAlphabet), $"Double pinyin table file not found: {tablePath}", e);
currentDoublePinyinTable = new ReadOnlyDictionary<string, string>(new Dictionary<string, string>());
}
catch (DirectoryNotFoundException e)
{
Log.Exception(nameof(PinyinAlphabet), $"Directory not found for double pinyin table: {tablePath}", e);
currentDoublePinyinTable = new ReadOnlyDictionary<string, string>(new Dictionary<string, string>());
}
catch (UnauthorizedAccessException e)
{
Log.Exception(nameof(PinyinAlphabet), $"Access denied to double pinyin table: {tablePath}", e);
currentDoublePinyinTable = new ReadOnlyDictionary<string, string>(new Dictionary<string, string>());
}
catch (System.Exception e)
{
Log.Exception(nameof(PinyinAlphabet), $"Failed to load double pinyin table from file: {tablePath}", e);
currentDoublePinyinTable = new ReadOnlyDictionary<string, string>(new Dictionary<string, string>());
}
2017-01-12 02:16:53 +00:00
}
public bool ShouldTranslate(string stringToTranslate)
2022-11-16 15:32:27 +00:00
{
2025-07-13 15:29:41 +00:00
// If the query (stringToTranslate) does NOT contain Chinese characters,
// we should translate the target string to pinyin for matching
return _settings.ShouldUsePinyin && !ContainsChinese(stringToTranslate);
2022-11-16 15:32:27 +00:00
}
public (string translation, TranslationMapping map) Translate(string content)
{
2025-07-13 15:29:41 +00:00
if (!_settings.ShouldUsePinyin || !ContainsChinese(content))
2025-06-13 15:39:14 +00:00
return (content, null);
2025-07-13 15:29:41 +00:00
return _pinyinCache.TryGetValue(content, out var cached) ? cached : BuildCacheFromContent(content);
2022-11-16 15:32:27 +00:00
}
2022-11-16 15:32:27 +00:00
private (string translation, TranslationMapping map) BuildCacheFromContent(string content)
{
2024-06-02 07:18:24 +00:00
var resultList = WordsHelper.GetPinyinList(content);
2025-07-13 15:29:41 +00:00
var resultBuilder = new StringBuilder(_settings.UseDoublePinyin ? 3 : 4); // Pre-allocate with estimated capacity
2025-04-05 15:02:28 +00:00
var map = new TranslationMapping();
var previousIsChinese = false;
2024-06-02 07:18:24 +00:00
2025-04-05 15:02:28 +00:00
for (var i = 0; i < resultList.Length; i++)
2024-06-02 07:18:24 +00:00
{
2025-07-13 15:29:41 +00:00
if (IsChineseCharacter(content[i]))
2024-06-02 07:18:24 +00:00
{
2025-07-13 15:29:41 +00:00
var translated = _settings.UseDoublePinyin ? ToDoublePinyin(resultList[i]) : resultList[i];
if (i > 0 && content[i - 1] != ' ')
{
resultBuilder.Append(' ');
}
2025-07-13 11:09:40 +00:00
map.AddNewIndex(resultBuilder.Length, translated.Length);
resultBuilder.Append(translated);
previousIsChinese = true;
2024-06-02 07:18:24 +00:00
}
else
2022-11-16 15:32:27 +00:00
{
2025-07-13 15:29:41 +00:00
// Add space after Chinese characters before non-Chinese characters
if (previousIsChinese)
2022-11-16 15:32:27 +00:00
{
previousIsChinese = false;
if (content[i] != ' ')
{
resultBuilder.Append(' ');
}
}
map.AddNewIndex(resultBuilder.Length, 1);
resultBuilder.Append(content[i]);
2017-01-12 02:16:53 +00:00
}
2024-06-02 07:18:24 +00:00
}
2022-11-16 15:32:27 +00:00
2025-07-13 15:29:41 +00:00
map.EndConstruct();
2022-11-16 15:32:27 +00:00
2025-07-13 15:29:41 +00:00
var translation = resultBuilder.ToString();
var result = (translation, map);
2025-07-13 15:29:41 +00:00
return _pinyinCache[content] = result;
}
2024-06-02 06:19:27 +00:00
2025-07-13 15:29:41 +00:00
/// <summary>
/// Optimized Chinese character detection using the comprehensive CJK Unicode ranges
/// </summary>
private static bool ContainsChinese(ReadOnlySpan<char> text)
2024-06-02 06:19:27 +00:00
{
2025-07-13 15:29:41 +00:00
foreach (var c in text)
2024-06-02 06:19:27 +00:00
{
2025-07-13 15:29:41 +00:00
if (IsChineseCharacter(c))
return true;
2024-06-02 06:19:27 +00:00
}
2025-07-13 15:29:41 +00:00
return false;
2024-06-02 06:19:27 +00:00
}
2025-04-05 15:01:09 +00:00
2025-07-13 15:29:41 +00:00
/// <summary>
/// Check if a character is a Chinese character using comprehensive Unicode ranges
/// Covers CJK Unified Ideographs, Extension A
/// </summary>
private static bool IsChineseCharacter(char c)
{
return (c >= 0x4E00 && c <= 0x9FFF) || // CJK Unified Ideographs
(c >= 0x3400 && c <= 0x4DBF); // CJK Extension A
}
private string ToDoublePinyin(string fullPinyin)
{
return currentDoublePinyinTable.TryGetValue(fullPinyin, out var doublePinyinValue)
? doublePinyinValue
2025-07-13 15:29:41 +00:00
: fullPinyin;
}
}
2022-11-16 15:32:27 +00:00
}