using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using NUnit.Framework;
using NUnit.Framework.Legacy;
using Flow.Launcher.Infrastructure;
using ToolGood.Words.Pinyin;
namespace Flow.Launcher.Test
{
///
/// Performance test comparing ContainsChinese() vs WordsHelper.HasChinese()
///
/// This test verifies:
/// 1. Both methods produce identical results (correctness)
/// 2. Performance characteristics of both implementations
/// 3. Memory allocation patterns
///
/// The ContainsChinese() method uses optimized Unicode range checking with ReadOnlySpan
/// while WordsHelper.HasChinese() uses the ToolGood.Words library implementation.
///
[TestFixture]
public class ChineseDetectionPerformanceTest
{
private readonly List _testStrings = new()
{
// Pure English - should return false
"Hello World",
"Visual Studio Code",
"Microsoft Office 2023",
"Adobe Photoshop Creative Suite",
"Google Chrome Browser Application",
// Pure Chinese - should return true
"你好世界",
"微软办公软件",
"谷歌浏览器",
"北京大学计算机科学与技术学院",
"中华人民共和国国家发展和改革委员会",
// Mixed content - should return true
"Hello 世界",
"Visual Studio 代码编辑器",
"QQ音乐 Music Player",
"Windows 10 操作系统",
"GitHub 代码仓库管理平台",
// Edge cases
"",
" ",
"123456",
"!@#$%^&*()",
"café résumé naïve", // Accented characters (not Chinese)
// Long strings for performance testing
"This is a very long English string that contains no Chinese characters but is designed to test performance with longer text content that might appear in file names or application descriptions",
"这是一个非常长的中文字符串,包含了很多汉字,用来测试在处理较长中文文本时的性能表现,比如可能出现在文件名或应用程序描述中的文本内容",
"This is a mixed 混合内容的字符串 that contains both English and Chinese characters 中英文混合 to test performance with 复杂的文本内容 in real-world scenarios 真实场景中的应用"
};
[Test]
public void ContainsChinese_CorrectnessTest()
{
// Verify ContainsChinese works correctly for known cases
ClassicAssert.IsFalse(ContainsChinese("Hello World"), "Pure English should return false");
ClassicAssert.IsTrue(ContainsChinese("你好世界"), "Pure Chinese should return true");
ClassicAssert.IsTrue(ContainsChinese("Hello 世界"), "Mixed content should return true");
ClassicAssert.IsFalse(ContainsChinese(""), "Empty string should return false");
ClassicAssert.IsFalse(ContainsChinese("123456"), "Numbers should return false");
ClassicAssert.IsFalse(ContainsChinese("café résumé"), "Accented characters should return false");
}
[Test]
public void WordsHelper_CorrectnessTest()
{
// Verify WordsHelper.HasChinese works correctly for known cases
ClassicAssert.IsFalse(WordsHelper.HasChinese("Hello World"), "Pure English should return false");
ClassicAssert.IsTrue(WordsHelper.HasChinese("你好世界"), "Pure Chinese should return true");
ClassicAssert.IsTrue(WordsHelper.HasChinese("Hello 世界"), "Mixed content should return true");
ClassicAssert.IsFalse(WordsHelper.HasChinese(""), "Empty string should return false");
ClassicAssert.IsFalse(WordsHelper.HasChinese("123456"), "Numbers should return false");
ClassicAssert.IsFalse(WordsHelper.HasChinese("café résumé"), "Accented characters should return false");
}
[Test]
public void BothMethods_ShouldProduceSameResults()
{
// Critical test: verify both methods produce identical results for all test cases
foreach (var testString in _testStrings)
{
var wordsHelperResult = WordsHelper.HasChinese(testString);
var containsChineseResult = ContainsChinese(testString);
ClassicAssert.AreEqual(wordsHelperResult, containsChineseResult,
$"Results differ for string: '{testString}'. WordsHelper: {wordsHelperResult}, ContainsChinese: {containsChineseResult}");
}
Console.WriteLine($"✓ Both methods produce identical results for all {_testStrings.Count} test cases");
}
[Test]
public void PerformanceComparison_BasicBenchmark()
{
const int iterations = 1000000;
Console.WriteLine("=== CHINESE CHARACTER DETECTION PERFORMANCE TEST ===");
Console.WriteLine($"Test iterations: {iterations:N0}");
Console.WriteLine($"Test strings: {_testStrings.Count}");
Console.WriteLine($"Total operations: {iterations * _testStrings.Count:N0}");
Console.WriteLine();
// Warmup to ensure JIT compilation
Console.WriteLine("Warming up...");
for (int i = 0; i < 1000; i++)
{
foreach (var testString in _testStrings)
{
_ = ContainsChinese(testString);
_ = WordsHelper.HasChinese(testString);
}
}
// Benchmark ContainsChinese method
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
var sw1 = System.Diagnostics.Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
foreach (var testString in _testStrings)
{
_ = ContainsChinese(testString);
}
}
sw1.Stop();
// Benchmark WordsHelper.HasChinese method
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
var sw2 = System.Diagnostics.Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
foreach (var testString in _testStrings)
{
_ = WordsHelper.HasChinese(testString);
}
}
sw2.Stop();
// Calculate and display results
var containsChineseMs = sw1.Elapsed.TotalMilliseconds;
var wordsHelperMs = sw2.Elapsed.TotalMilliseconds;
var speedRatio = wordsHelperMs / containsChineseMs;
var timeDifference = wordsHelperMs - containsChineseMs;
Console.WriteLine("RESULTS:");
Console.WriteLine($"ContainsChinese(): {containsChineseMs:F3} ms");
Console.WriteLine($"WordsHelper.HasChinese(): {wordsHelperMs:F3} ms");
Console.WriteLine($"Time difference: {timeDifference:F3} ms");
Console.WriteLine($"Speed improvement: {speedRatio:F2}x");
Console.WriteLine($"Performance gain: {((speedRatio - 1) * 100):F1}%");
Console.WriteLine();
if (speedRatio > 1.0)
{
Console.WriteLine($"✓ ContainsChinese() is {speedRatio:F2}x faster than WordsHelper.HasChinese()");
}
else
{
Console.WriteLine($"⚠ WordsHelper.HasChinese() is {(1/speedRatio):F2}x faster than ContainsChinese()");
}
// Test always passes - this is a measurement test
ClassicAssert.IsTrue(true);
}
[Test]
public void PerformanceComparison_ByStringType()
{
Console.WriteLine("=== PERFORMANCE BY STRING TYPE ===");
var categories = new Dictionary>
{
["Pure English"] = _testStrings.Where(s => !ContainsChinese(s) && s.All(c => c <= 127)).ToList(),
["Pure Chinese"] = _testStrings.Where(s => ContainsChinese(s) && s.All(c => IsChineseCharacter(c) || char.IsWhiteSpace(c))).ToList(),
["Mixed Content"] = _testStrings.Where(s => ContainsChinese(s) && s.Any(c => c <= 127 && char.IsLetter(c))).ToList(),
["Edge Cases"] = _testStrings.Where(s => string.IsNullOrWhiteSpace(s) || s.All(c => !char.IsLetter(c))).ToList()
};
foreach (var category in categories)
{
if (category.Value.Count == 0) continue;
Console.WriteLine($"\n{category.Key} ({category.Value.Count} strings):");
var sample = category.Value.First();
var displayText = sample.Length > 40 ? sample.Substring(0, 40) + "..." : sample;
Console.WriteLine($" Sample: '{displayText}'");
const int categoryIterations = 5000;
// Test each method
var sw1 = System.Diagnostics.Stopwatch.StartNew();
for (int i = 0; i < categoryIterations; i++)
{
foreach (var str in category.Value)
{
_ = ContainsChinese(str);
}
}
sw1.Stop();
var sw2 = System.Diagnostics.Stopwatch.StartNew();
for (int i = 0; i < categoryIterations; i++)
{
foreach (var str in category.Value)
{
_ = WordsHelper.HasChinese(str);
}
}
sw2.Stop();
var ratio = (double)sw2.ElapsedTicks / sw1.ElapsedTicks;
Console.WriteLine($" Performance: ContainsChinese is {ratio:F2}x faster");
}
ClassicAssert.IsTrue(true);
}
///
/// Optimized Chinese character detection using comprehensive CJK Unicode ranges
/// This method uses ReadOnlySpan for better performance and covers all CJK character ranges
///
private static bool ContainsChinese(ReadOnlySpan text)
{
foreach (var c in text)
{
if (IsChineseCharacter(c))
return true;
}
return false;
}
///
/// Check if a character is a Chinese character using comprehensive Unicode ranges
/// Covers CJK Unified Ideographs and all extension blocks
///
private static bool IsChineseCharacter(char c)
{
return (c >= 0x4E00 && c <= 0x9FFF) || // CJK Unified Ideographs (most common Chinese characters)
(c >= 0x3400 && c <= 0x4DBF) || // CJK Extension A
(c >= 0x20000 && c <= 0x2A6DF) || // CJK Extension B
(c >= 0x2A700 && c <= 0x2B73F) || // CJK Extension C
(c >= 0x2B740 && c <= 0x2B81F) || // CJK Extension D
(c >= 0x2B820 && c <= 0x2CEAF) || // CJK Extension E
(c >= 0x2CEB0 && c <= 0x2EBEF) || // CJK Extension F
(c >= 0xF900 && c <= 0xFAFF) || // CJK Compatibility Ideographs
(c >= 0x2F800 && c <= 0x2FA1F); // CJK Compatibility Supplement
}
}
}