diff --git a/src/libse/Common/StringExtensions.cs b/src/libse/Common/StringExtensions.cs index d7c8c2cec9..fc563be64e 100644 --- a/src/libse/Common/StringExtensions.cs +++ b/src/libse/Common/StringExtensions.cs @@ -771,7 +771,7 @@ public static bool HasSentenceEnding(this string value) return value.HasSentenceEnding(string.Empty); } - private static readonly HashSet NeutralSentenceEndingChars = new HashSet + public static readonly IReadOnlyCollection NeutralSentenceEndingChars = new HashSet { '.', '!', '?', ']', ')', '…', '♪', '؟', '。', '?' }; diff --git a/src/libse/Forms/FixCommonErrors/FixCommas.cs b/src/libse/Forms/FixCommonErrors/FixCommas.cs index 53a369cfbf..6cc1427e8d 100644 --- a/src/libse/Forms/FixCommonErrors/FixCommas.cs +++ b/src/libse/Forms/FixCommonErrors/FixCommas.cs @@ -1,4 +1,6 @@ using System; +using System.Collections.Generic; +using System.Linq; using Nikse.SubtitleEdit.Core.Interfaces; using System.Text.RegularExpressions; using Nikse.SubtitleEdit.Core.Common; @@ -12,6 +14,15 @@ public static class Language public static string FixCommas { get; set; } = "Fix commas"; } + private static readonly Lazy> LazyCommaRegex = new Lazy>(() => new Dictionary + { + { new Regex(@"([\p{L}\d\s]) *،،([\p{L}\d\s])", RegexOptions.Compiled), "$1،$2" }, + { new Regex(@"([\p{L}\d\s]) *، *، *،([\p{L}\d\s])", RegexOptions.Compiled), "$1...$2" }, + { new Regex(@"([\p{L}\d\s]) *، *، *،$", RegexOptions.Compiled), "$1..." }, + { new Regex(@"([\p{L}\d\s]) *،\s+،([\p{L}\d\s])", RegexOptions.Compiled), "$1،$2" }, + { new Regex(@"،(\p{L})", RegexOptions.Compiled), "، $1" }, + }); + public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var commaDouble = new Regex(@"([\p{L}\d\s]),,([\p{L}\d\s])"); @@ -30,7 +41,7 @@ public void Fix(Subtitle subtitle, IFixCallbacks callbacks) var s = p.Text; var oldText = s; - if (p.Text.IndexOf(',') >= 0) + if (p.Text.Contains(',')) { s = commaDouble.Replace(s, "$1,$2"); s = commaTriple.Replace(s, "$1...$2"); @@ -38,7 +49,7 @@ public void Fix(Subtitle subtitle, IFixCallbacks callbacks) s = commaWhiteSpaceBetween.Replace(s, "$1,$2"); var match = commaFollowedByLetter.Match(s); - if (match.Success && (!(match.Index > 0 && s[match.Index-1] == 'ό' && s.Substring(match.Index).StartsWith(",τι", StringComparison.OrdinalIgnoreCase)) || callbacks.Language != "el")) + if (match.Success && (!(match.Index > 0 && s[match.Index - 1] == 'ό' && s.Substring(match.Index).StartsWith(",τι", StringComparison.OrdinalIgnoreCase)) || callbacks.Language != "el")) { s = commaFollowedByLetter.Replace(s, ", $1"); } @@ -46,18 +57,12 @@ public void Fix(Subtitle subtitle, IFixCallbacks callbacks) s = RemoveCommaBeforeSentenceEndingChar(s, ','); } - if (p.Text.IndexOf('،') >= 0) + if (p.Text.Contains('،')) { - var commaDoubleAr = new Regex(@"([\p{L}\d\s]) *،،([\p{L}\d\s])"); - var commaTripleAr = new Regex(@"([\p{L}\d\s]) *، *، *،([\p{L}\d\s])"); - var commaTripleEndOfLineAr = new Regex(@"([\p{L}\d\s]) *، *، *،$"); - var commaWhiteSpaceBetweenAr = new Regex(@"([\p{L}\d\s]) *،\s+،([\p{L}\d\s])"); - var commaFollowedByLetterAr = new Regex(@"،(\p{L})"); - s = commaDoubleAr.Replace(s, "$1،$2"); - s = commaTripleAr.Replace(s, "$1...$2"); - s = commaTripleEndOfLineAr.Replace(s, "$1..."); - s = commaWhiteSpaceBetweenAr.Replace(s, "$1،$2"); - s = commaFollowedByLetterAr.Replace(s, "، $1"); + foreach (var keyValue in LazyCommaRegex.Value) + { + s = keyValue.Key.Replace(s, keyValue.Value); + } s = RemoveCommaBeforeSentenceEndingChar(s, '،'); } @@ -75,30 +80,27 @@ public void Fix(Subtitle subtitle, IFixCallbacks callbacks) private static string RemoveCommaBeforeSentenceEndingChar(string input, char comma) { - var s = input; - for (var i = s.Length - 1; i >= 0; i--) + // foo,,,,! bar => foo! bar + var len = input.Length; + var chars = input.ToCharArray(); + int pos = 0; + for (int i = 0; i < len; i++) { - var ch = s[i]; - if (i - 1 >= 0 && s[i - 1] == comma && IsSentenceEndingChar(ch)) + var ch = input[i]; + if (ch == comma && IsNextCharSentenceClosingSymbol(i, chars)) { - var k = i; - - do - { - i--; - } while (i - 1 >= 0 && s[i - 1] == comma); - - // remove commas - if (k - i > 0) - { - s = s.Remove(i, k - i); - } + continue; } + + chars[pos++] = ch; } - return s; - } + return new string(chars, 0, pos); - private static bool IsSentenceEndingChar(char ch) => ch == '.' || ch == '!' || ch == '?' || ch == ')' || ch == ']' || ch == '؟'; + bool IsNextCharSentenceClosingSymbol(int index, char[] characters) + { + return index + 1 < characters.Length && StringExtensions.NeutralSentenceEndingChars.Contains(characters[index + 1]); + } + } } }