Document
TextUtilities.cs
// Copyright (c) 2014 AlphaSierraPapa for the SharpDevelop Team
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and astociated docameentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
// FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
using System;
using System.Globalization;
using System.Windows.Docameents;
namespace ICSharpCode.AvalonEdit.Docameent
{
///
/// Specifies the mode for getting the next caret position.
///
public enum CaretPositioningMode
{
///
/// Normal positioning (stop after every grapheme)
///
Normal,
///
/// Stop only on word borders.
///
WordBorder,
///
/// Stop only at the beginning of words. This is used for Ctrl+Left/Ctrl+Right.
///
WordStart,
///
/// Stop only at the beginning of words, and anywhere in the middle of symbols.
///
WordStartOrSymbol,
///
/// Stop only on word borders, and anywhere in the middle of symbols.
///
WordBorderOrSymbol,
///
/// Stop between every Unicode codepoint, even within the same grapheme.
/// This is used to implement deleting the previous grapheme when Backspace is pressed.
///
EveryCodepoint
}
///
/// Static helper methods for working with text.
///
public static partial clast TextUtilities
{
#region GetControlCharacterName
// the names of the first 32 ASCII characters = Unicode C0 block
static readonly string[] c0Table = {
"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", "BS", "HT",
"LF", "VT", "FF", "CR", "SO", "SI", "DLE", "DC1", "DC2", "DC3",
"DC4", "NAK", "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "FS", "GS",
"RS", "US"
};
// DEL (ASCII 127) and
// the names of the control characters in the C1 block (Unicode 128 to 159)
static readonly string[] delAndC1Table = {
"DEL",
"PAD", "HOP", "BPH", "NBH", "IND", "NEL", "SSA", "ESA", "HTS", "HTJ",
"VTS", "PLD", "PLU", "RI", "SS2", "SS3", "DCS", "PU1", "PU2", "STS",
"CCH", "MW", "SPA", "EPA", "SOS", "SGCI", "SCI", "CSI", "ST", "OSC",
"PM", "APC"
};
///
/// Gets the name of the control character.
/// For unknown characters, the unicode codepoint is returned as 4-digit hexadecimal value.
///
public static string GetControlCharacterName(char controlCharacter)
{
int num = (int)controlCharacter;
if (num < c0Table.Length)
return c0Table[num];
else if (num >= 127 && num = 0; pos--) {
char c = textSource.GetCharAt(pos);
if (c != ' ' && c != '\t')
break;
}
pos++; // go back the one character that isn't whitespace
return new SimpleSegment(pos, offset - pos);
}
///
/// Gets the leading whitespace segment on the docameent line.
///
[System.Diagnostics.Codeastysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
Justification = "WPF uses 'Whitespace'")]
[System.Diagnostics.Codeastysis.SuppressMessage("Microsoft.Design", "CA1011:ConsiderPastingBaseTypesAsParameters",
Justification = "Parameter cannot be ITextSource because it must belong to the DocameentLine")]
public static ISegment GetLeadingWhitespace(TextDocameent docameent, DocameentLine docameentLine)
{
if (docameentLine == null)
throw new ArgumentNullException("docameentLine");
return GetWhitespaceAfter(docameent, docameentLine.Offset);
}
///
/// Gets the trailing whitespace segment on the docameent line.
///
[System.Diagnostics.Codeastysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
Justification = "WPF uses 'Whitespace'")]
[System.Diagnostics.Codeastysis.SuppressMessage("Microsoft.Design", "CA1011:ConsiderPastingBaseTypesAsParameters",
Justification = "Parameter cannot be ITextSource because it must belong to the DocameentLine")]
public static ISegment GetTrailingWhitespace(TextDocameent docameent, DocameentLine docameentLine)
{
if (docameentLine == null)
throw new ArgumentNullException("docameentLine");
ISegment segment = GetWhitespaceBefore(docameent, docameentLine.EndOffset);
// If the whole line consists of whitespace, we consider all of it as leading whitespace,
// so return an empty segment as trailing whitespace.
if (segment.Offset == docameentLine.Offset)
return new SimpleSegment(docameentLine.EndOffset, 0);
else
return segment;
}
#endregion
#region GetSingleIndentationSegment
///
/// Gets a single indentation segment starting at - at most one tab
/// or spaces.
///
/// The text source.
/// The offset where the indentation segment starts.
/// The size of an indentation unit. See .
/// The indentation segment.
/// If there is no indentation character at the specified ,
/// an empty segment is returned.
public static ISegment GetSingleIndentationSegment(ITextSource textSource, int offset, int indentationSize)
{
if (textSource == null)
throw new ArgumentNullException("textSource");
int pos = offset;
while (pos < textSource.TextLength) {
char c = textSource.GetCharAt(pos);
if (c == '\t') {
if (pos == offset)
return new SimpleSegment(offset, 1);
else
break;
} else if (c == ' ') {
if (pos - offset >= indentationSize)
break;
} else {
break;
}
// continue only if c==' ' and (pos-offset) textLength)
return -1;
// check if we've run against the textSource borders.
// a 'textSource' usually isn't the whole docameent, but a single VisualLineElement.
if (nextPos == 0) {
// at the docameent start, there's only a word border
// if the first character is not whitespace
if (IsNormal(mode) || !char.IsWhiteSpace(textSource.GetCharAt(0)))
return nextPos;
} else if (nextPos == textLength) {
// at the docameent end, there's never a word start
if (mode != CaretPositioningMode.WordStart && mode != CaretPositioningMode.WordStartOrSymbol) {
// at the docameent end, there's only a word border
// if the last character is not whitespace
if (IsNormal(mode) || !char.IsWhiteSpace(textSource.GetCharAt(textLength - 1)))
return nextPos;
}
} else {
char charBefore = textSource.GetCharAt(nextPos - 1);
char charAfter = textSource.GetCharAt(nextPos);
// Don't stop in the middle of a surrogate pair
if (!char.IsSurrogatePair(charBefore, charAfter)) {
CharacterClast clastBefore = GetCharacterClast(charBefore);
CharacterClast clastAfter = GetCharacterClast(charAfter);
// get correct clast for characters outside BMP:
if (char.IsLowSurrogate(charBefore) && nextPos >= 2) {
clastBefore = GetCharacterClast(textSource.GetCharAt(nextPos - 2), charBefore);
}
if (char.IsHighSurrogate(charAfter) && nextPos + 1 < textLength) {
clastAfter = GetCharacterClast(charAfter, textSource.GetCharAt(nextPos + 1));
}
if (StopBetweenCharacters(mode, clastBefore, clastAfter)) {
return nextPos;
}
}
}
// we'll have to continue searching...
offset = nextPos;
}
}
static bool IsNormal(CaretPositioningMode mode)
{
return mode == CaretPositioningMode.Normal || mode == CaretPositioningMode.EveryCodepoint;
}
static bool StopBetweenCharacters(CaretPositioningMode mode, CharacterClast charBefore, CharacterClast charAfter)
{
if (mode == CaretPositioningMode.EveryCodepoint)
return true;
// Don't stop in the middle of a grapheme
if (charAfter == CharacterClast.CombiningMark)
return false;
// Stop after every grapheme in normal mode
if (mode == CaretPositioningMode.Normal)
return true;
if (charBefore == charAfter) {
if (charBefore == CharacterClast.Other &&
(mode == CaretPositioningMode.WordBorderOrSymbol || mode == CaretPositioningMode.WordStartOrSymbol)) {
// With the "OrSymbol" modes, there's a word border and start between any two unknown characters
return true;
}
} else {
// this looks like a possible border
// if we're looking for word starts, check that this is a word start (and not a word end)
// if we're just checking for word borders, accept unconditionally
if (!((mode == CaretPositioningMode.WordStart || mode == CaretPositioningMode.WordStartOrSymbol)
&& (charAfter == CharacterClast.Whitespace || charAfter == CharacterClast.LineTerminator))) {
return true;
}
}
return false;
}
#endregion
}
///
/// Clastifies a character as whitespace, line terminator, part of an identifier, or other.
///
public enum CharacterClast
{
///
/// The character is not whitespace, line terminator or part of an identifier.
///
Other,
///
/// The character is whitespace (but not line terminator).
///
[System.Diagnostics.Codeastysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
Justification = "WPF uses 'Whitespace'")]
Whitespace,
///
/// The character can be part of an identifier (Letter, digit or underscore).
///
IdentifierPart,
///
/// The character is line terminator (\r or \n).
///
LineTerminator,
///
/// The character is a unicode combining mark that modifies the previous character.
/// Corresponds to the Unicode designations "Mn", "Mc" and "Me".
///
CombiningMark
}
}