mirror of
https://github.com/zyedidia/micro
synced 2024-11-05 17:41:24 +00:00
96 lines
2.3 KiB
Go
96 lines
2.3 KiB
Go
package util
|
|
|
|
import (
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// Unicode is annoying. A "code point" (rune in Go-speak) may need up to
|
|
// 4 bytes to represent it. In general, a code point will represent a
|
|
// complete character, but this is not always the case. A character with
|
|
// accents may be made up of multiple code points (the code point for the
|
|
// original character, and additional code points for each accent/marking).
|
|
// The functions below are meant to help deal with these additional "combining"
|
|
// code points. In underlying operations (search, replace, etc...), micro will
|
|
// treat a character with combining code points as just the original code point.
|
|
// For rendering, micro will display the combining characters. It's not perfect
|
|
// but it's pretty good.
|
|
|
|
var minMark = rune(unicode.Mark.R16[0].Lo)
|
|
|
|
func isMark(r rune) bool {
|
|
// Fast path
|
|
if r < minMark {
|
|
return false
|
|
}
|
|
return unicode.In(r, unicode.Mark)
|
|
}
|
|
|
|
// DecodeCharacter returns the next character from an array of bytes
|
|
// A character is a rune along with any accompanying combining runes
|
|
func DecodeCharacter(b []byte) (rune, []rune, int) {
|
|
r, size := utf8.DecodeRune(b)
|
|
b = b[size:]
|
|
c, s := utf8.DecodeRune(b)
|
|
|
|
var combc []rune
|
|
for isMark(c) {
|
|
combc = append(combc, c)
|
|
size += s
|
|
|
|
b = b[s:]
|
|
c, s = utf8.DecodeRune(b)
|
|
}
|
|
|
|
return r, combc, size
|
|
}
|
|
|
|
// DecodeCharacterInString returns the next character from a string
|
|
// A character is a rune along with any accompanying combining runes
|
|
func DecodeCharacterInString(str string) (rune, []rune, int) {
|
|
r, size := utf8.DecodeRuneInString(str)
|
|
str = str[size:]
|
|
c, s := utf8.DecodeRuneInString(str)
|
|
|
|
var combc []rune
|
|
for isMark(c) {
|
|
combc = append(combc, c)
|
|
size += s
|
|
|
|
str = str[s:]
|
|
c, s = utf8.DecodeRuneInString(str)
|
|
}
|
|
|
|
return r, combc, size
|
|
}
|
|
|
|
// CharacterCount returns the number of characters in a byte array
|
|
// Similar to utf8.RuneCount but for unicode characters
|
|
func CharacterCount(b []byte) int {
|
|
s := 0
|
|
|
|
for len(b) > 0 {
|
|
r, size := utf8.DecodeRune(b)
|
|
if !isMark(r) {
|
|
s++
|
|
}
|
|
|
|
b = b[size:]
|
|
}
|
|
|
|
return s
|
|
}
|
|
|
|
// CharacterCount returns the number of characters in a string
|
|
// Similar to utf8.RuneCountInString but for unicode characters
|
|
func CharacterCountInString(str string) int {
|
|
s := 0
|
|
|
|
for _, r := range str {
|
|
if !isMark(r) {
|
|
s++
|
|
}
|
|
}
|
|
|
|
return s
|
|
}
|