package language
import "github.com/go-text/typesetting/language"
Index ¶
- Constants
- Variables
- type LangID
- func NewLangID(l Language) (LangID, bool)
- func (lang LangID) Language() Language
- func (lang LangID) UseScript(s Script) bool
- type Language
- func DefaultLanguage() Language
- func NewLanguage(language string) Language
- func (l Language) Compare(other Language) LanguageComparison
- func (l Language) IsDerivedFrom(root Language) bool
- func (l Language) IsUndetermined() bool
- func (l Language) Primary() Language
- func (l Language) SimpleInheritance() []Language
- func (l Language) SplitExtensionTags() (prefix, private Language)
- type LanguageComparison
- type Script
- func LookupScript(r rune) Script
- func ParseScript(script string) (Script, error)
- func (s Script) String() string
- func (s Script) Strong() bool
- type ScriptRange
Constants ¶
const ( Adlam = Script(0x41646c6d) // Adlm Afaka = Script(0x4166616b) // Afak Ahom = Script(0x41686f6d) // Ahom Anatolian_Hieroglyphs = Script(0x486c7577) // Hluw Arabic = Script(0x41726162) // Arab Armenian = Script(0x41726d6e) // Armn Avestan = Script(0x41767374) // Avst Balinese = Script(0x42616c69) // Bali Bamum = Script(0x42616d75) // Bamu Bassa_Vah = Script(0x42617373) // Bass Batak = Script(0x4261746b) // Batk Bengali = Script(0x42656e67) // Beng Bhaiksuki = Script(0x42686b73) // Bhks Blissymbols = Script(0x426c6973) // Blis Book_Pahlavi = Script(0x50686c76) // Phlv Bopomofo = Script(0x426f706f) // Bopo Brahmi = Script(0x42726168) // Brah Braille = Script(0x42726169) // Brai Buginese = Script(0x42756769) // Bugi Buhid = Script(0x42756864) // Buhd Canadian_Aboriginal = Script(0x43616e73) // Cans Carian = Script(0x43617269) // Cari Caucasian_Albanian = Script(0x41676862) // Aghb Chakma = Script(0x43616b6d) // Cakm Cham = Script(0x4368616d) // Cham Cherokee = Script(0x43686572) // Cher Chorasmian = Script(0x43687273) // Chrs Cirth = Script(0x43697274) // Cirt Code_for_unwritten_documents = Script(0x5a787878) // Zxxx Common = Script(0x5a797979) // Zyyy Coptic = Script(0x436f7074) // Copt Cuneiform = Script(0x58737578) // Xsux Cypriot = Script(0x43707274) // Cprt Cypro_Minoan = Script(0x43706d6e) // Cpmn Cyrillic = Script(0x4379726c) // Cyrl Deseret = Script(0x44737274) // Dsrt Devanagari = Script(0x44657661) // Deva Dives_Akuru = Script(0x4469616b) // Diak Dogra = Script(0x446f6772) // Dogr Duployan = Script(0x4475706c) // Dupl Egyptian_Hieroglyphs = Script(0x45677970) // Egyp Egyptian_demotic = Script(0x45677964) // Egyd Egyptian_hieratic = Script(0x45677968) // Egyh Elbasan = Script(0x456c6261) // Elba Elymaic = Script(0x456c796d) // Elym Ethiopic = Script(0x45746869) // Ethi Georgian = Script(0x47656f72) // Geor Glagolitic = Script(0x476c6167) // Glag Gothic = Script(0x476f7468) // Goth Grantha = Script(0x4772616e) // Gran Greek = Script(0x4772656b) // Grek Gujarati = Script(0x47756a72) // Gujr Gunjala_Gondi = Script(0x476f6e67) // Gong Gurmukhi = Script(0x47757275) // Guru Han = Script(0x48616e69) // Hani Hangul = Script(0x48616e67) // Hang Hanifi_Rohingya = Script(0x526f6867) // Rohg Hanunoo = Script(0x48616e6f) // Hano Hatran = Script(0x48617472) // Hatr Hebrew = Script(0x48656272) // Hebr Hiragana = Script(0x48697261) // Hira Imperial_Aramaic = Script(0x41726d69) // Armi Inherited = Script(0x5a696e68) // Zinh Inscriptional_Pahlavi = Script(0x50686c69) // Phli Inscriptional_Parthian = Script(0x50727469) // Prti Javanese = Script(0x4a617661) // Java Jurchen = Script(0x4a757263) // Jurc Kaithi = Script(0x4b746869) // Kthi Kannada = Script(0x4b6e6461) // Knda Katakana = Script(0x4b616e61) // Kana Katakana_Or_Hiragana = Script(0x48726b74) // Hrkt Kawi = Script(0x4b617769) // Kawi Kayah_Li = Script(0x4b616c69) // Kali Kharoshthi = Script(0x4b686172) // Khar Khitan_Small_Script = Script(0x4b697473) // Kits Khitan_large_script = Script(0x4b69746c) // Kitl Khmer = Script(0x4b686d72) // Khmr Khojki = Script(0x4b686f6a) // Khoj Khudawadi = Script(0x53696e64) // Sind Kpelle = Script(0x4b70656c) // Kpel Lao = Script(0x4c616f6f) // Laoo Latin = Script(0x4c61746e) // Latn Leke = Script(0x4c656b65) // Leke Lepcha = Script(0x4c657063) // Lepc Limbu = Script(0x4c696d62) // Limb Linear_A = Script(0x4c696e61) // Lina Linear_B = Script(0x4c696e62) // Linb Lisu = Script(0x4c697375) // Lisu Loma = Script(0x4c6f6d61) // Loma Lycian = Script(0x4c796369) // Lyci Lydian = Script(0x4c796469) // Lydi Mahajani = Script(0x4d61686a) // Mahj Makasar = Script(0x4d616b61) // Maka Malayalam = Script(0x4d6c796d) // Mlym Mandaic = Script(0x4d616e64) // Mand Manichaean = Script(0x4d616e69) // Mani Marchen = Script(0x4d617263) // Marc Masaram_Gondi = Script(0x476f6e6d) // Gonm Mathematical_notation = Script(0x5a6d7468) // Zmth Mayan_hieroglyphs = Script(0x4d617961) // Maya Medefaidrin = Script(0x4d656466) // Medf Meetei_Mayek = Script(0x4d746569) // Mtei Mende_Kikakui = Script(0x4d656e64) // Mend Meroitic_Cursive = Script(0x4d657263) // Merc Meroitic_Hieroglyphs = Script(0x4d65726f) // Mero Miao = Script(0x506c7264) // Plrd Modi = Script(0x4d6f6469) // Modi Mongolian = Script(0x4d6f6e67) // Mong Mro = Script(0x4d726f6f) // Mroo Multani = Script(0x4d756c74) // Mult Myanmar = Script(0x4d796d72) // Mymr Nabataean = Script(0x4e626174) // Nbat Nag_Mundari = Script(0x4e61676d) // Nagm Nandinagari = Script(0x4e616e64) // Nand New_Tai_Lue = Script(0x54616c75) // Talu Newa = Script(0x4e657761) // Newa Nko = Script(0x4e6b6f6f) // Nkoo Nushu = Script(0x4e736875) // Nshu Nyiakeng_Puachue_Hmong = Script(0x486d6e70) // Hmnp Ogham = Script(0x4f67616d) // Ogam Ol_Chiki = Script(0x4f6c636b) // Olck Old_Hungarian = Script(0x48756e67) // Hung Old_Italic = Script(0x4974616c) // Ital Old_North_Arabian = Script(0x4e617262) // Narb Old_Permic = Script(0x5065726d) // Perm Old_Persian = Script(0x5870656f) // Xpeo Old_Sogdian = Script(0x536f676f) // Sogo Old_South_Arabian = Script(0x53617262) // Sarb Old_Turkic = Script(0x4f726b68) // Orkh Old_Uyghur = Script(0x4f756772) // Ougr Oriya = Script(0x4f727961) // Orya Osage = Script(0x4f736765) // Osge Osmanya = Script(0x4f736d61) // Osma Pahawh_Hmong = Script(0x486d6e67) // Hmng Palmyrene = Script(0x50616c6d) // Palm Pau_Cin_Hau = Script(0x50617563) // Pauc Phags_Pa = Script(0x50686167) // Phag Phoenician = Script(0x50686e78) // Phnx Psalter_Pahlavi = Script(0x50686c70) // Phlp Ranjana = Script(0x52616e6a) // Ranj Rejang = Script(0x526a6e67) // Rjng Rongorongo = Script(0x526f726f) // Roro Runic = Script(0x52756e72) // Runr Samaritan = Script(0x53616d72) // Samr Sarati = Script(0x53617261) // Sara Saurashtra = Script(0x53617572) // Saur Sharada = Script(0x53687264) // Shrd Shavian = Script(0x53686177) // Shaw Shuishu = Script(0x53687569) // Shui Siddham = Script(0x53696464) // Sidd SignWriting = Script(0x53676e77) // Sgnw Sinhala = Script(0x53696e68) // Sinh Sogdian = Script(0x536f6764) // Sogd Sora_Sompeng = Script(0x536f7261) // Sora Soyombo = Script(0x536f796f) // Soyo Sundanese = Script(0x53756e64) // Sund Sunuwar = Script(0x53756e75) // Sunu Syloti_Nagri = Script(0x53796c6f) // Sylo Symbols = Script(0x5a73796d) // Zsym Syriac = Script(0x53797263) // Syrc Tagalog = Script(0x54676c67) // Tglg Tagbanwa = Script(0x54616762) // Tagb Tai_Le = Script(0x54616c65) // Tale Tai_Tham = Script(0x4c616e61) // Lana Tai_Viet = Script(0x54617674) // Tavt Takri = Script(0x54616b72) // Takr Tamil = Script(0x54616d6c) // Taml Tangsa = Script(0x546e7361) // Tnsa Tangut = Script(0x54616e67) // Tang Telugu = Script(0x54656c75) // Telu Tengwar = Script(0x54656e67) // Teng Thaana = Script(0x54686161) // Thaa Thai = Script(0x54686169) // Thai Tibetan = Script(0x54696274) // Tibt Tifinagh = Script(0x54666e67) // Tfng Tirhuta = Script(0x54697268) // Tirh Toto = Script(0x546f746f) // Toto Ugaritic = Script(0x55676172) // Ugar Unknown = Script(0x5a7a7a7a) // Zzzz Vai = Script(0x56616969) // Vaii Visible_Speech = Script(0x56697370) // Visp Vithkuqi = Script(0x56697468) // Vith Wancho = Script(0x5763686f) // Wcho Warang_Citi = Script(0x57617261) // Wara Woleai = Script(0x576f6c65) // Wole Yezidi = Script(0x59657a69) // Yezi Yi = Script(0x59696969) // Yiii Zanabazar_Square = Script(0x5a616e62) // Zanb )
Variables ¶
var ScriptRanges = [...]ScriptRange{ /* 952 elements not displayed */ }
ScriptRanges is a sorted list of script ranges.
var ScriptToLang = map[Script]LangID{ Arabic: LangAr, Armenian: LangHy, Bengali: LangBn, Bopomofo: 0, Cherokee: LangChr, Coptic: LangCop, Cyrillic: LangRu, Deseret: 0, Devanagari: LangHi, Ethiopic: LangAm, Georgian: LangKa, Gothic: 0, Greek: LangEl, Gujarati: LangGu, Gurmukhi: LangPa, Han: 0, Hangul: LangKo, Hebrew: LangHe, Hiragana: LangJa, Kannada: LangKn, Katakana: LangJa, Khmer: LangKm, Lao: LangLo, Latin: LangEn, Malayalam: LangMl, Mongolian: LangMn, Myanmar: LangMy, Ogham: 0, Old_Italic: 0, Oriya: LangOr, Runic: 0, Sinhala: LangSi, Syriac: LangSyr, Tamil: LangTa, Telugu: LangTe, Thaana: LangDv, Thai: LangTh, Tibetan: LangBo, Canadian_Aboriginal: LangIu, Yi: 0, Tagalog: LangTl, Hanunoo: LangHnn, Buhid: LangBku, Tagbanwa: LangTbw, Braille: 0, Cypriot: 0, Limbu: 0, Osmanya: 0, Shavian: 0, Linear_B: 0, Tai_Le: 0, Ugaritic: LangUga, New_Tai_Lue: 0, Buginese: LangBug, Glagolitic: 0, Tifinagh: 0, Syloti_Nagri: LangSyl, Old_Persian: LangPeo, Nko: LangNqo, }
ScriptToLang maps a script to a language that is reasonably representative of the script. This will usually be the most widely spoken or used language written in that script: for instance, the sample language for `Cyrillic` is 'ru' (Russian), the sample language for `Arabic` is 'ar'.
For some scripts, no sample language will be returned because there is no language that is sufficiently representative. The best example of this is `Han`, where various different variants of written Chinese, Japanese, and Korean all use significantly different sets of Han characters and forms of shared characters. No sample language can be provided for many historical scripts as well.
inspired by pango/pango-language.c
Types ¶
type LangID ¶
type LangID uint16
LangID is a compact representation of a language this package has orthographic knowledge of.
The zero value represents a language not known by the package.
const ( LangAa LangID = 1 LangAb LangID = 2 LangAf LangID = 3 LangAgr LangID = 4 LangAk LangID = 5 LangAm LangID = 6 LangAn LangID = 7 LangAnp LangID = 8 LangAr LangID = 9 LangAs LangID = 10 LangAst LangID = 11 LangAv LangID = 12 LangAy LangID = 13 LangAyc LangID = 14 LangAz_Az LangID = 15 LangAz_Ir LangID = 16 LangBa LangID = 17 LangBe LangID = 18 LangBem LangID = 19 LangBer_Dz LangID = 20 LangBer_Ma LangID = 21 LangBg LangID = 22 LangBh LangID = 23 LangBhb LangID = 24 LangBho LangID = 25 LangBi LangID = 26 LangBin LangID = 27 LangBm LangID = 28 LangBn LangID = 29 LangBo LangID = 30 LangBr LangID = 31 LangBrx LangID = 32 LangBs LangID = 33 LangBua LangID = 34 LangByn LangID = 35 LangCa LangID = 36 LangCe LangID = 37 LangCh LangID = 38 LangChm LangID = 39 LangChr LangID = 40 LangCkb LangID = 41 LangCmn LangID = 42 LangCo LangID = 43 LangCop LangID = 44 LangCrh LangID = 45 LangCs LangID = 46 LangCsb LangID = 47 LangCu LangID = 48 LangCv LangID = 49 LangCy LangID = 50 LangDa LangID = 51 LangDe LangID = 52 LangDoi LangID = 53 LangDsb LangID = 54 LangDv LangID = 55 LangDz LangID = 56 LangEe LangID = 57 LangEl LangID = 58 LangEn LangID = 59 LangEo LangID = 60 LangEs LangID = 61 LangEt LangID = 62 LangEu LangID = 63 LangFa LangID = 64 LangFat LangID = 65 LangFf LangID = 66 LangFi LangID = 67 LangFil LangID = 68 LangFj LangID = 69 LangFo LangID = 70 LangFr LangID = 71 LangFur LangID = 72 LangFy LangID = 73 LangGa LangID = 74 LangGd LangID = 75 LangGez LangID = 76 LangGl LangID = 77 LangGn LangID = 78 LangGot LangID = 79 LangGu LangID = 80 LangGv LangID = 81 LangHa LangID = 82 LangHak LangID = 83 LangHaw LangID = 84 LangHe LangID = 85 LangHi LangID = 86 LangHif LangID = 87 LangHne LangID = 88 LangHo LangID = 89 LangHr LangID = 90 LangHsb LangID = 91 LangHt LangID = 92 LangHu LangID = 93 LangHy LangID = 94 LangHz LangID = 95 LangIa LangID = 96 LangId LangID = 97 LangIe LangID = 98 LangIg LangID = 99 LangIi LangID = 100 LangIk LangID = 101 LangIo LangID = 102 LangIs LangID = 103 LangIt LangID = 104 LangIu LangID = 105 LangJa LangID = 106 LangJv LangID = 107 LangKa LangID = 108 LangKaa LangID = 109 LangKab LangID = 110 LangKi LangID = 111 LangKj LangID = 112 LangKk LangID = 113 LangKl LangID = 114 LangKm LangID = 115 LangKn LangID = 116 LangKo LangID = 117 LangKok LangID = 118 LangKr LangID = 119 LangKs LangID = 120 LangKu_Am LangID = 121 LangKu_Iq LangID = 122 LangKu_Ir LangID = 123 LangKu_Tr LangID = 124 LangKum LangID = 125 LangKv LangID = 126 LangKw LangID = 127 LangKwm LangID = 128 LangKy LangID = 129 LangLa LangID = 130 LangLah LangID = 131 LangLb LangID = 132 LangLez LangID = 133 LangLg LangID = 134 LangLi LangID = 135 LangLij LangID = 136 LangLn LangID = 137 LangLo LangID = 138 LangLt LangID = 139 LangLv LangID = 140 LangLzh LangID = 141 LangMag LangID = 142 LangMai LangID = 143 LangMfe LangID = 144 LangMg LangID = 145 LangMh LangID = 146 LangMhr LangID = 147 LangMi LangID = 148 LangMiq LangID = 149 LangMjw LangID = 150 LangMk LangID = 151 LangMl LangID = 152 LangMn_Cn LangID = 153 LangMn_Mn LangID = 154 LangMni LangID = 155 LangMnw LangID = 156 LangMo LangID = 157 LangMr LangID = 158 LangMs LangID = 159 LangMt LangID = 160 LangMy LangID = 161 LangNa LangID = 162 LangNan LangID = 163 LangNb LangID = 164 LangNds LangID = 165 LangNe LangID = 166 LangNg LangID = 167 LangNhn LangID = 168 LangNiu LangID = 169 LangNl LangID = 170 LangNn LangID = 171 LangNo LangID = 172 LangNqo LangID = 173 LangNr LangID = 174 LangNso LangID = 175 LangNv LangID = 176 LangNy LangID = 177 LangOc LangID = 178 LangOm LangID = 179 LangOr LangID = 180 LangOs LangID = 181 LangOta LangID = 182 LangPa LangID = 183 LangPa_Pk LangID = 184 LangPap_An LangID = 185 LangPap_Aw LangID = 186 LangPes LangID = 187 LangPl LangID = 188 LangPrs LangID = 189 LangPs_Af LangID = 190 LangPs_Pk LangID = 191 LangPt LangID = 192 LangQu LangID = 193 LangQuz LangID = 194 LangRaj LangID = 195 LangRif LangID = 196 LangRm LangID = 197 LangRn LangID = 198 LangRo LangID = 199 LangRu LangID = 200 LangRw LangID = 201 LangSa LangID = 202 LangSah LangID = 203 LangSat LangID = 204 LangSc LangID = 205 LangSco LangID = 206 LangSd LangID = 207 LangSe LangID = 208 LangSel LangID = 209 LangSg LangID = 210 LangSgs LangID = 211 LangSh LangID = 212 LangShn LangID = 213 LangShs LangID = 214 LangSi LangID = 215 LangSid LangID = 216 LangSk LangID = 217 LangSl LangID = 218 LangSm LangID = 219 LangSma LangID = 220 LangSmj LangID = 221 LangSmn LangID = 222 LangSms LangID = 223 LangSn LangID = 224 LangSo LangID = 225 LangSq LangID = 226 LangSr LangID = 227 LangSs LangID = 228 LangSt LangID = 229 LangSu LangID = 230 LangSv LangID = 231 LangSw LangID = 232 LangSyr LangID = 233 LangSzl LangID = 234 LangTa LangID = 235 LangTcy LangID = 236 LangTe LangID = 237 LangTg LangID = 238 LangTh LangID = 239 LangThe LangID = 240 LangTi_Er LangID = 241 LangTi_Et LangID = 242 LangTig LangID = 243 LangTk LangID = 244 LangTl LangID = 245 LangTn LangID = 246 LangTo LangID = 247 LangTpi LangID = 248 LangTr LangID = 249 LangTs LangID = 250 LangTt LangID = 251 LangTw LangID = 252 LangTy LangID = 253 LangTyv LangID = 254 LangUg LangID = 255 LangUk LangID = 256 LangUnd_Zmth LangID = 257 LangUnd_Zsye LangID = 258 LangUnm LangID = 259 LangUr LangID = 260 LangUz LangID = 261 LangVe LangID = 262 LangVi LangID = 263 LangVo LangID = 264 LangVot LangID = 265 LangWa LangID = 266 LangWae LangID = 267 LangWal LangID = 268 LangWen LangID = 269 LangWo LangID = 270 LangXh LangID = 271 LangYap LangID = 272 LangYi LangID = 273 LangYo LangID = 274 LangYue LangID = 275 LangYuw LangID = 276 LangZa LangID = 277 LangZh_Cn LangID = 278 LangZh_Hk LangID = 279 LangZh_Mo LangID = 280 LangZh_Sg LangID = 281 LangZh_Tw LangID = 282 LangZu LangID = 283 LangBku LangID = 284 LangBug LangID = 285 LangHnn LangID = 286 LangKs_Devanagari LangID = 287 LangMl_In LangID = 288 LangMn LangID = 289 LangPeo LangID = 290 LangSd_Devanagari LangID = 291 LangSyl LangID = 292 LangTbw LangID = 293 LangUga LangID = 294 )
func NewLangID ¶
NewLangID returns the compact index of the given language, or false if it is not supported by this package.
Derived languages not exactly supported are mapped to their primary part : for instance, 'fr-be' is mapped to 'fr'
func (LangID) Language ¶
func (LangID) UseScript ¶
UseScript returns true if 's' is used to to write the language.
If nothing is known about the language (including if 'lang' is 0), true will be returned.
type Language ¶
type Language string
Language store the canonicalized BCP 47 tag, which has the generic form <lang>-<country>-<other tags>...
func DefaultLanguage ¶
func DefaultLanguage() Language
DefaultLanguage returns the language found in environment variables LC_ALL, LC_CTYPE or LANG (in that order), or the zero value if not found.
func NewLanguage ¶
NewLanguage canonicalizes the language input (as a BCP 47 language tag), by converting it to lowercase, mapping '_' to '-', and stripping all characters other than letters, numbers and '-'.
func (Language) Compare ¶
func (l Language) Compare(other Language) LanguageComparison
Compare compares `other` and `l`. Undetermined languages are only compared using the remaining tags, meaning that "und-fr" and "und-be" are compared as LanguagesDiffer, not LanguagePrimaryMatch.
func (Language) IsDerivedFrom ¶
IsDerivedFrom returns `true` if `l` has the `root` as primary
func (Language) IsUndetermined ¶
IsUndetermined returns `true` if its primary language is "und". It is a shortcut for IsDerivedFrom("und").
func (Language) Primary ¶
Primary returns the root language of l, that is the part before the first '-' separator
func (Language) SimpleInheritance ¶
SimpleInheritance returns the list of matching language, using simple truncation inheritance. The resulting slice starts with the given whole See http://www.unicode.org/reports/tr35/#Locale_Inheritance for more information.
func (Language) SplitExtensionTags ¶
SplitExtensionTags splits the language at the extension and private-use subtags, which are marked by a "-<one char>-" pattern. It returns the language before the first pattern, and, if any, the private-use subtag.
(l, "") is returned if the language has no extension or private-use tag.
type LanguageComparison ¶
type LanguageComparison uint8
LanguageComparison is a three state enum resulting from comparing two languages
const ( LanguagesDiffer LanguageComparison = iota // the two languages are totally differents LanguagesExactMatch // the two languages are exactly the same LanguagePrimaryMatch // the two languages have the same primary language, but differs. )
type Script ¶
type Script uint32
Script identifies different writing systems. It is represented as the binary encoding of a script tag of 4 (case sensitive) letters, as specified by ISO 15924. Note that the default value is usually the Unknown script, not the 0 value (which is invalid)
func LookupScript ¶
LookupScript looks up the script for a particular character (as defined by Unicode Standard Annex #24), and returns Unknown if not found.
func ParseScript ¶
ParseScript converts a 4 bytes string into its binary encoding, enforcing the conventional capitalized case. If [script] is longer, only its 4 first bytes are used.
func (Script) String ¶
String returns the ISO 4 lower letters code of the script
func (Script) Strong ¶
Strong returns true if the script is not Common or Inherited
type ScriptRange ¶
ScriptRange is an inclusive range of runes with constant script.
Source Files ¶
language.go language_table_gen.go scripts.go scripts_table.go
- Version
- v0.3.0 (latest)
- Published
- Feb 21, 2025
- Platform
- linux/amd64
- Imports
- 5 packages
- Last checked
- 16 hours ago –
Tools for package owners.