Module:Unicode chart/display: Difference between revisions

Content deleted Content added
made it this far
done unless i fell asleep and missed something in the CJK blocks, pls double-check
Line 1:
--[[-----------------
return {
 
Values that look like meaningless pass-through assignments actually have the purpose of forcing a dashed box
on printable characters that otherwise would not be identified as needing one. This avoids setting up a second config table for the box status.
 
The character U+1F3F7 LABEL actually looks like the tags in the Tags block. At least according to the standard. Reality will vary by font. See:
* https://www.unicode.org/charts/PDF/U1F300.pdf
* https://www.unicode.org/charts/PDF/UE0000.pdf
 
--]]-----------------
 
local display = {
-- Basic Latin
[0x0000] = "NUL",
Line 73 ⟶ 84:
[0x034F] = "CGJ ◌",
-- Arabic
[0x0600] = "؀"mw.ustring.char(0x0600),
[0x0601] = "؁"mw.ustring.char(0x0601),
[0x0602] = "؂"mw.ustring.char(0x0602),
[0x0603] = "؃"mw.ustring.char(0x0603),
[0x0604] = "؄"mw.ustring.char(0x0604),
[0x061C] = "ALM",
[0x06DD] = "۝"mw.ustring.char(0x06DD),
-- Syriac
[0x070F] = mw.ustring.char(0x070F).."܏ SAM",
-- Arabic Extended-A
[0x070F] = "࣢"mw.ustring.char(0x08E2),
-- Kannada
[0x0CF1] = "ೱ"mw.ustring.char(0x0CF1),
[0x0CF2] = "ೲ"mw.ustring.char(0x0CF2),
-- Malayalam
[0x0D4E] = "ൎ"mw.ustring.char(0x0D4E),
-- Tibetan
[0x0F0C] = mw.ustring.char(0x0F0C).."༌ NB",
-- Myanmar
[0x1039] = "္"mw.ustring.char(0x1039),
-- Hangul Jamo
[0x115F] = "HC F",
Line 101 ⟶ 112:
[0x17B4] = "KIV AQ",
[0x17B5] = "KIV AA",
[0x17D2] = "្"mw.ustring.char(0x17D2),
-- Mongolian
[0x180B] = "FV S1",
Line 108 ⟶ 119:
[0x180E] = "MV S",
-- Tai Tham
[0x1A60] = "᩠"mw.ustring.char(0x1A60),
-- Vedic Extensions
[0x1CF5] = "ᳵ"mw.ustring.char(0x1CF5),
[0x1CF6] = "ᳶ"mw.ustring.char(0x1CF6),
-- General Punctuation
[0x2000] = "NQ SP",
Line 150 ⟶ 161:
[0x206F] = "NO DS",
-- Tifinagh
[0x2D7F] = "⵿"mw.ustring.char(0x2D7F),
-- Supplemental Punctuation
[0x2E3A] = "2M ⸺"..mw.ustring.char(0x2E3A),
[0x2E3B] = "3M ⸻"..mw.ustring.char(0x2E3B),
-- CJK Symbols and Punctuation
[0x3000] = "ID SP", --whitespace anyway
-- Hangul Compatibility Jamo
[0x3164] = "HF",
 
-->>> BREAK
 
-- CJK Unified Ideographs Extension A
 
-- Yijing Hexagram Symbols
 
-- CJK Unified Ideographs
 
-- Yi Syllables
 
-- Yi Radicals
 
-- Lisu
 
-- Vai
 
-- Cyrillic Extended-B
 
-- Bamum
 
-- Modifier Tone Letters
 
-- Latin Extended-D
 
-- Syloti Nagri
 
-- Common Indic Number Forms
 
-- Phags-pa
 
-- Saurashtra
 
-- Devanagari Extended
 
-- Kayah Li
 
-- Rejang
 
-- Hangul Jamo Extended-A
 
-- Javanese
 
-- Myanmar Extended-B
 
-- Cham
 
-- Myanmar Extended-A
 
-- Tai Viet
 
-- Meetei Mayek Extensions
[0x3164] = mw.ustring.char(0x3164),
 
-- Ethiopic Extended-A
 
-- Latin Extended-E
 
-- Cherokee Supplement
 
-- Meetei Mayek
 
-- Hangul Syllables
 
-- Hangul Jamo Extended-B
 
-- High Surrogates
 
-- High Private Use Surrogates
 
-- Low Surrogates
 
-- Private Use Area
 
-- CJK Compatibility Ideographs
 
-- Alphabetic Presentation Forms
 
-- Arabic Presentation Forms-A
 
-- Variation Selectors
[0xFE00] = "VS 1",
 
[0xFE01] = "VS 2",
-- Vertical Forms
[0xFE02] = "VS 3",
 
[0xFE03] = "VS 4",
-- Combining Half Marks
[0xFE04] = "VS 5",
 
[0xFE05] = "VS 6",
-- CJK Compatibility Forms
[0xFE06] = "VS 7",
 
[0xFE07] = "VS 8",
-- Small Form Variants
[0xFE08] = "VS 9",
 
[0xFE09] = "VS 10",
[0xFE0A] = "VS 11",
[0xFE0B] = "VS 12",
[0xFE0C] = "VS 13",
[0xFE0D] = "VS 14",
[0xFE0E] = "VS 15",
[0xFE0F] = "VS 16",
-- Arabic Presentation Forms-B
[0xFEFF] = "ZWN BSP",
 
-- Halfwidth and Fullwidth Forms
[0xFFA0] = "HW HF",
 
-- Specials
[0xFFF9] = "IA A",
 
[0xFFFA] = "IA S",
-- Linear B Syllabary
[0xFFFB] = "IA T",
 
[0xFFFC] = "OBJ",
-- Linear B Ideograms
 
-- Aegean Numbers
 
-- Ancient Greek Numbers
 
-- Ancient Symbols
 
-- Phaistos Disc
 
-- Lycian
 
-- Carian
 
-- Coptic Epact Numbers
 
-- Old Italic
 
-- Gothic
 
-- Old Permic
 
-- Ugaritic
 
-- Old Persian
 
-- Deseret
 
-- Shavian
 
-- Osmanya
 
-- Osage
 
-- Elbasan
 
-- Caucasian Albanian
 
-- Linear A
 
-- Cypriot Syllabary
 
-- Imperial Aramaic
 
-- Palmyrene
 
-- Nabataean
 
-- Hatran
 
-- Phoenician
 
-- Lydian
 
-- Meroitic Hieroglyphs
 
-- Meroitic Cursive
 
-- Kharoshthi
[0x10A3F] = mw.ustring.char(0x10A3F),
 
-- Old South Arabian
 
-- Old North Arabian
 
-- Manichaean
 
-- Avestan
 
-- Inscriptional Parthian
 
-- Inscriptional Pahlavi
 
-- Psalter Pahlavi
 
-- Old Turkic
 
-- Old Hungarian
 
-- Hanifi Rohingya
 
-- Rumi Numeral Symbols
 
-- Old Sogdian
 
-- Sogdian
 
-- Elymaic
 
-- Brahmi
[0x11003] = mw.ustring.char(0x11003),
 
[0x11004] = mw.ustring.char(0x11004),
[0x1107F] = "BNJ",
-- Kaithi
[0x110BD] = mw.ustring.char(0x110BD),
 
[0x110CD] = mw.ustring.char(0x110CD),
-- Sora Sompeng
 
-- Chakma
[0x11133] = mw.ustring.char(0x11133),
 
-- Mahajani
 
-- Sharada
[0x111C2] = mw.ustring.char(0x111C2),
 
[0x111C3] = mw.ustring.char(0x111C3),
-- Sinhala Archaic Numbers
 
-- Khojki
 
-- Multani
 
-- Khudawadi
 
-- Grantha
 
-- Newa
 
-- Tirhuta
 
-- Siddham
 
-- Modi
 
-- Mongolian Supplement
 
-- Takri
 
-- Ahom
 
-- Dogra
 
-- Warang Citi
 
-- Nandinagari
 
-- Zanabazar Square
[0x11A3A] = mw.ustring.char(0x11A3A),
 
[0x11A47] = mw.ustring.char(0x11A47),
-- Soyombo
[0x11A84] = mw.ustring.char(0x11A84),
 
[0x11A85] = mw.ustring.char(0x11A85),
-- Pau Cin Hau
[0x11A86] = mw.ustring.char(0x11A86),
 
[0x11A87] = mw.ustring.char(0x11A87),
-- Bhaiksuki
[0x11A88] = mw.ustring.char(0x11A88),
 
[0x11A89] = mw.ustring.char(0x11A89),
-- Marchen
[0x11A47] = mw.ustring.char(0x11A99),
 
-- Masaram Gondi
[0x11D45] = mw.ustring.char(0x11D45),
 
[0x11D46] = mw.ustring.char(0x11D46),
-- Gunjala Gondi
[0x11D97] = mw.ustring.char(0x11D97),
 
-- Makasar
 
-- Tamil Supplement
 
-- Cuneiform
 
-- Cuneiform Numbers and Punctuation
 
-- Early Dynastic Cuneiform
 
-- Egyptian Hieroglyphs
 
-- Egyptian Hieroglyph Format Controls
[0x13430] = mw.ustring.char(0x13430),
 
[0x13431] = mw.ustring.char(0x13431),
-- Anatolian Hieroglyphs
[0x13432] = mw.ustring.char(0x13432),
 
[0x13433] = mw.ustring.char(0x13433),
-- Bamum Supplement
[0x13434] = mw.ustring.char(0x13434),
 
[0x13435] = mw.ustring.char(0x13435),
-- Mro
[0x13436] = mw.ustring.char(0x13436),
 
[0x13437] = mw.ustring.char(0x13437),
-- Bassa Vah
[0x13438] = mw.ustring.char(0x13438),
 
-- Pahawh Hmong
 
-- Medefaidrin
 
-- Miao
[0x16F8F] = mw.ustring.char(0x16F8F),
 
[0x16F90] = mw.ustring.char(0x16F90),
-- Ideographic Symbols and Punctuation
[0x16F91] = mw.ustring.char(0x16F91),
 
[0x16F92] = mw.ustring.char(0x16F92),
-- Tangut
 
-- Tangut Components
 
-- Kana Supplement
 
-- Kana Extended-A
 
-- Small Kana Extension
 
-- Nushu
 
-- Duployan
[0x1BC9D] = "DT LS",
 
-- Shorthand Format Controls
[0x1BCA0] = "⇸",
 
[0x1BCA1] = "⮡", -- not a very good approximation of pdf glyph https://www.unicode.org/charts/PDF/U1BCA0.pdf
-- Byzantine Musical Symbols
[0x1BCA2] = "↑",
 
[0x1BCA3] = "↓",
-- Musical Symbols
[0x1D159] = "NULL NOTE HEAD",
[0x1D173] = "BEGIN BEAM ",
[0x1D174] = "END BEAM",
[0x1D175] = "BEGIN TIE ",
[0x1D176] = "END TIE",
[0x1D177] = "BEGIN SLUR ",
[0x1D178] = "END SLUR",
[0x1D179] = "BEGIN PHR .",
[0x1D17A] = "END PHR.",
-- Ancient Greek Musical Notation
 
-- Mayan Numerals
 
-- Tai Xuan Jing Symbols
 
-- Counting Rod Numerals
 
-- Mathematical Alphanumeric Symbols
 
-- Sutton SignWriting
[0x1DA9B] = "SW F2",
 
[0x1DA9C] = "SW F3",
-- Glagolitic Supplement
[0x1DA9D] = "SW F4",
 
[0x1DA9E] = "SW F5",
-- Nyiakeng Puachue Hmong
[0x1DA9F] = "SW F6",
 
[0x1DAA1] = "SW R2",
-- Wancho
[0x1DAA2] = "SW R3",
 
[0x1DAA3] = "SW R4",
-- Mende Kikakui
[0x1DAA4] = "SW R5",
 
[0x1DAA5] = "SW R6",
-- Adlam
[0x1DAA6] = "SW R7",
 
[0x1DAA7] = "SW R8",
-- Indic Siyaq Numbers
[0x1DAA8] = "SW R9",
 
[0x1DAA9] = "SW R10",
-- Ottoman Siyaq Numbers
[0x1DAAA] = "SW R11",
 
[0x1DAAB] = "SW R12",
-- Arabic Mathematical Alphabetic Symbols
[0x1DAAC] = "SW R13",
 
[0x1DAAD] = "SW R14",
-- Mahjong Tiles
[0x1DAAE] = "SW R15",
 
[0x1DAAF] = "SW R16",
-- Domino Tiles
 
-- Playing Cards
 
-- Enclosed Alphanumeric Supplement
[0x1F1E6] = mw.ustring.char(0x1F1E6),
 
[0x1F1E7] = mw.ustring.char(0x1F1E7),
-- Enclosed Ideographic Supplement
[0x1F1E8] = mw.ustring.char(0x1F1E8),
 
[0x1F1E9] = mw.ustring.char(0x1F1E9),
-- Miscellaneous Symbols and Pictographs
[0x1F1EA] = mw.ustring.char(0x1F1EA),
 
[0x1F1EB] = mw.ustring.char(0x1F1EB),
-- Emoticons
[0x1F1EC] = mw.ustring.char(0x1F1EC),
 
[0x1F1ED] = mw.ustring.char(0x1F1ED),
-- Ornamental Dingbats
[0x1F1EE] = mw.ustring.char(0x1F1EE),
 
[0x1F1EF] = mw.ustring.char(0x1F1EF),
-- Transport and Map Symbols
[0x1F1F0] = mw.ustring.char(0x1F1F0),
 
[0x1F1F1] = mw.ustring.char(0x1F1F1),
-- Alchemical Symbols
[0x1F1F2] = mw.ustring.char(0x1F1F2),
 
[0x1F1F3] = mw.ustring.char(0x1F1F3),
-- Geometric Shapes Extended
[0x1F1F4] = mw.ustring.char(0x1F1F4),
 
[0x1F1F5] = mw.ustring.char(0x1F1F5),
-- Supplemental Arrows-C
[0x1F1F6] = mw.ustring.char(0x1F1F6),
 
[0x1F1F7] = mw.ustring.char(0x1F1F7),
-- Supplemental Symbols and Pictographs
[0x1F1F8] = mw.ustring.char(0x1F1F8),
 
[0x1F1F9] = mw.ustring.char(0x1F1F9),
-- Chess Symbols
[0x1F1FA] = mw.ustring.char(0x1F1FA),
 
[0x1F1FB] = mw.ustring.char(0x1F1FB),
-- Symbols and Pictographs Extended-A
[0x1F1FC] = mw.ustring.char(0x1F1FC),
 
[0x1F1FD] = mw.ustring.char(0x1F1FD),
-- CJK Unified Ideographs Extension B
[0x1F1FE] = mw.ustring.char(0x1F1FE),
 
[0x1F1FF] = mw.ustring.char(0x1F1FF),
-- CJK Unified Ideographs Extension C
}
 
-- CJK Unified Ideographs Extension D
 
-- CJK Unified Ideographs Extension E
 
-- CJK Unified Ideographs Extension F
 
-- CJK Compatibility Ideographs Supplement
 
-- Tags
local tag = ' '..mw.ustring.char(0x1F3F7)
display[0xE0001] = "BEGIN"..tag
display[0xE0020] = "SP"..tag
for n = 0xE0021,0xE007F do display[n] = (string.char(n-0xE0000)..tag) end
 
-- Variation Selectors Supplement
for n = 0xE0100,0x0E01EF do display[n] = "VS "..(n-0xE00EF) end
 
return display
-- Supplementary Private Use Area-A
 
-- Supplementary Private Use Area-B
 
}
 
-- {{list-stub}}