Characters are not bytes and bytes are not characters (not any more). The Python string type supports Unicode encodings which defines numeric values for a very large set of characters.
ASCII is a subset of Unicode but is limited to the first 127 characters.
# ASCII characters are between 32 and 127
for i in range(32,128):
print(bin(i),chr(i),end='\n')
0b100000 0b100001 ! 0b100010 " 0b100011 # 0b100100 $ 0b100101 % 0b100110 & 0b100111 ' 0b101000 ( 0b101001 ) 0b101010 * 0b101011 + 0b101100 , 0b101101 - 0b101110 . 0b101111 / 0b110000 0 0b110001 1 0b110010 2 0b110011 3 0b110100 4 0b110101 5 0b110110 6 0b110111 7 0b111000 8 0b111001 9 0b111010 : 0b111011 ; 0b111100 < 0b111101 = 0b111110 > 0b111111 ? 0b1000000 @ 0b1000001 A 0b1000010 B 0b1000011 C 0b1000100 D 0b1000101 E 0b1000110 F 0b1000111 G 0b1001000 H 0b1001001 I 0b1001010 J 0b1001011 K 0b1001100 L 0b1001101 M 0b1001110 N 0b1001111 O 0b1010000 P 0b1010001 Q 0b1010010 R 0b1010011 S 0b1010100 T 0b1010101 U 0b1010110 V 0b1010111 W 0b1011000 X 0b1011001 Y 0b1011010 Z 0b1011011 [ 0b1011100 \ 0b1011101 ] 0b1011110 ^ 0b1011111 _ 0b1100000 ` 0b1100001 a 0b1100010 b 0b1100011 c 0b1100100 d 0b1100101 e 0b1100110 f 0b1100111 g 0b1101000 h 0b1101001 i 0b1101010 j 0b1101011 k 0b1101100 l 0b1101101 m 0b1101110 n 0b1101111 o 0b1110000 p 0b1110001 q 0b1110010 r 0b1110011 s 0b1110100 t 0b1110101 u 0b1110110 v 0b1110111 w 0b1111000 x 0b1111001 y 0b1111010 z 0b1111011 { 0b1111100 | 0b1111101 } 0b1111110 ~ 0b1111111
# Unicode characters have values >128:
from random import randint
start=randint(128,20000)
for i in range(start,start+64):
print(hex(i),chr(i),end=' .. ')
0x3211 ㈑ .. 0x3212 ㈒ .. 0x3213 ㈓ .. 0x3214 ㈔ .. 0x3215 ㈕ .. 0x3216 ㈖ .. 0x3217 ㈗ .. 0x3218 ㈘ .. 0x3219 ㈙ .. 0x321a ㈚ .. 0x321b ㈛ .. 0x321c ㈜ .. 0x321d ㈝ .. 0x321e ㈞ .. 0x321f .. 0x3220 ㈠ .. 0x3221 ㈡ .. 0x3222 ㈢ .. 0x3223 ㈣ .. 0x3224 ㈤ .. 0x3225 ㈥ .. 0x3226 ㈦ .. 0x3227 ㈧ .. 0x3228 ㈨ .. 0x3229 ㈩ .. 0x322a ㈪ .. 0x322b ㈫ .. 0x322c ㈬ .. 0x322d ㈭ .. 0x322e ㈮ .. 0x322f ㈯ .. 0x3230 ㈰ .. 0x3231 ㈱ .. 0x3232 ㈲ .. 0x3233 ㈳ .. 0x3234 ㈴ .. 0x3235 ㈵ .. 0x3236 ㈶ .. 0x3237 ㈷ .. 0x3238 ㈸ .. 0x3239 ㈹ .. 0x323a ㈺ .. 0x323b ㈻ .. 0x323c ㈼ .. 0x323d ㈽ .. 0x323e ㈾ .. 0x323f ㈿ .. 0x3240 ㉀ .. 0x3241 ㉁ .. 0x3242 ㉂ .. 0x3243 ㉃ .. 0x3244 ㉄ .. 0x3245 ㉅ .. 0x3246 ㉆ .. 0x3247 ㉇ .. 0x3248 ㉈ .. 0x3249 ㉉ .. 0x324a ㉊ .. 0x324b ㉋ .. 0x324c ㉌ .. 0x324d ㉍ .. 0x324e ㉎ .. 0x324f ㉏ .. 0x3250 ㉐ ..
# we can find the number (ordinal value or Unicode ''code point'') for a character
c=''
print(c,hex(ord(c)))
# or the character for the ordinal
print(0x2551,chr(0x2551))
0xa31 9553 ║
# different encodings require a different number of bytes
c=chr(0x2551)
print(c,len(c),len(c.encode('utf-8')), len(c.encode('utf-16')))
print(type(c),type(c.encode('utf-8')),type(c.encode('utf-16')))
║ 1 3 4 <class 'str'> <class 'bytes'> <class 'bytes'>
# examples of different characters that are one character long
# but require different number of bytes to encode
s='RÖ猫𐒎'
for c in s:
print(c,hex(ord(c)),len(c),len(c.encode('utf-8')), len(c.encode('utf-16')))
R 0x52 1 1 4 Ö 0xd6 1 2 4 猫 0x732b 1 3 4 𐒎 0x1048e 1 4 6