How to check a character is belong or not belong to specific code page?

63 Views Asked by At

I want to print the character only if it is not belong to specific code page.

What function I can use for this purpose?

with open('in.txt', 'r', encoding="utf-16-le") as f:
    while True:
        c = f.read(1)
        if not c:
            break
        if not c.isprintable():
            continue
        if not ?????(c):
            print(c)

Version 2:

def is_supported(char, encoding):
    try:
        char.encode(encoding)
    except UnicodeEncodeError:
        return False
    return True
    
with open('in.txt', 'r', encoding="utf-16-le") as f:
    while True:
        c = f.read(1)
        if not c:
            break
        if not c.isprintable():
            continue
        if is_supported(c, 'cp950'):
            print(c + "(yes)")
        else:
            print(c + "(no)")
1

There are 1 best solutions below

8
Mark Tolonen On

Try to encode the character in that code page. If if fails, it is not supported:

def is_supported(char, encoding):
    try:
        char.encode(encoding)
    except UnicodeEncodeError:
        return False
    return True

print(is_supported('ç', 'cp1252'))
print(is_supported('马', 'cp1252'))
True
False