Python and Turkish capitalization

Question 1

You should use PyICU

>>> from icu import UnicodeString, Locale
>>> tr = Locale("TR")
>>> s = UnicodeString("i")
>>> print(unicode(s.toUpper(tr)))
İ
>>> s = UnicodeString("I")
>>> print(unicode(s.toLower(tr)))
ı
>>>

Question 2

You can define your own hardcoded function for Turkish character problem.

import re

def tr_upper(self):
    self = re.sub(r"i", "İ", self)
    self = re.sub(r"ı", "I", self)
    self = re.sub(r"ç", "Ç", self)
    self = re.sub(r"ş", "Ş", self)
    self = re.sub(r"ü", "Ü", self)
    self = re.sub(r"ğ", "Ğ", self)
    self = self.upper() # for the rest use default upper
    return self


def tr_lower(self):
    self = re.sub(r"İ", "i", self)
    self = re.sub(r"I", "ı", self)
    self = re.sub(r"Ç", "ç", self)
    self = re.sub(r"Ş", "ş", self)
    self = re.sub(r"Ü", "ü", self)
    self = re.sub(r"Ğ", "ğ", self)
    self = self.lower() # for the rest use default lower
    return self

regular upper:

>>>print("ulvido".upper())
ULVIDO

our custom upper:

>>>print(tr_upper("ulvido"))
ULVİDO

if you need this conversion a lot you can make it .py file. for example: save it as trtextstyle.py and import into your projects.

if trtextstyle.py is same directory with your file:

from .trtextstyle import tr_upper, tr_lower

hope this helps.

Question 3

def tr_capitalize(param_word):
    word_list = param_word.split(sep=" ")
    new_word = ""
    for word in word_list:
        first_letter = word[0]
        last_part = word[1:]

        first_letter = re.sub(r"i", "İ", first_letter)
        first_letter = re.sub(r"ı", "I", first_letter)
        first_letter = re.sub(r"ç", "Ç", first_letter)
        first_letter = re.sub(r"ş", "Ş", first_letter)
        first_letter = re.sub(r"ü", "Ü", first_letter)
        first_letter = re.sub(r"ğ", "Ğ", first_letter)



        last_part = re.sub(r"İ", "i", last_part)
        last_part = re.sub(r"I", "ı", last_part)
        last_part = re.sub(r"Ç", "ç", last_part)
        last_part = re.sub(r"Ş", "ş", last_part)
        last_part = re.sub(r"Ü", "ü", last_part)
        last_part = re.sub(r"Ğ", "ğ", last_part)


        rebuilt_word = first_letter + last_part
        rebuilt_word = rebuilt_word.capitalize()
        new_word = new_word + " " + rebuilt_word

        
    new_word = new_word.strip()
    return new_word

Question 4

Python can not do locale sensitive Unicode case mapping, at least not without any custom library. The manual states it exactly:

There is no way to perform case conversions and character classifications according to the locale.

Source: https://docs.python.org/3.10/library/locale.html

You should write your custom function (like ulvido and hasan içli suggested) or use for example PyICU (like ismail) which is not very pythonic. And of course do not use cmd.exe for anything Unicode related.

Python 3 example:

import locale
import unicodedata

from icu import UnicodeString, Locale  # pip install pyicu


lowercase_i_w_dot = "i"
uppercase_i_wo_dot = "I"

# Set locale for ICU
tr = Locale("TR")

# ICU uppercasing
uppercase_i_w_dot = str(UnicodeString(lowercase_i_w_dot).toUpper(tr))
unicodedata.name(uppercase_i_w_dot)  # 'LATIN CAPITAL LETTER I WITH DOT ABOVE'

# ICU lowercasing
lowercase_i_wo_dot = str(UnicodeString(uppercase_i_wo_dot).toLower(tr))
unicodedata.name(lowercase_i_wo_dot)  # 'LATIN SMALL LETTER DOTLESS I'

# Even if doing
locale.setlocale(locale.LC_ALL, 'tr_TR.UTF-8')

# Normal uppercasing
unicodedata.name(lowercase_i_w_dot.upper())  # 'LATIN CAPITAL LETTER I'

# Normal lowercasing
unicodedata.name(uppercase_i_wo_dot.lower())  # 'LATIN SMALL LETTER I'