third_party/llvm-7.0/llvm/utils/unicode-case-fold.py - SwiftShader - Git at Google

 #!/usr/bin/env python
 """
 Unicode case folding database conversion utility

 Parses the database and generates a C++ function which implements the case
 folding algorithm. The database entries are of the form:

   <code>; <status>; <mapping>; # <name>

 <status> can be one of four characters:
   C - Common mappings
   S - mappings for Simple case folding
   F - mappings for Full case folding
   T - special case for Turkish I characters

 Right now this generates a function which implements simple case folding (C+S
 entries).
 """

 import sys
 import re
 import urllib2

 # This variable will body of the mappings function
 body = ""

 # Reads file line-by-line, extracts Common and Simple case fold mappings and
 # returns a (from_char, to_char, from_name) tuple.
 def mappings(f):
     previous_from = -1
     expr = re.compile(r'^(.*); [CS]; (.*); # (.*)')
     for line in f:
         m = expr.match(line)
         if not m: continue
         from_char = int(m.group(1), 16)
         to_char = int(m.group(2), 16)
         from_name = m.group(3)

         if from_char <= previous_from:
             raise Exception("Duplicate or unsorted characters in input")
         yield from_char, to_char, from_name
         previous_from = from_char

 # Computes the shift (to_char - from_char) in a mapping.
 def shift(mapping):
     return mapping[1] - mapping[0]

 # Computes the stride (from_char2 - from_char1) of two mappings.
 def stride2(mapping1, mapping2):
     return mapping2[0] - mapping1[0]

 # Computes the stride of a list of mappings. The list should have at least two
 # mappings. All mappings in the list are assumed to have the same stride.
 def stride(block):
     return stride2(block[0], block[1])


 # b is a list of mappings. All the mappings are assumed to have the same
 # shift and the stride between adjecant mappings (if any) is constant.
 def dump_block(b):
     global body

     if len(b) == 1:
         # Special case for handling blocks of length 1. We don't even need to
         # emit the "if (C < X) return C" check below as all characters in this
         # range will be caught by the "C < X" check emitted by the first
         # non-trivial block.
         body  += "  // {2}\n  if (C == {0:#06x})\n    return {1:#06x};\n".format(*b[0])
         return

     first = b[0][0]
     last = first + stride(b) * (len(b)-1)
     modulo = first % stride(b)

     # All characters before this block map to themselves.
     body += "  if (C < {0:#06x})\n    return C;\n".format(first)
     body += "  // {0} characters\n".format(len(b))

     # Generic pattern: check upper bound (lower bound is checked by the "if"
     # above) and modulo of C, return C+shift.
     pattern = "  if (C <= {0:#06x} && C % {1} == {2})\n    return C + {3};\n"

     if stride(b) == 2 and shift(b[0]) == 1 and modulo == 0:
         # Special case:
         # We can elide the modulo-check because the expression "C|1" will map
         # the intervening characters to themselves.
         pattern = "  if (C <= {0:#06x})\n    return C | 1;\n"
     elif stride(b) == 1:
         # Another special case: X % 1 is always zero, so don't emit the
         # modulo-check.
         pattern = "  if (C <= {0:#06x})\n    return C + {3};\n"

     body += pattern.format(last, stride(b), modulo, shift(b[0]))

 current_block = []
 f = urllib2.urlopen(sys.argv[1])
 for m in mappings(f):
     if len(current_block) == 0:
         current_block.append(m)
         continue

     if shift(current_block[0]) != shift(m):
         # Incompatible shift, start a new block.
         dump_block(current_block)
         current_block = [m]
         continue

     if len(current_block) == 1 or stride(current_block) == stride2(current_block[-1], m):
         current_block.append(m)
         continue

     # Incompatible stride, start a new block.
     dump_block(current_block)
     current_block = [m]
 f.close()

 dump_block(current_block)

 print '//===---------- Support/UnicodeCaseFold.cpp -------------------------------===//'
 print '//'
 print '// This file was generated by utils/unicode-case-fold.py from the Unicode'
 print '// case folding database at'
 print '//   ', sys.argv[1]
 print '//'
 print '// To regenerate this file, run:'
 print '//   utils/unicode-case-fold.py \\'
 print '//     "{}" \\'.format(sys.argv[1])
 print '//     > lib/Support/UnicodeCaseFold.cpp'
 print '//'
 print '//===----------------------------------------------------------------------===//'
 print ''
 print '#include "llvm/Support/Unicode.h"'
 print ''
 print "int llvm::sys::unicode::foldCharSimple(int C) {"
 print body
 print "  return C;"
 print "}"
	#!/usr/bin/env python
	"""
	Unicode case folding database conversion utility

	Parses the database and generates a C++ function which implements the case
	folding algorithm. The database entries are of the form:

	<code>; <status>; <mapping>; # <name>

	<status> can be one of four characters:
	C - Common mappings
	S - mappings for Simple case folding
	F - mappings for Full case folding
	T - special case for Turkish I characters

	Right now this generates a function which implements simple case folding (C+S
	entries).
	"""

	import sys
	import re
	import urllib2

	# This variable will body of the mappings function
	body = ""

	# Reads file line-by-line, extracts Common and Simple case fold mappings and
	# returns a (from_char, to_char, from_name) tuple.
	def mappings(f):
	previous_from = -1
	expr = re.compile(r'^(.); [CS]; (.); # (.*)')
	for line in f:
	m = expr.match(line)
	if not m: continue
	from_char = int(m.group(1), 16)
	to_char = int(m.group(2), 16)
	from_name = m.group(3)

	if from_char <= previous_from:
	raise Exception("Duplicate or unsorted characters in input")
	yield from_char, to_char, from_name
	previous_from = from_char

	# Computes the shift (to_char - from_char) in a mapping.
	def shift(mapping):
	return mapping[1] - mapping[0]

	# Computes the stride (from_char2 - from_char1) of two mappings.
	def stride2(mapping1, mapping2):
	return mapping2[0] - mapping1[0]

	# Computes the stride of a list of mappings. The list should have at least two
	# mappings. All mappings in the list are assumed to have the same stride.
	def stride(block):
	return stride2(block[0], block[1])


	# b is a list of mappings. All the mappings are assumed to have the same
	# shift and the stride between adjecant mappings (if any) is constant.
	def dump_block(b):
	global body

	if len(b) == 1:
	# Special case for handling blocks of length 1. We don't even need to
	# emit the "if (C < X) return C" check below as all characters in this
	# range will be caught by the "C < X" check emitted by the first
	# non-trivial block.
	body += " // {2}\n if (C == {0:#06x})\n return {1:#06x};\n".format(*b[0])
	return

	first = b[0][0]
	last = first + stride(b) * (len(b)-1)
	modulo = first % stride(b)

	# All characters before this block map to themselves.
	body += " if (C < {0:#06x})\n return C;\n".format(first)
	body += " // {0} characters\n".format(len(b))

	# Generic pattern: check upper bound (lower bound is checked by the "if"
	# above) and modulo of C, return C+shift.
	pattern = " if (C <= {0:#06x} && C % {1} == {2})\n return C + {3};\n"

	if stride(b) == 2 and shift(b[0]) == 1 and modulo == 0:
	# Special case:
	# We can elide the modulo-check because the expression "C\|1" will map
	# the intervening characters to themselves.
	pattern = " if (C <= {0:#06x})\n return C \| 1;\n"
	elif stride(b) == 1:
	# Another special case: X % 1 is always zero, so don't emit the
	# modulo-check.
	pattern = " if (C <= {0:#06x})\n return C + {3};\n"

	body += pattern.format(last, stride(b), modulo, shift(b[0]))

	current_block = []
	f = urllib2.urlopen(sys.argv[1])
	for m in mappings(f):
	if len(current_block) == 0:
	current_block.append(m)
	continue

	if shift(current_block[0]) != shift(m):
	# Incompatible shift, start a new block.
	dump_block(current_block)
	current_block = [m]
	continue

	if len(current_block) == 1 or stride(current_block) == stride2(current_block[-1], m):
	current_block.append(m)
	continue

	# Incompatible stride, start a new block.
	dump_block(current_block)
	current_block = [m]
	f.close()

	dump_block(current_block)

	print '//===---------- Support/UnicodeCaseFold.cpp -------------------------------===//'
	print '//'
	print '// This file was generated by utils/unicode-case-fold.py from the Unicode'
	print '// case folding database at'
	print '// ', sys.argv[1]
	print '//'
	print '// To regenerate this file, run:'
	print '// utils/unicode-case-fold.py \\'
	print '// "{}" \\'.format(sys.argv[1])
	print '// > lib/Support/UnicodeCaseFold.cpp'
	print '//'
	print '//===----------------------------------------------------------------------===//'
	print ''
	print '#include "llvm/Support/Unicode.h"'
	print ''
	print "int llvm::sys::unicode::foldCharSimple(int C) {"
	print body
	print " return C;"
	print "}"