Jump to content

మాడ్యూల్:Unicode data/patterns

విక్షనరీ నుండి

Documentation for this module may be created at మాడ్యూల్:Unicode data/patterns/doc

local export = {}
local Array = require "Module:array"

local function numeric_character_reference(code_point)
	return ("&#x%04X;"):format(code_point)
end

function export.all_ranges_per_value(data_module)
	local value_to_ranges = setmetatable({}, {
		__index = function(self, key)
			local value = Array()
			self[key] = value
			return value
		end,
	})

	for code_point, value in pairs(data_module.singles) do
		value_to_ranges[value]:insert { code_point, code_point }
	end

	for _, range in ipairs(data_module.ranges) do
		local low, high, value = unpack(range)
		value_to_ranges[value]:insert { low, high }
	end
	
	return value_to_ranges
end

function export.ranges_per_value(data_module, value_to_find)
	local ranges = Array()

	for code_point, value in pairs(data_module.singles) do
		if value == value_to_find then
			ranges:insert { code_point, code_point }
		end
	end

	for _, range in ipairs(data_module.ranges) do
		local low, high, value = unpack(range)
		if value == value_to_find then
			ranges:insert { low, high }
		end
	end
	
	return ranges
end

local function sort_ranges(ranges)
	table.sort(
		ranges,
		function (a, b)
			return a[1] < b[1]
		end)
end

-- Makes a pattern suitable to put inside [...] or [^...]
-- in a Lua pattern or regular expression.
local function make_pattern(ranges, char_ref)
	local output = Array()
	
	for _, range in ipairs(ranges) do
		if char_ref then
			output:insert(numeric_character_reference(range[1]))
		else
			output:insert(mw.ustring.char(range[1]))
		end
		if range[1] ~= range[2] then
			output:insert "-"
			if char_ref then
				output:insert(numeric_character_reference(range[2]))
			else
				output:insert(mw.ustring.char(range[2]))
			end
		end
	end
	
	return output:concat()
end

-- Assumes ranges are sorted and that only one range has bad characters.
-- Treats all characters U+0000-U+001F as invalid in wikitext, but only some are.
local function sanitize_ranges(ranges)
	for i, range in ipairs(ranges) do
		if 0 <= range[1] and range[1] <= 0x1F then
			if 0 <= range[2] and range[2] <= 0x1F then
				table.remove(ranges, i)
				break
			else
				range[1] = 0x20
			end
		end
	end
end

function export.make_pattern(frame)
	local module_name = frame.args.module
	if not module_name then
		error("Provide name of submodule of Module:Unicode data in |module= parameter.")
	end
	
	local value = frame.args.value
	if not value then
		error("Provide value to search for in |value= parameter.")
	end
	
	local ranges = export.ranges_per_value(require("Module:Unicode data/" .. module_name), value)
	
	sanitize_ranges(ranges)
	
	return make_pattern(ranges, false)
end

function export.show_all_patterns(frame)
	local module_name = frame.args.module
	if not module_name then
		error("Provide name of submodule of Module:Unicode data in |module=.")
	end
	local value_to_ranges = export.all_ranges_per_value(require("Module:Unicode data/" .. module_name))
	
	for _, ranges in pairs(value_to_ranges) do
		sort_ranges(ranges)
	end
	
	local output = Array()
	for value, ranges in require "Module:table".sortedPairs(value_to_ranges) do
		output:insert("\n* " .. value .. ": ")
		output:insert "<code>"
		output:insert(make_pattern(ranges, true))
		output:insert "</code>"
	end
	
	return output:concat()
end

return export