|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Collections.Generic;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.InteropServices;
using System.Text;
namespace System.Globalization
{
////////////////////////////////////////////////////////////////////////////
//
// Used in HebrewNumber.ParseByChar to maintain the context information (
// the state in the state machine and current Hebrew number values, etc.)
// when parsing Hebrew number character by character.
//
////////////////////////////////////////////////////////////////////////////
internal struct HebrewNumberParsingContext
{
// The current state of the state machine for parsing Hebrew numbers.
internal HebrewNumber.HS state;
// The current value of the Hebrew number.
// The final value is determined when state is FoundEndOfHebrewNumber.
internal int result;
public HebrewNumberParsingContext(int result)
{
// Set the start state of the state machine for parsing Hebrew numbers.
state = HebrewNumber.HS.Start;
this.result = result;
}
}
////////////////////////////////////////////////////////////////////////////
//
// Please see ParseByChar() for comments about different states defined here.
//
////////////////////////////////////////////////////////////////////////////
internal enum HebrewNumberParsingState
{
InvalidHebrewNumber,
NotHebrewDigit,
FoundEndOfHebrewNumber,
ContinueParsing,
}
////////////////////////////////////////////////////////////////////////////
//
// class HebrewNumber
//
// Provides static methods for formatting integer values into
// Hebrew text and parsing Hebrew number text.
//
// Limitations:
// Parse can only handle value 1 ~ 999.
// Append() can only handle 1 ~ 999. If value is greater than 5000,
// 5000 will be subtracted from the value.
//
////////////////////////////////////////////////////////////////////////////
internal static class HebrewNumber
{
////////////////////////////////////////////////////////////////////////////
//
// Append
//
// Converts the given number to Hebrew letters according to the numeric
// value of each Hebrew letter, appending to the supplied StringBuilder.
// Basically, this converts the lunar year and the lunar month to letters.
//
// The character of a year is described by three letters of the Hebrew
// alphabet, the first and third giving, respectively, the days of the
// weeks on which the New Year occurs and Passover begins, while the
// second is the initial of the Hebrew word for defective, normal, or
// complete.
//
// Defective Year : Both Heshvan and Kislev are defective (353 or 383 days)
// Normal Year : Heshvan is defective, Kislev is full (354 or 384 days)
// Complete Year : Both Heshvan and Kislev are full (355 or 385 days)
//
////////////////////////////////////////////////////////////////////////////
internal static void Append<TChar>(ref ValueListBuilder<TChar> outputBuffer, int Number) where TChar : unmanaged, IUtfChar<TChar>
{
int outputBufferStartingLength = outputBuffer.Length;
char cTens = '\x0';
char cUnits; // tens and units chars
int Hundreds, Tens; // hundreds and tens values
//
// Adjust the number if greater than 5000.
//
if (Number > 5000)
{
Number -= 5000;
}
Debug.Assert(Number > 0 && Number <= 999, "Number is out of range.");
//
// Get the Hundreds.
//
Hundreds = Number / 100;
if (Hundreds > 0)
{
Number -= Hundreds * 100;
// \x05e7 = 100
// \x05e8 = 200
// \x05e9 = 300
// \x05ea = 400
// If the number is greater than 400, use the multiples of 400.
for (int i = 0; i < (Hundreds / 4); i++)
{
DateTimeFormat.AppendChar(ref outputBuffer, '\x05ea');
}
int remains = Hundreds % 4;
if (remains > 0)
{
DateTimeFormat.AppendChar(ref outputBuffer, (char)('\x05e6' + remains));
}
}
//
// Get the Tens.
//
Tens = Number / 10;
Number %= 10;
switch (Tens)
{
case (0):
cTens = '\x0';
break;
case (1):
cTens = '\x05d9'; // Hebrew Letter Yod
break;
case (2):
cTens = '\x05db'; // Hebrew Letter Kaf
break;
case (3):
cTens = '\x05dc'; // Hebrew Letter Lamed
break;
case (4):
cTens = '\x05de'; // Hebrew Letter Mem
break;
case (5):
cTens = '\x05e0'; // Hebrew Letter Nun
break;
case (6):
cTens = '\x05e1'; // Hebrew Letter Samekh
break;
case (7):
cTens = '\x05e2'; // Hebrew Letter Ayin
break;
case (8):
cTens = '\x05e4'; // Hebrew Letter Pe
break;
case (9):
cTens = '\x05e6'; // Hebrew Letter Tsadi
break;
}
//
// Get the Units.
//
cUnits = (char)(Number > 0 ? ((int)'\x05d0' + Number - 1) : 0);
if ((cUnits == '\x05d4') && // Hebrew Letter He (5)
(cTens == '\x05d9'))
{ // Hebrew Letter Yod (10)
cUnits = '\x05d5'; // Hebrew Letter Vav (6)
cTens = '\x05d8'; // Hebrew Letter Tet (9)
}
if ((cUnits == '\x05d5') && // Hebrew Letter Vav (6)
(cTens == '\x05d9'))
{ // Hebrew Letter Yod (10)
cUnits = '\x05d6'; // Hebrew Letter Zayin (7)
cTens = '\x05d8'; // Hebrew Letter Tet (9)
}
//
// Copy the appropriate info to the given buffer.
//
if (cTens != '\x0')
{
DateTimeFormat.AppendChar(ref outputBuffer, cTens);
}
if (cUnits != '\x0')
{
DateTimeFormat.AppendChar(ref outputBuffer, cUnits);
}
if (outputBuffer.Length - outputBufferStartingLength > 1)
{
if (typeof(TChar) == typeof(char))
{
TChar last = outputBuffer[outputBuffer.Length - 1];
outputBuffer.Length--;
outputBuffer.Append(TChar.CastFrom('"'));
outputBuffer.Append(last);
}
else
{
Debug.Assert(typeof(TChar) == typeof(byte));
Rune.DecodeLastFromUtf8(MemoryMarshal.AsBytes(outputBuffer.AsSpan()), out Rune value, out int bytesConsumed);
outputBuffer.Length -= bytesConsumed;
outputBuffer.Append(TChar.CastFrom('"'));
DateTimeFormat.AppendChar(ref outputBuffer, (char)value.Value);
}
}
else
{
DateTimeFormat.AppendChar(ref outputBuffer, '\'');
}
}
////////////////////////////////////////////////////////////////////////////
//
// Token used to tokenize a Hebrew word into tokens so that we can use in the
// state machine.
//
////////////////////////////////////////////////////////////////////////////
private enum HebrewToken : short
{
Invalid = -1,
Digit400 = 0,
Digit200_300 = 1,
Digit100 = 2,
Digit10 = 3, // 10 ~ 90
Digit1 = 4, // 1, 2, 3, 4, 5, 8,
Digit6_7 = 5,
Digit7 = 6,
Digit9 = 7,
SingleQuote = 8,
DoubleQuote = 9,
}
////////////////////////////////////////////////////////////////////////////
//
// This class is used to map a token into its Hebrew digit value.
//
////////////////////////////////////////////////////////////////////////////
private readonly struct HebrewValue
{
internal readonly HebrewToken token;
internal readonly short value;
internal HebrewValue(HebrewToken token, short value)
{
this.token = token;
this.value = value;
}
}
//
// Map a Hebrew character from U+05D0 ~ U+05EA to its digit value.
// The value is -1 if the Hebrew character does not have a associated value.
//
private static readonly HebrewValue[] s_hebrewValues = {
new HebrewValue(HebrewToken.Digit1, 1), // '\x05d0
new HebrewValue(HebrewToken.Digit1, 2), // '\x05d1
new HebrewValue(HebrewToken.Digit1, 3), // '\x05d2
new HebrewValue(HebrewToken.Digit1, 4), // '\x05d3
new HebrewValue(HebrewToken.Digit1, 5), // '\x05d4
new HebrewValue(HebrewToken.Digit6_7, 6), // '\x05d5
new HebrewValue(HebrewToken.Digit6_7, 7), // '\x05d6
new HebrewValue(HebrewToken.Digit1, 8), // '\x05d7
new HebrewValue(HebrewToken.Digit9, 9), // '\x05d8
new HebrewValue(HebrewToken.Digit10, 10), // '\x05d9; // Hebrew Letter Yod
new HebrewValue(HebrewToken.Invalid, -1), // '\x05da;
new HebrewValue(HebrewToken.Digit10, 20), // '\x05db; // Hebrew Letter Kaf
new HebrewValue(HebrewToken.Digit10, 30), // '\x05dc; // Hebrew Letter Lamed
new HebrewValue(HebrewToken.Invalid, -1), // '\x05dd;
new HebrewValue(HebrewToken.Digit10, 40), // '\x05de; // Hebrew Letter Mem
new HebrewValue(HebrewToken.Invalid, -1), // '\x05df;
new HebrewValue(HebrewToken.Digit10, 50), // '\x05e0; // Hebrew Letter Nun
new HebrewValue(HebrewToken.Digit10, 60), // '\x05e1; // Hebrew Letter Samekh
new HebrewValue(HebrewToken.Digit10, 70), // '\x05e2; // Hebrew Letter Ayin
new HebrewValue(HebrewToken.Invalid, -1), // '\x05e3;
new HebrewValue(HebrewToken.Digit10, 80), // '\x05e4; // Hebrew Letter Pe
new HebrewValue(HebrewToken.Invalid, -1), // '\x05e5;
new HebrewValue(HebrewToken.Digit10, 90), // '\x05e6; // Hebrew Letter Tsadi
new HebrewValue(HebrewToken.Digit100, 100), // '\x05e7;
new HebrewValue(HebrewToken.Digit200_300, 200), // '\x05e8;
new HebrewValue(HebrewToken.Digit200_300, 300), // '\x05e9;
new HebrewValue(HebrewToken.Digit400, 400), // '\x05ea;
};
private const int minHebrewNumberCh = 0x05d0;
private static readonly char s_maxHebrewNumberCh = (char)(minHebrewNumberCh + s_hebrewValues.Length - 1);
////////////////////////////////////////////////////////////////////////////
//
// Hebrew number parsing State
// The current state and the next token will lead to the next state in the state machine.
// DQ = Double Quote
//
////////////////////////////////////////////////////////////////////////////
internal enum HS : sbyte
{
_err = -1, // an error state
Start = 0,
S400 = 1, // a Hebrew digit 400
S400_400 = 2, // Two Hebrew digit 400
S400_X00 = 3, // Two Hebrew digit 400 and followed by 100
S400_X0 = 4, // Hebrew digit 400 and followed by 10 ~ 90
X00_DQ = 5, // A hundred number and followed by a double quote.
S400_X00_X0 = 6,
X0_DQ = 7, // A two-digit number and followed by a double quote.
X = 8, // A single digit Hebrew number.
X0 = 9, // A two-digit Hebrew number
X00 = 10, // A three-digit Hebrew number
S400_DQ = 11, // A Hebrew digit 400 and followed by a double quote.
S400_400_DQ = 12,
S400_400_100 = 13,
S9 = 14, // Hebrew digit 9
X00_S9 = 15, // A hundered number and followed by a digit 9
S9_DQ = 16, // Hebrew digit 9 and followed by a double quote
END = 100, // A terminial state is reached.
}
//
// The state machine for Hebrew number passing.
//
private static readonly HS[] s_numberPassingState =
{
// 400 300/200 100 90~10 8~1 6, 7, 9, ' "
/* 0 */
HS.S400, HS.X00, HS.X00, HS.X0, HS.X, HS.X, HS.X, HS.S9, HS._err, HS._err,
/* 1: S400 */
HS.S400_400, HS.S400_X00, HS.S400_X00, HS.S400_X0, HS._err, HS._err, HS._err, HS.X00_S9, HS.END, HS.S400_DQ,
/* 2: S400_400 */
HS._err, HS._err, HS.S400_400_100, HS.S400_X0, HS._err, HS._err, HS._err, HS.X00_S9, HS._err, HS.S400_400_DQ,
/* 3: S400_X00 */
HS._err, HS._err, HS._err, HS.S400_X00_X0, HS._err, HS._err, HS._err, HS.X00_S9, HS._err, HS.X00_DQ,
/* 4: S400_X0 */
HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.X0_DQ,
/* 5: X00_DQ */
HS._err, HS._err, HS._err, HS.END, HS.END, HS.END, HS.END, HS.END, HS._err, HS._err,
/* 6: S400_X00_X0 */
HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.X0_DQ,
/* 7: X0_DQ */
HS._err, HS._err, HS._err, HS._err, HS.END, HS.END, HS.END, HS.END, HS._err, HS._err,
/* 8: X */
HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.END, HS._err,
/* 9: X0 */
HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.END, HS.X0_DQ,
/* 10: X00 */
HS._err, HS._err, HS._err, HS.S400_X0, HS._err, HS._err, HS._err, HS.X00_S9, HS.END, HS.X00_DQ,
/* 11: S400_DQ */
HS.END, HS.END, HS.END, HS.END, HS.END, HS.END, HS.END, HS.END, HS._err, HS._err,
/* 12: S400_400_DQ*/
HS._err, HS._err, HS.END, HS.END, HS.END, HS.END, HS.END, HS.END, HS._err, HS._err,
/* 13: S400_400_100*/
HS._err, HS._err, HS._err, HS.S400_X00_X0, HS._err, HS._err, HS._err, HS.X00_S9, HS._err, HS.X00_DQ,
/* 14: S9 */
HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.END, HS.S9_DQ,
/* 15: X00_S9 */
HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.S9_DQ,
/* 16: S9_DQ */
HS._err, HS._err, HS._err, HS._err, HS._err, HS.END, HS.END, HS._err, HS._err, HS._err
};
// Count of valid HebrewToken, column count in the NumberPassingState array
private const int HebrewTokenCount = 10;
////////////////////////////////////////////////////////////////////////
//
// Actions:
// Parse the Hebrew number by passing one character at a time.
// The state between characters are maintained at HebrewNumberPassingContext.
// Returns:
// Return a enum of HebrewNumberParsingState.
// NotHebrewDigit: The specified ch is not a valid Hebrew digit.
// InvalidHebrewNumber: After parsing the specified ch, it will lead into
// an invalid Hebrew number text.
// FoundEndOfHebrewNumber: A terminal state is reached. This means that
// we find a valid Hebrew number text after the specified ch is parsed.
// ContinueParsing: The specified ch is a valid Hebrew digit, and
// it will lead into a valid state in the state machine, we should
// continue to parse incoming characters.
//
////////////////////////////////////////////////////////////////////////
internal static HebrewNumberParsingState ParseByChar(char ch, ref HebrewNumberParsingContext context)
{
Debug.Assert(s_numberPassingState.Length == HebrewTokenCount * ((int)HS.S9_DQ + 1));
HebrewToken token;
if (ch == '\'')
{
token = HebrewToken.SingleQuote;
}
else if (ch == '\"')
{
token = HebrewToken.DoubleQuote;
}
else
{
int index = (int)ch - minHebrewNumberCh;
if (index >= 0 && index < s_hebrewValues.Length)
{
token = s_hebrewValues[index].token;
if (token == HebrewToken.Invalid)
{
return HebrewNumberParsingState.NotHebrewDigit;
}
context.result += s_hebrewValues[index].value;
}
else
{
// Not in valid Hebrew digit range.
return HebrewNumberParsingState.NotHebrewDigit;
}
}
context.state = s_numberPassingState[(int)context.state * (int)HebrewTokenCount + (int)token];
if (context.state == HS._err)
{
// Invalid Hebrew state. This indicates an incorrect Hebrew number.
return HebrewNumberParsingState.InvalidHebrewNumber;
}
if (context.state == HS.END)
{
// Reach a terminal state.
return HebrewNumberParsingState.FoundEndOfHebrewNumber;
}
// We should continue to parse.
return HebrewNumberParsingState.ContinueParsing;
}
////////////////////////////////////////////////////////////////////////
//
// Actions:
// Check if the ch is a valid Hebrew number digit.
// This function will return true if the specified char is a legal Hebrew
// digit character, single quote, or double quote.
// Returns:
// true if the specified character is a valid Hebrew number character.
//
////////////////////////////////////////////////////////////////////////
internal static bool IsDigit(char ch)
{
if (ch >= minHebrewNumberCh && ch <= s_maxHebrewNumberCh)
{
return s_hebrewValues[ch - minHebrewNumberCh].value >= 0;
}
return ch == '\'' || ch == '\"';
}
}
}
|