22 references to Cl100kBase
Microsoft.ML.Tokenizers (22)
Model\TiktokenTokenizer.cs (22)
1052( "gpt-4-", ModelEncoding.Cl100kBase), // e.g., gpt-4-0314, etc., plus gpt-4-32k
1053( "gpt-3.5-", ModelEncoding.Cl100kBase), // e.g, gpt-3.5-turbo-0301, -0401, etc.
1054( "gpt-35-", ModelEncoding.Cl100kBase ), // Azure deployment name
1059( "ft:gpt-4", ModelEncoding.Cl100kBase ),
1060( "ft:gpt-3.5-turbo", ModelEncoding.Cl100kBase ),
1061( "ft:davinci-002", ModelEncoding.Cl100kBase ),
1062( "ft:babbage-002", ModelEncoding.Cl100kBase ),
1077{ "gpt-4", ModelEncoding.Cl100kBase },
1078{ "gpt-3.5-turbo", ModelEncoding.Cl100kBase },
1079{ "gpt-3.5", ModelEncoding.Cl100kBase },
1080{ "gpt-3.5-turbo-16k", ModelEncoding.Cl100kBase },
1081{ "gpt-35", ModelEncoding.Cl100kBase }, // Azure deployment name
1082{ "gpt-35-turbo", ModelEncoding.Cl100kBase }, // Azure deployment name
1083{ "gpt-35-turbo-16k", ModelEncoding.Cl100kBase }, // Azure deployment name
1086{ "davinci-002", ModelEncoding.Cl100kBase },
1087{ "babbage-002", ModelEncoding.Cl100kBase },
1091{ "text-embedding-ada-002", ModelEncoding.Cl100kBase },
1092{ "text-embedding-3-small", ModelEncoding.Cl100kBase },
1093{ "text-embedding-3-large", ModelEncoding.Cl100kBase },
1138{ Phi4ModelName, ModelEncoding.Cl100kBase },
1195case ModelEncoding.Cl100kBase:
1535modelEncoding = ModelEncoding.Cl100kBase;