22 references to Cl100kBase
Microsoft.ML.Tokenizers (22)
Model\TiktokenTokenizer.cs (22)
1056( "gpt-4-", ModelEncoding.Cl100kBase ), // e.g., gpt-4-0314, etc., plus gpt-4-32k 1057( "gpt-3.5-", ModelEncoding.Cl100kBase ), // e.g, gpt-3.5-turbo-0301, -0401, etc. 1058( "gpt-35-", ModelEncoding.Cl100kBase ), // Azure deployment name 1063( "ft:gpt-4", ModelEncoding.Cl100kBase ), 1064( "ft:gpt-3.5-turbo", ModelEncoding.Cl100kBase ), 1065( "ft:davinci-002", ModelEncoding.Cl100kBase ), 1066( "ft:babbage-002", ModelEncoding.Cl100kBase ), 1085{ "gpt-4", ModelEncoding.Cl100kBase }, 1086{ "gpt-3.5-turbo", ModelEncoding.Cl100kBase }, 1087{ "gpt-3.5", ModelEncoding.Cl100kBase }, 1088{ "gpt-3.5-turbo-16k", ModelEncoding.Cl100kBase }, 1089{ "gpt-35", ModelEncoding.Cl100kBase }, // Azure deployment name 1090{ "gpt-35-turbo", ModelEncoding.Cl100kBase }, // Azure deployment name 1091{ "gpt-35-turbo-16k", ModelEncoding.Cl100kBase }, // Azure deployment name 1094{ "davinci-002", ModelEncoding.Cl100kBase }, 1095{ "babbage-002", ModelEncoding.Cl100kBase }, 1099{ "text-embedding-ada-002", ModelEncoding.Cl100kBase }, 1100{ "text-embedding-3-small", ModelEncoding.Cl100kBase }, 1101{ "text-embedding-3-large", ModelEncoding.Cl100kBase }, 1146{ Phi4ModelName, ModelEncoding.Cl100kBase }, 1203case ModelEncoding.Cl100kBase: 1543modelEncoding = ModelEncoding.Cl100kBase;