15 references to Cl100kBase
Microsoft.ML.Tokenizers (15)
Model\TiktokenTokenizer.cs (15)
1037( "gpt-4-", ModelEncoding.Cl100kBase), // e.g., gpt-4-0314, etc., plus gpt-4-32k 1038( "gpt-3.5-", ModelEncoding.Cl100kBase), // e.g, gpt-3.5-turbo-0301, -0401, etc. 1039( "gpt-35-", ModelEncoding.Cl100kBase ) // Azure deployment name 1051{ "gpt-4", ModelEncoding.Cl100kBase }, 1052{ "gpt-3.5-turbo", ModelEncoding.Cl100kBase }, 1053{ "gpt-3.5-turbo-16k", ModelEncoding.Cl100kBase }, 1054{ "gpt-35", ModelEncoding.Cl100kBase }, // Azure deployment name 1055{ "gpt-35-turbo", ModelEncoding.Cl100kBase }, // Azure deployment name 1056{ "gpt-35-turbo-16k", ModelEncoding.Cl100kBase }, // Azure deployment name 1084{ "text-embedding-ada-002", ModelEncoding.Cl100kBase }, 1085{ "text-embedding-3-small", ModelEncoding.Cl100kBase }, 1086{ "text-embedding-3-large", ModelEncoding.Cl100kBase }, 1104{ Phi4ModelName, ModelEncoding.Cl100kBase }, 1135case ModelEncoding.Cl100kBase: 1471modelEncoding = ModelEncoding.Cl100kBase;