32 references to GPT4
Microsoft.ML.Tokenizers.Tests (32)
TiktokenTests.cs (32)
41TestGPT4TokenizationEncoding(GPT4); 43Assert.True(GPT4 is TiktokenTokenizer); 44IReadOnlyDictionary<string, int>? specialTokens = (GPT4 as TiktokenTokenizer)!.SpecialTokens; 59Tokenizer tokenizer = TiktokenTokenizer.Create(tokenizerDataFileName, GPT4.PreTokenizer, null, specialTokens); 64tokenizer = TiktokenTokenizer.Create(stream, GPT4.PreTokenizer, null, specialTokens); 68tokenizer = await TiktokenTokenizer.CreateAsync(tokenizerDataFileName, GPT4.PreTokenizer, normalizer: null, specialTokens); 73tokenizer = await TiktokenTokenizer.CreateAsync(stream, GPT4.PreTokenizer, normalizer: null, specialTokens); 100yield return new object[] { GPT4, @"https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken" }; 191IReadOnlyList<int> encoded = GPT4.EncodeToIds(text); 193Assert.Equal(text, GPT4.Decode(encoded)); 194TestDecodingWithSpan((GPT4 as TiktokenTokenizer)!, encoded.ToArray(), text); 196IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out string? normalizedText); 197int idsCount = GPT4.CountTokens(text); 234IReadOnlyList<int> encoded = GPT4.EncodeToIds(text); 236Assert.Equal(text, GPT4.Decode(encoded)); 237TestDecodingWithSpan((GPT4 as TiktokenTokenizer)!, encoded.ToArray(), text); 239IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out string? normalizedText); 244int idsCount = GPT4.CountTokens(text); 255IReadOnlyList<int> encoded = GPT4.EncodeToIds(text); 258IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out string? normalizedText); 259int idsCount = GPT4.CountTokens(text); 268IReadOnlyList<int> encoded = GPT4.EncodeToIds(text); 269int idsCount = GPT4.CountTokens(text); 271Assert.Equal(text, GPT4.Decode(encoded)); 272TestDecodingWithSpan((GPT4 as TiktokenTokenizer)!, encoded.ToArray(), text); 274IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out string? normalizedText); 566Tokenizer tokenizer = GPT4; 683IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out _); 689Assert.Equal(expectedIds, GPT4.EncodeToIds(text)); 690Assert.Equal(expectedIds.Length, GPT4.CountTokens(text)); 694int length = GPT4.GetIndexByTokenCount(text, tokenCount, out _, out int count); 712int index = GPT4.GetIndexByTokenCountFromEnd(text, tokenCount, out _, out count);