32 references to GPT4
Microsoft.ML.Tokenizers.Tests (32)
TiktokenTests.cs (32)
45TestGPT4TokenizationEncoding(GPT4); 48Assert.True(GPT4 is TiktokenTokenizer); 49IReadOnlyDictionary<string, int>? specialTokens = (GPT4 as TiktokenTokenizer)!.SpecialTokens; 64Tokenizer tokenizer = TiktokenTokenizer.Create(tokenizerDataFileName, GPT4.PreTokenizer, null, specialTokens); 69tokenizer = TiktokenTokenizer.Create(stream, GPT4.PreTokenizer, null, specialTokens); 73tokenizer = await TiktokenTokenizer.CreateAsync(tokenizerDataFileName, GPT4.PreTokenizer, normalizer: null, specialTokens); 78tokenizer = await TiktokenTokenizer.CreateAsync(stream, GPT4.PreTokenizer, normalizer: null, specialTokens); 105yield return new object[] { GPT4, @"https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken" }; 196IReadOnlyList<int> encoded = GPT4.EncodeToIds(text); 198Assert.Equal(text, GPT4.Decode(encoded)); 199TestDecodingWithSpan((GPT4 as TiktokenTokenizer)!, encoded.ToArray(), text); 201IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out string? normalizedText); 202int idsCount = GPT4.CountTokens(text); 239IReadOnlyList<int> encoded = GPT4.EncodeToIds(text); 241Assert.Equal(text, GPT4.Decode(encoded)); 242TestDecodingWithSpan((GPT4 as TiktokenTokenizer)!, encoded.ToArray(), text); 244IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out string? normalizedText); 249int idsCount = GPT4.CountTokens(text); 260IReadOnlyList<int> encoded = GPT4.EncodeToIds(text); 263IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out string? normalizedText); 264int idsCount = GPT4.CountTokens(text); 273IReadOnlyList<int> encoded = GPT4.EncodeToIds(text); 274int idsCount = GPT4.CountTokens(text); 276Assert.Equal(text, GPT4.Decode(encoded)); 277TestDecodingWithSpan((GPT4 as TiktokenTokenizer)!, encoded.ToArray(), text); 279IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out string? normalizedText); 608TestTokenizerEncodingForTokenizer(GPT4, text, expectedTokens, expectedOffsets, expectedIds); 729IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out _); 735Assert.Equal(expectedIds, GPT4.EncodeToIds(text)); 736Assert.Equal(expectedIds.Length, GPT4.CountTokens(text)); 740int length = GPT4.GetIndexByTokenCount(text, tokenCount, out _, out int count); 758int index = GPT4.GetIndexByTokenCountFromEnd(text, tokenCount, out _, out count);