32 references to GPT4
Microsoft.ML.Tokenizers.Tests (32)
TiktokenTests.cs (32)
43TestGPT4TokenizationEncoding(GPT4);
46Assert.True(GPT4 is TiktokenTokenizer);
47IReadOnlyDictionary<string, int>? specialTokens = (GPT4 as TiktokenTokenizer)!.SpecialTokens;
62Tokenizer tokenizer = TiktokenTokenizer.Create(tokenizerDataFileName, GPT4.PreTokenizer, null, specialTokens);
67tokenizer = TiktokenTokenizer.Create(stream, GPT4.PreTokenizer, null, specialTokens);
71tokenizer = await TiktokenTokenizer.CreateAsync(tokenizerDataFileName, GPT4.PreTokenizer, normalizer: null, specialTokens);
76tokenizer = await TiktokenTokenizer.CreateAsync(stream, GPT4.PreTokenizer, normalizer: null, specialTokens);
103yield return new object[] { GPT4, @"https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken" };
194IReadOnlyList<int> encoded = GPT4.EncodeToIds(text);
196Assert.Equal(text, GPT4.Decode(encoded));
197TestDecodingWithSpan((GPT4 as TiktokenTokenizer)!, encoded.ToArray(), text);
199IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out string? normalizedText);
200int idsCount = GPT4.CountTokens(text);
237IReadOnlyList<int> encoded = GPT4.EncodeToIds(text);
239Assert.Equal(text, GPT4.Decode(encoded));
240TestDecodingWithSpan((GPT4 as TiktokenTokenizer)!, encoded.ToArray(), text);
242IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out string? normalizedText);
247int idsCount = GPT4.CountTokens(text);
258IReadOnlyList<int> encoded = GPT4.EncodeToIds(text);
261IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out string? normalizedText);
262int idsCount = GPT4.CountTokens(text);
271IReadOnlyList<int> encoded = GPT4.EncodeToIds(text);
272int idsCount = GPT4.CountTokens(text);
274Assert.Equal(text, GPT4.Decode(encoded));
275TestDecodingWithSpan((GPT4 as TiktokenTokenizer)!, encoded.ToArray(), text);
277IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out string? normalizedText);
576TestTokenizerEncodingForTokenizer(GPT4, text, expectedTokens, expectedOffsets, expectedIds);
697IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out _);
703Assert.Equal(expectedIds, GPT4.EncodeToIds(text));
704Assert.Equal(expectedIds.Length, GPT4.CountTokens(text));
708int length = GPT4.GetIndexByTokenCount(text, tokenCount, out _, out int count);
726int index = GPT4.GetIndexByTokenCountFromEnd(text, tokenCount, out _, out count);