32 references to GPT4
Microsoft.ML.Tokenizers.Tests (32)
TiktokenTests.cs (32)
41TestGPT4TokenizationEncoding(GPT4);
43Assert.True(GPT4 is TiktokenTokenizer);
44IReadOnlyDictionary<string, int>? specialTokens = (GPT4 as TiktokenTokenizer)!.SpecialTokens;
59Tokenizer tokenizer = TiktokenTokenizer.Create(tokenizerDataFileName, GPT4.PreTokenizer, null, specialTokens);
64tokenizer = TiktokenTokenizer.Create(stream, GPT4.PreTokenizer, null, specialTokens);
68tokenizer = await TiktokenTokenizer.CreateAsync(tokenizerDataFileName, GPT4.PreTokenizer, normalizer: null, specialTokens);
73tokenizer = await TiktokenTokenizer.CreateAsync(stream, GPT4.PreTokenizer, normalizer: null, specialTokens);
100yield return new object[] { GPT4, @"https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken" };
191IReadOnlyList<int> encoded = GPT4.EncodeToIds(text);
193Assert.Equal(text, GPT4.Decode(encoded));
194TestDecodingWithSpan((GPT4 as TiktokenTokenizer)!, encoded.ToArray(), text);
196IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out string? normalizedText);
197int idsCount = GPT4.CountTokens(text);
234IReadOnlyList<int> encoded = GPT4.EncodeToIds(text);
236Assert.Equal(text, GPT4.Decode(encoded));
237TestDecodingWithSpan((GPT4 as TiktokenTokenizer)!, encoded.ToArray(), text);
239IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out string? normalizedText);
244int idsCount = GPT4.CountTokens(text);
255IReadOnlyList<int> encoded = GPT4.EncodeToIds(text);
258IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out string? normalizedText);
259int idsCount = GPT4.CountTokens(text);
268IReadOnlyList<int> encoded = GPT4.EncodeToIds(text);
269int idsCount = GPT4.CountTokens(text);
271Assert.Equal(text, GPT4.Decode(encoded));
272TestDecodingWithSpan((GPT4 as TiktokenTokenizer)!, encoded.ToArray(), text);
274IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out string? normalizedText);
566Tokenizer tokenizer = GPT4;
683IReadOnlyList<EncodedToken> result = GPT4.EncodeToTokens(text, out _);
689Assert.Equal(expectedIds, GPT4.EncodeToIds(text));
690Assert.Equal(expectedIds.Length, GPT4.CountTokens(text));
694int length = GPT4.GetIndexByTokenCount(text, tokenCount, out _, out int count);
712int index = GPT4.GetIndexByTokenCountFromEnd(text, tokenCount, out _, out count);