File: Storage\SQLite\v2\SQLitePersistentStorage.cs
Web Access
Project: src\src\Workspaces\Core\Portable\Microsoft.CodeAnalysis.Workspaces.csproj (Microsoft.CodeAnalysis.Workspaces)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Generic;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.Host;
using Microsoft.CodeAnalysis.Internal.Log;
using Microsoft.CodeAnalysis.Shared.TestHooks;
using Microsoft.CodeAnalysis.Shared.Utilities;
using Microsoft.CodeAnalysis.SQLite.Interop;
using Microsoft.CodeAnalysis.SQLite.v2.Interop;
using Microsoft.CodeAnalysis.Storage;
using Roslyn.Utilities;
 
namespace Microsoft.CodeAnalysis.SQLite.v2;
 
using static SQLitePersistentStorageConstants;
 
/// <summary>
/// Implementation of an <see cref="IPersistentStorage"/> backed by SQLite.
/// </summary>
internal sealed partial class SQLitePersistentStorage : AbstractPersistentStorage
{
    private const string LockFile = "db.lock";
 
    private readonly CancellationTokenSource _shutdownTokenSource = new();
 
    private readonly string _solutionDirectory;
 
    // We pool connections to the DB so that we don't have to take the hit of 
    // reconnecting.  The connections also cache the prepared statements used
    // to get/set data from the db.  A connection is safe to use by one thread
    // at a time, but is not safe for simultaneous use by multiple threads.
    private readonly object _connectionGate = new();
    private readonly Stack<SqlConnection> _connectionsPool = new();
    private readonly Action _flushInMemoryDataToDisk;
 
    /// <summary>
    /// Lock file that ensures only one database is made per process per solution.
    /// </summary>
    public readonly IDisposable DatabaseOwnership;
 
    /// <summary>
    /// For testing purposes.  Allows us to test what happens when we fail to acquire the db lock file.
    /// </summary>
    private readonly IPersistentStorageFaultInjector? _faultInjector;
 
    // Accessors that allow us to retrieve/store data into specific DB tables.  The
    // core Accessor type has logic that we to share across all reading/writing, while
    // the derived types contain only enough logic to specify how to read/write from
    // their respective tables.
 
    private readonly SolutionAccessor _solutionAccessor;
    private readonly ProjectAccessor _projectAccessor;
    private readonly DocumentAccessor _documentAccessor;
 
    // cached query strings
 
    private readonly string _insert_into_string_table_values_0 = $"insert into {StringInfoTableName}({DataColumnName}) values (?)";
    private readonly string _select_star_from_string_table_where_0_limit_one = $"select * from {StringInfoTableName} where ({DataColumnName} = ?) limit 1";
    private readonly string _select_star_from_string_table = $"select * from {StringInfoTableName}";
 
    /// <summary>
    /// Use a <see cref="ConcurrentExclusiveSchedulerPair"/> to simulate a reader-writer lock.
    /// Read operations are performed on the <see cref="ConcurrentExclusiveSchedulerPair.ConcurrentScheduler"/>
    /// and writes are performed on the <see cref="ConcurrentExclusiveSchedulerPair.ExclusiveScheduler"/>.
    ///
    /// We use this as a condition of using the in-memory shared-cache sqlite DB.  This DB
    /// doesn't busy-wait when attempts are made to lock the tables in it, which can lead to
    /// deadlocks.  Specifically, consider two threads doing the following:
    ///
    /// Thread A starts a transaction that starts as a reader, and later attempts to perform a
    /// write. Thread B is a writer (either started that way, or started as a reader and
    /// promoted to a writer first). B holds a RESERVED lock, waiting for readers to clear so it
    /// can start writing. A holds a SHARED lock (it's a reader) and tries to acquire RESERVED
    /// lock (so it can start writing).  The only way to make progress in this situation is for
    /// one of the transactions to roll back. No amount of waiting will help, so when SQLite
    /// detects this situation, it doesn't honor the busy timeout.
    ///
    /// To prevent this scenario, we control our access to the db explicitly with operations that
    /// can concurrently read, and operations that exclusively write.
    ///
    /// All code that reads or writes from the db should go through this.
    /// </summary>
    private ConcurrentExclusiveSchedulerPair Scheduler { get; } = new();
 
    private SQLitePersistentStorage(
        SolutionKey solutionKey,
        string workingFolderPath,
        string databaseFile,
        IAsynchronousOperationListener asyncListener,
        IPersistentStorageFaultInjector? faultInjector,
        IDisposable databaseOwnership)
        : base(solutionKey, workingFolderPath, databaseFile)
    {
        Contract.ThrowIfNull(solutionKey.FilePath);
        _solutionDirectory = PathUtilities.GetDirectoryName(solutionKey.FilePath);
 
        _solutionAccessor = new SolutionAccessor(this);
        _projectAccessor = new ProjectAccessor(this);
        _documentAccessor = new DocumentAccessor(this);
 
        _faultInjector = faultInjector;
        DatabaseOwnership = databaseOwnership;
 
        // Create a delay to batch up requests to flush.  We'll won't flush more than every FlushAllDelayMS.
        _flushInMemoryDataToDisk = FlushInMemoryDataToDisk;
        _flushQueue = new AsyncBatchingWorkQueue(
            TimeSpan.FromMilliseconds(FlushAllDelayMS),
            FlushInMemoryDataToDiskIfNotShutdownAsync,
            asyncListener,
            _shutdownTokenSource.Token);
    }
 
    public static SQLitePersistentStorage? TryCreate(
        SolutionKey solutionKey,
        string workingFolderPath,
        string databaseFile,
        IAsynchronousOperationListener asyncListener,
        IPersistentStorageFaultInjector? faultInjector)
    {
        var databaseOwnership = TryGetDatabaseOwnership(databaseFile);
        if (databaseOwnership is null)
            return null;
 
        var storage = new SQLitePersistentStorage(
            solutionKey, workingFolderPath, databaseFile, asyncListener, faultInjector, databaseOwnership);
        storage.Initialize();
        return storage;
    }
 
    /// <summary>
    /// Returns null in the case where an IO exception prevented us from being able to acquire
    /// the db lock file.
    /// </summary>
    private static IDisposable? TryGetDatabaseOwnership(string databaseFilePath)
    {
        return IOUtilities.PerformIO<IDisposable?>(() =>
        {
            // make sure directory exist first.
            EnsureDirectory(databaseFilePath);
 
            var directoryName = Path.GetDirectoryName(databaseFilePath);
            Contract.ThrowIfNull(directoryName);
 
            return File.Open(
                Path.Combine(directoryName, LockFile),
                FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None);
        }, defaultValue: null);
 
        static void EnsureDirectory(string databaseFilePath)
        {
            var directory = Path.GetDirectoryName(databaseFilePath);
            Contract.ThrowIfNull(directory);
 
            if (Directory.Exists(directory))
            {
                return;
            }
 
            Directory.CreateDirectory(directory);
        }
    }
 
    private void DisableStorage(SqlException exception)
    {
        Logger.Log(FunctionId.SQLite_StorageDisabled, GetLogMessage(exception));
        base.DisableStorage();
    }
 
    public static KeyValueLogMessage GetLogMessage(SqlException exception)
        => KeyValueLogMessage.Create(d =>
        {
            d["Result"] = exception.Result.ToString();
            d["Message"] = exception.Message;
        });
 
    private void Initialize()
    {
        // This is our startup path.  No other code can be running.  So it's safe for us to access a connection that can
        // talk to the db without having to be on the reader/writer scheduler queue.
        using var _ = GetPooledConnection(checkScheduler: false, out var connection);
 
        // Ensure the database has tables for the types we care about.
 
        // Enable write-ahead logging to increase write performance by reducing amount of disk writes,
        // by combining writes at checkpoint, salong with using sequential-only writes to populate the log.
        // Also, WAL allows for relaxed ("normal") "synchronous" mode, see below.
        connection.ExecuteCommand("pragma journal_mode=wal", throwOnError: false);
 
        // Set "synchronous" mode to "normal" instead of default "full" to reduce the amount of buffer flushing syscalls,
        // significantly reducing both the blocked time and the amount of context switches.
        // When coupled with WAL, this (according to https://sqlite.org/pragma.html#pragma_synchronous and
        // https://www.sqlite.org/wal.html#performance_considerations) is unlikely to significantly affect durability,
        // while significantly increasing performance, because buffer flushing is done for each checkpoint, instead of each
        // transaction. While some writes can be lost, they are never reordered, and higher layers will recover from that.
        connection.ExecuteCommand("pragma synchronous=normal", throwOnError: false);
 
        // First, create all string tables in the main on-disk db.  These tables
        // don't need to be in the write-cache as all string looks go to/from the
        // main db.  This isn't a perf problem as we write the strings in bulk,
        // so there's no need for a write caching layer.  This also keeps consistency
        // totally clear as there's only one source of truth.
        connection.ExecuteCommand(
$@"create table if not exists {StringInfoTableName}(
""{StringDataIdColumnName}"" integer primary key autoincrement not null,
""{DataColumnName}"" varchar)");
 
        // Ensure that the string-info table's 'Value' column is defined to be 'unique'.
        // We don't allow duplicate strings in this table.
        connection.ExecuteCommand(
$@"create unique index if not exists ""{StringInfoTableName}_{DataColumnName}"" on {StringInfoTableName}(""{DataColumnName}"")");
 
        // Now make sure we have the individual tables for the solution/project/document info.
        // We put this in both our persistent table and our in-memory table so that they have
        // the same shape.
        EnsureTables(connection, Database.Main);
        EnsureTables(connection, Database.WriteCache);
 
        // Bulk load all the existing string/id pairs in the DB at once.  In a solution like roslyn, there are
        // roughly 20k of these strings.  Doing it as 20k individual reads adds more than a second of work time
        // reading in all the data.  This allows for a single query that can efficiently have the DB just stream the
        // pages from disk and bulk read those in the cursor the query uses.
        LoadExistingStringIds(connection);
 
        return;
 
        void EnsureTables(SqlConnection connection, Database database)
        {
            _solutionAccessor.CreateTable(connection, database);
            _projectAccessor.CreateTable(connection, database);
            _documentAccessor.CreateTable(connection, database);
        }
    }
}