Skip to content

Commit

Permalink
Merge pull request #72621 from CyrusNajmabadi/docStates
Browse files Browse the repository at this point in the history
Break added documents into batches when processing solution/compilation translation states.
  • Loading branch information
CyrusNajmabadi authored Mar 21, 2024
2 parents 9bd2481 + 49a6f4a commit c4f4202
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 45 deletions.
101 changes: 96 additions & 5 deletions src/Compilers/Core/Portable/InternalUtilities/EnumerableExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,13 @@
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Collections.ObjectModel;
using System.Diagnostics;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.PooledObjects;
using Roslyn.Utilities;
using System.Threading;

#if DEBUG
using System.Diagnostics;
#endif

namespace Roslyn.Utilities
{
Expand Down Expand Up @@ -868,5 +865,99 @@ public static bool SequenceEqual<T>(this IEnumerable<T>? first, IEnumerable<T>?
return result;
}
}

#if NETSTANDARD

// Copied from https://github.com/dotnet/runtime/blob/main/src/libraries/System.Linq/src/System/Linq/Chunk.cs
public static IEnumerable<TSource[]> Chunk<TSource>(this IEnumerable<TSource> source, int size)
{
if (source is null)
throw new ArgumentNullException(nameof(source));

if (size < 1)
throw new ArgumentOutOfRangeException(nameof(size));

if (source is TSource[] array)
{
// Special-case arrays, which have an immutable length. This enables us to not only do an
// empty check and avoid allocating an iterator object when empty, it enables us to have a
// much more efficient (and simpler) implementation for chunking up the array.
return array.Length != 0 ?
ArrayChunkIterator(array, size) :
[];
}

return EnumerableChunkIterator(source, size);
}

private static IEnumerable<TSource[]> ArrayChunkIterator<TSource>(TSource[] source, int size)
{
int index = 0;
while (index < source.Length)
{
TSource[] chunk = new ReadOnlySpan<TSource>(source, index, Math.Min(size, source.Length - index)).ToArray();
index += chunk.Length;
yield return chunk;
}
}

private static IEnumerable<TSource[]> EnumerableChunkIterator<TSource>(IEnumerable<TSource> source, int size)
{
using IEnumerator<TSource> e = source.GetEnumerator();

// Before allocating anything, make sure there's at least one element.
if (e.MoveNext())
{
// Now that we know we have at least one item, allocate an initial storage array. This is not
// the array we'll yield. It starts out small in order to avoid significantly overallocating
// when the source has many fewer elements than the chunk size.
int arraySize = Math.Min(size, 4);
int i;
do
{
var array = new TSource[arraySize];

// Store the first item.
array[0] = e.Current;
i = 1;

if (size != array.Length)
{
// This is the first chunk. As we fill the array, grow it as needed.
for (; i < size && e.MoveNext(); i++)
{
if (i >= array.Length)
{
arraySize = (int)Math.Min((uint)size, 2 * (uint)array.Length);
Array.Resize(ref array, arraySize);
}

array[i] = e.Current;
}
}
else
{
// For all but the first chunk, the array will already be correctly sized.
// We can just store into it until either it's full or MoveNext returns false.
TSource[] local = array; // avoid bounds checks by using cached local (`array` is lifted to iterator object as a field)
Debug.Assert(local.Length == size);
for (; (uint)i < (uint)local.Length && e.MoveNext(); i++)
{
local[i] = e.Current;
}
}

if (i != array.Length)
{
Array.Resize(ref array, i);
}

yield return array;
}
while (i >= size && e.MoveNext());
}
}

#endif
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis;
Expand Down Expand Up @@ -310,20 +311,20 @@ InProgressState BuildInProgressStateFromNoCompilationState()
{
try
{
// Create a chain of translation steps where we add each document one at a time to an initially
// empty compilation. This allows us to then process that chain of actions like we would do any
// other. It also means that if we're in the process of parsing documents in that chain, that
// we'll see the results of how far we've gotten if someone asks for a frozen snapshot midway
// through.
// Create a chain of translation steps where we add a chunk of documents at a time to an
// initially empty compilation. This allows us to then process that chain of actions like we
// would do any other. It also means that if we're in the process of parsing documents in that
// chain, that we'll see the results of how far we've gotten if someone asks for a frozen
// snapshot midway through.
var initialProjectState = this.ProjectState.RemoveAllNormalDocuments();
var initialCompilation = this.CreateEmptyCompilation();

var translationActionsBuilder = ImmutableList.CreateBuilder<TranslationAction>();

var oldProjectState = initialProjectState;
foreach (var documentState in this.ProjectState.DocumentStates.GetStatesInCompilationOrder())
foreach (var chunk in this.ProjectState.DocumentStates.GetStatesInCompilationOrder().Chunk(TranslationAction.AddDocumentsAction.AddDocumentsBatchSize))
{
var documentStates = ImmutableArray.Create(documentState);
var documentStates = ImmutableCollectionsMarshal.AsImmutableArray(chunk);
var newProjectState = oldProjectState.AddDocuments(documentStates);
translationActionsBuilder.Add(new TranslationAction.AddDocumentsAction(
oldProjectState, newProjectState, documentStates));
Expand Down Expand Up @@ -359,26 +360,6 @@ async Task<InProgressState> CollapseInProgressStateAsync(InProgressState initial
{
var currentState = initialState;

// To speed things up, we look for all the added documents in the chain and we preemptively kick
// off work to parse the documents for it in parallel. This has the added benefit that if
// someone asks for a frozen partial snapshot while we're in the middle of doing this, they can
// use however many document states have successfully parsed their syntax trees. For example,
// if you had one extremely large file that took a long time to parse, and dozens of tiny ones,
// it's more likely that the frozen tree would have many more documents in it.
//
// Note: we intentionally kick these off in a fire-and-forget fashion. If we get canceled, all
// the tasks will attempt to cancel. If we complete, that's only because these tasks would
// complete as well. There's no need to track this with any sort of listener as this work just
// acts to speed up getting to a compilation, but is otherwise unobservable.
foreach (var action in currentState.PendingTranslationActions)
{
if (action is TranslationAction.AddDocumentsAction { Documents: var documents })
{
foreach (var document in documents)
_ = Task.Run(async () => await document.GetSyntaxTreeAsync(cancellationToken).ConfigureAwait(false), cancellationToken);
}
}

// Then, we serially process the chain while that parsing is happening concurrently.
while (currentState.PendingTranslationActions.Count > 0)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Immutable;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.Diagnostics;
Expand Down Expand Up @@ -126,16 +128,46 @@ internal sealed class AddDocumentsAction(
ImmutableArray<DocumentState> documents)
: TranslationAction(oldProjectState, newProjectState)
{
/// <summary>
/// Amount to break batches of documents into. That allows us to process things in parallel, without also
/// creating too many individual actions that then need to be processed.
/// </summary>
public const int AddDocumentsBatchSize = 32;

public readonly ImmutableArray<DocumentState> Documents = documents;

public override async Task<Compilation> TransformCompilationAsync(Compilation oldCompilation, CancellationToken cancellationToken)
{
// Parse all the documents in parallel.
using var _ = ArrayBuilder<Task<SyntaxTree>>.GetInstance(this.Documents.Length, out var tasks);
#if NETSTANDARD
using var _1 = ArrayBuilder<Task>.GetInstance(this.Documents.Length, out var tasks);

// We want to parse in parallel. But we don't want to have too many parses going on at the same time.
// So we use a semaphore here to only allow that many in at a time. Once we hit that amount, it will
// block further parallel work. However, as the semaphore is released, new work will be let in.
var semaphore = new SemaphoreSlim(initialCount: AddDocumentsBatchSize);
foreach (var document in this.Documents)
{
tasks.Add(Task.Run(async () =>
{
using (await semaphore.DisposableWaitAsync(cancellationToken).ConfigureAwait(false))
await document.GetSyntaxTreeAsync(cancellationToken).ConfigureAwait(false);
}, cancellationToken));
}

await Task.WhenAll(tasks).ConfigureAwait(false);
#else
await Parallel.ForEachAsync(
this.Documents,
cancellationToken,
static async (document, cancellationToken) =>
await document.GetSyntaxTreeAsync(cancellationToken).ConfigureAwait(false)).ConfigureAwait(false);
#endif

using var _2 = ArrayBuilder<SyntaxTree>.GetInstance(this.Documents.Length, out var trees);

foreach (var document in this.Documents)
tasks.Add(Task.Run(async () => await document.GetSyntaxTreeAsync(cancellationToken).ConfigureAwait(false), cancellationToken));
trees.Add(await document.GetSyntaxTreeAsync(cancellationToken).ConfigureAwait(false));

var trees = await Task.WhenAll(tasks).ConfigureAwait(false);
return oldCompilation.AddSyntaxTrees(trees);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1353,6 +1353,10 @@ static SolutionCompilationState ComputeFrozenPartialState(
CancellationToken cancellationToken)
{
var currentState = frozenCompilationState;

using var _ = PooledDictionary<ProjectId, ArrayBuilder<DocumentState>>.GetInstance(out var missingDocumentStates);

// First, either update documents that have changed, or keep track of documents that are missing.
foreach (var newDocumentState in documentStates)
{
var documentId = newDocumentState.Id;
Expand All @@ -1361,19 +1365,22 @@ static SolutionCompilationState ComputeFrozenPartialState(

if (oldDocumentState is null)
{
// Project doesn't have this document, attempt to fork it with the document added.
currentState = currentState.AddDocumentsToMultipleProjects(
[(oldProjectState, [newDocumentState])],
static (oldProjectState, newDocumentStates) =>
new TranslationAction.AddDocumentsAction(oldProjectState, oldProjectState.AddDocuments(newDocumentStates), newDocumentStates));
missingDocumentStates.MultiAdd(documentId.ProjectId, newDocumentState);
}
else
{
// Project has this document, attempt to fork it with the new contents.
currentState = currentState.WithDocumentState(newDocumentState);
}
}

// Now, add all missing documents per project.
currentState = currentState.AddDocumentsToMultipleProjects(
// Do a SelectAsArray here to ensure that we realize the array once, and as such only call things like
// ToImmutableAndFree once per ArrayBuilder.
missingDocumentStates.SelectAsArray(kvp => (kvp.Key, kvp.Value.ToImmutableAndFree())),
static (oldProjectState, newDocumentStates) =>
new TranslationAction.AddDocumentsAction(oldProjectState, oldProjectState.AddDocuments(newDocumentStates), newDocumentStates));

return currentState;
}
}
Expand Down Expand Up @@ -1447,20 +1454,21 @@ private SolutionCompilationState AddDocumentsToMultipleProjects<TDocumentState>(
var projectId = g.Key;
this.SolutionState.CheckContainsProject(projectId);
var projectState = this.SolutionState.GetRequiredProjectState(projectId);
return (projectState, newDocumentStates: g.SelectAsArray(di => createDocumentState(di, projectState)));
return (projectId, newDocumentStates: g.SelectAsArray(di => createDocumentState(di, projectState)));
}),
addDocumentsToProjectState);
}

private SolutionCompilationState AddDocumentsToMultipleProjects<TDocumentState>(
IEnumerable<(ProjectState oldProjectState, ImmutableArray<TDocumentState> newDocumentStates)> projectIdAndNewDocuments,
IEnumerable<(ProjectId projectId, ImmutableArray<TDocumentState> newDocumentStates)> projectIdAndNewDocuments,
Func<ProjectState, ImmutableArray<TDocumentState>, TranslationAction> addDocumentsToProjectState)
where TDocumentState : TextDocumentState
{
var newCompilationState = this;

foreach (var (oldProjectState, newDocumentStates) in projectIdAndNewDocuments)
foreach (var (projectId, newDocumentStates) in projectIdAndNewDocuments)
{
var oldProjectState = newCompilationState.SolutionState.GetRequiredProjectState(projectId);
var compilationTranslationAction = addDocumentsToProjectState(oldProjectState, newDocumentStates);
var newProjectState = compilationTranslationAction.NewProjectState;

Expand Down

0 comments on commit c4f4202

Please sign in to comment.