Skip to content

Commit 0c89067

Browse files
octogonzLPegasus
andauthored
[node-core-library] Fix LockFile API issues that caused Rush autoinstaller failures (#5734)
* - Fix incorrect "dirty" detection caused by transient/racy lockfiles (e.g., files disappearing mid-acquire or partially written files) - Change approach to defer cleanup: first determine lock ownership, then clean up truly stale files only after successfully acquiring - Tighten stale vs valid lock distinction (including PID reuse cases) so we don’t misclassify active or benign states as dirty * rush change * test(node-core-library): update test case of LockFile#acquireAsync --------- Co-authored-by: LPegasus <lpegasus@users.noreply.github.com>
1 parent 4e87cfd commit 0c89067

4 files changed

Lines changed: 97 additions & 29 deletions

File tree

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"changes": [
3+
{
4+
"packageName": "@rushstack/node-core-library",
5+
"comment": "Fix an issue where the LockFile API sometimes incorrectly reported a dirty acquisition, causing Rush autoinstaller failures (GitHub #5684)",
6+
"type": "patch"
7+
}
8+
],
9+
"packageName": "@rushstack/node-core-library"
10+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"changes": [
3+
{
4+
"packageName": "@rushstack/node-core-library",
5+
"comment": "Regression risk: This narrows when a lock is considered \"dirty\". Although the previous behavior was incorrect, the fix could break consumers that implicitly relied on those false positives.",
6+
"type": "minor"
7+
}
8+
],
9+
"packageName": "@rushstack/node-core-library"
10+
}

libraries/node-core-library/src/LockFile.ts

Lines changed: 55 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -296,10 +296,13 @@ export class LockFile {
296296
resourceName: string,
297297
pidLockFilePath: string
298298
): LockFile | undefined {
299-
let dirtyWhenAcquired: boolean = false;
300-
301-
// get the current process' pid
299+
// get the current process identifier (PID)
302300
const pid: number = process.pid;
301+
302+
// Suppose that a process terminates unexpectedly without deleting its PID-based lockfile,
303+
// then we check to see if the process is still alive. The OS may have given the same PID
304+
// to a new process, how to detect that? We will rely on getProcessStartTime() which
305+
// is stored in the file itself for comparison.
303306
const startTime: string | undefined = LockFile._getStartTime(pid);
304307

305308
if (!startTime) {
@@ -327,6 +330,11 @@ export class LockFile {
327330
// look for anything ending with # then numbers and ".lock"
328331
const lockFileRegExp: RegExp = /^(.+)#([0-9]+)\.lock$/;
329332

333+
// If we are the process to acquire the lock, it becomes our responsibility to clean up these
334+
// stale files. If there is at least 1 stale file, then the resource is assumed to be "dirty"
335+
// (for example, the previous process was interrupted before releasing or while acquiring).
336+
const staleFilesToDelete: string[] = [];
337+
330338
let match: RegExpMatchArray | null;
331339
let otherPid: string;
332340
for (const fileInFolder of files) {
@@ -335,14 +343,16 @@ export class LockFile {
335343
match[1] === resourceName &&
336344
(otherPid = match[2]) !== pid.toString()
337345
) {
338-
// we found at least one lockfile hanging around that isn't ours
346+
// We found at least one lockfile hanging around that isn't ours
339347
const fileInFolderPath: string = `${resourceFolder}/${fileInFolder}`;
340-
dirtyWhenAcquired = true;
341348

342349
// console.log(`FOUND OTHER LOCKFILE: ${otherPid}`);
343350

351+
// Actual start time of the other PID
344352
const otherPidCurrentStartTime: string | undefined = LockFile._getStartTime(parseInt(otherPid, 10));
345353

354+
// The start time from the file, which we will compare with otherPidCurrentStartTime
355+
// to determine whether the PID got reused by a new process.
346356
let otherPidOldStartTime: string | undefined;
347357
let otherBirthtimeMs: number | undefined;
348358
try {
@@ -351,47 +361,62 @@ export class LockFile {
351361
otherBirthtimeMs = FileSystem.getStatistics(fileInFolderPath).birthtime.getTime();
352362
} catch (error) {
353363
if (FileSystem.isNotExistError(error)) {
354-
// the file is already deleted by other process, skip it
364+
// ==> Properly closed lockfile, safe to ignore:
365+
// The other process deleted the file, which we assume means it completed successfully,
366+
// so the state is not dirty. This is equivalent to if readFolderItemNames() never saw
367+
// the file in the firstplace.
355368
continue;
356369
}
357370
}
358371

359-
// if the otherPidOldStartTime is invalid, then we should look at the timestamp,
360-
// if this file was created after us, ignore it
361-
// if it was created within 1 second before us, then it could be good, so we
362-
// will conservatively fail
363-
// otherwise it is an old lock file and will be deleted
364-
if (otherPidOldStartTime === '' && otherBirthtimeMs !== undefined) {
372+
// What the other process's file exists, but it is an empty file?
373+
// Either they were terminated while acquiring, or else they haven't finished writing it yet.
374+
if (otherBirthtimeMs !== undefined && otherPidOldStartTime === '') {
365375
if (otherBirthtimeMs > currentBirthTimeMs) {
366-
// ignore this file, he will be unable to get the lock since this process
367-
// will hold it
376+
// ==> Safe to ignore
377+
// If the other process was terminated, it happened before they finished acquiring.
378+
// If the other process is alive, their file is newer, so we will acquire instead of them.
379+
368380
// console.log(`Ignoring lock for pid ${otherPid} because its lockfile is newer than ours.`);
369381
continue;
370382
} else if (
371-
otherBirthtimeMs - currentBirthTimeMs < 0 && // it was created before us AND
383+
otherBirthtimeMs - currentBirthTimeMs < 0 &&
372384
otherBirthtimeMs - currentBirthTimeMs > -1000
373385
) {
374-
// it was created less than a second before
375-
376-
// conservatively be unable to keep the lock
377-
return undefined;
386+
// ==> Race condition
387+
// The other process created their file first, so they will probably acquire the lock
388+
// after they finish writing the contents. But what if their process is actually dead
389+
// and replaced by a new process with the same PID? Normally the otherPidOldStartTime
390+
// gives the answer, but in this edge case we are missing that information.
391+
// So we conservatively assume that it should not take them more than 1000ms to
392+
// open a file, write a PID, and close the file.
393+
return undefined; // fail to acquire and retry later
378394
}
379395
}
380396

381397
// console.log(`Other pid ${otherPid} lockfile has start time: "${otherPidOldStartTime}"`);
382398
// console.log(`Other pid ${otherPid} actually has start time: "${otherPidCurrentStartTime}"`);
383399

384-
// this means the process is no longer executing, delete the file
400+
// Time to compare
385401
if (!otherPidCurrentStartTime || otherPidOldStartTime !== otherPidCurrentStartTime) {
402+
// ==> Stale lockfile
403+
// This file doesn't prevent us from acquiring the lock, but it does indicate that
404+
// the resource was left in a dirty state. (If we delete the file right now, that
405+
// information would be lost, so we clean up later when we acquire successfully.)
406+
386407
// console.log(`Other pid ${otherPid} is no longer executing!`);
387-
FileSystem.deleteFile(fileInFolderPath);
408+
staleFilesToDelete.push(fileInFolderPath);
388409
continue;
389410
}
390411

391412
// console.log(`Pid ${otherPid} lockfile has birth time: ${otherBirthtimeMs}`);
392413
// console.log(`Pid ${pid} lockfile has birth time: ${currentBirthTimeMs}`);
393-
// this is a lockfile pointing at something valid
414+
394415
if (otherBirthtimeMs !== undefined) {
416+
// ==> We found a valid file belonging to another process.
417+
// With multiple parties trying to acquire, the winner is the smallestBirthTime,
418+
// so we need to sort.
419+
395420
// the other lock file was created before the current earliest lock file
396421
// or the other lock file was created at the same exact time, but has earlier pid
397422

@@ -418,9 +443,15 @@ export class LockFile {
418443
return undefined;
419444
}
420445

421-
// we have the lock!
446+
let dirtyWhenAcquired: boolean = false;
447+
for (const staleFileToDelete of staleFilesToDelete) {
448+
FileSystem.deleteFile(staleFileToDelete, { throwIfNotExists: false });
449+
dirtyWhenAcquired = true;
450+
}
451+
452+
// We have the lock!
422453
lockFile = new LockFile(lockFileHandle, pidLockFilePath, dirtyWhenAcquired);
423-
lockFileHandle = undefined; // we have handed the descriptor off to the instance
454+
lockFileHandle = undefined; // The LockFile object has taken ownership of our handle
424455
} finally {
425456
if (lockFileHandle) {
426457
// ensure our lock is closed

libraries/node-core-library/src/test/LockFile.test.ts

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ describe(LockFile.name, () => {
130130

131131
const lock2Exists: boolean = await FileSystem.existsAsync(lock2.filePath);
132132
expect(lock2Exists).toEqual(true);
133+
// The second lock should not be dirty since it is acquired after the first lock is released
134+
expect(lock2.dirtyWhenAcquired).toEqual(false);
133135
expect(lock2.isReleased).toEqual(false);
134136

135137
expect(lock2Acquired).toEqual(true);
@@ -244,7 +246,7 @@ describe(LockFile.name, () => {
244246
expect(lock).toBeUndefined();
245247
});
246248

247-
test('deletes other hanging lockfiles if corresponding processes are not running anymore', () => {
249+
test('deletes other hanging lockfiles if corresponding processes are not running anymore and marks dirtyWhenAcquired', () => {
248250
// ensure test folder is clean
249251
const testFolder: string = path.join(libTestFolder, '4');
250252
FileSystem.ensureEmptyFolder(testFolder);
@@ -270,10 +272,19 @@ describe(LockFile.name, () => {
270272
});
271273

272274
const deleteFileSpy = jest.spyOn(FileSystem, 'deleteFile');
273-
LockFile.tryAcquire(testFolder, resourceName);
275+
276+
const lock: LockFile | undefined = LockFile.tryAcquire(testFolder, resourceName);
277+
278+
expect(lock).toBeDefined();
279+
expect(lock!.dirtyWhenAcquired).toEqual(true);
280+
expect(lock!.isReleased).toEqual(false);
274281

275282
expect(deleteFileSpy).toHaveBeenCalledTimes(1);
276-
expect(deleteFileSpy).toHaveBeenNthCalledWith(1, otherPidLockFileName);
283+
expect(deleteFileSpy).toHaveBeenNthCalledWith(1, otherPidLockFileName, {
284+
throwIfNotExists: false
285+
});
286+
287+
lock!.release();
277288
});
278289

279290
test("doesn't attempt deleting other process lockfile if it is released in the middle of acquiring process", () => {
@@ -302,7 +313,7 @@ describe(LockFile.name, () => {
302313
return pid === otherPid ? otherPidStartTime : getProcessStartTime(pid);
303314
});
304315

305-
const originalReadFile = FileSystem.readFile;
316+
const originalReadFile: typeof FileSystem.readFile = FileSystem.readFile;
306317
jest.spyOn(FileSystem, 'readFile').mockImplementation((filePath: string) => {
307318
if (filePath === otherPidLockFileName) {
308319
// simulate other process lock release right before the current process reads
@@ -315,13 +326,19 @@ describe(LockFile.name, () => {
315326

316327
const deleteFileSpy = jest.spyOn(FileSystem, 'deleteFile');
317328

318-
LockFile.tryAcquire(testFolder, resourceName);
329+
const lock: LockFile | undefined = LockFile.tryAcquire(testFolder, resourceName);
330+
331+
expect(lock).toBeDefined();
332+
expect(lock!.dirtyWhenAcquired).toEqual(false);
333+
expect(lock!.isReleased).toEqual(false);
319334

320335
// Ensure there were no other FileSystem.deleteFile calls after our lock release simulation.
321336
// An extra attempt to delete the lockfile might lead to unexpectedly deleting a new lockfile
322337
// created by another process right after releasing/deleting the previous lockfile
323338
expect(deleteFileSpy).toHaveBeenCalledTimes(1);
324339
expect(deleteFileSpy).toHaveBeenNthCalledWith(1, otherPidLockFileName);
340+
341+
lock!.release();
325342
});
326343
});
327344
}

0 commit comments

Comments
 (0)