deploy: detect unchanged files and avoid uploading them again

This commit is contained in:
Armaël Guéneau 2025-02-22 19:07:09 +01:00
parent acfe034631
commit 10047e4622

View file

@ -1,5 +1,6 @@
import fs from "fs";
import path from "path";
import crypto from "crypto";
import mime from "mime";
import { WebsiteApi } from "guichet-sdk-ts";
import {
@ -7,11 +8,14 @@ import {
ListObjectsV2Command,
DeleteObjectsCommand,
DeleteObjectsCommandOutput,
HeadObjectCommand,
} from "@aws-sdk/client-s3";
import { Upload } from "@aws-sdk/lib-storage";
import { PromisePool } from "@supercharge/promise-pool";
import { openApiConf } from "./auth.js";
const MD5METAFIELD = "dfl-md5sum";
// Walks through the local directory at path `dir`, and for each file it contains, returns :
// - `localPath`: its path on the local filesystem (includes `dir`). On windows, this path
// will typically use `\` as separator.
@ -32,8 +36,18 @@ async function getLocalFiles(dir: string, s3Prefix: string | null): Promise<{ lo
return files.flat()
}
async function getBucketFiles(client: S3Client, Bucket: string): Promise<string[]> {
const files = [];
async function getFileMd5(file: string): Promise<string> {
const hash = crypto.createHash('md5');
for await (const chunk of fs.createReadStream(file)) {
hash.update(chunk as Buffer);
}
return hash.digest('hex')
}
async function getBucketFiles(client: S3Client, Bucket: string):
Promise<Map<string, { size: number }>>
{
const files = new Map();
let done = false;
let cmd = new ListObjectsV2Command({ Bucket });
while (!done) {
@ -45,7 +59,7 @@ async function getBucketFiles(client: S3Client, Bucket: string): Promise<string[
}
for (var item of resp.Contents!) {
files.push(item.Key!)
files.set(item.Key!, { size: item.Size! })
}
if (resp.NextContinuationToken) {
@ -60,14 +74,22 @@ async function getBucketFiles(client: S3Client, Bucket: string): Promise<string[
return files
}
async function uploadFile(client: S3Client, Bucket: string, Key: string, Body: any) {
async function uploadFile(client: S3Client, Bucket: string, Key: string, Body: any, md5: string) {
// use `path.posix` because `Key` is a path in a bucket that uses `/` as separator.
let ContentType = mime.getType(path.posix.extname(Key)) ?? undefined;
// add charset=utf-8 by default on text files (TODO: allow the user to override this)
if (ContentType && ContentType.startsWith("text/")) {
ContentType = ContentType + "; charset=utf-8";
}
const parallelUpload = new Upload({ client, params: { Bucket, Key, Body, ContentType } });
// store the md5 checksum in the object metadata; it will be used to skip
// subsequent uploads if the file has not changed.
const Metadata = { [MD5METAFIELD]: md5 };
const params = { Bucket, Key, Body, ContentType, Metadata };
const parallelUpload = new Upload({ client, params });
parallelUpload.on("httpUploadProgress", progress => {
process.stdout.moveCursor(0, -1)
process.stdout.clearLine(1)
@ -92,6 +114,43 @@ async function deleteFiles(client: S3Client, Bucket: string, files: string[]): P
}));
}
// Checks whether a remote file needs to be updated by its local copy.
//
// We first check whether files differ, and if not compare the md5 checksum we
// previously stored in the object metadata (if it exists) with the local file's
// md5 checksum.
async function needsUpdate(
client: S3Client,
localFile: string,
localMd5: string,
Bucket: string,
Key: string,
remoteSize: number,
): Promise<boolean> {
const localSize = (await fs.promises.stat(localFile)).size;
if (
localSize == 0 /* stat can return 0 in case of error */
|| localSize != remoteSize
) {
return true
}
// fetch metadata for the object and see if we previously stored its md5
const resp = await client.send(new HeadObjectCommand({ Bucket, Key }));
if (resp.$metadata.httpStatusCode != 200) {
// TODO: better error handling?
throw resp
}
const remoteMd5 = resp.Metadata ? resp.Metadata[MD5METAFIELD] : null;
if (!remoteMd5) {
return true
}
// we have a remote md5, compare it with the local one
return (localMd5 != remoteMd5)
}
export async function deploy(vhost: string, localFolder: string) {
const conf = await openApiConf();
@ -136,7 +195,9 @@ export async function deploy(vhost: string, localFolder: string) {
const resp = await deleteFiles(
s3client,
Bucket,
remoteFiles.filter(f => !localFiles.find(({ s3Path }) => s3Path == f))
[...remoteFiles]
.filter(([name, _]) => !localFiles.find(({ s3Path }) => s3Path == name))
.map(([name, _]) => name)
);
if (resp && resp!.$metadata.httpStatusCode != 200) {
// TODO: better error handling?
@ -144,9 +205,21 @@ export async function deploy(vhost: string, localFolder: string) {
process.exit(1)
}
// Control concurrence while uploading
// Uploads a local file unless the remote copy is the same
async function processFile(localPath: string, s3Path: string) {
const localMd5 = await getFileMd5(localPath);
const remoteFile = remoteFiles.get(s3Path);
if (
!remoteFile ||
await needsUpdate(s3client, localPath, localMd5, Bucket, s3Path, remoteFile.size)
) {
uploadFile(s3client, Bucket, s3Path, fs.createReadStream(localPath), localMd5)
}
};
// Control concurrency while uploading
await PromisePool
.for(localFiles)
.withConcurrency(6)
.process(({ localPath, s3Path }) => uploadFile(s3client, Bucket, s3Path,fs.createReadStream(localPath)));
.process(({ localPath, s3Path }) => processFile(localPath, s3Path));
}