From 05178c924f39934bdd7cb2a1b4b7cb6fdd9e2365 Mon Sep 17 00:00:00 2001 From: Andy Burke Date: Wed, 2 Jul 2025 17:46:04 -0700 Subject: [PATCH] refactor: make all() take `filter` and `sort` options refactor: return unload item entries from all()/find() --- README.md | 15 -- deno.json | 2 +- fsdb.ts | 133 +++++------- indexers/symlinks.ts | 2 +- tests/03_index_items.test.ts | 8 +- tests/04_indexing_sanity_checks.test.ts | 16 +- tests/06_test_all.test.ts | 276 +++++++++++++++--------- utils/walk.ts | 62 ++++++ 8 files changed, 308 insertions(+), 206 deletions(-) create mode 100644 utils/walk.ts diff --git a/README.md b/README.md index 7b27462..89f5642 100644 --- a/README.md +++ b/README.md @@ -168,18 +168,3 @@ SQL against data still stored on disk in a nicely human browsable format. | FSDB_LOG_EVENTS | set to true to log the events system | ## TODO - - - [ ] make all()/find() return something like - -``` -{ - file_info, - entry: { - private data = undefined; - load() => { - data = data ?? await Deno.readTextFile(this.file_info.path); - return data; - } - } -} -``` diff --git a/deno.json b/deno.json index 1502e0a..3540e04 100644 --- a/deno.json +++ b/deno.json @@ -1,6 +1,6 @@ { "name": "@andyburke/fsdb", - "version": "0.7.0", + "version": "0.8.0", "license": "MIT", "exports": { ".": "./fsdb.ts", diff --git a/fsdb.ts b/fsdb.ts index fed59ec..b0a0a39 100644 --- a/fsdb.ts +++ b/fsdb.ts @@ -2,6 +2,9 @@ import * as fs from '@std/fs'; import * as path from '@std/path'; import by_lurid from './organizers/by_lurid.ts'; import { Optional } from './utils/optional.ts'; +import { walk, WALK_ENTRY } from './utils/walk.ts'; + +export type { WALK_ENTRY }; export type FSDB_COLLECTION_CONFIG = { name: string; @@ -12,22 +15,18 @@ export type FSDB_COLLECTION_CONFIG = { }; export type FSDB_COLLECTION_CONFIG_INPUT = Optional; -export type FSDB_SEARCH_OPTIONS = { - limit: number; +export type FSDB_SEARCH_OPTIONS = { + limit?: number; offset?: number; - before?: string; - after?: string; - modified_before?: string; - modified_after?: string; - id_before?: string; - id_after?: string; + filter?: (entry: WALK_ENTRY) => boolean; + sort?: (a: WALK_ENTRY, b: WALK_ENTRY) => number; }; export interface FSDB_INDEXER { set_fsdb_root(root: string): void; index(item: T, authoritative_path: string): Promise; remove(item: T, authoritative_path: string): Promise; - lookup(value: string, options?: FSDB_SEARCH_OPTIONS): Promise; + lookup(value: string, options?: FSDB_SEARCH_OPTIONS): Promise; } /** Represents a collection of like items within the database on disk. */ @@ -243,21 +242,16 @@ export class FSDB_COLLECTION> { } /** Iterate through the items. */ - async all(input_options?: FSDB_SEARCH_OPTIONS): Promise { + async all({ + limit = 100, + offset = 0, + filter = undefined, + sort = undefined + }: FSDB_SEARCH_OPTIONS = {}): Promise[]> { if (Deno.env.get('FSDB_PERF')) performance.mark('fsdb_all_begin'); - const options: FSDB_SEARCH_OPTIONS = { - ...{ - limit: 100, - offset: 0 - }, - ...(input_options ?? {}) - }; + const results: WALK_ENTRY[] = []; - const results: T[] = []; - - const limit = options?.limit ?? 100; - const offset = options?.offset ?? 0; let counter = 0; // TODO: better way to get a pattern to match files in this collection? @@ -270,65 +264,33 @@ export class FSDB_COLLECTION> { } for await ( - const entry of fs.walk(this.config.root, { - includeDirs: false, - includeSymlinks: false, - skip: [/\.fsdb\.collection\.json$/], - exts: ['json'] + const entry of walk(this.config.root, { + filter: (entry: WALK_ENTRY): boolean => { + const extension = path.extname(entry.path); + if (extension.toLowerCase() !== '.json') { + return false; + } + + if (entry.info.isDirectory || entry.info.isSymlink) { + return false; + } + + const filename = path.basename(entry.path); + if (filename === '.fsdb.collection.json') { + return false; + } + + return filter ? filter(entry) : true; + }, + sort }) ) { - let item_stat = null; - if (options.before) { - item_stat = item_stat ?? await Deno.lstat(entry.path); - const birthtime = (item_stat.birthtime ?? new Date(0)).toISOString(); - if (birthtime > options.before) { - continue; - } - } - - if (options.after) { - item_stat = item_stat ?? await Deno.lstat(entry.path); - if ((item_stat.birthtime ?? new Date(0)).toISOString() < options.after) { - continue; - } - } - - if (options.modified_before) { - item_stat = item_stat ?? await Deno.lstat(entry.path); - if ((item_stat.mtime ?? new Date(0)).toISOString() > options.modified_before) { - continue; - } - } - - if (options.modified_after) { - item_stat = item_stat ?? await Deno.lstat(entry.path); - if ((item_stat.mtime ?? new Date(0)).toISOString() < options.modified_after) { - continue; - } - } - - let item_id = null; - if (options.id_before) { - item_id = item_id ?? entry.name.replace(/\.json$/, ''); - if (item_id >= options.id_before) { - continue; - } - } - - if (options.id_after) { - item_id = item_id ?? entry.name.replace(/\.json$/, ''); - if (item_id <= options.id_after) { - continue; - } - } - if (counter < offset) { ++counter; continue; } - const content = await Deno.readTextFile(entry.path); - results.push(JSON.parse(content)); + results.push(entry); ++counter; if (counter >= (offset + limit)) { @@ -340,7 +302,12 @@ export class FSDB_COLLECTION> { if (Deno.env.get('FSDB_PERF')) console.dir(performance.measure('fsdb all items time', 'fsdb_all_begin', 'fsdb_all_end')); this.emit('all', { - options, + options: { + limit, + offset, + filter, + sort + }, results }); @@ -348,10 +315,10 @@ export class FSDB_COLLECTION> { } /** Use indexes to search for matching items. */ - async find(criteria: Record, input_options?: FSDB_SEARCH_OPTIONS): Promise { + async find(criteria: Record, input_options?: FSDB_SEARCH_OPTIONS): Promise[]> { if (Deno.env.get('FSDB_PERF')) performance.mark('fsdb_find_begin'); - const options: FSDB_SEARCH_OPTIONS = { + const options: FSDB_SEARCH_OPTIONS = { ...{ limit: 100, offset: 0 @@ -359,7 +326,7 @@ export class FSDB_COLLECTION> { ...(input_options ?? {}) }; - const results: T[] = []; + const results: WALK_ENTRY[] = []; const item_paths: string[] = []; for (const search_key of Object.keys(criteria)) { @@ -380,8 +347,16 @@ export class FSDB_COLLECTION> { continue; } - const content = await Deno.readTextFile(item_path); - results.push(JSON.parse(content)); + const info: Deno.FileInfo = await Deno.lstat(item_path); + results.push({ + path: item_path, + info, + depth: -1, + load: function () { + return JSON.parse(Deno.readTextFileSync(this.path)) as T; + } + }); + ++counter; if (counter >= (offset + limit)) { diff --git a/indexers/symlinks.ts b/indexers/symlinks.ts index 5e6d86c..df97c90 100644 --- a/indexers/symlinks.ts +++ b/indexers/symlinks.ts @@ -67,7 +67,7 @@ export class FSDB_INDEXER_SYMLINKS implements FSDB_INDEXER { return [value]; } - async lookup(value: string, options?: FSDB_SEARCH_OPTIONS): Promise { + async lookup(value: string, options?: FSDB_SEARCH_OPTIONS): Promise { if (typeof this.config.root !== 'string') { throw new Error('root should have been set by FSDB instance'); } diff --git a/tests/03_index_items.test.ts b/tests/03_index_items.test.ts index 60d8335..3f1ad1d 100644 --- a/tests/03_index_items.test.ts +++ b/tests/03_index_items.test.ts @@ -65,19 +65,21 @@ Deno.test({ } for (const item of items) { - const fetched_by_email: ITEM[] = await item_collection.find({ email: item.email }); + const fetched_by_email: ITEM[] = (await item_collection.find({ email: item.email })).map((entry) => entry.load()); asserts.assertLess(fetched_by_email.length, items.length); asserts.assertGreater(fetched_by_email.length, 0); asserts.assert(fetched_by_email.find((email_item) => email_item.id === item.id)); - const fetched_by_phone: ITEM[] = await item_collection.find({ phone: item.phone }); + const fetched_by_phone: ITEM[] = (await item_collection.find({ phone: item.phone })).map((entry) => entry.load()); asserts.assertLess(fetched_by_phone.length, items.length); asserts.assertGreater(fetched_by_phone.length, 0); asserts.assert(fetched_by_phone.find((phone_item) => phone_item.id === item.id)); const words_in_value: string[] = item.value.split(/\W/).filter((word) => word.length > 3); const random_word_in_value: string = words_in_value[Math.floor(Math.random() * words_in_value.length)]; - const fetched_by_word_in_value: ITEM[] = await item_collection.find({ by_character_test: random_word_in_value }); + const fetched_by_word_in_value: ITEM[] = (await item_collection.find({ by_character_test: random_word_in_value })).map(( + entry + ) => entry.load()); asserts.assertLess(fetched_by_word_in_value.length, items.length); asserts.assertGreater(fetched_by_word_in_value.length, 0); asserts.assert(fetched_by_word_in_value.find((word_in_value_item) => word_in_value_item.id === item.id)); diff --git a/tests/04_indexing_sanity_checks.test.ts b/tests/04_indexing_sanity_checks.test.ts index 46cf993..ab5acb0 100644 --- a/tests/04_indexing_sanity_checks.test.ts +++ b/tests/04_indexing_sanity_checks.test.ts @@ -67,19 +67,21 @@ Deno.test({ } for (const item of items) { - const fetched_by_email: ITEM[] = await item_collection.find({ email: item.email }); + const fetched_by_email: ITEM[] = (await item_collection.find({ email: item.email })).map((entry) => entry.load()); asserts.assertLess(fetched_by_email.length, items.length); asserts.assertGreater(fetched_by_email.length, 0); asserts.assert(fetched_by_email.find((email_item) => email_item.id === item.id)); - const fetched_by_phone: ITEM[] = await item_collection.find({ phone: item.phone }); + const fetched_by_phone: ITEM[] = (await item_collection.find({ phone: item.phone })).map((entry) => entry.load()); asserts.assertLess(fetched_by_phone.length, items.length); asserts.assertGreater(fetched_by_phone.length, 0); asserts.assert(fetched_by_phone.find((phone_item) => phone_item.id === item.id)); const words_in_value: string[] = item.value.split(/\W/).filter((word) => word.length > 3); const random_word_in_value: string = words_in_value[Math.floor(Math.random() * words_in_value.length)]; - const fetched_by_word_in_value: ITEM[] = await item_collection.find({ by_character_test: random_word_in_value }); + const fetched_by_word_in_value: ITEM[] = (await item_collection.find({ by_character_test: random_word_in_value })).map(( + entry + ) => entry.load()); asserts.assertLess(fetched_by_word_in_value.length, items.length); asserts.assertGreater(fetched_by_word_in_value.length, 0); asserts.assert(fetched_by_word_in_value.find((word_in_value_item) => word_in_value_item.id === item.id)); @@ -89,15 +91,17 @@ Deno.test({ for (const item of items.slice(1)) { await item_collection.delete(item); - const fetched_by_email: ITEM[] = await item_collection.find({ email: item.email }); + const fetched_by_email: ITEM[] = (await item_collection.find({ email: item.email })).map((entry) => entry.load()); asserts.assertFalse(fetched_by_email.find((email_item) => email_item.id === item.id)); - const fetched_by_phone: ITEM[] = await item_collection.find({ phone: item.phone }); + const fetched_by_phone: ITEM[] = (await item_collection.find({ phone: item.phone })).map((entry) => entry.load()); asserts.assertFalse(fetched_by_phone.find((phone_item) => phone_item.id === item.id)); const words_in_value: string[] = item.value.split(/\W/).filter((word) => word.length > 3); const random_word_in_value: string = words_in_value[Math.floor(Math.random() * words_in_value.length)]; - const fetched_by_word_in_value: ITEM[] = await item_collection.find({ by_character_test: random_word_in_value }); + const fetched_by_word_in_value: ITEM[] = (await item_collection.find({ by_character_test: random_word_in_value })).map(( + entry + ) => entry.load()); asserts.assertFalse(fetched_by_word_in_value.find((word_in_value_item) => word_in_value_item.id === item.id)); } } diff --git a/tests/06_test_all.test.ts b/tests/06_test_all.test.ts index d69f908..43e63a5 100644 --- a/tests/06_test_all.test.ts +++ b/tests/06_test_all.test.ts @@ -1,93 +1,148 @@ import * as asserts from '@std/assert'; import * as fsdb from '../fsdb.ts'; -import { get_data_dir, random_email_address, random_phone_number } from './helpers.ts'; +import * as path from '@std/path'; +import { get_data_dir } from './helpers.ts'; import lurid from '@andyburke/lurid'; +type ITEM = { + id: string; + value: string; + created: string; + written_by_time?: string; +}; + +const item_collection: fsdb.FSDB_COLLECTION = new fsdb.FSDB_COLLECTION({ + name: 'test-06-items', + root: get_data_dir() + '/test-06-items' +}); + +const items: ITEM[] = []; +const item_count: number = 1_000; +const midpoint: number = Math.floor(item_count / 2); +let first_id = null; +let time_mid = null; +let mid_id = null; +let time_end = null; +let last_id = null; +for (let i = 0; i < item_count; ++i) { + const item: ITEM = { + id: lurid(), + value: `${Math.random() * 10_000_000}`, + created: new Date().toISOString() + }; + + first_id = first_id ?? item.id; + + items.push(item); + + const stored_item: ITEM = await item_collection.create(item); + item.written_by_time = new Date().toISOString(); + + asserts.assertObjectMatch(stored_item, item); + + if (i === midpoint) { + time_mid = item.written_by_time; + mid_id = item.id; + } + + time_end = item.written_by_time; + last_id = item.id; + + // wait a ms between items to help with sort stuff later + await new Promise((resolve) => setTimeout(resolve, 1)); +} + +const sorted_items = { + by_id: items.sort((lhs, rhs) => lhs.id.localeCompare(rhs.id)).map((item) => { + return { id: item.id, value: item.value, created: item.created }; + }), + + by_created: items.sort((lhs, rhs) => lhs.created.localeCompare(rhs.created)).map((item) => { + return { id: item.id, value: item.value, created: item.created }; + }) +}; + +const LIMIT_MIN = 11; +const LIMIT_MAX = 333; + Deno.test({ - name: 'iterate over all items', + name: 'sort all() by id', permissions: { env: true, - // https://github.com/denoland/deno/discussions/17258 read: true, write: true }, fn: async () => { - type ITEM = { - id: string; - email: string; - phone: string; - created: string; - written_by_time?: string; - }; - - const item_collection: fsdb.FSDB_COLLECTION = new fsdb.FSDB_COLLECTION({ - name: 'test-06-items', - root: get_data_dir() + '/test-06-items' - }); - asserts.assert(item_collection); - const items: ITEM[] = []; - const item_count: number = 500; - const midpoint: number = Math.floor(item_count / 2); - let first_id = null; - let time_mid = null; - let mid_id = null; - let time_end = null; - let last_id = null; - for (let i = 0; i < item_count; ++i) { - const item: ITEM = { - id: lurid(), - email: random_email_address(), - phone: random_phone_number(), - created: new Date().toISOString() - }; - - first_id = first_id ?? item.id; - - items.push(item); - - const stored_item: ITEM = await item_collection.create(item); - item.written_by_time = new Date().toISOString(); - - asserts.assertObjectMatch(stored_item, item); - - if (i === midpoint) { - time_mid = item.written_by_time; - mid_id = item.id; - } - - time_end = item.written_by_time; - last_id = item.id; - } - - const LIMIT_MIN = 11; - const LIMIT_MAX = 333; - - let fetch_for_sort_offset = 0; - const fetched_for_sort = []; - let more_to_fetch_for_sorting = true; + let offset = 0; + const fetched = []; + let more = true; do { // fuzz the limit const limit = Math.floor(Math.random() * (LIMIT_MAX - LIMIT_MIN + 1)) + LIMIT_MIN; const fetched_items = await item_collection.all({ limit, - offset: fetch_for_sort_offset + offset: offset, + sort: (a: fsdb.WALK_ENTRY, b: fsdb.WALK_ENTRY) => + path.basename(a.path).replace(/\.json$/i, '').localeCompare(path.basename(b.path).replace(/\.json/i, '')) }); - fetched_for_sort.push(...fetched_items); - fetch_for_sort_offset += fetched_items.length; - more_to_fetch_for_sorting = fetched_items.length === limit; - } while (more_to_fetch_for_sorting); + fetched.push(...(fetched_items.map((item) => item.load()))); + offset += fetched_items.length; + more = fetched_items.length === limit; + } while (more); - const sorted_items = items.sort((lhs, rhs) => lhs.id.localeCompare(rhs.id)).map((item) => { - return { id: item.id, email: item.email, phone: item.phone, created: item.created }; - }); - const sorted_fetched = fetched_for_sort.sort((lhs, rhs) => lhs.id.localeCompare(rhs.id)); + asserts.assertEquals(fetched, sorted_items.by_id); + } +}); - asserts.assertEquals(sorted_fetched, sorted_items); +Deno.test({ + name: 'sort all() by birthtime', + permissions: { + env: true, + // https://github.com/denoland/deno/discussions/17258 + read: true, + write: true + }, + fn: async () => { + asserts.assert(item_collection); + + let offset = 0; + const fetched = []; + let more = true; + do { + // fuzz the limit + const limit = Math.floor(Math.random() * (LIMIT_MAX - LIMIT_MIN + 1)) + LIMIT_MIN; + + const fetched_items = await item_collection.all({ + limit, + offset: offset, + sort: (a: fsdb.WALK_ENTRY, b: fsdb.WALK_ENTRY) => + (a.info.birthtime?.toISOString() ?? '').localeCompare(b.info.birthtime?.toISOString() ?? '') + }); + + fetched.push(...(fetched_items.map((item) => item.load()))); + offset += fetched_items.length; + more = fetched_items.length === limit; + } while (more); + + asserts.assertEquals(fetched, sorted_items.by_created); + } +}); + +Deno.test({ + name: 'filter all() by birthtime', + permissions: { + env: true, + // https://github.com/denoland/deno/discussions/17258 + read: true, + write: true + }, + fn: async () => { + asserts.assert(item_collection); asserts.assert(time_mid); asserts.assert(time_end); @@ -106,10 +161,10 @@ Deno.test({ const fetched_items = await item_collection.all({ limit, offset: fetch_for_before_offset, - before: time_mid + filter: (entry) => (entry.info.birthtime?.toISOString() ?? '') < time_mid }); - fetched_for_before.push(...fetched_items); + fetched_for_before.push(...(fetched_items.map((item) => item.load()))); fetch_for_before_offset += fetched_items.length; more_to_fetch_for_before = fetched_items.length === limit; } while (more_to_fetch_for_before); @@ -128,35 +183,6 @@ Deno.test({ // note: we use less or equal because we don't have the actual file write time asserts.assertLessOrEqual(newest, time_mid); - // test id_before - let fetch_for_id_before_offset = 0; - const fetched_for_id_before = []; - let more_to_fetch_for_id_before = true; - do { - // fuzz the limit - const limit = Math.floor(Math.random() * (LIMIT_MAX - LIMIT_MIN + 1)) + LIMIT_MIN; - - const fetched_items = await item_collection.all({ - limit, - offset: fetch_for_id_before_offset, - id_before: mid_id - }); - - fetched_for_id_before.push(...fetched_items); - fetch_for_id_before_offset += fetched_items.length; - more_to_fetch_for_id_before = fetched_items.length === limit; - } while (more_to_fetch_for_id_before); - - let newest_id = first_id; - asserts.assert(newest_id); - for (const item of fetched_for_id_before) { - if (item.id > newest_id) { - newest_id = item.id; - } - } - - asserts.assertLess(newest_id, mid_id); - // test after let fetch_for_after_offset = 0; const fetched_for_after = []; @@ -168,10 +194,10 @@ Deno.test({ const fetched_items = await item_collection.all({ limit, offset: fetch_for_after_offset, - after: time_mid + filter: (entry) => (entry.info.birthtime?.toISOString() ?? '') > time_mid }); - fetched_for_after.push(...fetched_items); + fetched_for_after.push(...(fetched_items.map((item) => item.load()))); fetch_for_after_offset += fetched_items.length; more_to_fetch_for_after = fetched_items.length === limit; } while (more_to_fetch_for_after); @@ -189,6 +215,54 @@ Deno.test({ // again with the file write time slop asserts.assertGreaterOrEqual(oldest, time_mid); + } +}); + +Deno.test({ + name: 'filter all() by id', + permissions: { + env: true, + // https://github.com/denoland/deno/discussions/17258 + read: true, + write: true + }, + fn: async () => { + asserts.assert(item_collection); + + asserts.assert(time_mid); + asserts.assert(time_end); + + asserts.assert(mid_id); + asserts.assert(last_id); + + // test id_before + let fetch_for_id_before_offset = 0; + const fetched_for_id_before = []; + let more_to_fetch_for_id_before = true; + do { + // fuzz the limit + const limit = Math.floor(Math.random() * (LIMIT_MAX - LIMIT_MIN + 1)) + LIMIT_MIN; + + const fetched_items = await item_collection.all({ + limit, + offset: fetch_for_id_before_offset, + filter: (entry) => path.basename(entry.path).replace(/\.json$/i, '') < mid_id + }); + + fetched_for_id_before.push(...(fetched_items.map((item) => item.load()))); + fetch_for_id_before_offset += fetched_items.length; + more_to_fetch_for_id_before = fetched_items.length === limit; + } while (more_to_fetch_for_id_before); + + let newest_id = first_id; + asserts.assert(newest_id); + for (const item of fetched_for_id_before) { + if (item.id > newest_id) { + newest_id = item.id; + } + } + + asserts.assertLess(newest_id, mid_id); // test id_after let fetch_for_id_after_offset = 0; @@ -201,10 +275,10 @@ Deno.test({ const fetched_items = await item_collection.all({ limit, offset: fetch_for_id_after_offset, - id_after: mid_id + filter: (entry) => path.basename(entry.path).replace(/\.json$/i, '') > mid_id }); - fetched_for_id_after.push(...fetched_items); + fetched_for_id_after.push(...(fetched_items.map((item) => item.load()))); fetch_for_id_after_offset += fetched_items.length; more_to_fetch_for_id_after = fetched_items.length === limit; } while (more_to_fetch_for_id_after); diff --git a/utils/walk.ts b/utils/walk.ts new file mode 100644 index 0000000..e31b401 --- /dev/null +++ b/utils/walk.ts @@ -0,0 +1,62 @@ +import * as path from '@std/path'; + +export type WALK_OPTIONS = { + max_depth?: number; + depth?: number; + index?: number; + filter?: (entry: WALK_ENTRY) => boolean; + sort?: (a: WALK_ENTRY, b: WALK_ENTRY) => number; +}; + +export type WALK_ENTRY = { + path: string; + info: Deno.FileInfo; + depth: number; + load: () => T; +}; + +export async function* walk( + root: string | URL, + { + depth = 0, + filter = undefined, + sort = undefined + }: WALK_OPTIONS = {} +): AsyncIterableIterator> { + const root_path = typeof root === 'string' ? path.resolve(root) : path.resolve(path.fromFileUrl(root)); + + const entries: WALK_ENTRY[] = []; + + for await (const dir_entry of Deno.readDir(root_path)) { + const full_path = path.join(root, dir_entry.name); + const info = await Deno.lstat(full_path); + const entry = { + path: full_path, + info, + depth, + load: function () { + return JSON.parse(Deno.readTextFileSync(this.path)) as T; + } + }; + + entries.push(entry); + } + + if (sort) { + entries.sort(sort); + } + + for (const entry of entries) { + if (!filter || filter(entry)) { + yield entry; + } + + if (entry.info.isDirectory) { + yield* walk(entry.path, { + depth: depth + 1, + filter, + sort + }); + } + } +}