refactor: make all() take filter and sort options

refactor: return unload item entries from all()/find()
This commit is contained in:
Andy Burke 2025-07-02 17:46:04 -07:00
parent 3214d17b80
commit 05178c924f
8 changed files with 308 additions and 206 deletions

View file

@ -168,18 +168,3 @@ SQL against data still stored on disk in a nicely human browsable format.
| FSDB_LOG_EVENTS | set to true to log the events system |
## TODO
- [ ] make all()/find() return something like
```
{
file_info,
entry: {
private data = undefined;
load() => {
data = data ?? await Deno.readTextFile(this.file_info.path);
return data;
}
}
}
```

View file

@ -1,6 +1,6 @@
{
"name": "@andyburke/fsdb",
"version": "0.7.0",
"version": "0.8.0",
"license": "MIT",
"exports": {
".": "./fsdb.ts",

133
fsdb.ts
View file

@ -2,6 +2,9 @@ import * as fs from '@std/fs';
import * as path from '@std/path';
import by_lurid from './organizers/by_lurid.ts';
import { Optional } from './utils/optional.ts';
import { walk, WALK_ENTRY } from './utils/walk.ts';
export type { WALK_ENTRY };
export type FSDB_COLLECTION_CONFIG = {
name: string;
@ -12,22 +15,18 @@ export type FSDB_COLLECTION_CONFIG = {
};
export type FSDB_COLLECTION_CONFIG_INPUT = Optional<FSDB_COLLECTION_CONFIG, 'id_field' | 'organize' | 'root'>;
export type FSDB_SEARCH_OPTIONS = {
limit: number;
export type FSDB_SEARCH_OPTIONS<T> = {
limit?: number;
offset?: number;
before?: string;
after?: string;
modified_before?: string;
modified_after?: string;
id_before?: string;
id_after?: string;
filter?: (entry: WALK_ENTRY<T>) => boolean;
sort?: (a: WALK_ENTRY<T>, b: WALK_ENTRY<T>) => number;
};
export interface FSDB_INDEXER<T> {
set_fsdb_root(root: string): void;
index(item: T, authoritative_path: string): Promise<string[]>;
remove(item: T, authoritative_path: string): Promise<string[]>;
lookup(value: string, options?: FSDB_SEARCH_OPTIONS): Promise<string[]>;
lookup(value: string, options?: FSDB_SEARCH_OPTIONS<T>): Promise<string[]>;
}
/** Represents a collection of like items within the database on disk. */
@ -243,21 +242,16 @@ export class FSDB_COLLECTION<T extends Record<string, any>> {
}
/** Iterate through the items. */
async all(input_options?: FSDB_SEARCH_OPTIONS): Promise<T[]> {
async all({
limit = 100,
offset = 0,
filter = undefined,
sort = undefined
}: FSDB_SEARCH_OPTIONS<T> = {}): Promise<WALK_ENTRY<T>[]> {
if (Deno.env.get('FSDB_PERF')) performance.mark('fsdb_all_begin');
const options: FSDB_SEARCH_OPTIONS = {
...{
limit: 100,
offset: 0
},
...(input_options ?? {})
};
const results: WALK_ENTRY<T>[] = [];
const results: T[] = [];
const limit = options?.limit ?? 100;
const offset = options?.offset ?? 0;
let counter = 0;
// TODO: better way to get a pattern to match files in this collection?
@ -270,65 +264,33 @@ export class FSDB_COLLECTION<T extends Record<string, any>> {
}
for await (
const entry of fs.walk(this.config.root, {
includeDirs: false,
includeSymlinks: false,
skip: [/\.fsdb\.collection\.json$/],
exts: ['json']
const entry of walk(this.config.root, {
filter: (entry: WALK_ENTRY<T>): boolean => {
const extension = path.extname(entry.path);
if (extension.toLowerCase() !== '.json') {
return false;
}
if (entry.info.isDirectory || entry.info.isSymlink) {
return false;
}
const filename = path.basename(entry.path);
if (filename === '.fsdb.collection.json') {
return false;
}
return filter ? filter(entry) : true;
},
sort
})
) {
let item_stat = null;
if (options.before) {
item_stat = item_stat ?? await Deno.lstat(entry.path);
const birthtime = (item_stat.birthtime ?? new Date(0)).toISOString();
if (birthtime > options.before) {
continue;
}
}
if (options.after) {
item_stat = item_stat ?? await Deno.lstat(entry.path);
if ((item_stat.birthtime ?? new Date(0)).toISOString() < options.after) {
continue;
}
}
if (options.modified_before) {
item_stat = item_stat ?? await Deno.lstat(entry.path);
if ((item_stat.mtime ?? new Date(0)).toISOString() > options.modified_before) {
continue;
}
}
if (options.modified_after) {
item_stat = item_stat ?? await Deno.lstat(entry.path);
if ((item_stat.mtime ?? new Date(0)).toISOString() < options.modified_after) {
continue;
}
}
let item_id = null;
if (options.id_before) {
item_id = item_id ?? entry.name.replace(/\.json$/, '');
if (item_id >= options.id_before) {
continue;
}
}
if (options.id_after) {
item_id = item_id ?? entry.name.replace(/\.json$/, '');
if (item_id <= options.id_after) {
continue;
}
}
if (counter < offset) {
++counter;
continue;
}
const content = await Deno.readTextFile(entry.path);
results.push(JSON.parse(content));
results.push(entry);
++counter;
if (counter >= (offset + limit)) {
@ -340,7 +302,12 @@ export class FSDB_COLLECTION<T extends Record<string, any>> {
if (Deno.env.get('FSDB_PERF')) console.dir(performance.measure('fsdb all items time', 'fsdb_all_begin', 'fsdb_all_end'));
this.emit('all', {
options,
options: {
limit,
offset,
filter,
sort
},
results
});
@ -348,10 +315,10 @@ export class FSDB_COLLECTION<T extends Record<string, any>> {
}
/** Use indexes to search for matching items. */
async find(criteria: Record<string, any>, input_options?: FSDB_SEARCH_OPTIONS): Promise<T[]> {
async find(criteria: Record<string, any>, input_options?: FSDB_SEARCH_OPTIONS<T>): Promise<WALK_ENTRY<T>[]> {
if (Deno.env.get('FSDB_PERF')) performance.mark('fsdb_find_begin');
const options: FSDB_SEARCH_OPTIONS = {
const options: FSDB_SEARCH_OPTIONS<T> = {
...{
limit: 100,
offset: 0
@ -359,7 +326,7 @@ export class FSDB_COLLECTION<T extends Record<string, any>> {
...(input_options ?? {})
};
const results: T[] = [];
const results: WALK_ENTRY<T>[] = [];
const item_paths: string[] = [];
for (const search_key of Object.keys(criteria)) {
@ -380,8 +347,16 @@ export class FSDB_COLLECTION<T extends Record<string, any>> {
continue;
}
const content = await Deno.readTextFile(item_path);
results.push(JSON.parse(content));
const info: Deno.FileInfo = await Deno.lstat(item_path);
results.push({
path: item_path,
info,
depth: -1,
load: function () {
return JSON.parse(Deno.readTextFileSync(this.path)) as T;
}
});
++counter;
if (counter >= (offset + limit)) {

View file

@ -67,7 +67,7 @@ export class FSDB_INDEXER_SYMLINKS<T> implements FSDB_INDEXER<T> {
return [value];
}
async lookup(value: string, options?: FSDB_SEARCH_OPTIONS): Promise<string[]> {
async lookup(value: string, options?: FSDB_SEARCH_OPTIONS<T>): Promise<string[]> {
if (typeof this.config.root !== 'string') {
throw new Error('root should have been set by FSDB instance');
}

View file

@ -65,19 +65,21 @@ Deno.test({
}
for (const item of items) {
const fetched_by_email: ITEM[] = await item_collection.find({ email: item.email });
const fetched_by_email: ITEM[] = (await item_collection.find({ email: item.email })).map((entry) => entry.load());
asserts.assertLess(fetched_by_email.length, items.length);
asserts.assertGreater(fetched_by_email.length, 0);
asserts.assert(fetched_by_email.find((email_item) => email_item.id === item.id));
const fetched_by_phone: ITEM[] = await item_collection.find({ phone: item.phone });
const fetched_by_phone: ITEM[] = (await item_collection.find({ phone: item.phone })).map((entry) => entry.load());
asserts.assertLess(fetched_by_phone.length, items.length);
asserts.assertGreater(fetched_by_phone.length, 0);
asserts.assert(fetched_by_phone.find((phone_item) => phone_item.id === item.id));
const words_in_value: string[] = item.value.split(/\W/).filter((word) => word.length > 3);
const random_word_in_value: string = words_in_value[Math.floor(Math.random() * words_in_value.length)];
const fetched_by_word_in_value: ITEM[] = await item_collection.find({ by_character_test: random_word_in_value });
const fetched_by_word_in_value: ITEM[] = (await item_collection.find({ by_character_test: random_word_in_value })).map((
entry
) => entry.load());
asserts.assertLess(fetched_by_word_in_value.length, items.length);
asserts.assertGreater(fetched_by_word_in_value.length, 0);
asserts.assert(fetched_by_word_in_value.find((word_in_value_item) => word_in_value_item.id === item.id));

View file

@ -67,19 +67,21 @@ Deno.test({
}
for (const item of items) {
const fetched_by_email: ITEM[] = await item_collection.find({ email: item.email });
const fetched_by_email: ITEM[] = (await item_collection.find({ email: item.email })).map((entry) => entry.load());
asserts.assertLess(fetched_by_email.length, items.length);
asserts.assertGreater(fetched_by_email.length, 0);
asserts.assert(fetched_by_email.find((email_item) => email_item.id === item.id));
const fetched_by_phone: ITEM[] = await item_collection.find({ phone: item.phone });
const fetched_by_phone: ITEM[] = (await item_collection.find({ phone: item.phone })).map((entry) => entry.load());
asserts.assertLess(fetched_by_phone.length, items.length);
asserts.assertGreater(fetched_by_phone.length, 0);
asserts.assert(fetched_by_phone.find((phone_item) => phone_item.id === item.id));
const words_in_value: string[] = item.value.split(/\W/).filter((word) => word.length > 3);
const random_word_in_value: string = words_in_value[Math.floor(Math.random() * words_in_value.length)];
const fetched_by_word_in_value: ITEM[] = await item_collection.find({ by_character_test: random_word_in_value });
const fetched_by_word_in_value: ITEM[] = (await item_collection.find({ by_character_test: random_word_in_value })).map((
entry
) => entry.load());
asserts.assertLess(fetched_by_word_in_value.length, items.length);
asserts.assertGreater(fetched_by_word_in_value.length, 0);
asserts.assert(fetched_by_word_in_value.find((word_in_value_item) => word_in_value_item.id === item.id));
@ -89,15 +91,17 @@ Deno.test({
for (const item of items.slice(1)) {
await item_collection.delete(item);
const fetched_by_email: ITEM[] = await item_collection.find({ email: item.email });
const fetched_by_email: ITEM[] = (await item_collection.find({ email: item.email })).map((entry) => entry.load());
asserts.assertFalse(fetched_by_email.find((email_item) => email_item.id === item.id));
const fetched_by_phone: ITEM[] = await item_collection.find({ phone: item.phone });
const fetched_by_phone: ITEM[] = (await item_collection.find({ phone: item.phone })).map((entry) => entry.load());
asserts.assertFalse(fetched_by_phone.find((phone_item) => phone_item.id === item.id));
const words_in_value: string[] = item.value.split(/\W/).filter((word) => word.length > 3);
const random_word_in_value: string = words_in_value[Math.floor(Math.random() * words_in_value.length)];
const fetched_by_word_in_value: ITEM[] = await item_collection.find({ by_character_test: random_word_in_value });
const fetched_by_word_in_value: ITEM[] = (await item_collection.find({ by_character_test: random_word_in_value })).map((
entry
) => entry.load());
asserts.assertFalse(fetched_by_word_in_value.find((word_in_value_item) => word_in_value_item.id === item.id));
}
}

View file

@ -1,93 +1,148 @@
import * as asserts from '@std/assert';
import * as fsdb from '../fsdb.ts';
import { get_data_dir, random_email_address, random_phone_number } from './helpers.ts';
import * as path from '@std/path';
import { get_data_dir } from './helpers.ts';
import lurid from '@andyburke/lurid';
type ITEM = {
id: string;
value: string;
created: string;
written_by_time?: string;
};
const item_collection: fsdb.FSDB_COLLECTION<ITEM> = new fsdb.FSDB_COLLECTION<ITEM>({
name: 'test-06-items',
root: get_data_dir() + '/test-06-items'
});
const items: ITEM[] = [];
const item_count: number = 1_000;
const midpoint: number = Math.floor(item_count / 2);
let first_id = null;
let time_mid = null;
let mid_id = null;
let time_end = null;
let last_id = null;
for (let i = 0; i < item_count; ++i) {
const item: ITEM = {
id: lurid(),
value: `${Math.random() * 10_000_000}`,
created: new Date().toISOString()
};
first_id = first_id ?? item.id;
items.push(item);
const stored_item: ITEM = await item_collection.create(item);
item.written_by_time = new Date().toISOString();
asserts.assertObjectMatch(stored_item, item);
if (i === midpoint) {
time_mid = item.written_by_time;
mid_id = item.id;
}
time_end = item.written_by_time;
last_id = item.id;
// wait a ms between items to help with sort stuff later
await new Promise((resolve) => setTimeout(resolve, 1));
}
const sorted_items = {
by_id: items.sort((lhs, rhs) => lhs.id.localeCompare(rhs.id)).map((item) => {
return { id: item.id, value: item.value, created: item.created };
}),
by_created: items.sort((lhs, rhs) => lhs.created.localeCompare(rhs.created)).map((item) => {
return { id: item.id, value: item.value, created: item.created };
})
};
const LIMIT_MIN = 11;
const LIMIT_MAX = 333;
Deno.test({
name: 'iterate over all items',
name: 'sort all() by id',
permissions: {
env: true,
// https://github.com/denoland/deno/discussions/17258
read: true,
write: true
},
fn: async () => {
type ITEM = {
id: string;
email: string;
phone: string;
created: string;
written_by_time?: string;
};
const item_collection: fsdb.FSDB_COLLECTION<ITEM> = new fsdb.FSDB_COLLECTION<ITEM>({
name: 'test-06-items',
root: get_data_dir() + '/test-06-items'
});
asserts.assert(item_collection);
const items: ITEM[] = [];
const item_count: number = 500;
const midpoint: number = Math.floor(item_count / 2);
let first_id = null;
let time_mid = null;
let mid_id = null;
let time_end = null;
let last_id = null;
for (let i = 0; i < item_count; ++i) {
const item: ITEM = {
id: lurid(),
email: random_email_address(),
phone: random_phone_number(),
created: new Date().toISOString()
};
first_id = first_id ?? item.id;
items.push(item);
const stored_item: ITEM = await item_collection.create(item);
item.written_by_time = new Date().toISOString();
asserts.assertObjectMatch(stored_item, item);
if (i === midpoint) {
time_mid = item.written_by_time;
mid_id = item.id;
}
time_end = item.written_by_time;
last_id = item.id;
}
const LIMIT_MIN = 11;
const LIMIT_MAX = 333;
let fetch_for_sort_offset = 0;
const fetched_for_sort = [];
let more_to_fetch_for_sorting = true;
let offset = 0;
const fetched = [];
let more = true;
do {
// fuzz the limit
const limit = Math.floor(Math.random() * (LIMIT_MAX - LIMIT_MIN + 1)) + LIMIT_MIN;
const fetched_items = await item_collection.all({
limit,
offset: fetch_for_sort_offset
offset: offset,
sort: (a: fsdb.WALK_ENTRY<ITEM>, b: fsdb.WALK_ENTRY<ITEM>) =>
path.basename(a.path).replace(/\.json$/i, '').localeCompare(path.basename(b.path).replace(/\.json/i, ''))
});
fetched_for_sort.push(...fetched_items);
fetch_for_sort_offset += fetched_items.length;
more_to_fetch_for_sorting = fetched_items.length === limit;
} while (more_to_fetch_for_sorting);
fetched.push(...(fetched_items.map((item) => item.load())));
offset += fetched_items.length;
more = fetched_items.length === limit;
} while (more);
const sorted_items = items.sort((lhs, rhs) => lhs.id.localeCompare(rhs.id)).map((item) => {
return { id: item.id, email: item.email, phone: item.phone, created: item.created };
});
const sorted_fetched = fetched_for_sort.sort((lhs, rhs) => lhs.id.localeCompare(rhs.id));
asserts.assertEquals(fetched, sorted_items.by_id);
}
});
asserts.assertEquals(sorted_fetched, sorted_items);
Deno.test({
name: 'sort all() by birthtime',
permissions: {
env: true,
// https://github.com/denoland/deno/discussions/17258
read: true,
write: true
},
fn: async () => {
asserts.assert(item_collection);
let offset = 0;
const fetched = [];
let more = true;
do {
// fuzz the limit
const limit = Math.floor(Math.random() * (LIMIT_MAX - LIMIT_MIN + 1)) + LIMIT_MIN;
const fetched_items = await item_collection.all({
limit,
offset: offset,
sort: (a: fsdb.WALK_ENTRY<ITEM>, b: fsdb.WALK_ENTRY<ITEM>) =>
(a.info.birthtime?.toISOString() ?? '').localeCompare(b.info.birthtime?.toISOString() ?? '')
});
fetched.push(...(fetched_items.map((item) => item.load())));
offset += fetched_items.length;
more = fetched_items.length === limit;
} while (more);
asserts.assertEquals(fetched, sorted_items.by_created);
}
});
Deno.test({
name: 'filter all() by birthtime',
permissions: {
env: true,
// https://github.com/denoland/deno/discussions/17258
read: true,
write: true
},
fn: async () => {
asserts.assert(item_collection);
asserts.assert(time_mid);
asserts.assert(time_end);
@ -106,10 +161,10 @@ Deno.test({
const fetched_items = await item_collection.all({
limit,
offset: fetch_for_before_offset,
before: time_mid
filter: (entry) => (entry.info.birthtime?.toISOString() ?? '') < time_mid
});
fetched_for_before.push(...fetched_items);
fetched_for_before.push(...(fetched_items.map((item) => item.load())));
fetch_for_before_offset += fetched_items.length;
more_to_fetch_for_before = fetched_items.length === limit;
} while (more_to_fetch_for_before);
@ -128,35 +183,6 @@ Deno.test({
// note: we use less or equal because we don't have the actual file write time
asserts.assertLessOrEqual(newest, time_mid);
// test id_before
let fetch_for_id_before_offset = 0;
const fetched_for_id_before = [];
let more_to_fetch_for_id_before = true;
do {
// fuzz the limit
const limit = Math.floor(Math.random() * (LIMIT_MAX - LIMIT_MIN + 1)) + LIMIT_MIN;
const fetched_items = await item_collection.all({
limit,
offset: fetch_for_id_before_offset,
id_before: mid_id
});
fetched_for_id_before.push(...fetched_items);
fetch_for_id_before_offset += fetched_items.length;
more_to_fetch_for_id_before = fetched_items.length === limit;
} while (more_to_fetch_for_id_before);
let newest_id = first_id;
asserts.assert(newest_id);
for (const item of fetched_for_id_before) {
if (item.id > newest_id) {
newest_id = item.id;
}
}
asserts.assertLess(newest_id, mid_id);
// test after
let fetch_for_after_offset = 0;
const fetched_for_after = [];
@ -168,10 +194,10 @@ Deno.test({
const fetched_items = await item_collection.all({
limit,
offset: fetch_for_after_offset,
after: time_mid
filter: (entry) => (entry.info.birthtime?.toISOString() ?? '') > time_mid
});
fetched_for_after.push(...fetched_items);
fetched_for_after.push(...(fetched_items.map((item) => item.load())));
fetch_for_after_offset += fetched_items.length;
more_to_fetch_for_after = fetched_items.length === limit;
} while (more_to_fetch_for_after);
@ -189,6 +215,54 @@ Deno.test({
// again with the file write time slop
asserts.assertGreaterOrEqual(oldest, time_mid);
}
});
Deno.test({
name: 'filter all() by id',
permissions: {
env: true,
// https://github.com/denoland/deno/discussions/17258
read: true,
write: true
},
fn: async () => {
asserts.assert(item_collection);
asserts.assert(time_mid);
asserts.assert(time_end);
asserts.assert(mid_id);
asserts.assert(last_id);
// test id_before
let fetch_for_id_before_offset = 0;
const fetched_for_id_before = [];
let more_to_fetch_for_id_before = true;
do {
// fuzz the limit
const limit = Math.floor(Math.random() * (LIMIT_MAX - LIMIT_MIN + 1)) + LIMIT_MIN;
const fetched_items = await item_collection.all({
limit,
offset: fetch_for_id_before_offset,
filter: (entry) => path.basename(entry.path).replace(/\.json$/i, '') < mid_id
});
fetched_for_id_before.push(...(fetched_items.map((item) => item.load())));
fetch_for_id_before_offset += fetched_items.length;
more_to_fetch_for_id_before = fetched_items.length === limit;
} while (more_to_fetch_for_id_before);
let newest_id = first_id;
asserts.assert(newest_id);
for (const item of fetched_for_id_before) {
if (item.id > newest_id) {
newest_id = item.id;
}
}
asserts.assertLess(newest_id, mid_id);
// test id_after
let fetch_for_id_after_offset = 0;
@ -201,10 +275,10 @@ Deno.test({
const fetched_items = await item_collection.all({
limit,
offset: fetch_for_id_after_offset,
id_after: mid_id
filter: (entry) => path.basename(entry.path).replace(/\.json$/i, '') > mid_id
});
fetched_for_id_after.push(...fetched_items);
fetched_for_id_after.push(...(fetched_items.map((item) => item.load())));
fetch_for_id_after_offset += fetched_items.length;
more_to_fetch_for_id_after = fetched_items.length === limit;
} while (more_to_fetch_for_id_after);

62
utils/walk.ts Normal file
View file

@ -0,0 +1,62 @@
import * as path from '@std/path';
export type WALK_OPTIONS<T> = {
max_depth?: number;
depth?: number;
index?: number;
filter?: (entry: WALK_ENTRY<T>) => boolean;
sort?: (a: WALK_ENTRY<T>, b: WALK_ENTRY<T>) => number;
};
export type WALK_ENTRY<T> = {
path: string;
info: Deno.FileInfo;
depth: number;
load: () => T;
};
export async function* walk<T>(
root: string | URL,
{
depth = 0,
filter = undefined,
sort = undefined
}: WALK_OPTIONS<T> = {}
): AsyncIterableIterator<WALK_ENTRY<T>> {
const root_path = typeof root === 'string' ? path.resolve(root) : path.resolve(path.fromFileUrl(root));
const entries: WALK_ENTRY<T>[] = [];
for await (const dir_entry of Deno.readDir(root_path)) {
const full_path = path.join(root, dir_entry.name);
const info = await Deno.lstat(full_path);
const entry = {
path: full_path,
info,
depth,
load: function () {
return JSON.parse(Deno.readTextFileSync(this.path)) as T;
}
};
entries.push(entry);
}
if (sort) {
entries.sort(sort);
}
for (const entry of entries) {
if (!filter || filter(entry)) {
yield entry;
}
if (entry.info.isDirectory) {
yield* walk(entry.path, {
depth: depth + 1,
filter,
sort
});
}
}
}