1
0
mirror of git://jb55.com/damus synced 2024-10-04 19:00:42 +00:00

nostrdb/search: allow searching from newest-to-oldest and oldest-to-newest

Signed-off-by: William Casarin <jb55@jb55.com>
This commit is contained in:
William Casarin 2023-12-01 17:04:01 -08:00
parent 01c239c0eb
commit e537c7cef4
2 changed files with 84 additions and 20 deletions

View File

@ -177,7 +177,7 @@ static int ndb_make_text_search_key(unsigned char *buf, int bufsize,
// TODO: need update this to uint64_t // TODO: need update this to uint64_t
// we push this first because our query function can pull this off // we push this first because our query function can pull this off
// quicky to check matches // quicky to check matches
if (!push_varint(&cur, (int)note_id)) if (!push_varint(&cur, (int32_t)note_id))
return 0; return 0;
// string length // string length
@ -238,12 +238,14 @@ static int ndb_make_text_search_key_high(unsigned char *buf, int bufsize,
int *keysize) int *keysize)
{ {
uint64_t timestamp, note_id; uint64_t timestamp, note_id;
timestamp = UINT64_MAX; timestamp = INT32_MAX;
note_id = UINT64_MAX; note_id = INT32_MAX;
return ndb_make_text_search_key(buf, bufsize, 0, wordlen, word, return ndb_make_text_search_key(buf, bufsize, 0, wordlen, word,
timestamp, note_id, keysize); timestamp, note_id, keysize);
} }
typedef int (*ndb_text_search_key_order_fn)(unsigned char *buf, int bufsize, int wordlen, const char *word, int *keysize);
/** From LMDB: Compare two items lexically */ /** From LMDB: Compare two items lexically */
static int mdb_cmp_memn(const MDB_val *a, const MDB_val *b) { static int mdb_cmp_memn(const MDB_val *a, const MDB_val *b) {
int diff; int diff;
@ -2469,7 +2471,8 @@ static int prefix_count(const char *str1, int len1, const char *str2, int len2)
static int ndb_text_search_next_word(MDB_cursor *cursor, MDB_cursor_op op, static int ndb_text_search_next_word(MDB_cursor *cursor, MDB_cursor_op op,
MDB_val *k, struct ndb_word *search_word, MDB_val *k, struct ndb_word *search_word,
struct ndb_text_search_result *last_result, struct ndb_text_search_result *last_result,
struct ndb_text_search_result *result) struct ndb_text_search_result *result,
MDB_cursor_op order_op)
{ {
struct cursor key_cursor; struct cursor key_cursor;
MDB_val v; MDB_val v;
@ -2481,8 +2484,18 @@ static int ndb_text_search_next_word(MDB_cursor *cursor, MDB_cursor_op op,
// key. // key.
// //
// Subsequent searches should use MDB_NEXT // Subsequent searches should use MDB_NEXT
if (mdb_cursor_get(cursor, k, &v, op)) if (mdb_cursor_get(cursor, k, &v, op)) {
return 0; // we should only do this if we're going in reverse
if (op == MDB_SET_RANGE && order_op == MDB_PREV) {
// if set range worked and our key exists, it should be
// the one right before this one
if (mdb_cursor_get(cursor, k, &v, MDB_PREV))
return 0;
} else {
return 0;
}
}
make_cursor(k->mv_data, k->mv_data + k->mv_size, &key_cursor); make_cursor(k->mv_data, k->mv_data + k->mv_size, &key_cursor);
@ -2566,28 +2579,68 @@ static void ndb_text_search_results_init(
results->num_results = 0; results->num_results = 0;
} }
static void ndb_print_text_search_key(struct ndb_text_search_key *key)
{
printf("K<'%.*s' %d %" PRIu64 " note_id:%" PRIu64 ">", key->str_len, key->str,
key->word_index,
key->timestamp,
key->note_id);
}
void ndb_default_text_search_config(struct ndb_text_search_config *cfg)
{
cfg->order = NDB_ORDER_DESCENDING;
cfg->limit = MAX_TEXT_SEARCH_RESULTS;
}
void ndb_text_search_config_set_order(struct ndb_text_search_config *cfg,
enum ndb_search_order order)
{
cfg->order = order;
}
void ndb_text_search_config_set_limit(struct ndb_text_search_config *cfg, int limit)
{
cfg->limit = limit;
}
int ndb_text_search(struct ndb_txn *txn, const char *query, int ndb_text_search(struct ndb_txn *txn, const char *query,
struct ndb_text_search_results *results, int limit) struct ndb_text_search_results *results,
struct ndb_text_search_config *config)
{ {
unsigned char buffer[1024], *buf; unsigned char buffer[1024], *buf;
unsigned char saved_buf[1024], *saved; unsigned char saved_buf[1024], *saved;
struct ndb_text_search_result *result, *last_result; struct ndb_text_search_result *result, *last_result;
struct ndb_text_search_result candidate, last_candidate; struct ndb_text_search_result candidate, last_candidate;
struct ndb_search_words search_words; struct ndb_search_words search_words;
//struct ndb_text_search_key search_key;
struct ndb_word *search_word; struct ndb_word *search_word;
struct cursor cur; struct cursor cur;
ndb_text_search_key_order_fn key_order_fn;
MDB_dbi text_db; MDB_dbi text_db;
MDB_cursor *cursor; MDB_cursor *cursor;
MDB_val k, v; MDB_val k, v;
int i, j, keysize, saved_size; int i, j, keysize, saved_size, limit;
MDB_cursor_op op; MDB_cursor_op op, order_op;
//int num_note_ids; //int num_note_ids;
saved = NULL; saved = NULL;
ndb_text_search_results_init(results); ndb_text_search_results_init(results);
ndb_search_words_init(&search_words); ndb_search_words_init(&search_words);
// search config
limit = MAX_TEXT_SEARCH_RESULTS;
order_op = MDB_PREV;
key_order_fn = ndb_make_text_search_key_high;
if (config) {
if (config->order == NDB_ORDER_ASCENDING) {
order_op = MDB_NEXT;
key_order_fn = ndb_make_text_search_key_low;
}
limit = min(limit, config->limit);
}
// end search config
//num_note_ids = 0;
text_db = txn->lmdb->dbs[NDB_DB_NOTE_TEXT]; text_db = txn->lmdb->dbs[NDB_DB_NOTE_TEXT];
make_cursor((unsigned char *)query, (unsigned char *)query + strlen(query), &cur); make_cursor((unsigned char *)query, (unsigned char *)query + strlen(query), &cur);
@ -2600,8 +2653,6 @@ int ndb_text_search(struct ndb_txn *txn, const char *query,
return 0; return 0;
} }
limit = min(MAX_TEXT_SEARCH_RESULTS, limit);
// for each word, we recursively find all of the submatches // for each word, we recursively find all of the submatches
while (results->num_results < limit) { while (results->num_results < limit) {
last_result = NULL; last_result = NULL;
@ -2619,18 +2670,17 @@ int ndb_text_search(struct ndb_txn *txn, const char *query,
// reposition the cursor so we can continue // reposition the cursor so we can continue
if (mdb_cursor_get(cursor, &k, &v, MDB_SET_RANGE)) if (mdb_cursor_get(cursor, &k, &v, MDB_SET_RANGE))
break; return 0;
op = MDB_NEXT; op = order_op;
} else { } else {
// construct a packed fulltext search key using this // construct a packed fulltext search key using this
// word this key doesn't contain any timestamp or index // word this key doesn't contain any timestamp or index
// info, so it should range match instead of exact // info, so it should range match instead of exact
// match // match
if (!ndb_make_text_search_key_low( if (!key_order_fn(buffer, sizeof(buffer),
buffer, sizeof(buffer), search_words.words[0].word_len,
search_words.words[0].word_len, search_words.words[0].word, &keysize))
search_words.words[0].word, &keysize))
{ {
// word is too big to fit in 1024-sized key // word is too big to fit in 1024-sized key
continue; continue;
@ -2677,7 +2727,8 @@ int ndb_text_search(struct ndb_txn *txn, const char *query,
if (!ndb_text_search_next_word(cursor, op, &k, if (!ndb_text_search_next_word(cursor, op, &k,
search_word, search_word,
last_result, last_result,
&candidate)) { &candidate,
order_op)) {
break; break;
} }

View File

@ -277,6 +277,11 @@ enum ndb_generic_element_type {
NDB_ELEMENT_ID = 2, NDB_ELEMENT_ID = 2,
}; };
enum ndb_search_order {
NDB_ORDER_DESCENDING,
NDB_ORDER_ASCENDING,
};
union ndb_filter_element { union ndb_filter_element {
const char *string; const char *string;
const unsigned char *id; const unsigned char *id;
@ -311,6 +316,11 @@ struct ndb_config {
ndb_ingest_filter_fn ingest_filter; ndb_ingest_filter_fn ingest_filter;
}; };
struct ndb_text_search_config {
enum ndb_search_order order;
int limit;
};
// CONFIG // CONFIG
void ndb_default_config(struct ndb_config *); void ndb_default_config(struct ndb_config *);
void ndb_config_set_ingest_threads(struct ndb_config *config, int threads); void ndb_config_set_ingest_threads(struct ndb_config *config, int threads);
@ -377,7 +387,10 @@ void ndb_filter_end_field(struct ndb_filter *);
void ndb_filter_free(struct ndb_filter *filter); void ndb_filter_free(struct ndb_filter *filter);
// FULLTEXT SEARCH // FULLTEXT SEARCH
int ndb_text_search(struct ndb_txn *txn, const char *query, struct ndb_text_search_results *, int limit); int ndb_text_search(struct ndb_txn *txn, const char *query, struct ndb_text_search_results *, struct ndb_text_search_config *);
void ndb_default_text_search_config(struct ndb_text_search_config *);
void ndb_text_search_config_set_order(struct ndb_text_search_config *, enum ndb_search_order);
void ndb_text_search_config_set_limit(struct ndb_text_search_config *, int limit);
// stats // stats
int ndb_stat(struct ndb *ndb, struct ndb_stat *stat); int ndb_stat(struct ndb *ndb, struct ndb_stat *stat);