PostgreSQL Source Code: src/backend/utils/adt/tsvector.c Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
16
22#include "utils/fmgrprotos.h"
25
26typedef struct
27{
32
33
34
35int
37{
40
42}
43
44
45
46
47
48
49
50
51static int
53{
55 *res;
56
57 if (l <= 1)
58 return l;
59
61
62 res = a;
63 ptr = a + 1;
64 while (ptr - a < l)
65 {
67 {
68 res++;
69 *res = *ptr;
72 break;
73 }
76 ptr++;
77 }
78
79 return res + 1 - a;
80}
81
82
83
84
85
86static int
88{
91 char *BufferStr = (char *) arg;
92
94 &BufferStr[b->pos], b->len,
95 false);
96}
97
98
99
100
101
102static int
104{
105 int buflen;
107 *res;
108
110
111 if (l > 1)
113
114 buflen = 0;
115 res = a;
116 ptr = a + 1;
117 while (ptr - a < l)
118 {
122 {
123
126 {
130 }
131 res++;
132 if (res != ptr)
134 }
136 {
138 {
139
141
146 res->poslen = newlen;
148 }
149 else
150 {
151
155 }
156 }
157 ptr++;
158 }
159
160
163 {
167 }
168
169 *outbuflen = buflen;
170 return res + 1 - a;
171}
172
173
176{
178 Node *escontext = fcinfo->context;
181 int totallen;
182 int arrlen;
184 int len = 0;
186 int i;
188 int toklen;
190 int poslen;
191 char *strbuf;
192 int stroff;
193
194
195
196
197
199 char *cur;
200 int buflen = 256;
201
203
204 arrlen = 64;
207
209 {
212 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
213 errmsg("word is too long (%ld bytes, max %ld bytes)",
214 (long) toklen,
216
219 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
220 errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)",
222
223
224
225
226 if (len >= arrlen)
227 {
228 arrlen *= 2;
231 }
232 while ((cur - tmpbuf) + toklen >= buflen)
233 {
235
236 buflen *= 2;
239 }
243 cur += toklen;
244
245 if (poslen != 0)
246 {
250 }
251 else
252 {
256 }
258 }
259
261
262
265
266 if (len > 0)
268 else
269 buflen = 0;
270
273 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
274 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS)));
275
282 stroff = 0;
284 {
288 if (arr[i].entry.haspos)
289 {
290
291 if (arr[i].poslen > 0xFFFF)
292 elog(ERROR, "positions array too long");
293
294
296 *(uint16 *) (strbuf + stroff) = (uint16) arr[i].poslen;
297 stroff += sizeof(uint16);
298
299
300 memcpy(strbuf + stroff, arr[i].pos, arr[i].poslen * sizeof(WordEntryPos));
302
304 }
306 }
307
308 Assert((strbuf + stroff - (char *) in) == totallen);
309
311}
312
315{
317 char *outbuf;
319 lenbuf = 0,
320 pp;
322 char *curbegin,
323 *curin,
324 *curout;
325
326 lenbuf = out->size * 2 + out->size - 1 + 2 ;
327 for (i = 0; i < out->size; i++)
328 {
330 if (ptr[i].haspos)
331 lenbuf += 1 + 7 * POSDATALEN(out, &(ptr[i]));
332 }
333
334 curout = outbuf = (char *) palloc(lenbuf);
335 for (i = 0; i < out->size; i++)
336 {
337 curbegin = curin = STRPTR(out) + ptr->pos;
338 if (i != 0)
339 *curout++ = ' ';
340 *curout++ = '\'';
341 while (curin - curbegin < ptr->len)
342 {
344
345 if (t_iseq(curin, '\''))
346 *curout++ = '\'';
347 else if (t_iseq(curin, '\\'))
348 *curout++ = '\\';
349
350 while (len--)
351 *curout++ = *curin++;
352 }
353
354 *curout++ = '\'';
355 if ((pp = POSDATALEN(out, ptr)) != 0)
356 {
358
359 *curout++ = ':';
361 while (pp)
362 {
365 {
366 case 3:
367 *curout++ = 'A';
368 break;
369 case 2:
370 *curout++ = 'B';
371 break;
372 case 1:
373 *curout++ = 'C';
374 break;
375 case 0:
376 default:
377 break;
378 }
379
380 if (pp > 1)
381 *curout++ = ',';
382 pp--;
383 wptr++;
384 }
385 }
386 ptr++;
387 }
388
389 *curout = '\0';
392}
393
394
395
396
397
398
399
400
401
402
403
404
405
408{
411 int i,
412 j;
414
416
418 for (i = 0; i < vec->size; i++)
419 {
421
422
423
424
425
428
431
432 if (npos > 0)
433 {
435
436 for (j = 0; j < npos; j++)
438 }
439 weptr++;
440 }
441
443}
444
447{
450 int i;
452 int datalen;
453
454
457 bool needSort = false;
458
461 elog(ERROR, "invalid size of tsvector");
462
464
465 len = hdrlen * 2;
467 vec->size = nentries;
468
469 datalen = 0;
470 for (i = 0; i < nentries; i++)
471 {
472 const char *lexeme;
474 size_t lex_len;
475
478
479
480
481 lex_len = strlen(lexeme);
483 elog(ERROR, "invalid tsvector: lexeme too long");
484
486 elog(ERROR, "invalid tsvector: maximum total lexeme length exceeded");
487
489 elog(ERROR, "unexpected number of tsvector positions");
490
491
492
493
494
495
496 while (hdrlen + SHORTALIGN(datalen + lex_len) +
498 {
499 len *= 2;
501 }
502
506
507 memcpy(STRPTR(vec) + datalen, lexeme, lex_len);
508
509 datalen += lex_len;
510
514 needSort = true;
515
516
517 if (npos > 0)
518 {
521
522
523
524
525
526
528 {
529 *(STRPTR(vec) + datalen) = '\0';
531 }
532
533 memcpy(STRPTR(vec) + datalen, &npos, sizeof(uint16));
534
536 for (j = 0; j < npos; j++)
537 {
540 elog(ERROR, "position information is misordered");
541 }
542
544 }
545 }
546
548
549 if (needSort)
552
554}
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereturn(context, dummy_value,...)
#define PG_FREE_IF_COPY(ptr, n)
#define PG_RETURN_BYTEA_P(x)
#define PG_GETARG_POINTER(n)
#define PG_RETURN_CSTRING(x)
#define PG_GETARG_CSTRING(n)
Assert(PointerIsAligned(start, uint64))
#define CALCDATASIZE(x, lenstr)
static int pg_cmp_s32(int32 a, int32 b)
int pg_database_encoding_max_length(void)
int pg_mblen(const char *mbstr)
void * repalloc(void *pointer, Size size)
void pfree(void *pointer)
void * palloc0(Size size)
#define SOFT_ERROR_OCCURRED(escontext)
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
#define qsort(a, b, c, d)
unsigned int pq_getmsgint(StringInfo msg, int b)
void pq_sendtext(StringInfo buf, const char *str, int slen)
const char * pq_getmsgstring(StringInfo msg)
void pq_begintypsend(StringInfo buf)
bytea * pq_endtypsend(StringInfo buf)
static void pq_sendint32(StringInfo buf, uint32 i)
static void pq_sendbyte(StringInfo buf, uint8 byt)
static void pq_sendint16(StringInfo buf, uint16 i)
StringInfoData * StringInfo
WordEntry entries[FLEXIBLE_ARRAY_MEMBER]
#define PG_GETARG_TSVECTOR(n)
#define PG_RETURN_TSVECTOR(x)
#define WEP_SETWEIGHT(x, v)
Datum tsvectorout(PG_FUNCTION_ARGS)
static int uniquePos(WordEntryPos *a, int l)
Datum tsvectorrecv(PG_FUNCTION_ARGS)
static int compareentry(const void *va, const void *vb, void *arg)
Datum tsvectorin(PG_FUNCTION_ARGS)
int compareWordEntryPos(const void *a, const void *b)
Datum tsvectorsend(PG_FUNCTION_ARGS)
static int uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen)
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
void close_tsvector_parser(TSVectorParseState state)
bool gettoken_tsvector(TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr)
TSVectorParseState init_tsvector_parser(char *input, int flags, Node *escontext)
#define SET_VARSIZE(PTR, len)
static StringInfoData tmpbuf