PostgreSQL Source Code: src/backend/utils/adt/tsvector.c Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

16

22#include "utils/fmgrprotos.h"

25

26typedef struct

27{

30 int poslen;

32

33

34

35int

37{

40

42}

43

44

45

46

47

48

49

50

51static int

53{

55 *res;

56

57 if (l <= 1)

58 return l;

59

61

62 res = a;

63 ptr = a + 1;

64 while (ptr - a < l)

65 {

67 {

68 res++;

69 *res = *ptr;

72 break;

73 }

76 ptr++;

77 }

78

79 return res + 1 - a;

80}

81

82

83

84

85

86static int

88{

91 char *BufferStr = (char *) arg;

92

94 &BufferStr[b->pos], b->len,

95 false);

96}

97

98

99

100

101

102static int

104{

105 int buflen;

107 *res;

108

110

111 if (l > 1)

113

114 buflen = 0;

115 res = a;

116 ptr = a + 1;

117 while (ptr - a < l)

118 {

122 {

123

126 {

130 }

131 res++;

132 if (res != ptr)

134 }

136 {

138 {

139

141

146 res->poslen = newlen;

148 }

149 else

150 {

151

155 }

156 }

157 ptr++;

158 }

159

160

163 {

167 }

168

169 *outbuflen = buflen;

170 return res + 1 - a;

171}

172

173

176{

178 Node *escontext = fcinfo->context;

181 int totallen;

182 int arrlen;

184 int len = 0;

186 int i;

188 int toklen;

190 int poslen;

191 char *strbuf;

192 int stroff;

193

194

195

196

197

199 char *cur;

200 int buflen = 256;

201

203

204 arrlen = 64;

207

209 {

212 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),

213 errmsg("word is too long (%ld bytes, max %ld bytes)",

214 (long) toklen,

216

219 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),

220 errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)",

222

223

224

225

226 if (len >= arrlen)

227 {

228 arrlen *= 2;

231 }

232 while ((cur - tmpbuf) + toklen >= buflen)

233 {

235

236 buflen *= 2;

239 }

243 cur += toklen;

244

245 if (poslen != 0)

246 {

250 }

251 else

252 {

256 }

258 }

259

261

262

265

266 if (len > 0)

268 else

269 buflen = 0;

270

273 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),

274 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS)));

275

282 stroff = 0;

283 for (i = 0; i < len; i++)

284 {

288 if (arr[i].entry.haspos)

289 {

290

291 if (arr[i].poslen > 0xFFFF)

292 elog(ERROR, "positions array too long");

293

294

296 *(uint16 *) (strbuf + stroff) = (uint16) arr[i].poslen;

297 stroff += sizeof(uint16);

298

299

300 memcpy(strbuf + stroff, arr[i].pos, arr[i].poslen * sizeof(WordEntryPos));

302

304 }

306 }

307

308 Assert((strbuf + stroff - (char *) in) == totallen);

309

311}

312

315{

317 char *outbuf;

319 lenbuf = 0,

320 pp;

322 char *curbegin,

323 *curin,

324 *curout;

325

326 lenbuf = out->size * 2 + out->size - 1 + 2 ;

327 for (i = 0; i < out->size; i++)

328 {

330 if (ptr[i].haspos)

331 lenbuf += 1 + 7 * POSDATALEN(out, &(ptr[i]));

332 }

333

334 curout = outbuf = (char *) palloc(lenbuf);

335 for (i = 0; i < out->size; i++)

336 {

337 curbegin = curin = STRPTR(out) + ptr->pos;

338 if (i != 0)

339 *curout++ = ' ';

340 *curout++ = '\'';

341 while (curin - curbegin < ptr->len)

342 {

344

345 if (t_iseq(curin, '\''))

346 *curout++ = '\'';

347 else if (t_iseq(curin, '\\'))

348 *curout++ = '\\';

349

350 while (len--)

351 *curout++ = *curin++;

352 }

353

354 *curout++ = '\'';

355 if ((pp = POSDATALEN(out, ptr)) != 0)

356 {

358

359 *curout++ = ':';

361 while (pp)

362 {

365 {

366 case 3:

367 *curout++ = 'A';

368 break;

369 case 2:

370 *curout++ = 'B';

371 break;

372 case 1:

373 *curout++ = 'C';

374 break;

375 case 0:

376 default:

377 break;

378 }

379

380 if (pp > 1)

381 *curout++ = ',';

382 pp--;

383 wptr++;

384 }

385 }

386 ptr++;

387 }

388

389 *curout = '\0';

392}

393

394

395

396

397

398

399

400

401

402

403

404

405

408{

411 int i,

412 j;

414

416

418 for (i = 0; i < vec->size; i++)

419 {

421

422

423

424

425

428

431

432 if (npos > 0)

433 {

435

436 for (j = 0; j < npos; j++)

438 }

439 weptr++;

440 }

441

443}

444

447{

450 int i;

452 int datalen;

453

454

456 Size len;

457 bool needSort = false;

458

461 elog(ERROR, "invalid size of tsvector");

462

464

465 len = hdrlen * 2;

467 vec->size = nentries;

468

469 datalen = 0;

470 for (i = 0; i < nentries; i++)

471 {

472 const char *lexeme;

474 size_t lex_len;

475

478

479

480

481 lex_len = strlen(lexeme);

483 elog(ERROR, "invalid tsvector: lexeme too long");

484

486 elog(ERROR, "invalid tsvector: maximum total lexeme length exceeded");

487

489 elog(ERROR, "unexpected number of tsvector positions");

490

491

492

493

494

495

496 while (hdrlen + SHORTALIGN(datalen + lex_len) +

498 {

499 len *= 2;

501 }

502

506

507 memcpy(STRPTR(vec) + datalen, lexeme, lex_len);

508

509 datalen += lex_len;

510

514 needSort = true;

515

516

517 if (npos > 0)

518 {

521

522

523

524

525

526

528 {

529 *(STRPTR(vec) + datalen) = '\0';

531 }

532

533 memcpy(STRPTR(vec) + datalen, &npos, sizeof(uint16));

534

536 for (j = 0; j < npos; j++)

537 {

540 elog(ERROR, "position information is misordered");

541 }

542

544 }

545 }

546

548

549 if (needSort)

552

554}

int errcode(int sqlerrcode)

int errmsg(const char *fmt,...)

#define ereturn(context, dummy_value,...)

#define PG_FREE_IF_COPY(ptr, n)

#define PG_RETURN_BYTEA_P(x)

#define PG_GETARG_POINTER(n)

#define PG_RETURN_CSTRING(x)

#define PG_GETARG_CSTRING(n)

Assert(PointerIsAligned(start, uint64))

#define CALCDATASIZE(x, lenstr)

static int pg_cmp_s32(int32 a, int32 b)

int pg_database_encoding_max_length(void)

int pg_mblen(const char *mbstr)

void * repalloc(void *pointer, Size size)

void pfree(void *pointer)

void * palloc0(Size size)

#define SOFT_ERROR_OCCURRED(escontext)

void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)

#define qsort(a, b, c, d)

unsigned int pq_getmsgint(StringInfo msg, int b)

void pq_sendtext(StringInfo buf, const char *str, int slen)

const char * pq_getmsgstring(StringInfo msg)

void pq_begintypsend(StringInfo buf)

bytea * pq_endtypsend(StringInfo buf)

static void pq_sendint32(StringInfo buf, uint32 i)

static void pq_sendbyte(StringInfo buf, uint8 byt)

static void pq_sendint16(StringInfo buf, uint16 i)

StringInfoData * StringInfo

WordEntry entries[FLEXIBLE_ARRAY_MEMBER]

#define PG_GETARG_TSVECTOR(n)

#define PG_RETURN_TSVECTOR(x)

#define WEP_SETWEIGHT(x, v)

Datum tsvectorout(PG_FUNCTION_ARGS)

static int uniquePos(WordEntryPos *a, int l)

Datum tsvectorrecv(PG_FUNCTION_ARGS)

static int compareentry(const void *va, const void *vb, void *arg)

Datum tsvectorin(PG_FUNCTION_ARGS)

int compareWordEntryPos(const void *a, const void *b)

Datum tsvectorsend(PG_FUNCTION_ARGS)

static int uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen)

int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)

void close_tsvector_parser(TSVectorParseState state)

bool gettoken_tsvector(TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr)

TSVectorParseState init_tsvector_parser(char *input, int flags, Node *escontext)

#define SET_VARSIZE(PTR, len)

static StringInfoData tmpbuf