Preparers Chunk Text Operation Examples
Examples of using preparers with the ChunkText operation in AI Accelerator.
-- Only specify a desired length SELECT * FROM aidb.chunk_text('This is a simple test sentence.', '{"desired_length": 10}'); -- Specify a desired length and a maximum length SELECT * FROM aidb.chunk_text('This is a simple test sentence.', '{"desired_length": 10, "max_length": 15}'); -- Named parameters SELECT chunk_id, chunk FROM aidb.chunk_text( input => 'This is a significantly longer text example that might require splitting into smaller chunks. The purpose of this function is to partition text data into segments of a specified maximum length, for example, this sentence 145 is characters. This enables processing or storage of data in manageable parts.', options => '{"desired_length": 10}' ); -- Semantic chunking to split into the largest continuous semantic chunk that fits in the max_length SELECT * FROM aidb.chunk_text('This sentence should be its own chunk. This too.', '{"desired_length": 1, "max_length": 1000}');
Preparer with table data source
-- Create source test table CREATE TABLE source_table__1628 ( id INT GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY, content TEXT NOT NULL ); INSERT INTO source_table__1628 VALUES (1, 'This is a significantly longer text example that might require splitting into smaller chunks. The purpose of this function is to partition text data into segments of a specified maximum length, for example, this sentence 145 is characters. This enables processing or storage of data in manageable parts.'), (2, 'This sentence should be its own chunk. This too.'); SELECT aidb.create_preparer_for_table( name => 'preparer__1628', operation => 'ChunkText', source_table => 'source_table__1628', source_data_column => 'content', destination_table => 'chunked_data__1628', destination_data_column => 'chunks', source_key_column => 'id', destination_key_column => 'id', options => '{"desired_length": 1, "max_length": 1000}'::JSONB -- Configuration for the ChunkText operation ); SELECT aidb.bulk_data_preparation('preparer__1628'); SELECT * FROM chunked_data__1628; -- Unnest chunk text arrays SELECT id, chunk_number, chunk FROM chunked_data__1628, unnest(chunks) WITH ORDINALITY AS chunk_list(chunk, chunk_number);
