CAMEL - Build Multi-Agent AI Systems (original) (raw)

Chunkr Reader allows you to process PDFs (and other docs) in chunks, with built-in OCR and format control. Below is a basic usage pattern:Initialize the ChunkrReader and ChunkrReaderConfig, set the file path and chunking options, then submit your task and fetch results:

import asyncio
from camel.loaders import ChunkrReader, ChunkrReaderConfig

async def main():
    chunkr = ChunkrReader()

    config = ChunkrReaderConfig(
        chunk_processing=512,      # Example: target chunk length
        ocr_strategy="Auto",       # Example: OCR strategy
        high_resolution=False      # False for faster processing (old "Fast" model)
    )

    # Replace with your actual file path.
    file_path = "/path/to/your/document.pdf"
    try:
        task_id = await chunkr.submit_task(
            file_path=file_path,
            chunkr_config=config,
        )
        print(f"Task ID: {task_id}")

        # Poll and fetch the output.
        if task_id:
            task_output_json_str = await chunkr.get_task_output(task_id=task_id)
            if task_output_json_str:
                print("Task Output:")
                print(task_output_json_str)
            else:
                print(f"Failed to get output for task {task_id}, or task did not succeed/was cancelled.")
    except ValueError as e:
        print(f"An error occurred during task submission or retrieval: {e}")
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}. Please check the path.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

if __name__ == "__main__":
    print("To run this example, replace '/path/to/your/document.pdf' with a real file path, ensure CHUNKR_API_KEY is set, and uncomment 'asyncio.run(main())'.")
    # asyncio.run(main()) # Uncomment to run the example

> > > Task ID: 7becf001-6f07-4f63-bddf-5633df363bbb
> > > Task Output:
> > > { "task_id": "7becf001-6f07-4f63-bddf-5633df363bbb", "status": "Succeeded", "created_at": "2024-11-08T12:45:04.260765Z", "finished_at": "2024-11-08T12:45:48.942365Z", "expires_at": null, "message": "Task succeeded", "output": { "chunks": [ { "segments": [ { "segment_id": "d53ec931-3779-41be-a220-3fe4da2770c5", "bbox": { "left": 224.16666, "top": 370.0, "width": 2101.6665, "height": 64.166664 }, "page_number": 1, "page_width": 2550.0, "page_height": 3300.0, "content": "Large Language Model based Multi-Agents: A Survey of Progress and Challenges", "segment_type": "Title", "ocr": null, "image": "https://chunkmydocs-bucket-prod.storage.googleapis.com/.../d53ec931-3779-41be-a220-3fe4da2770c5.jpg?...", "html": "<h1>Large Language Model based Multi-Agents: A Survey of Progress and Challenges</h1>", "markdown": "# Large Language Model based Multi-Agents: A Survey of Progress and Challenges\n\n" } ], "chunk_length": 11 }, { "segments": [ { "segment_id": "7bb38fc7-c1b3-4153-a3cc-116c0b9caa0a", "bbox": { "left": 432.49997, "top": 474.16666, "width": 1659.9999, "height": 122.49999 }, "page_number": 1, "page_width": 2550.0, "page_height": 3300.0, "content": "Taicheng Guo 1 , Xiuying Chen 2 , Yaqi Wang 3 \u2217 , Ruidi Chang , Shichao Pei 4 , Nitesh V. Chawla 1 , Olaf Wiest 1 , Xiangliang Zhang 1 \u2020", "segment_type": "Text", "ocr": null, "image": "https://chunkmydocs-bucket-prod.storage.googleapis.com/.../7bb38fc7-c1b3-4153-a3cc-116c0b9caa0a.jpg?...", "html": "<p>Taicheng Guo 1 , Xiuying Chen 2 , Yaqi Wang 3 \u2217 , Ruidi Chang , Shichao Pei 4 , Nitesh V. Chawla 1 , Olaf Wiest 1 , Xiangliang Zhang 1 \u2020</p>", "markdown": "Taicheng Guo 1 , Xiuying Chen 2 , Yaqi Wang 3 \u2217 , Ruidi Chang , Shichao Pei 4 , Nitesh V. Chawla 1 , Olaf Wiest 1 , Xiangliang Zhang 1 \u2020\n\n" } ], "chunk_length": 100 } ] } }