@@ -242,6 +242,55 @@ def mtmd_bitmap_init_from_audio(
242242def mtmd_bitmap_free (bitmap : mtmd_bitmap_p , / ): ...
243243
244244
245+ # MTMD_API uint32_t mtmd_bitmap_get_nx(const mtmd_bitmap * bitmap);
246+ @ctypes_function ("mtmd_bitmap_get_nx" , [mtmd_bitmap_p_ctypes ], c_uint32 )
247+ def mtmd_bitmap_get_nx (bitmap : mtmd_bitmap_p , / ) -> int :
248+ """Get the bitmap width in pixels."""
249+ ...
250+
251+
252+ # MTMD_API uint32_t mtmd_bitmap_get_ny(const mtmd_bitmap * bitmap);
253+ @ctypes_function ("mtmd_bitmap_get_ny" , [mtmd_bitmap_p_ctypes ], c_uint32 )
254+ def mtmd_bitmap_get_ny (bitmap : mtmd_bitmap_p , / ) -> int :
255+ """Get the bitmap height in pixels."""
256+ ...
257+
258+
259+ # MTMD_API const unsigned char * mtmd_bitmap_get_data(const mtmd_bitmap * bitmap);
260+ @ctypes_function ("mtmd_bitmap_get_data" , [mtmd_bitmap_p_ctypes ], POINTER (c_uint8 ))
261+ def mtmd_bitmap_get_data (bitmap : mtmd_bitmap_p , / ) -> Optional [CtypesArray [c_uint8 ]]:
262+ """Get the raw bitmap data buffer."""
263+ ...
264+
265+
266+ # MTMD_API size_t mtmd_bitmap_get_n_bytes(const mtmd_bitmap * bitmap);
267+ @ctypes_function ("mtmd_bitmap_get_n_bytes" , [mtmd_bitmap_p_ctypes ], c_size_t )
268+ def mtmd_bitmap_get_n_bytes (bitmap : mtmd_bitmap_p , / ) -> int :
269+ """Get the bitmap data size in bytes."""
270+ ...
271+
272+
273+ # MTMD_API bool mtmd_bitmap_is_audio(const mtmd_bitmap * bitmap);
274+ @ctypes_function ("mtmd_bitmap_is_audio" , [mtmd_bitmap_p_ctypes ], c_bool )
275+ def mtmd_bitmap_is_audio (bitmap : mtmd_bitmap_p , / ) -> bool :
276+ """Check whether the bitmap contains audio data."""
277+ ...
278+
279+
280+ # MTMD_API const char * mtmd_bitmap_get_id(const mtmd_bitmap * bitmap);
281+ @ctypes_function ("mtmd_bitmap_get_id" , [mtmd_bitmap_p_ctypes ], c_char_p )
282+ def mtmd_bitmap_get_id (bitmap : mtmd_bitmap_p , / ) -> Optional [bytes ]:
283+ """Get the optional bitmap identifier."""
284+ ...
285+
286+
287+ # MTMD_API void mtmd_bitmap_set_id(mtmd_bitmap * bitmap, const char * id);
288+ @ctypes_function ("mtmd_bitmap_set_id" , [mtmd_bitmap_p_ctypes , c_char_p ], None )
289+ def mtmd_bitmap_set_id (bitmap : mtmd_bitmap_p , id : Optional [bytes ], / ):
290+ """Set the optional bitmap identifier."""
291+ ...
292+
293+
245294# MTMD_API mtmd_input_chunks * mtmd_input_chunks_init(void);
246295@ctypes_function ("mtmd_input_chunks_init" , [], mtmd_input_chunks_p_ctypes )
247296def mtmd_input_chunks_init () -> Optional [mtmd_input_chunks_p ]: ...
@@ -315,11 +364,146 @@ def mtmd_input_chunk_get_tokens_text(
315364) -> Optional ["_Pointer[llama_cpp.llama_token]" ]: ...
316365
317366
367+ # MTMD_API const mtmd_image_tokens * mtmd_input_chunk_get_tokens_image(const mtmd_input_chunk * chunk);
368+ @ctypes_function (
369+ "mtmd_input_chunk_get_tokens_image" ,
370+ [mtmd_input_chunk_p_ctypes ],
371+ mtmd_image_tokens_p_ctypes ,
372+ )
373+ def mtmd_input_chunk_get_tokens_image (
374+ chunk : mtmd_input_chunk_p , /
375+ ) -> Optional [mtmd_image_tokens_p ]: ...
376+
377+
378+ # MTMD_API const char * mtmd_input_chunk_get_id(const mtmd_input_chunk * chunk);
379+ @ctypes_function ("mtmd_input_chunk_get_id" , [mtmd_input_chunk_p_ctypes ], c_char_p )
380+ def mtmd_input_chunk_get_id (chunk : mtmd_input_chunk_p , / ) -> Optional [bytes ]:
381+ """Get the optional chunk identifier."""
382+ ...
383+
384+
385+ # MTMD_API llama_pos mtmd_input_chunk_get_n_pos(const mtmd_input_chunk * chunk);
386+ @ctypes_function (
387+ "mtmd_input_chunk_get_n_pos" ,
388+ [mtmd_input_chunk_p_ctypes ],
389+ llama_cpp .llama_pos ,
390+ )
391+ def mtmd_input_chunk_get_n_pos (chunk : mtmd_input_chunk_p , / ) -> int :
392+ """Get the number of positions consumed by the chunk."""
393+ ...
394+
395+
396+ # MTMD_API mtmd_input_chunk * mtmd_input_chunk_copy(const mtmd_input_chunk * chunk);
397+ @ctypes_function (
398+ "mtmd_input_chunk_copy" , [mtmd_input_chunk_p_ctypes ], mtmd_input_chunk_p_ctypes
399+ )
400+ def mtmd_input_chunk_copy (chunk : mtmd_input_chunk_p , / ) -> Optional [mtmd_input_chunk_p ]:
401+ """Copy an input chunk and transfer ownership to the caller."""
402+ ...
403+
404+
405+ # MTMD_API void mtmd_input_chunk_free(mtmd_input_chunk * chunk);
406+ @ctypes_function ("mtmd_input_chunk_free" , [mtmd_input_chunk_p_ctypes ], None )
407+ def mtmd_input_chunk_free (chunk : mtmd_input_chunk_p , / ):
408+ """Free an owned input chunk."""
409+ ...
410+
411+
412+ # MTMD_API size_t mtmd_image_tokens_get_n_tokens(const mtmd_image_tokens * image_tokens);
413+ @ctypes_function (
414+ "mtmd_image_tokens_get_n_tokens" , [mtmd_image_tokens_p_ctypes ], c_size_t
415+ )
416+ def mtmd_image_tokens_get_n_tokens (image_tokens : mtmd_image_tokens_p , / ) -> int :
417+ """Get the number of image tokens."""
418+ ...
419+
420+
421+ # MTMD_API size_t mtmd_image_tokens_get_nx(const mtmd_image_tokens * image_tokens);
422+ @ctypes_function ("mtmd_image_tokens_get_nx" , [mtmd_image_tokens_p_ctypes ], c_size_t )
423+ def mtmd_image_tokens_get_nx (image_tokens : mtmd_image_tokens_p , / ) -> int :
424+ """Get the image token grid width."""
425+ ...
426+
427+
428+ # MTMD_API size_t mtmd_image_tokens_get_ny(const mtmd_image_tokens * image_tokens);
429+ @ctypes_function ("mtmd_image_tokens_get_ny" , [mtmd_image_tokens_p_ctypes ], c_size_t )
430+ def mtmd_image_tokens_get_ny (image_tokens : mtmd_image_tokens_p , / ) -> int :
431+ """Get the image token grid height."""
432+ ...
433+
434+
435+ # MTMD_API const char * mtmd_image_tokens_get_id(const mtmd_image_tokens * image_tokens);
436+ @ctypes_function ("mtmd_image_tokens_get_id" , [mtmd_image_tokens_p_ctypes ], c_char_p )
437+ def mtmd_image_tokens_get_id (image_tokens : mtmd_image_tokens_p , / ) -> Optional [bytes ]:
438+ """Get the optional image token identifier."""
439+ ...
440+
441+
442+ # MTMD_API llama_pos mtmd_image_tokens_get_n_pos(const mtmd_image_tokens * image_tokens);
443+ @ctypes_function (
444+ "mtmd_image_tokens_get_n_pos" ,
445+ [mtmd_image_tokens_p_ctypes ],
446+ llama_cpp .llama_pos ,
447+ )
448+ def mtmd_image_tokens_get_n_pos (image_tokens : mtmd_image_tokens_p , / ) -> int :
449+ """Get the number of positions consumed by the image tokens."""
450+ ...
451+
452+
453+ # MTMD_API int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens);
454+ @ctypes_function (
455+ "mtmd_encode" ,
456+ [mtmd_context_p_ctypes , mtmd_image_tokens_p_ctypes ],
457+ c_int ,
458+ )
459+ def mtmd_encode (ctx : mtmd_context_p , image_tokens : mtmd_image_tokens_p , / ) -> int :
460+ """Run an MTMD encode pass for image tokens."""
461+ ...
462+
463+
464+ # MTMD_API int32_t mtmd_encode_chunk(mtmd_context * ctx, const mtmd_input_chunk * chunk);
465+ @ctypes_function (
466+ "mtmd_encode_chunk" ,
467+ [mtmd_context_p_ctypes , mtmd_input_chunk_p_ctypes ],
468+ c_int ,
469+ )
470+ def mtmd_encode_chunk (ctx : mtmd_context_p , chunk : mtmd_input_chunk_p , / ) -> int :
471+ """Run an MTMD encode pass for a single chunk."""
472+ ...
473+
474+
475+ # MTMD_API float * mtmd_get_output_embd(mtmd_context * ctx);
476+ @ctypes_function ("mtmd_get_output_embd" , [mtmd_context_p_ctypes ], POINTER (c_float ))
477+ def mtmd_get_output_embd (ctx : mtmd_context_p , / ) -> Optional [CtypesArray [c_float ]]:
478+ """Get output embeddings from the last encode pass."""
479+ ...
480+
481+
482+ # MTMD_API mtmd_input_chunks * mtmd_test_create_input_chunks(void);
483+ @ctypes_function ("mtmd_test_create_input_chunks" , [], mtmd_input_chunks_p_ctypes )
484+ def mtmd_test_create_input_chunks () -> Optional [mtmd_input_chunks_p ]:
485+ """Create MTMD test chunks for the C API tests."""
486+ ...
487+
488+
318489################################################
319490# mtmd-helper.h functions
320491################################################
321492
322493
494+ # MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname);
495+ @ctypes_function (
496+ "mtmd_helper_bitmap_init_from_file" ,
497+ [mtmd_context_p_ctypes , c_char_p ],
498+ mtmd_bitmap_p_ctypes ,
499+ )
500+ def mtmd_helper_bitmap_init_from_file (
501+ ctx : mtmd_context_p , fname : bytes , /
502+ ) -> Optional [mtmd_bitmap_p ]:
503+ """Initialize an MTMD bitmap from a file."""
504+ ...
505+
506+
323507# MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx, const unsigned char * buf, size_t len);
324508@ctypes_function (
325509 "mtmd_helper_bitmap_init_from_buf" ,
@@ -339,6 +523,52 @@ def mtmd_helper_bitmap_init_from_buf(
339523def mtmd_helper_get_n_tokens (chunks : mtmd_input_chunks_p , / ) -> int : ...
340524
341525
526+ # MTMD_API llama_pos mtmd_helper_get_n_pos(const mtmd_input_chunks * chunks);
527+ @ctypes_function (
528+ "mtmd_helper_get_n_pos" ,
529+ [mtmd_input_chunks_p_ctypes ],
530+ llama_cpp .llama_pos ,
531+ )
532+ def mtmd_helper_get_n_pos (chunks : mtmd_input_chunks_p , / ) -> int :
533+ """Count the total positions consumed by the chunks."""
534+ ...
535+
536+
537+ # MTMD_API int32_t mtmd_helper_eval_chunks(mtmd_context * ctx,
538+ # struct llama_context * lctx,
539+ # const mtmd_input_chunks * chunks,
540+ # llama_pos n_past,
541+ # llama_seq_id seq_id,
542+ # int32_t n_batch,
543+ # bool logits_last,
544+ # llama_pos * new_n_past);
545+ @ctypes_function (
546+ "mtmd_helper_eval_chunks" ,
547+ [
548+ mtmd_context_p_ctypes ,
549+ llama_cpp .llama_context_p_ctypes ,
550+ mtmd_input_chunks_p_ctypes ,
551+ llama_cpp .llama_pos ,
552+ llama_cpp .llama_seq_id ,
553+ c_int ,
554+ c_bool ,
555+ POINTER (llama_cpp .llama_pos ),
556+ ],
557+ c_int ,
558+ )
559+ def mtmd_helper_eval_chunks (
560+ ctx : mtmd_context_p ,
561+ lctx : llama_cpp .llama_context_p ,
562+ chunks : mtmd_input_chunks_p ,
563+ n_past : llama_cpp .llama_pos ,
564+ seq_id : llama_cpp .llama_seq_id ,
565+ n_batch : Union [c_int , int ],
566+ logits_last : Union [c_bool , bool ],
567+ new_n_past : "_Pointer[llama_cpp.llama_pos]" ,
568+ / ,
569+ ) -> int : ...
570+
571+
342572# MTMD_API int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
343573# struct llama_context * lctx,
344574# const mtmd_input_chunk * chunk,
@@ -374,6 +604,43 @@ def mtmd_helper_eval_chunk_single(
374604) -> int : ...
375605
376606
607+ # MTMD_API int32_t mtmd_helper_decode_image_chunk(mtmd_context * ctx,
608+ # struct llama_context * lctx,
609+ # const mtmd_input_chunk * chunk,
610+ # float * encoded_embd,
611+ # llama_pos n_past,
612+ # llama_seq_id seq_id,
613+ # int32_t n_batch,
614+ # llama_pos * new_n_past);
615+ @ctypes_function (
616+ "mtmd_helper_decode_image_chunk" ,
617+ [
618+ mtmd_context_p_ctypes ,
619+ llama_cpp .llama_context_p_ctypes ,
620+ mtmd_input_chunk_p_ctypes ,
621+ POINTER (c_float ),
622+ llama_cpp .llama_pos ,
623+ llama_cpp .llama_seq_id ,
624+ c_int ,
625+ POINTER (llama_cpp .llama_pos ),
626+ ],
627+ c_int ,
628+ )
629+ def mtmd_helper_decode_image_chunk (
630+ ctx : mtmd_context_p ,
631+ lctx : llama_cpp .llama_context_p ,
632+ chunk : mtmd_input_chunk_p ,
633+ encoded_embd : CtypesArray [c_float ],
634+ n_past : llama_cpp .llama_pos ,
635+ seq_id : llama_cpp .llama_seq_id ,
636+ n_batch : Union [c_int , int ],
637+ new_n_past : "_Pointer[llama_cpp.llama_pos]" ,
638+ / ,
639+ ) -> int :
640+ """Decode a pre-encoded image chunk."""
641+ ...
642+
643+
377644# MTMD_API void mtmd_log_set(ggml_log_callback log_callback, void * user_data);
378645@ctypes_function (
379646 "mtmd_log_set" ,
0 commit comments