@@ -23,18 +23,11 @@ const MIME_TYPES: Record<string, string> = {
2323} ;
2424
2525async function toDataUri ( image : string ) : Promise < string > {
26- if ( image . startsWith ( 'data:' ) ) {
27- return image ;
28- }
26+ if ( image . startsWith ( 'data:' ) ) return image ;
2927
3028 if ( image . startsWith ( 'http://' ) || image . startsWith ( 'https://' ) ) {
3129 const res = await fetch ( image ) ;
32- if ( ! res . ok ) {
33- throw new CLIError (
34- `Failed to download image: HTTP ${ res . status } ` ,
35- ExitCode . GENERAL ,
36- ) ;
37- }
30+ if ( ! res . ok ) throw new CLIError ( `Failed to download image: HTTP ${ res . status } ` , ExitCode . GENERAL ) ;
3831 const contentType = res . headers . get ( 'content-type' ) || 'image/jpeg' ;
3932 const mime = contentType . split ( ';' ) [ 0 ] ! . trim ( ) ;
4033 const buf = await res . arrayBuffer ( ) ;
@@ -43,79 +36,93 @@ async function toDataUri(image: string): Promise<string> {
4336 }
4437
4538 // Local file
46- if ( ! existsSync ( image ) ) {
47- throw new CLIError (
48- `File not found: ${ image } ` ,
49- ExitCode . USAGE ,
50- ) ;
51- }
52-
39+ if ( ! existsSync ( image ) ) throw new CLIError ( `File not found: ${ image } ` , ExitCode . USAGE ) ;
5340 const ext = extname ( image ) . toLowerCase ( ) ;
5441 const mime = MIME_TYPES [ ext ] ;
55- if ( ! mime ) {
56- throw new CLIError (
57- `Unsupported image format "${ ext } ". Supported: jpg, jpeg, png, webp` ,
58- ExitCode . USAGE ,
59- ) ;
60- }
61-
42+ if ( ! mime ) throw new CLIError ( `Unsupported image format "${ ext } ". Supported: jpg, jpeg, png, webp` , ExitCode . USAGE ) ;
6243 const buf = readFileSync ( image ) ;
63- const b64 = buf . toString ( 'base64' ) ;
64- return `data:${ mime } ;base64,${ b64 } ` ;
44+ return `data:${ mime } ;base64,${ buf . toString ( 'base64' ) } ` ;
6545}
6646
6747export default defineCommand ( {
6848 name : 'vision describe' ,
6949 description : 'Describe an image using MiniMax VLM' ,
70- usage : 'minimax vision describe --image <path-or-url> [--prompt <text>]' ,
50+ usage : 'minimax vision describe ( --image <path-or-url> | --file-id <id>) [--prompt <text>]' ,
7151 options : [
72- { flag : '--image <path-or-url>' , description : 'Image file path or URL' , required : true } ,
52+ { flag : '--image <path-or-url>' , description : 'Local image path or URL (base64 encoded automatically)' } ,
53+ { flag : '--file-id <id>' , description : 'Pre-uploaded file ID (skips base64 conversion)' } ,
7354 { flag : '--prompt <text>' , description : 'Question about the image (default: "Describe the image.")' } ,
7455 ] ,
7556 examples : [
7657 'minimax vision describe --image photo.jpg' ,
7758 'minimax vision describe --image https://example.com/photo.jpg --prompt "What breed is this dog?"' ,
78- 'minimax vision describe --image screenshot.png --prompt "Extract the text" --output json ' ,
59+ 'minimax vision describe --file-id file-123456789 --prompt "Extract the text"' ,
7960 ] ,
8061 async run ( config : Config , flags : GlobalFlags ) {
8162 let image = flags . image as string | undefined ;
63+ let fileId = flags . fileId as string | undefined ;
8264 const prompt = ( flags . prompt as string ) || 'Describe the image.' ;
8365
84- if ( ! image ) {
66+ // Mutually exclusive: must provide one, cannot provide both
67+ if ( ! image && ! fileId ) {
8568 if ( isInteractive ( { nonInteractive : config . nonInteractive } ) ) {
8669 const hint = await promptText ( {
87- message : 'Enter image path or URL :' ,
70+ message : 'Enter image path, URL, or File ID :' ,
8871 } ) ;
8972 if ( ! hint ) {
9073 process . stderr . write ( 'Vision describe cancelled.\n' ) ;
9174 process . exit ( 1 ) ;
9275 }
93- image = hint ;
76+ // Simple heuristic: if no extension and not http(s), treat as fileId
77+ if ( ! hint . includes ( '.' ) && ! hint . startsWith ( 'http' ) ) {
78+ fileId = hint ;
79+ } else {
80+ image = hint ;
81+ }
9482 } else {
95- failIfMissing ( 'image' , 'minimax vision describe --image <path-or-url>' ) ;
83+ throw new CLIError (
84+ 'Missing required argument. Must provide either --image or --file-id.' ,
85+ ExitCode . USAGE ,
86+ 'minimax vision describe --image <path> OR --file-id <id>' ,
87+ ) ;
9688 }
89+ } else if ( image && fileId ) {
90+ throw new CLIError (
91+ 'Conflicting arguments: cannot provide both --image and --file-id.' ,
92+ ExitCode . USAGE ,
93+ ) ;
9794 }
9895
9996 const format = detectOutputFormat ( config . output ) ;
10097
10198 if ( config . dryRun ) {
102- console . log ( formatOutput ( { request : { prompt, image } } , format ) ) ;
99+ process . stdout . write ( formatOutput ( { request : { prompt, image, fileId } } , format ) + '\n' ) ;
103100 return ;
104101 }
105102
106- const imageUrl = await toDataUri ( image ) ;
107103 const url = vlmEndpoint ( config . baseUrl ) ;
104+ let body : Record < string , unknown > = { prompt } ;
105+
106+ if ( fileId ) {
107+ // Skip base64: pass fileId directly to the API
108+ body . file_id = fileId ;
109+ } else if ( image ) {
110+ // Fallback to base64 encoding for local/HTTP images
111+ const imageUrl = await toDataUri ( image ) ;
112+ body . image_url = imageUrl ;
113+ }
114+
108115 const response = await requestJson < VlmResponse > ( config , {
109116 url,
110117 method : 'POST' ,
111- body : { prompt , image_url : imageUrl } ,
118+ body,
112119 } ) ;
113120
114121 if ( format !== 'text' ) {
115- console . log ( formatOutput ( response , format ) ) ;
122+ process . stdout . write ( formatOutput ( response , format ) + '\n' ) ;
116123 return ;
117124 }
118125
119- console . log ( response . content ) ;
126+ process . stdout . write ( response . content + '\n' ) ;
120127 } ,
121128} ) ;
0 commit comments