@@ -7,7 +7,7 @@ use cid::Cid;
77use reqwest:: { Client , multipart} ;
88use serde:: Deserialize ;
99use std:: time:: Duration ;
10- use tracing:: { instrument , warn } ;
10+ use tracing:: { debug , instrument } ;
1111
1212/// Maximum size for a single IPFS block (slightly under 1MB to be safe)
1313/// Data larger than this MUST use UnixFS chunking via /api/v0/add
@@ -138,10 +138,23 @@ impl IpfsBlockStore {
138138 }
139139
140140 /// Get block by CID
141+ ///
142+ /// For raw blocks (codec 0x55), uses /api/v0/block/get directly.
143+ /// For UnixFS/dag-pb blocks (codec 0x70), uses /api/v0/cat to traverse
144+ /// the DAG and return the complete file content.
141145 #[ instrument( skip( self ) ) ]
142146 pub async fn get_block_raw ( & self , cid : & Cid ) -> Result < Bytes > {
147+ // DAG-PB codec (0x70) indicates a UnixFS file that may have been chunked.
148+ // We must use /api/v0/cat to traverse the DAG and get the full content.
149+ // Using block/get on a dag-pb CID returns only the protobuf metadata, not the file.
150+ const DAG_PB_CODEC : u64 = 0x70 ;
151+
152+ if cid. codec ( ) == DAG_PB_CODEC {
153+ return self . cat_unixfs ( cid) . await ;
154+ }
155+
143156 let url = format ! ( "{}/api/v0/block/get?arg={}" , self . config. api_url, cid) ;
144-
157+
145158 let response = self . client . post ( & url) . send ( ) . await ?;
146159
147160 if !response. status ( ) . is_success ( ) {
@@ -161,6 +174,33 @@ impl IpfsBlockStore {
161174 . map_err ( |e| BlockStoreError :: IpfsApi ( e. to_string ( ) ) )
162175 }
163176
177+ /// Retrieve UnixFS content by traversing the DAG
178+ ///
179+ /// Uses /api/v0/cat which automatically handles chunked files by
180+ /// following links in the DAG structure and reassembling the content.
181+ #[ instrument( skip( self ) ) ]
182+ async fn cat_unixfs ( & self , cid : & Cid ) -> Result < Bytes > {
183+ let url = format ! ( "{}/api/v0/cat?arg={}" , self . config. api_url, cid) ;
184+
185+ let response = self . client . post ( & url) . send ( ) . await ?;
186+
187+ if !response. status ( ) . is_success ( ) {
188+ if response. status ( ) . as_u16 ( ) == 404 {
189+ return Err ( BlockStoreError :: NotFound ( * cid) ) ;
190+ }
191+ let error = response. text ( ) . await . unwrap_or_default ( ) ;
192+ return Err ( BlockStoreError :: IpfsApi ( format ! (
193+ "Failed to cat UnixFS content: {}" ,
194+ error
195+ ) ) ) ;
196+ }
197+
198+ response
199+ . bytes ( )
200+ . await
201+ . map_err ( |e| BlockStoreError :: IpfsApi ( e. to_string ( ) ) )
202+ }
203+
164204 /// Put a raw block
165205 /// Note: Raw blocks (file chunks) are NOT pinned inline to avoid timeouts.
166206 /// They are protected by the bucket root's recursive pin after flush_forest.
@@ -169,9 +209,10 @@ impl IpfsBlockStore {
169209 /// stored using UnixFS chunking via /api/v0/add instead of /api/v0/block/put.
170210 #[ instrument( skip( self , data) , fields( size = data. len( ) ) ) ]
171211 pub async fn put_block_raw ( & self , data : & [ u8 ] ) -> Result < Cid > {
172- // CRITICAL: IPFS block/put has a 1MB limit. Use UnixFS add for larger data.
212+ // IPFS block/put has a 1MB limit. Use UnixFS add for larger data.
213+ // This is handled gracefully - retrieval uses /api/v0/cat for dag-pb CIDs.
173214 if data. len ( ) > MAX_BLOCK_SIZE {
174- warn ! (
215+ debug ! (
175216 size = data. len( ) ,
176217 max = MAX_BLOCK_SIZE ,
177218 "Data exceeds IPFS block limit, using UnixFS chunking"
@@ -552,4 +593,80 @@ mod tests {
552593
553594 add_mock. assert_hits_async ( 1 ) . await ;
554595 }
596+
597+ #[ tokio:: test]
598+ async fn get_block_uses_block_get_for_raw_cid ( ) {
599+ let server = MockServer :: start_async ( ) . await ;
600+
601+ // Mock /api/v0/id probe
602+ let _id_mock = server
603+ . mock_async ( |when, then| {
604+ when. method ( POST ) . path ( "/api/v0/id" ) ;
605+ then. status ( 200 ) . body ( "{}" ) ;
606+ } )
607+ . await ;
608+
609+ // Create a raw CID (codec 0x55)
610+ let raw_cid = "bafkreigh2akiscaildc6v5q2xg34x5sqo5djznnha64x4jn3fjvu3j6jci" ;
611+
612+ // Expect block/get to be called for raw CIDs
613+ let block_get = server
614+ . mock_async ( |when, then| {
615+ when. method ( POST )
616+ . path ( "/api/v0/block/get" )
617+ . query_param ( "arg" , raw_cid) ;
618+ then. status ( 200 ) . body ( "hello world" ) ;
619+ } )
620+ . await ;
621+
622+ let store = IpfsBlockStore :: new ( IpfsConfig :: with_url ( server. base_url ( ) ) )
623+ . await
624+ . expect ( "ipfs store init" ) ;
625+
626+ let cid: Cid = raw_cid. parse ( ) . expect ( "parse cid" ) ;
627+ let data = store. get_block ( & cid) . await . expect ( "get block" ) ;
628+
629+ assert_eq ! ( & data[ ..] , b"hello world" ) ;
630+ block_get. assert_hits_async ( 1 ) . await ;
631+ }
632+
633+ #[ tokio:: test]
634+ async fn get_block_uses_cat_for_dagpb_cid ( ) {
635+ let server = MockServer :: start_async ( ) . await ;
636+
637+ // Mock /api/v0/id probe
638+ let _id_mock = server
639+ . mock_async ( |when, then| {
640+ when. method ( POST ) . path ( "/api/v0/id" ) ;
641+ then. status ( 200 ) . body ( "{}" ) ;
642+ } )
643+ . await ;
644+
645+ // Create a dag-pb CID (codec 0x70)
646+ // This is a CIDv1 with dag-pb codec - represents UnixFS chunked file
647+ let dagpb_cid = "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi" ;
648+
649+ // Expect /api/v0/cat to be called for dag-pb CIDs (not block/get)
650+ let cat_mock = server
651+ . mock_async ( |when, then| {
652+ when. method ( POST )
653+ . path ( "/api/v0/cat" )
654+ . query_param ( "arg" , dagpb_cid) ;
655+ then. status ( 200 ) . body ( "large file content reassembled from chunks" ) ;
656+ } )
657+ . await ;
658+
659+ let store = IpfsBlockStore :: new ( IpfsConfig :: with_url ( server. base_url ( ) ) )
660+ . await
661+ . expect ( "ipfs store init" ) ;
662+
663+ let cid: Cid = dagpb_cid. parse ( ) . expect ( "parse cid" ) ;
664+ // Verify this is indeed a dag-pb CID
665+ assert_eq ! ( cid. codec( ) , 0x70 , "CID should have dag-pb codec" ) ;
666+
667+ let data = store. get_block ( & cid) . await . expect ( "get block via cat" ) ;
668+
669+ assert_eq ! ( & data[ ..] , b"large file content reassembled from chunks" ) ;
670+ cat_mock. assert_hits_async ( 1 ) . await ;
671+ }
555672}
0 commit comments