@@ -1344,7 +1344,7 @@ def read_parquet(
13441344 "The provided path contains a wildcard character (*), which is not "
13451345 "supported by the current engine. To read files from wildcard paths, "
13461346 "please use the 'bigquery' engine by setting `engine='bigquery'` in "
1347- "your configuration ."
1347+ "the function call ."
13481348 )
13491349
13501350 read_parquet_kwargs : Dict [str , Any ] = {}
@@ -1360,6 +1360,87 @@ def read_parquet(
13601360 )
13611361 return self ._read_pandas (pandas_obj , write_engine = write_engine )
13621362
1363+ def read_orc (
1364+ self ,
1365+ path : str | IO ["bytes" ],
1366+ * ,
1367+ engine : str = "auto" ,
1368+ write_engine : constants .WriteEngineType = "default" ,
1369+ ) -> dataframe .DataFrame :
1370+ """Load an ORC file to a BigQuery DataFrames DataFrame.
1371+
1372+ Args:
1373+ path (str or IO):
1374+ The path or buffer to the ORC file. Can be a local path or Google Cloud Storage URI.
1375+ engine (str, default "auto"):
1376+ The engine used to read the file. Supported values: `auto`, `bigquery`, `pyarrow`.
1377+ write_engine (str, default "default"):
1378+ The write engine used to persist the data to BigQuery if needed.
1379+
1380+ Returns:
1381+ bigframes.pandas.DataFrame:
1382+ A new DataFrame representing the data from the ORC file.
1383+ """
1384+ bigframes .session .validation .validate_engine_compatibility (
1385+ engine = engine ,
1386+ write_engine = write_engine ,
1387+ )
1388+ if engine == "bigquery" :
1389+ job_config = bigquery .LoadJobConfig ()
1390+ job_config .source_format = bigquery .SourceFormat .ORC
1391+ job_config .labels = {"bigframes-api" : "read_orc" }
1392+ table_id = self ._loader .load_file (path , job_config = job_config )
1393+ return self ._loader .read_gbq_table (table_id )
1394+ elif engine in ("auto" , "pyarrow" ):
1395+ if isinstance (path , str ) and "*" in path :
1396+ raise ValueError (
1397+ "The provided path contains a wildcard character (*), which is not "
1398+ "supported by the current engine. To read files from wildcard paths, "
1399+ "please use the 'bigquery' engine by setting `engine='bigquery'` in "
1400+ "your configuration."
1401+ )
1402+
1403+ read_orc_kwargs : Dict [str , Any ] = {}
1404+ if not pandas .__version__ .startswith ("1." ):
1405+ read_orc_kwargs ["dtype_backend" ] = "pyarrow"
1406+
1407+ pandas_obj = pandas .read_orc (path , ** read_orc_kwargs )
1408+ return self ._read_pandas (pandas_obj , write_engine = write_engine )
1409+ else :
1410+ raise ValueError (
1411+ f"Unsupported engine: { repr (engine )} . Supported values: 'auto', 'bigquery', 'pyarrow'."
1412+ )
1413+
1414+ def read_avro (
1415+ self ,
1416+ path : str | IO ["bytes" ],
1417+ * ,
1418+ engine : str = "auto" ,
1419+ ) -> dataframe .DataFrame :
1420+ """Load an Avro file to a BigQuery DataFrames DataFrame.
1421+
1422+ Args:
1423+ path (str or IO):
1424+ The path or buffer to the Avro file. Can be a local path or Google Cloud Storage URI.
1425+ engine (str, default "auto"):
1426+ The engine used to read the file. Only `bigquery` is supported for Avro.
1427+
1428+ Returns:
1429+ bigframes.pandas.DataFrame:
1430+ A new DataFrame representing the data from the Avro file.
1431+ """
1432+ if engine not in ("auto" , "bigquery" ):
1433+ raise ValueError (
1434+ f"Unsupported engine: { repr (engine )} . Supported values: 'auto', 'bigquery'."
1435+ )
1436+
1437+ job_config = bigquery .LoadJobConfig ()
1438+ job_config .use_avro_logical_types = True
1439+ job_config .source_format = bigquery .SourceFormat .AVRO
1440+ job_config .labels = {"bigframes-api" : "read_avro" }
1441+ table_id = self ._loader .load_file (path , job_config = job_config )
1442+ return self ._loader .read_gbq_table (table_id )
1443+
13631444 def read_json (
13641445 self ,
13651446 path_or_buf : str | IO ["bytes" ],
0 commit comments