@@ -13,6 +13,7 @@ use gcp_bigquery_client::{
1313use prost:: Message ;
1414use std:: fmt;
1515use std:: sync:: Arc ;
16+ use tonic:: Code ;
1617use tracing:: { debug, info} ;
1718
1819use crate :: bigquery:: encoding:: BigQueryTableRow ;
@@ -410,20 +411,17 @@ impl BigQueryClient {
410411 }
411412 }
412413
413- /// Streams table batches to BigQuery using the concurrent Storage Write API.
414+ /// Appends table batches to BigQuery using the concurrent Storage Write API.
414415 ///
415- /// Accepts pre-constructed TableBatch objects and processes them concurrently with
416- /// controlled parallelism. This allows streaming to multiple different tables efficiently
417- /// in a single call.
416+ /// Accepts pre-constructed TableBatch objects wrapped in Arc and processes them concurrently
417+ /// with controlled parallelism. This allows streaming to multiple different tables efficiently
418+ /// in a single call. The Arc wrapping enables efficient retry operations without cloning data.
418419 ///
419420 /// If ordering is not required, you may split a table's data into multiple batches,
420421 /// which can be processed concurrently.
421422 /// If ordering guarantees are needed, all data for a given table must be included
422423 /// in a single batch.
423- ///
424- /// TODO: we might want to improve the detection of retriable errors by having a special error
425- /// type that we return for this.
426- pub async fn stream_table_batches_concurrent < I > (
424+ pub async fn append_table_batches < I > (
427425 & self ,
428426 table_batches : I ,
429427 max_concurrent_streams : usize ,
@@ -538,6 +536,15 @@ impl BigQueryClient {
538536 Ok ( ResultSet :: new_from_query_response ( query_response) )
539537 }
540538
539+ /// Releases all connections currently held in the connection pool.
540+ ///
541+ /// Removes all idle connections, forcing new requests to create fresh connections.
542+ /// This is useful after DDL operations (e.g., ALTER TABLE) when BigQuery's Storage
543+ /// Write API may have stale schema information cached in existing connections.
544+ pub fn release_all_connections ( & self ) {
545+ self . client . storage ( ) . release_all_connections ( ) ;
546+ }
547+
541548 /// Sanitizes a BigQuery identifier for safe backtick quoting.
542549 ///
543550 /// Rejects empty identifiers and identifiers containing control characters. Internal backticks
@@ -873,19 +880,123 @@ fn bq_error_to_etl_error(err: BQError) -> EtlError {
873880 ( ErrorKind :: InvalidData , "BigQuery invalid metadata value" )
874881 }
875882 BQError :: TonicStatusError ( status) => {
876- // Since we do not have access to the `Code` type from `tonic`, we just match on the description
877- // statically.
878- if status. code ( ) . description ( )
879- == "The caller does not have permission to execute the specified operation"
880- {
881- ( ErrorKind :: PermissionDenied , "BigQuery permission denied" )
882- } else if is_retryable_streaming_message ( status. message ( ) ) {
883- (
883+ // First check for schema mismatch patterns in the message, as these can occur
884+ // with various gRPC codes after DDL operations.
885+ if is_retryable_streaming_message ( status. message ( ) ) {
886+ return etl_error ! (
884887 ErrorKind :: DestinationSchemaMismatch ,
885888 "BigQuery schema mismatch" ,
886- )
887- } else {
888- ( ErrorKind :: DestinationError , "BigQuery gRPC status error" )
889+ err. to_string( )
890+ ) ;
891+ }
892+
893+ match status. code ( ) {
894+ // Code::Unavailable (14) - "The service is currently unavailable."
895+ // This is the primary retryable code per Google AIP-194. It indicates transient
896+ // conditions like network hiccups or intentional throttling. BigQuery returns this
897+ // with messages like "Task is overloaded (cpu-protection)" or "(memory-protection)"
898+ // when the service is temporarily overwhelmed. Safe to retry with exponential backoff.
899+ Code :: Unavailable => ( ErrorKind :: DestinationThrottled , "BigQuery unavailable" ) ,
900+
901+ // Code::ResourceExhausted (8) - "Some resource has been exhausted."
902+ // Per Google AIP-194: "This code may be a signal that quota is exhausted. Retries
903+ // therefore may not be expected to work for several hours; meanwhile the retries
904+ // may have billing implications." We do NOT retry this to avoid wasting resources
905+ // on quota exhaustion that won't recover quickly.
906+ Code :: ResourceExhausted => {
907+ ( ErrorKind :: DestinationError , "BigQuery resource exhausted" )
908+ }
909+
910+ // Code::PermissionDenied (7) - "The caller does not have permission."
911+ // Authorization failure. The request will never succeed without configuration
912+ // changes (e.g., granting IAM permissions). Never retry.
913+ Code :: PermissionDenied => {
914+ ( ErrorKind :: DestinationError , "BigQuery permission denied" )
915+ }
916+
917+ // Code::Unauthenticated (16) - "Missing or invalid authentication credentials."
918+ // Authentication failure. Requires credential refresh or configuration fix.
919+ // Never retry automatically.
920+ Code :: Unauthenticated => (
921+ ErrorKind :: DestinationError ,
922+ "BigQuery authentication failed" ,
923+ ) ,
924+
925+ // Code::InvalidArgument (3) - "Client specified an invalid argument."
926+ // Malformed request or invalid data. This is a client bug that won't be fixed
927+ // by retrying. Never retry.
928+ Code :: InvalidArgument => ( ErrorKind :: DestinationError , "BigQuery invalid argument" ) ,
929+
930+ // Code::NotFound (5) - "Some requested entity was not found."
931+ // The resource (table, dataset, stream) doesn't exist. Requires creating the
932+ // resource first. Never retry.
933+ Code :: NotFound => (
934+ ErrorKind :: DestinationTableMissing ,
935+ "BigQuery entity not found" ,
936+ ) ,
937+
938+ // Code::AlreadyExists (6) - "The entity already exists."
939+ // Conflict during creation. For streaming with offsets, this may indicate the
940+ // row was already written (safe to ignore). Never retry.
941+ Code :: AlreadyExists => (
942+ ErrorKind :: DestinationTableAlreadyExists ,
943+ "BigQuery entity already exists" ,
944+ ) ,
945+
946+ // Code::FailedPrecondition (9) - "System is not in required state."
947+ // The operation can't proceed due to system state (e.g., non-empty table for
948+ // certain operations). Requires explicit state change before retrying.
949+ // Per gRPC spec: "Use FAILED_PRECONDITION if the client should not retry until
950+ // the system state has been explicitly fixed." Never retry automatically.
951+ Code :: FailedPrecondition => {
952+ ( ErrorKind :: DestinationError , "BigQuery precondition failed" )
953+ }
954+
955+ // Code::OutOfRange (11) - "Operation attempted past the valid range."
956+ // For streaming, this typically means the specified offset is beyond the current
957+ // end of the stream, indicating a previous write failed. Requires application-level
958+ // recovery (retry from last successful write). Never retry at this level.
959+ Code :: OutOfRange => ( ErrorKind :: DestinationError , "BigQuery offset out of range" ) ,
960+
961+ // Code::Aborted (10) - "The operation was aborted."
962+ // Typically due to concurrency issues (sequencer check failure, transaction abort).
963+ // Per gRPC spec: "Use ABORTED if the client should retry at a higher level."
964+ // This means retry the entire transaction, not just this request. We don't retry
965+ // here; the caller should handle transaction-level retry if needed.
966+ Code :: Aborted => ( ErrorKind :: DestinationError , "BigQuery operation aborted" ) ,
967+
968+ // Code::Internal (13) - "Internal server error."
969+ // Per Google AIP-194: "This error must be surfaced to the application immediately;
970+ // it usually means a bug should be filed against the system." While BigQuery docs
971+ // suggest these can be retried, AIP-194 recommends surfacing them. The underlying
972+ // client library may already retry these internally before surfacing to us.
973+ Code :: Internal => ( ErrorKind :: DestinationError , "BigQuery internal error" ) ,
974+
975+ // Code::DeadlineExceeded (4) - "Deadline expired before operation could complete."
976+ // Per Google AIP-194: "An application can set a deadline, which must be honored."
977+ // Retrying could violate the application's timeout expectations. The caller should
978+ // decide whether to retry with a new deadline.
979+ Code :: DeadlineExceeded => {
980+ ( ErrorKind :: DestinationError , "BigQuery deadline exceeded" )
981+ }
982+
983+ // Code::Cancelled (1) - "The operation was cancelled."
984+ // Typically client-initiated cancellation. Never retry.
985+ Code :: Cancelled => ( ErrorKind :: DestinationError , "BigQuery operation cancelled" ) ,
986+
987+ // Code::Unimplemented (12) - "Operation not implemented or supported."
988+ // The requested operation is not available. Never retry.
989+ Code :: Unimplemented => (
990+ ErrorKind :: DestinationError ,
991+ "BigQuery operation not supported" ,
992+ ) ,
993+
994+ // Code::DataLoss (15) - "Unrecoverable data loss or corruption."
995+ // Severe error indicating data corruption. Never retry.
996+ Code :: DataLoss => ( ErrorKind :: DestinationError , "BigQuery data loss" ) ,
997+
998+ // Catch-all for unexpected codes.
999+ _ => ( ErrorKind :: DestinationError , "BigQuery gRPC error" ) ,
8891000 }
8901001 }
8911002
0 commit comments