I am using protobuf to send rows to Bigquery to populate tables. One issue I am facing is with nested fields/messages. Indeed, I only know how to add a descriptor for one message, but in the case when they are nested, I need to add the descriptor of multiple messages. And I don't know how to do it.
In this example, I am trying to send a Bar message that itself contains a nested repeated Foo message.
Here is my code snippet
Python file
from google.cloud.bigquery_storage_v1 import types, writer, BigQueryWriteClient
from google.protobuf import descriptor_pb2
from test_pb2 import Bar
pb2_class=Bar
write_client = BigQueryWriteClient()
parent = write_client.table_path("my_project", "my_dataset", "my_table")
write_stream = types.WriteStream()
write_stream.type_ = types.WriteStream.Type.PENDING
write_stream = write_client.create_write_stream(
parent=parent, write_stream=write_stream
)
stream_name = write_stream.name
request_template = types.AppendRowsRequest()
request_template.write_stream = stream_name
proto_schema = types.ProtoSchema()
proto_descriptor = descriptor_pb2.DescriptorProto()
pb2_class.DESCRIPTOR.CopyToProto(proto_descriptor) # This only adds a descriptor for Bar ! Foo will be unknown when sending the messages to bigquery
proto_schema.proto_descriptor = proto_descriptor
proto_data = types.AppendRowsRequest.ProtoData()
proto_data.writer_schema = proto_schema
request_template.proto_rows = proto_data
append_rows_stream = writer.AppendRowsStream(write_client, request_template)
proto_rows = types.ProtoRows()
offset = 0
.proto file
syntax = "proto3";
message Foo {
string key = 1;
string value = 2;
}
message Bar {
repeated Foo foo = 1;
string key = 2;
}