I see. Thanks for clarifying @comaniac, I agree with your comments.
Addressing @merrymercy’s points:
- One possible solution to the redundancy of repeating items such as target string would be to encode something like this:
message AutoTVMLogs{ string target; repeated AutoTVMLog; ...}
where the inner AutoTVMLog no longer indicates the target string. However, this change would make it more difficult to adhere to the “one record per line” json standard AutoTVM currently holds. For simplicity I prefer keeping the redundancy, but since I haven’t worked very closely with the logs myself, I will defer to others’ takes.
- The proposed implementation will allow manipulation of readable json.
- The major differences you indicated can modify the proto as desired when ansor is ready.
Here is an updated proposal of the protobuf given everyone’s feedback.
syntax = "proto3";
package autotvm.log;
import "google/protobuf/any.proto";
message Target {
// For now this is the string representation of a target; e.g. "llvm -mcpu=broadwell"
// This should be replaced once the rfc "TVM Target specification" is finalized
string target_string = 1;
}
message AutoTVMLog {
// The compilation target
Target target = 1;
// Represents a tuning task
Task task = 2;
// The configuration used by this task
Config config = 3;
// Tuning results
Result result = 4;
// SemVer string describing the AutoTVM log format version
string version = 5;
// SemVer string with qualifiers attached as a suffix. e.g. "0.7.dev1"
string tvm_version = 6;
}
message Task {
// Human-readable task name
string task_name = 1;
// Map of keyword arguments where the key indicates argument name
map<string, Argument> args = 2;
}
message Argument {
oneof arg {
Tensor tensor = 1;
// Possible tuple values are not well specified and may require more sorting out
// https://github.com/apache/incubator-tvm/blob/master/python/tvm/autotvm/task/task.py#L43-L63
Tuple tuple = 2;
string value = 3;
}
}
message Tensor {
repeated uint32 shape = 1;
// Indicates a numpy dtype
string dtype = 2;
}
message Tuple {
repeated google.protobuf.Any values = 1;
}
// Config for AutoTVM v1
message Config_v1 {
// code hash
string code_hash = 1;
repeated Entity entities = 2;
uint32 index = 3;
}
message Config {
oneof config {
Config_v1 config_v1 = 1;
}
}
message Entity {
// Entities are previously output as `[["tile_ow", "sp", [-1, 1]], <other_entities>]`
// The proposed encoding clarifies entity type in the schema itself instead of as a string
string knob_name = 1;
oneof entity {
SplitEntity split = 2;
ReorderEntity reorder = 3;
AnnotateEntity annotate = 4;
OtherOptionEntity other_option = 5;
}
}
message SplitEntity {
repeated int32 size = 1;
}
message ReorderEntity {
repeated uint32 order = 1;
}
message AnnotateEntity {
repeated string annotations = 1;
}
message OtherOptionEntity {
google.protobuf.Any value = 1;
}
message Result {
// The measured runtime costs of this configuration
repeated float costs = 1;
// The error type defined by MeasureErrorNo
int32 error_no = 2;
// End-to-end cost of benchmarking, including rpc, compilation, test runs
float all_cost = 3;
// ISO-8601 formatted timestamp
string timestamp = 4;
}
One further question I have is regarding the Tuple
argument. It is serialized arbitrarily in branches that include possible recursion here https://github.com/apache/incubator-tvm/blob/master/python/tvm/autotvm/task/task.py#L53-L54 and it’s unclear to me what these different serializations should map to in logical structures. Could someone (perhaps @haichen) clarify what each branch is meant to represent? Everything that I’ve marked Tuple
below represents a structure that is unclear to me.
if isinstance(x, tensor.Tensor): # message Tensor { shape, dtype }
return ('TENSOR', get_const_tuple(x.shape), x.dtype)
if isinstance(x, (tuple, list, container.Array)): # message Tuple { repeated Any }
return tuple([_encode(a) for a in x])
if isinstance(x, (str, int, float, np.int, np.float, expr.Var)): # message Tuple { repeated Any }
return x
if isinstance(x, (expr.StringImm, expr.IntImm, expr.FloatImm)): # message Tuple { repeated Any }
return x.value
if isinstance(x, runtime.container.String): # string value
return str(x)