# int8
--reset
--dir=FWD_B,FWD_D
--cfg=s8s8s32,s8s8s8,s8s8u8,u8s8s32,u8s8s8,u8s8u8
--attr-oscale=per_oc:2.25,per_oc:2.25*
--attr-post-ops=sum:0.5,add:s8:per_tensor,linear:0.5:1.5:2, \
                add:f32:per_oc+sum:0.25+relu:0.5
--mb=2 --batch=set_gpu
--mb=0 --batch=shapes_0d_gpu

--attr-oscale=common:2.25,common:2.25*
--attr-post-ops=sum:0.5,add:u8:per_tensor,linear:-0.5:0.5:0.5, \
                add:f32:per_oc+abs+sum:0.5
--batch=shapes_1d

# f32
--reset
--cfg=f32
--mb=2
--dir=FWD_B,BWD_D,BWD_WB
--batch=set_gpu

# f32 + post_ops
--reset
--cfg=f32
--dir=FWD_B,FWD_D
--attr-post-ops=sum:2+relu:3
--mb=2 --batch=set_gpu
--mb=0 --batch=shapes_0d_gpu

--attr-post-ops=sum:2,linear:3:2,add:f32, \
                sum+add:s8:per_tensor+linear:0.5:1.5:2
--batch=shapes_1d

# bf16
--reset
--cfg=bf16bf16bf16
--dir=FWD_B,BWD_D,BWD_WB
--batch=shapes_1d

# f16
--reset
--mb=0,2
--cfg=f16,f16f16s8,f16f16u8
--dir=FWD_B,FWD_D
--attr-post-ops=linear:1:2,add:f16,add:s8:per_tensor+linear:2:3:0.5
--batch=shapes_1d

# bf16 + f32
--attr-post-ops=

--dir=FWD_B
--cfg=bf16bf16f32
--batch=shapes_1d

--dir=BWD_D
--cfg=f32bf16bf16
--batch=shapes_1d

--dir=BWD_WB
--cfg=bf16f32bf16
--batch=shapes_1d

# bf16 + post_ops
--mb=0
--dir=FWD_B
--cfg=bf16bf16bf16,bf16bf16f32
--attr-post-ops=sum:0.5,tanh,add:bf16,add:f32:per_tensor+relu:0.5
--batch=shapes_1d

# diff mem tags
--batch=harness_ip_tag_gpu

# Test layers of some key GPU DL Frameworks
--reset
--batch=option_set_fwks_key_gpu
