很多人在写c语言程序的时候,经常分不清两者的区别,到底该定义signed char还是unsigned char呢,从而可能会引起一些潜在风险。
#include <stdio.h>
void foo(signed char sc, unsigned char uc) {
if (sc == '\x85') printf("%s\n", "equal"); else printf("%s\n", "not equal"); /* result: equal */
if (uc == '\x85') printf("%s\n", "equal"); else printf("%s\n", "not equal"); /* result: not equal */
if (sc == 0x85) printf("%s\n", "equal"); else printf("%s\n", "not equal"); /* result: not equal */
if (uc == 0x85) printf("%s\n", "equal"); else printf("%s\n", "not equal"); /* result: equal */
int main(int argc, char * argv[]) {
signed char sc = '\x85';
unsigned char uc = '\x85';
foo(uc, sc);
return 0;
not equal
not equal
执行环境是在Darwin gcc-4.2
$ uname -a
Darwin localhost 15.6.0 Darwin Kernel Version 15.6.0: Tue Apr 11 16:00:51 PDT 2017; root:xnu-3248. x86_64
$ gcc -v
Configured with: --prefix=/Applications/Xcode.app/Contents/Developer/usr --with-gxx-include-dir=/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.12.sdk/usr/include/c++/4.2.1
Apple LLVM version 8.0.0 (clang-800.0.42.1)
Target: x86_64-apple-darwin15.6.0
Thread model: posix
InstalledDir: /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin
if (sc == '\x85'):
movsbl -1(%rbp), %esi
cmpl $-123, %esi
jne LBB0_2
if (uc == '\x85'):
movzbl -2(%rbp), %eax
cmpl $-123, %eax
jne LBB0_5
if (sc == 0x85):
movsbl -1(%rbp), %eax
cmpl $133, %eax
jne LBB0_8
if (uc == 0x85):
movzbl -2(%rbp), %eax
cmpl $133, %eax
jne LBB0_11
Move Zero-Extended Byte to Long。
The low 8 bits of the destination are replaced by the source operand. the top 24 bits are set to 0.
Move Sign-Extend Byte to Long。
Logical comparison meaning it does not look at the sign and treats the operands as unsigned integers.
If arg1 is an immediate value it will be sign extended to the length of arg2。
- if (sc == '\x85'):
指令 | 描述 |
movsbl -1(%rbp), %esi | 把参数sc的值move到寄存器esi, 做符号扩展成0xffffff85 |
cmpl $-123, %esi | 这个指令有两部分操作: 1. 把immediate value($-123)符合扩展成0xffffff85 ,2. 做无符号比较 |
jne LBB0_2 | 比较结果是相同 |
- if (uc == '\x85'):
指令 | 描述 |
movzbl -2(%rbp), %eax | 把参数sc的值move到寄存器esi, 做0扩展成0x00000085 |
cmpl $-123, %esi | 这个指令有两部分操作: 1. 把immediate value($-123)符合扩展成0xffffff85 ,2. 做无符号比较 |
jne LBB0_2 | 比较结果是不相同 |
- if (sc == 0x85)
指令 | 描述 |
movsbl -1(%rbp), %eax | 把参数sc的值move到寄存器esi, 做符号扩展成0xffffff85 |
cmpl $133, %esi | 这个指令有两部分操作: 1. 把immediate value($133)符合扩展成0x00000085 ,2. 做无符号比较 |
jne LBB0_2 | 比较结果是不相同 |
- if (uc == 0x85)
指令 | 描述 |
movzbl -1(%rbp), %eax | 把参数sc的值move到寄存器esi, 做0扩展成0x00000085 |
cmpl $133, %esi | 这个指令有两部分操作: 1. 把immediate value($133)符合扩展成0x00000085 ,2. 做无符号比较 |
jne LBB0_2 | 比较结果是相同 |
因为在c语言层面, 虽然'\x85'和0x85两者很多场合可以通用,但还是有区别的,'\x85'是一个字符,缺省情况下字符类型是带符号的,所以尽管表示成16进制都是0x85,但是表述成10进制的意义是不一样的,字符型的'\x85' = -123,而不是133(8*16+5);而0x85是一个数字常量不存在符号扩展的问题,其值表述成10进制就是133。
字符类型缺省是signed char,当把字符类型和其他类型数据做比较的时候,要充分考虑到扩展的问题,是符号数扩展还是无符号数扩展。
$ gcc t.c
test.c:5:12: warning: comparison of constant -123 with expression of type 'unsigned char' is always false
if (uc == '\x85') printf("%s\n", "equal"); else printf("%s\n", "not equal"); /* not equal */
~~ ^ ~~~~~~
test.c:7:12: warning: comparison of constant 133 with expression of type 'signed char' is always false
if (sc == 0x85) printf("%s\n", "equal"); else printf("%s\n", "not equal"); /* not equal */
~~ ^ ~~~~
2 warnings generated.
在Linux x64 gcc环境下更神奇,直接丢弃了warning信息,在关闭优化开关(gcc -S -O0)的条件下,生成汇编代码如下:
.section .rodata
.string "not equal"
.globl foo
.type foo, @function
pushq %rbp
.cfi_def_cfa_offset 16
movq %rsp, %rbp
.cfi_offset 6, -16
.cfi_def_cfa_register 6
subq $16, %rsp
movl %edi, %edx
movl %esi, %eax
movb %dl, -4(%rbp)
movb %al, -8(%rbp)
movl $.LC0, %edi
call puts
movl $.LC0, %edi
call puts
.cfi_def_cfa 7, 8
.size foo, .-foo
可以看到在函数foo里面,比较指令已经被丢弃了,甚至字符串"equal"也被丢弃了,函数直接就打印出两条"not equal"($.LC0),不管输入参数的值是什么。这相当于foo函数等价于:
void foo(signed char sc, unsigned char uc) {
printf("%s\n", "not equal"); /* not equal */
printf("%s\n", "not equal"); /* not equal */
void foo(signed char sc, unsigned char uc) {
if (sc == -123) printf("%s\n", "equal"); else printf("%s\n", "not equal"); /* result: equal */
if (uc == -123) printf("%s\n", "equal"); else printf("%s\n", "not equal"); /* result: not equal */
if (sc == 133) printf("%s\n", "equal"); else printf("%s\n", "not equal"); /* result: not equal */
if (uc == 133) printf("%s\n", "equal"); else printf("%s\n", "not equal"); /* result: equal */