前言
简书的markdown 怎么生成目录........., vscode里面生成的复制过来没法用..... 很烦.
还是 Notion 好用!
一些关于 io file 结构体的笔记, 仅供参考. 欢迎交流 :P
0. 基本数据结构和宏
_IO_FILE && _IO_FILE_plus
struct _IO_FILE {
int _flags; /* High-order word is _IO_MAGIC; rest is flags. */
#define _IO_file_flags _flags
/* The following pointers correspond to the C++ streambuf protocol. */
/* Note: Tk uses the _IO_read_ptr and _IO_read_end fields directly. */
char* _IO_read_ptr; /* Current read pointer */
char* _IO_read_end; /* End of get area. */
char* _IO_read_base; /* Start of putback+get area. */
char* _IO_write_base; /* Start of put area. */
char* _IO_write_ptr; /* Current put pointer. */
char* _IO_write_end; /* End of put area. */
char* _IO_buf_base; /* Start of reserve area. */
char* _IO_buf_end; /* End of reserve area. */
/* The following fields are used to support backing up and undo. */
char *_IO_save_base; /* Pointer to start of non-current get area. */
char *_IO_backup_base; /* Pointer to first valid character of backup area */
char *_IO_save_end; /* Pointer to end of non-current get area. */
struct _IO_marker *_markers;
struct _IO_FILE *_chain;
int _fileno;
#if 0
int _blksize;
#else
int _flags2;
#endif
_IO_off_t _old_offset; /* This used to be _offset but it's too small. */
#define __HAVE_COLUMN /* temporary */
/* 1+column number of pbase(); 0 is unknown. */
unsigned short _cur_column;
signed char _vtable_offset;
char _shortbuf[1];
/* char* _save_gptr; char* _save_egptr; */
_IO_lock_t *_lock;
#ifdef _IO_USE_OLD_IO_FILE
};
/* We always allocate an extra word following an _IO_FILE.
This contains a pointer to the function jump table used.
This is for compatibility with C++ streambuf; the word can
be used to smash to a pointer to a virtual function table. */
struct _IO_FILE_plus
{
_IO_FILE file;
const struct _IO_jump_t *vtable;
};
io file结构体中 _flags 相关宏
#define _IO_MAGIC 0xFBAD0000 /* Magic number */
#define _OLD_STDIO_MAGIC 0xFABC0000 /* Emulate old stdio. */
#define _IO_MAGIC_MASK 0xFFFF0000
#define _IO_USER_BUF 1 /* User owns buffer; don't delete it on close. */
#define _IO_UNBUFFERED 2
#define _IO_NO_READS 4 /* Reading not allowed */
#define _IO_NO_WRITES 8 /* Writing not allowd */
#define _IO_EOF_SEEN 0x10
#define _IO_ERR_SEEN 0x20
#define _IO_DELETE_DONT_CLOSE 0x40 /* Don't call close(_fileno) on cleanup. */
#define _IO_LINKED 0x80 /* Set if linked (using _chain) to streambuf::_list_all.*/
#define _IO_IN_BACKUP 0x100
#define _IO_LINE_BUF 0x200
#define _IO_TIED_PUT_GET 0x400 /* Set if put and get pointer logicly tied. */
#define _IO_CURRENTLY_PUTTING 0x800
#define _IO_IS_APPENDING 0x1000
#define _IO_IS_FILEBUF 0x2000
#define _IO_BAD_SEEN 0x4000
#define _IO_USER_LOCK 0x8000
_IO_jump_t (vtable)
就是 cpp 中的虚表
const struct _IO_jump_t _IO_file_jumps =
{
**JUMP_INIT_DUMMY,
JUMP_INIT(finish, _IO_file_finish),
JUMP_INIT(overflow, _IO_file_overflow),
JUMP_INIT(underflow, _IO_file_underflow),
JUMP_INIT(uflow, _IO_default_uflow),
JUMP_INIT(pbackfail, _IO_default_pbackfail),
JUMP_INIT(xsputn, _IO_file_xsputn),
JUMP_INIT(xsgetn, _IO_file_xsgetn),
JUMP_INIT(seekoff, _IO_new_file_seekoff),
JUMP_INIT(seekpos, _IO_default_seekpos),
JUMP_INIT(setbuf, _IO_new_file_setbuf),
JUMP_INIT(sync, _IO_new_file_sync),
JUMP_INIT(doallocate, _IO_file_doallocate),
JUMP_INIT(read, _IO_file_read),
JUMP_INIT(write, _IO_new_file_write),
JUMP_INIT(seek, _IO_file_seek),
JUMP_INIT(close, _IO_file_close),
JUMP_INIT(stat, _IO_file_stat),
JUMP_INIT(showmanyc, _IO_default_showmanyc),
JUMP_INIT(imbue, _IO_default_imbue)
};
1. setvbuf
1.1 文档
-> man setvbuf
...
The setvbuf() function may be used on any open stream to change its buffer. The mode argument must be
one of the following three macros:
_IONBF unbuffered
_IOLBF line buffered
_IOFBF fully buffered
Except for unbuffered files, the buf argument should point to a buffer at least size bytes long; this
buffer will be used instead of the current buffer. If the argument buf is NULL, only the mode is
affected; a new buffer will be allocated on the next read or write operation. The setvbuf() function
may be used only after opening a stream and before any other operations have been performed on it.
1.2 实例
// iosetvbuf.c
#define _IOFBF 0 /* Fully buffered. */
#define _IOLBF 1 /* Line buffered. */
#define _IONBF 2 /* No buffering. */
// test.c
int main(){
setvbuf(stdin, 0, 2, 0x10); // 0, 1, 2
for(;;){
char buf[0x100];
scanf("%s", buf);
puts(buf);
}
return 0;
}
分别设置 mode 为 0, 1, 2.
mode 为 0(Fully Buffered) :
经过调试发现 mode 为 0 时, 在setvbuf中会 malloc(0x400), 然后 stdin→_IO_buf_base 和 stdin→_IO_buf_end 分别指向chunk的开头和末尾
pwndbg> io_file 0x7ffff7dd18e0
$5 = {
file = {
...
_IO_buf_base = 0x602010,
_IO_buf_end = 0x602410,
...
},
vtable = 0x7ffff7dd06e0
}
pwndbg> heap
add heap to history
0x602000 PREV_INUSE $6 = {
prev_size = 0x0,
size = 0x411,
fd = 0x0,
bk = 0x0,
fd_nextsize = 0x0,
bk_nextsize = 0x0
}
0x602410 PREV_INUSE $7 = {
...
manpage 里面说 如果 setvbuf 的 buf 时 NULL 的话 a new buffer will be allocated on the next read or write operation
,貌似实现和文档描述并不一致...
mode 为 1 (Line buffered) :
setvbuf 中没有调用malloc, 而是在第一次调用 scanf的时候调用了 malloc(0x400), stdin→_IO_buf_base 和 stdin→_IO_buf_end 的值和之前一致.
这次实现和文档倒是一致了.
mode 为2 (No Buffer) :
都不会malloc, stdin→_IO_buf_base 和 stdin→_IO_buf_end 指向 stdout 附近一块长度为1个字节的内存
pwndbg> io_file 0x7ffff7dd18e0
add io_file 0x7ffff7dd18e0 to history
$1 = {
file = {
...
_IO_buf_base = 0x7ffff7dd1963,
_IO_buf_end = 0x7ffff7dd1964,
...
1.3 源码
调用链
_IO_setvbuf (iosetvbuf.c) → _IO_new_file_setbuf / _IO_file_setbuf (fileops.c) → _IO_default_setbuf (genops.c)
源码
先贴一下用到的宏
#define _IOFBF 0 /* Fully buffered. */
#define _IOLBF 1 /* Line buffered. */
#define _IONBF 2 /* No buffering. */
#define _IO_LINE_BUF 0x200
#define _IO_UNBUFFERED 2
先看 _IO_setvbuf (省略部分无关代码)
int _IO_setvbuf (_IO_FILE *fp, char *buf, int mode, _IO_size_t size) {
int result;
switch (mode) {
case _IOFBF:
fp->_IO_file_flags &= ~(_IO_LINE_BUF|_IO_UNBUFFERED);
if (buf == NULL){
if (fp->_IO_buf_base == NULL){
/* There is no flag to distinguish between "fully buffered
mode has been explicitly set" as opposed to "line
buffering has not been explicitly set". In both
cases, _IO_LINE_BUF is off. If this is a tty, and
_IO_filedoalloc later gets called, it cannot know if
it should set the _IO_LINE_BUF flag (because that is
the default), or not (because we have explicitly asked
for fully buffered mode). So we make sure a buffer
gets allocated now, and explicitly turn off line
buffering.
A possibly cleaner alternative would be to add an
extra flag, but then flags are a finite resource. */
_IO_DOALLOCATE (fp);
fp->_IO_file_flags &= ~_IO_LINE_BUF;
}
result = 0;
goto unlock_return;
}
break;
case _IOLBF:
fp->_IO_file_flags &= ~_IO_UNBUFFERED;
fp->_IO_file_flags |= _IO_LINE_BUF;
if (buf == NULL){
result = 0;
goto unlock_return;
}
break;
case _IONBF:
fp->_IO_file_flags &= ~_IO_LINE_BUF;
fp->_IO_file_flags |= _IO_UNBUFFERED;
buf = NULL;
size = 0;
break;
default:
result = EOF;
goto unlock_return;
}
result = _IO_SETBUF (fp, buf, size) == NULL ? EOF : 0;
unlock_return:
_IO_release_lock (fp); // _IO_new_file_setbuf
return result;
}
通过源码以及注释可以明白为何 mode 和 buf 都为 0 的时候实现为何与文档不一致:
io file 结构体关于 缓冲有三种状态, fully buffered, line buffered, no buffer, 但是 io file 的 _flags 属性只使用了 2 位来表示这三种状态, 为了避免之后分配buffer ( 按照文档里说法应该是第一次读的时候再分配 buffer) 时函数不清楚该如何设置 _flags 所以才提前分配. 画个表格表示一下 _flags 的两位 和 三种状态之间的关系吧
mode | #define _IO_LINE_BUF 0x200 | #define _IO_UNBUFFERED 2 |
---|---|---|
#define _IOFBF 0 /* Fully buffered. */ | 0 | 0 |
#define _IOLBF 1 /* Line buffered. */ | 1 | 0 |
#define _IONBF 2 /* No buffering. */ | 0 | 1 |
通过源码可以看到仅当 (mode 为 _IOLBUF 且 buf ≠ NULL ) 或 mode 为 _IONBF 的时候, 函数才会继续调用 _IO_SETBUF(FP, BUFFER, LENGTH)
(即 _IO_new_file_setbuf), 其余情况就会立即返回.
pwn 题中最常见的就是 _IONBF , 我们继续往下看 _IO_new_file_setbuf 的源码:
_IO_FILE *_IO_new_file_setbuf (_IO_FILE *fp, char *p, _IO_ssize_t len) {
if (_IO_default_setbuf (fp, p, len) == NULL)
return NULL;
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
= fp->_IO_buf_base;
fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_read_end
= fp->_IO_buf_base;
return fp;
}
libc_hidden_ver (_IO_new_file_setbuf, _IO_file_setbuf)
_IO_FILE *_IO_default_setbuf (_IO_FILE *fp, char *p, _IO_ssize_t len) {
if (p == NULL || len == 0) {
fp->_flags |= _IO_UNBUFFERED;
_IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0); // fp->_shortbuf
}
else {
fp->_flags &= ~_IO_UNBUFFERED;
_IO_setb (fp, p, p+len, 0);
}
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end = 0;
fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_read_end = 0;
return fp;
}
void _IO_setb (FILE *f, char *b, char *eb, int a){
f->_IO_buf_base = b;
f->_IO_buf_end = eb;
if (a)
f->_flags &= ~_IO_USER_BUF;
else
f->_flags |= _IO_USER_BUF;
}
通过源码可以看到如果 buf=NULL 或者 size = 0 的话, f->_IO_buf_base 和 f->_IO_buf_end 会被指向 fp->_shortbuf 和 fp->_shortbuf+1, 虽然不知道为什么要这么实现, 但是这样就解释了为什么 可以通过 覆盖 _IO_buf_base 的低字节来leak 地址: 覆盖了低字节之后 _IO_buf_base 就会指向 io file 结构体 内部, 而io file 结构体内部就有很多指向 _shortbuf 的指针.
2. fread && scanf
调用链:
-
fread:
fread → _IO_file_xsgetn (fileops.c) (如果buffer够的话就直接返回) → __underflow (genops.c) → _IO_new_file_underflow (fileops.c) → _IO_SYSREAD
-
scanf:
scanf → _IO_vfscanf_internal (vfscanf.c) → __uflow (genops.c) → _IO_default_uflow (genops.c) → _IO_file_underflow (fileops.c) -> _IO_SYSREAD
注:
_IO_new_file_underflow 和 _IO_file_underflow 就是同一个函数
神奇的是 用两个 函数名在 gdb 里面都可以断下来
────────────────────────────────────────────────────────────[ DISASM ]────────────────────────────────────────────────────────────
► 0x7ffff7a874a0 <_IO_file_underflow> mov eax, dword ptr [rdi] <0x7ffff7dd18e0>
0x7ffff7a874a2 <_IO_file_underflow+2> test al, 4
0x7ffff7a874a4 <_IO_file_underflow+4> jne _IO_file_underflow+544 <0x7ffff7a876c0>
0x7ffff7a874aa <_IO_file_underflow+10> mov rdx, qword ptr [rdi + 8]
0x7ffff7a874ae <_IO_file_underflow+14> cmp rdx, qword ptr [rdi + 0x10]
0x7ffff7a874b2 <_IO_file_underflow+18> jb _IO_file_underflow+384 <0x7ffff7a87620>
0x7ffff7a874b8 <_IO_file_underflow+24> push rbp
0x7ffff7a874b9 <_IO_file_underflow+25> push rbx
0x7ffff7a874ba <_IO_file_underflow+26> mov rbx, rdi
0x7ffff7a874bd <_IO_file_underflow+29> sub rsp, 8
0x7ffff7a874c1 <_IO_file_underflow+33> cmp qword ptr [rdi + 0x38], 0
────────────────────────────────────────────────────────[ SOURCE (CODE) ]─────────────────────────────────────────────────────────
In file: /home/pu1p/glibcs/glibc-2.23/libio/fileops.c
526 return count;
527 }
528
529 int
530 _IO_new_file_underflow (_IO_FILE *fp)
► 531 {
532 _IO_ssize_t count;
533 #if 0
534 /* SysV does not make this test; take it out for compatibility */
535 if (fp->_flags & _IO_EOF_SEEN)
536 return (EOF);
源码显示 _IO_new_file_underflow , 汇编显示 _IO_file_underflow, 神奇
源码:
fread
简化了一下
size_t fread(void *data, size_t size, size_t nmemb, FILE *fp){
_IO_size_t n = size*nmemb;
_IO_size_t want, have;
want = n;
if(fp->_IO_buf_base == NULL)
// malloc buf and make _IO_buf_base and _IO_buf_end point to it
_IO_doallocbuf (fp);
while (want > 0){
have = fp->_IO_read_end - fp->_IO_read_ptr;
if (want <= have){
memcpy(data, fp->_IO_read_ptr, want);
fp->_IO_read_ptr += want;
want = 0
} else{
if (have > 0){
memcpy (data, fp->_IO_read_ptr, have);
data += have;
want -= have;
fp->_IO_read_ptr += have;
}
if (fp->_IO_buf_base
&& want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base)) {
if (__underflow (fp) == EOF)
break;
continue;
}
}
}
}
int __underflow (_IO_FILE *fp) {
if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1)
return EOF;
if (fp->_mode == 0)
_IO_fwide (fp, -1);
if (_IO_in_put_mode (fp)) // _flags & _IO_CURRENTLY_PUTTING
if (_IO_switch_to_get_mode (fp) == EOF)
return EOF;
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
if (_IO_in_backup (fp)) {
// _flags & _IO_IN_BACKUP
_IO_switch_to_main_get_area (fp);
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
}
if (_IO_have_markers (fp)) {
if (save_for_backup (fp, fp->_IO_read_end))
return EOF;
}
else if (_IO_have_backup (fp)) // _IO_save_base != NULL
_IO_free_backup_area (fp);
return _IO_new_file_underflow (fp);
}
int _IO_new_file_underflow (_IO_FILE *fp) {
if (fp->_flags & _IO_NO_READS) {
fp->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}
fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base;
fp->_IO_read_end = fp->_IO_buf_base;
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
= fp->_IO_buf_base;
count = _IO_SYSREAD (fp, fp->_IO_buf_base,
fp->_IO_buf_end - fp->_IO_buf_base);
fp->_IO_read_end += count;
return *(unsigned char *) fp->_IO_read_ptr;
}
fread的大致逻辑如下:
- 如果 _IO_buf_base 为空先malloc一个buf
- 先使用 p->_IO_read_ptr 和 p->_IO_read_end 之间的数据
- 如果不够的话再调用 sys_read 读取新的数据到_IO_buf_base中.
所以如果可以控制 io_file 结构体, 通过修改 _IO_buf_base 实现任意地址写的功能, 不过需要绕过以下限制.
- _IO_buf_base 和 _IO_buf_end 分别指向要写的地址的头和尾, 这个不必多说
- fp->_flags & _IO_NO_READS == 0 // #define _IO_NO_READS 4 / Reading not allowed /
- fp->_IO_read_end ≥ fp->_IO_read_ptr, 满足这个条件就可以通过多次读把这段空间的数据消耗完, 之后的数据就会读进 _IO_buf_base
- (_IO_buf_end -_IO_buf_base) ≥ want, 满足这个条件才会调用 __underflow
- 还有一些其它限制(_IO_have_backup (fp) 之类的)具体调试的时候具体情况具体对待就好
scanf
对 io file 的操作差不多, 利用方式也一样, 不再赘述
3. fwrite, puts, printf
调用链
-
fwrite:
_IO_fwrite (iofwrite.c) → _IO_new_file_xsputn (fileops.c) → → _IO_new_file_overflow (fileops.c) → _IO_do_write → new_do_write (fileops.c) → _IO_SYSWRITE/_IO_new_file_write (fileops.c)
-
puts:
puts → _IO_new_file_xsputn (fileops.c) → ...
-
printf
// todo
源码
fwrite
省略了一些无关代码
size_t fwrite(void *data, size_t size, size_t nmemb, FILE *f){
if (_IO_OVERFLOW (f, EOF) == EOF) // _IO_new_file_overflow
return to_do == 0 ? EOF : n - to_do;
return n - to_do;
}
int _IO_new_file_overflow (_IO_FILE *f, int ch)
{
if (f->_flags & _IO_NO_WRITES) /* SET ERROR */
{
// MUST AVOID !!! 1
f->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}
/* If currently reading or no buffer allocated. */
if ((f->_flags & _IO_CURRENTLY_PUTTING) == 0 || f->_IO_write_base == NULL){
// reset buf, MUST AVOID !! 2
}
if (ch == EOF)
return _IO_do_write (f, f->_IO_write_base,
f->_IO_write_ptr - f->_IO_write_base); // new_do_write
return (unsigned char) ch;
}
static _IO_size_t new_do_write (_IO_FILE *fp, const char *data, _IO_size_t to_do)
{
_IO_size_t count;
if (fp->_IO_read_end != fp->_IO_write_base){
// MUST AVOID !!! 3
_IO_off64_t new_pos
= _IO_SYSSEEK (fp, fp->_IO_write_base - fp->_IO_read_end, 1);
// fseek(stdout, x, 1) always return _IO_pos_BAD (-1)
if (new_pos == _IO_pos_BAD)
return 0;
fp->_offset = new_pos;
}
count = _IO_SYSWRITE (fp, data, to_do); // TARGET !!!
// omitted
return count;
}
与 fread 类似, fwrite 也是优先将缓冲区中的数据写入文件. 写入文件时的缓冲区是由 _IO_write_base 和 _IO_write_ptr 表示的. 所以我们可以通过控制这两个缓冲区指针来实现任意地址读的目的. 同样需要满足以下条件
- _IO_write_base 和 _IO_write_ptr 指向想要泄露地址的地方
- (f->_flags & _IO_NO_WRITES) == 0 // #define _IO_NO_WRITES 8
- ((f->_flags & _IO_CURRENTLY_PUTTING) == 0 || f->_IO_write_base == NULL) == 0 // #define _IO_CURRENTLY_PUTTING 0x800
- fp->_IO_read_end != fp->_IO_write_base // 因为 fseek(stdout, x, 1) (我试了好几次) 都会返回 _IO_pos_BAD (-1).
puts
流程和 fread 类似, 利用方式也差不多.
Appendix A : 参考
Appendix B: 赛题
TokyoWesterns CTF 2017 parrot (修改 io file 达到任意地址写的目的)
ByteCTF 2019 note_five (修改 stdout->_IO_buf_base 来 leak 地址)