一文弄懂printf函数从用户态到内核态的执行流程

一文弄懂printf函数从用户态到内核态的执行流程

目录

1.简介2.示例代码3.程序执行初探4.用户态处理流程5.内核态处理流程5.1. 软中断处理5.2 系统调用返回5.3 系统调用处理5.4 stdout重定向到console5.5 tty及sstar uart驱动5.6 sstar uart dma发送线程

6 问:为什么printf打印不会卡?7.参考文献

1.简介

我们经常使用C库的printf函数,花时间整理一下从用户态到内核态的整个流程,涉及libc、系统调用、tty驱动、console等多个方面()其中,跟踪的驱动部分代码是sigmastar的,视用户实际使用的平台而定)。文章略长,请耐心阅读哈~

由于作者水平有限,如有纰漏,请帮忙指正,谢谢~

2.示例代码

使用最简单的代码作为示例。

#include

#include

int main()

{

printf("hello world!\n");

return 0;

}

3.程序执行初探

gcc编译上述程序后,使用strace命令可以跟踪程序的系统调用流程。可以看到,程序执行需要依赖C库。整个执行流程大致如下:Hello_world可执行程序通过execve加载到内存后,libc.so等动态库通过mmap加载到内存映射区,最终通过write系统调用将“hello world!”输出到屏幕,程序执行完成退出。

$ strace ./hello_world

execve("./hello_world", ["./hello_world"], [/ 44 vars /]) = 0

brk(0) = 0x12bd000

mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f48bfadc000

access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)

open("/etc/ld.so.cache", O_RDONLY) = 3

fstat(3, {st_mode=S_IFREG|0644, st_size=70625, ...}) = 0

mmap(NULL, 70625, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f48bfaca000

close(3) = 0

open("/lib64/libc.so.6", O_RDONLY) = 3

read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0000\356\1\0\0\0\0\0"..., 832) = 832

fstat(3, {st_mode=S_IFREG|0755, st_size=1924768, ...}) = 0

mmap(NULL, 3750184, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f48bf529000

mprotect(0x7f48bf6b4000, 2093056, PROT_NONE) = 0

mmap(0x7f48bf8b3000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x18a000) = 0x7f48bf8b3000

mmap(0x7f48bf8b9000, 14632, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f48bf8b9000

close(3) = 0

mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f48bfac9000

mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f48bfac8000

mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f48bfac7000

arch_prctl(ARCH_SET_FS, 0x7f48bfac8700) = 0

mprotect(0x7f48bf8b3000, 16384, PROT_READ) = 0

mprotect(0x7f48bfadd000, 4096, PROT_READ) = 0

munmap(0x7f48bfaca000, 70625) = 0

fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 2), ...}) = 0

mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f48bfadb000

write(1, "hello world!\n", 13hello world!

) = 13

exit_group(0) = ?

+++ exited with 0 +++

4.用户态处理流程

printf的实现是在C库,通过stdout打印。

int printf(const char * __restrict format, ...)

{

va_list arg;

int rv;

va_start(arg, format);

rv = vfprintf(stdout, format, arg);

va_end(arg);

return rv;

}

vfprintf函数主要是处理和校验打印格式,调用关系如下:

Vfprintf:

PUTC-> putc_unlocked-> __PUTC_UNLOCKED->…-> __PUTC_UNLOCKED_MACRO:

__fputc_unlocked

__fputc_unlocked函数部分实现如下:

//buffer还没满

if (__STDIO_STREAM_BUFFER_SIZE(stream)) {

//添加到缓冲区

__STDIO_STREAM_BUFFER_ADD(stream, ((unsigned char) c));

if (__STDIO_STREAM_IS_LBF(stream)) {

//遇到'\n'则直接进行commit buffer。

if ((((unsigned char) c) == '\n')

&& __STDIO_COMMIT_WRITE_BUFFER(stream)) {

/ Commit failed! /

__STDIO_STREAM_BUFFER_UNADD(stream); / Undo the write! /

goto BAD;

}

}

} else {

//buffer满了,则直接进行write。

unsigned char uc = (unsigned char) c;

if (! __stdio_WRITE(stream, &uc, 1)) {

goto BAD;

}

}

上文的__STDIO_COMMIT_WRITE_BUFFER 和__stdio_WRITE最终都会调用到write系统调用陷入到内核态继续执行。

static inline ssize_t __WRITE(FILE stream, const char buf, size_t bufsize)

{

__STDIO_STREAM_CUSTOM_WRITE_FUNC(stream, buf, bufsize);

return write(stream->__filedes, buf, bufsize);

}

这里的write调用实际是__libc_write,各种宏定义展开如下:

PSEUDO (__libc_write, write, 3)

ret

PSEUDO_END (__libc_write)

#define PSEUDO(name, syscall_name, args) \

.text; \

ENTRY (name); \

DO_CALL (syscall_name, args); \

cmn r0, $4096;

#undef DO_CALL

#if defined(__ARM_EABI__)

#define DO_CALL(syscall_name, args) \

DOARGS_##args \

mov ip, r7; \

ldr r7, =SYS_ify (syscall_name); \ //r7记录系统调用号

swi 0x0; \ //产生软中断

mov r7, ip; \

UNDOARGS_##args

#else

#define DO_CALL(syscall_name, args) \

DOARGS_##args \

swi SYS_ify (syscall_name); \

UNDOARGS_##args

#endif

#define SYS_ify(syscall_name) (__NR_##syscall_name)

//在内核src\arch\arm\include\uapi\asm\unistd.h中的定义如下:

#if defined(__thumb__) || defined(__ARM_EABI__)

#define __NR_SYSCALL_BASE 0

#else

#define __NR_SYSCALL_BASE __NR_OABI_SYSCALL_BASE

#endif

#define __NR_write (__NR_SYSCALL_BASE+ 4)

调用是先处理参数,接着通过r7记录系统调用号(我使用的内核支持__ARM_EABI__(#define CONFIG_AEABI 1),write的系统调用号为4),执行swi 0x0从用户态先入到内核态。 至此,用户态流程处理完成。

5.内核态处理流程

5.1. 软中断处理

上节说到产生软中断后,内核态会跳转到中断向量处执行。可以看到,通过指令ldrcc pc, [tbl, scno, lsl #2]执行系统调用,通过ret_fast_syscall来返回。

ENTRY(vector_swi)

//执行系统调用前先保存用户态18个寄存器,PT_REGS_SIZE = 72,sizeof(struct pt_regs),分//别是r0-r15、cspr、spsr

sub sp, sp, #PT_REGS_SIZE

stmia sp, {r0 - r12} @ Calling r0 - r12

ARM( add r8, sp, #S_PC )

ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr

THUMB( mov r8, sp )

THUMB( store_user_sp_lr r8, r10, S_SP ) @ calling sp, lr

mrs r8, spsr @ called from non-FIQ mode, so ok.

str lr, [sp, #S_PC] @ Save calling PC

//进入内核态之前先保存CPSR,返回到用户态时从SPSR中恢复

str r8, [sp, #S_PSR] @ Save CPSR

str r0, [sp, #S_OLD_R0] @ Save OLD_R0

zero_fp

alignment_trap r10, ip, __cr_alignment

enable_irq

ct_user_exit

get_thread_info tsk

/*

* Get the system call number.

*/

#if defined(CONFIG_OABI_COMPAT)

...

#elif defined(CONFIG_AEABI)

/*

* Pure EABI user space always put syscall number into scno (r7).

*/

#elif defined(CONFIG_ARM_THUMB)

/ Legacy ABI only, possibly thumb mode. /

tst r8, #PSR_T_BIT @ this is SPSR from save_user_regs

addne scno, r7, #__NR_SYSCALL_BASE @ put OS number in

USER( ldreq scno, [lr, #-4] )

#else

#endif

uaccess_disable tbl

//加载系统调用表基地址

adr tbl, sys_call_table @ load syscall table pointer

#if defined(CONFIG_OABI_COMPAT)

/*

* If the swi argument is zero, this is an EABI call and we do nothing.

*

* If this is an old ABI call, get the syscall number into scno and

* get the old ABI syscall table address.

*/

#elif !defined(CONFIG_AEABI)

bic scno, scno, #0xff000000 @ mask off SWI op-code

eor scno, scno, #__NR_SYSCALL_BASE @ check OS number

#endif

local_restart:

ldr r10, [tsk, #TI_FLAGS] @ check for syscall tracing

stmdb sp!, {r4, r5} @ push fifth and sixth args

tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls?

bne __sys_trace

cmp scno, #NR_syscalls @ check upper syscall limit

//通过__ret_fast_syscall返回

badr lr, __ret_fast_syscall @ return address

//通过系统调用表基地址tbl+系统调用好scno,执行系统调用函数

ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine

add r1, sp, #S_OFF

2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)

eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back

bcs arm_syscall

mov why, #0 @ no longer a real syscall

b sys_ni_syscall @ not private func

ENDPROC(vector_swi)

5.2 系统调用返回

上一小节看到,系统调用执行完成返回到__ret_fast_syscall:

ret_fast_syscall:

__ret_fast_syscall:

UNWIND(.fnstart )

UNWIND(.cantunwind )

disable_irq_notrace @ disable interrupts

ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing

tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK

bne fast_work_pending

/ perform architecture specific actions before user return /

arch_ret_to_user r1, lr

restore_user_regs fast = 1, offset = S_OFF

UNWIND(.fnend )

ENDPROC(ret_fast_syscall)

fast_work_pending:

str r0, [sp, #S_R0+S_OFF]! @ returned r0

/ fall through to work_pending /

slow_work_pending:

mov r0, sp @ 'regs'

mov r2, why @ 'syscall'

bl do_work_pending //见下

cmp r0, #0

beq no_work_pending

movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)

ldmia sp, {r0 - r6} @ have to reload r0 - r6

b local_restart @ ... and off we go

no_work_pending:

asm_trace_hardirqs_on save = 0

/ perform architecture specific actions before user return /

arch_ret_to_user r1, lr //恢复用户态的寄存器

ct_user_enter save = 0

restore_user_regs fast = 0, offset = 0

在返回用户态前,do_work_pending主要检查是否处理pend的信号。

asmlinkage int

do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)

{

/*

* The assembly code enters us with IRQs off, but it hasn't

* informed the tracing code of that for efficiency reasons.

* Update the trace code with the current status.

*/

trace_hardirqs_off();

do {

//检查是否需要重新调用

if (likely(thread_flags & _TIF_NEED_RESCHED)) {

schedule();

} else {

if (unlikely(!user_mode(regs)))

return 0;

local_irq_enable();

//有未处理的信号

if (thread_flags & _TIF_SIGPENDING) {

int restart = do_signal(regs, syscall);

if (unlikely(restart)) {

/*

* Restart without handlers.

* Deal with it without leaving

* the kernel space.

*/

return restart;

}

syscall = 0;

}

}

local_irq_disable();

thread_flags = current_thread_info()->flags;

} while (thread_flags & _TIF_WORK_MASK);

return 0;

}

5.3 系统调用处理

系统调用write实际调用的是sys_write,在内核代码中无法直接搜到,因为它是通过宏定义拼接的,跟踪宏展开中name字段就可以看到最终是sys_write函数,在内核编译生成的System.map也可以搜到sys_write符号:

define __NR_write 64

__SYSCALL(__NR_write, sys_write)

#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)

#define SYSCALL_DEFINEx(x, sname, ...) \

SYSCALL_METADATA(sname, x, __VA_ARGS__) \

__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)

#define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)

#define __SYSCALL_DEFINEx(x, name, ...) \

asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \

__attribute__((alias(__stringify(SyS##name)))); \

static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \

asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \

asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \

{ \

long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \

__MAP(x,__SC_TEST,__VA_ARGS__); \

__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \

return ret; \

} \

static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))

Sys_write函数的具体实现如下:

SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,

size_t, count)

{

struct fd f = fdget_pos(fd);

ssize_t ret = -EBADF;

if(f.file) {

loff_t pos = file_pos_read(f.file);

ret = vfs_write(f.file, buf, count, &pos);

if(ret >= 0)

file_pos_write(f.file, pos);

fdput_pos(f);

}

return ret;

}

vfs_write函数调用如下:

vfs_write

__vfs_write

file->f_op->write(file, p, count, pos);

//这里的实际执行函数时redirected_tty_write

5.4 stdout重定向到console

查看程序的fd,可以看到fd 0、1和2都是重定向到/dev/console。

# 679为程序pid

ls /proc/679/fd

lrwx------ 1 64 2 -> /dev/console

lrwx------ 1 64 1 -> /dev/console

lrwx------ 1 64 0 -> /dev/console

内核启动时创建init进程(pid=1):

start_kernel

rest_init

/*

* We need to spawn init first so that it obtains pid 1, however

* the init task will end up wanting to create kthreads, which, if

* we schedule it before we create kthreadd, will OOPS.

*/

kernel_thread(kernel_init, NULL, CLONE_FS);

init进程打开/dev/console作为标准输入输出。

kernel_init

kernel_init_freeable

/ Open the /dev/console on the rootfs, this should never fail /

if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) //stdin, fd = 0

pr_err("Warning: unable to open an initial console.\n");

(void) sys_dup(0); //stdout, fd = 1;

(void) sys_dup(0); //stdout fd = 2;

Linux的所有进程都是由init进程创建的,继承fd 0、1和2。因此,打印都被重定向到/dev/console上,执行系统调用write函数,实际就是执行的console的file_operations的write函数。 在内核启动日志中,可以看到在打印:console [ttyS0] enabled。

5.5 tty及sstar uart驱动

tty驱动初始化流程如下,创建字符设备并注册到/dev/console:

__initcall_chr_dev_init5

chr_dev_init

tty_init

tty_init

cdev_init(&console_cdev, &console_fops);

if (cdev_add(&console_cdev, MKDEV(TTYAUX_MAJOR, 1), 1) ||

register_chrdev_region(MKDEV(TTYAUX_MAJOR, 1), 1, "/dev/console") < 0)

panic("Couldn't register /dev/console driver\n");

struct file_operations console_fops结构体如下:

static const struct file_operations console_fops = {

.llseek = no_llseek,

.read = tty_read,

.write = redirected_tty_write,

.poll = tty_poll,

.unlocked_ioctl = tty_ioctl,

.compat_ioctl = tty_compat_ioctl,

.open = tty_open,

.release = tty_release,

.fasync = tty_fasync,

};

write系统调用最终会调用到redirected_tty_write

redirected_tty_write

tty_write

do_tty_write(ld->ops->write, tty, file, buf, count)

ld->ops->write();

//tty_register_ldisc 中通过tty_register_ldisc(N_TTY, &n_tty_ops);注册

//(见下文注释1),因此此处write函数实际为n_tty_write。

n_tty_write

c = tty->ops->write(tty, b, nr);

//uart_register_driver中通过tty_set_operations(normal, &uart_ops); 注册

//(见下文注释2),write回调函数为uart_write

uart_write

//将数据送到xmit环形缓冲区(队列大小:PAGE_SIZE)中,若环形队列满

//则不再拷贝

__uart_start

port->ops->start_tx(port);

//sstar平台在_ms_uart_console_prepare中通过

//console_port.port.ops=&ms_uart_ops; 注册

//此处调用的是ms_uart_start_tx

ms_uart_start_tx

//将xmit环形队列的数据拷贝到驱动的dma的tx_buf中

URDMA_StartTx

至此,write系统调用返回。

注释1: n_tty_ops注册流程如下:

start_kernel

console_init

n_tty_init

tty_register_ldisc(N_TTY, &n_tty_ops);

static struct tty_ldisc_ops n_tty_ops = {

.magic = TTY_LDISC_MAGIC,

.name = "n_tty",

.open = n_tty_open,

.close = n_tty_close,

.flush_buffer = n_tty_flush_buffer,

.read = n_tty_read,

.write = n_tty_write,

.ioctl = n_tty_ioctl,

.set_termios = n_tty_set_termios,

.poll = n_tty_poll,

.receive_buf = n_tty_receive_buf,

.write_wakeup = n_tty_write_wakeup,

.receive_buf2 = n_tty_receive_buf2,

};

注释2: 我这里跟踪的是sigmastar平台的uart驱动。 内核启动时会调用uart驱动模块的init函数,即ms_uart_module_init。

ms_uart_module_init

uart_register_driver(&ms_uart_driver);

tty_set_operations(normal, &uart_ops);

platform_driver_register(&ms_uart_platform_driver);

static struct uart_driver ms_uart_driver = {

.owner = THIS_MODULE,

.driver_name = "ms_uart",

.dev_name = "ttyS",

.nr = 8,

.cons = &ms_uart_console,

};

static struct console ms_uart_console =

{

.name = MS_CONSOLE_DEV,

.write = ms_uart_console_write,

.setup = ms_uart_console_setup,

.flags = CON_PRINTBUFFER,

.device = uart_console_device,

.data = &ms_uart_driver,

.index = -1,

#if CONSOLE_DMA

.match = ms_uart_console_match,

#endif

};

static const struct tty_operations uart_ops = {

.open = uart_open,

.close = uart_close,

.write = uart_write,

.put_char = uart_put_char,

.flush_chars = uart_flush_chars,

.write_room = uart_write_room,

};

static struct platform_driver ms_uart_platform_driver = {

.remove = ms_uart_remove,

.probe = ms_uart_probe,

};

在platform_driver_register中,会调用probe函数ms_uart_probe注册console ttyS0,调用关系如下:

ms_uart_module_init

platform_driver_register—> __platform_driver_register

driver_register

bus_add_driver

driver_attach

bus_for_each_dev

__driver_attach

driver_probe_device

really_probe

ret = dev->bus->probe(dev);

//这里实际调用的就是上面注册的

ms_uart_probe

uart_add_one_port

uart_configure_port

register_console

print console [ttyS0] enabled

5.6 sstar uart dma发送线程

需要关注到一个内核线程urdma_tx_thread,它是在内核启动时会初始化platform,调用ms_uart_probe创建tx线程。

ms_uart_probe

//设置DMA的tx和rx缓冲区(页对齐)

mp->urdma->rx_urdma_size = PAGE_ALIGN(UR2DMA_RX_BUF_LENGTH);

mp->urdma->tx_urdma_size = PAGE_ALIGN(UR2DMA_TX_BUF_LENGTH);

//启动一个内核线程输出打印

mp->urdma_task = kthread_run(urdma_tx_thread,(void *)&mp->port,"urdma_tx_thread");

ret = uart_add_one_port(&ms_uart_driver, &mp->port);

uart_configure_port

//boot参数dh_keyboard在此生效

register_console

ms_uart_console_setup //设置波特率等参数

urdma_tx_thread实现如下:

static int urdma_tx_thread(void *arg)

{

struct uart_port p = (struct uart_port )arg;

struct circ_buf *xmit;

while(!kthread_should_stop()){

//等待中断唤醒返回

wait_event_interruptible(urdma_wait, urdma_conditions);

urdma_conditions = 0;

xmit = &p->state->xmit;

if (uart_circ_empty(xmit) || uart_tx_stopped(p))

{

ms_uart_stop_tx(p);

}

if (uart_circ_chars_pending(xmit))

{

//环形缓冲区有数据,则将数据拷贝到驱动

URDMA_StartTx(p);

}else

{

//环形缓冲区数据满了

//调用n_tty_write_wakeup,发送SIGIO信号通知driver有output data

uart_write_wakeup(p);

}

}

return 0;

}

驱动加载的时候会uart_ops中的open接口,实现如下:

uart_open

tty_port_open

port->ops->activate(port, tty);

//实际为uart_port_activate

uart_port_activate

uart_startup

uart_port_startup

uport->ops->startup(uport)

//实际为ms_uart_startup

ms_uart_startup

//此处注册了uart的中断处理函数ms_uart_interrupt

request_irq(mp->urdma->urdma_irq, ms_uart_interrupt, IRQF_SHARED, "ms_serial_dma",p);

ms_uart_interrupt函数实现如下:

static irqreturn_t ms_uart_interrupt(s32 irq, void *dev_id)

{

if(mp->use_dma)

{

u8 status = URDMA_GetInterruptStatus(p);

if(status & URDMA_INTR_STATUS_RX)

{

}

else if(status & URDMA_INTR_STATUS_TX)

{

//有tx_mcu_intr中断,则wakeup

URDMA_TxClearInterrupt(p);

urdma_conditions = 1;

wake_up_interruptible(&urdma_wait);

}

}

在收到urdma_wait的唤醒中断时,urdma_tx_thread会被唤醒,如果环形缓冲区数据满了,则通知驱动中断程序取数据并输出到串口上。 至此,整个printf打印流程完成。

6 问:为什么printf打印不会卡?

答:printf打印不会卡最根本的原因在于printf打印是异步的。数据从用户态的C库中的缓冲区到内核态的write调用,接着到拷贝到tty的xmit环形缓冲区,这个过程是同步的,执行完成返回。在此过程中,主要涉及内存拷贝动作,没有其他耗时的操作。 剩下的过程是异步执行:当有tx_mcu_intr中断时,在内核线程urdma_tx_thread中从tty的环形缓冲区拷贝到驱动设备的私有数据,当数据满的时候,发送SIGIO信号通知driver有output data。而驱动收到信号后将数据输出打印到串口中。

7.参考文献

http://blog.chinaunix.net/uid-29401328-id-4866781.html https://www.cnblogs.com/pengdonglin137/p/3878316.html https://www.cnblogs.com/cslunatic/p/3655970.html

相关推荐

鱼鳔是用来干嘛的?
365bet手机

鱼鳔是用来干嘛的?

📅 11-12 👁️ 9476
劳动模范
365bet亚洲版官网

劳动模范

📅 08-11 👁️ 9647
我的世界全怪物图鉴 我的世界全怪物掉落物品一览
365bet亚洲版官网

我的世界全怪物图鉴 我的世界全怪物掉落物品一览

📅 10-03 👁️ 7676