Skip to content

Commit

Permalink
Merge pull request #486 from zhangzihengya/develop
Browse files Browse the repository at this point in the history
迭代sleep的eBPF程序,完成进程on_cpu和off_cpu的时间信息采集
  • Loading branch information
chenamy2017 authored Aug 2, 2023
2 parents 7f0ea5b + 237b651 commit a7a9fbf
Show file tree
Hide file tree
Showing 13 changed files with 136,562 additions and 60 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/eBPF_proc_image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ jobs:
sudo make proc_image
sudo ./proc_image -t 1
- name: Run proc_offcpu_time
run: |
cd eBPF_Supermarket/eBPF_proc_image/proc_offcpu_time
sudo make
sudo ./proc_offcpu_time -t 1
- name: Run mutex_image
run: |
cd eBPF_Supermarket/eBPF_proc_image
Expand Down
26 changes: 20 additions & 6 deletions eBPF_Supermarket/eBPF_proc_image/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,25 @@

proc_image便是Linux进程生命周期画像工具,该工具由多个子功能组成。

### 1. 进程的off_CPU_time
### 1. 进程上下CPU时间统计

目前该功能可由proc_image工具的-p参数去实现,需指定进程pid,便可以采集到该进程处于off_CPU的时间。该功能已经和加入sleep逻辑的用户态程序(./test/test_sleep.c)进行了时间上的比对,准确性满足要求。示例如下:
目前该功能可由proc_image工具的-p参数去实现,需指定进程pid,便可以采集到该进程在生命周期中上下CPU的时间信息。该功能已经和top进行了时间上的比对,准确性满足要求。示例如下:

在top页面按下“d”,可以看到top默认为每3秒更新一次:

<div align='center'><img src="./docs/images/top_delay.png"></div>

运行eBPF程序跟踪top进程,执行指令 sudo ./proc_image -p 5523,运行结果:

<div align='center'><img src="./docs/images/proc_cpu.png"></div>

结合top进程每3秒更新一次,从运行结果中可以看出该eBPF程序已经成功捕获到top进程上下cpu的时间信息。

在此基础上,通过该工具的-p和-C参数,能捕获到每个CPU所对应0号进程的上下cpu时间信息,进而也可以体现出0号进程所对应的CPU繁忙程度。

## 三、proc_offcpu_time工具

该工具可通过-p参数指定进程的pid,便可以采集到该进程处于off_CPU的时间。该功能已经和加入sleep逻辑的用户态程序(./test/test_sleep.c)进行了时间上的比对,准确性满足要求。示例如下:

终端1:./test_sleep

Expand Down Expand Up @@ -40,11 +56,9 @@ sleep结束时间:2023-07-24 16:58:31
pid:9063 comm:test_sleep offcpu_id:3 offcpu_time:5963882827916 oncpu_id:3 oncpu_time:5966883001411 sleeptime:3.000173
```

目前由于统计到的数据量过少,可视化的意义并不大,所以准备在下一步的迭代过程中进行可视化。

下一步迭代计划:在统计到的事件里加入int型的flag字段,1代表one_cpu,0代表off_CPU,即可获得进程生命周期的动态数据,在宏观上实现进程的生命周期,以便后期加入更多的进程信息。
目前该工具的功能已经合入proc_image。

## 、mutex_image 工具
## 、mutex_image 工具

mutex_image 工具目前只能完成下图情形1的进程互斥锁画像,后期会继续迭代。

Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
198 changes: 158 additions & 40 deletions eBPF_Supermarket/eBPF_proc_image/proc_image.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,57 +25,175 @@
char LICENSE[] SEC("license") = "Dual BSD/GPL";

const volatile pid_t target_pid = 0;
const volatile int target_cpu_id = 0;

struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 1);
__type(key, pid_t);
__type(value, struct sleep_offcpu);
} sleep SEC(".maps");
__type(key, struct proc_id);
__type(value, struct proc_oncpu);
} oncpu SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 1);
__type(key, struct proc_id);
__type(value, struct proc_offcpu);
} offcpu SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries,256 * 1024);
} sleep_rb SEC(".maps");
} cpu_rb SEC(".maps");

SEC("kprobe/finish_task_switch.isra.0")
int kprobe__finish_task_switch(struct pt_regs *ctx)
{
struct task_struct *prev = (struct task_struct *)PT_REGS_PARM1(ctx);
struct task_struct *next = (struct task_struct *)bpf_get_current_task();
pid_t pid = target_pid;

if(BPF_CORE_READ(prev,pid) == pid){
struct sleep_offcpu sleep_offcpu={};

sleep_offcpu.offcpu_id = bpf_get_smp_processor_id();
sleep_offcpu.offcpu_time = bpf_ktime_get_ns();

if(bpf_map_update_elem(&sleep, &pid, &sleep_offcpu, BPF_ANY))
return 0;
}else if(BPF_CORE_READ(next,pid) == pid){
struct sleep_offcpu *sleep_offcpu;

sleep_offcpu = bpf_map_lookup_elem(&sleep, &pid);
if (!sleep_offcpu)
return 0;

struct sleep_event *sleep_event;
sleep_event = bpf_ringbuf_reserve(&sleep_rb, sizeof(*sleep_event), 0);
if(!sleep_event)
return 0;

sleep_event->offcpu_id = sleep_offcpu->offcpu_id;
sleep_event->offcpu_time = sleep_offcpu->offcpu_time;
sleep_event->pid = pid;
bpf_get_current_comm(&sleep_event->comm, sizeof(sleep_event->comm));
sleep_event->oncpu_id = bpf_get_smp_processor_id();
sleep_event->oncpu_time = bpf_ktime_get_ns();

bpf_ringbuf_submit(sleep_event, 0);

bpf_map_delete_elem(&sleep, &pid);
}

return 0;
struct task_struct *prev = (struct task_struct *)PT_REGS_PARM1(ctx);
pid_t next_pid = BPF_CORE_READ(next,pid);
pid_t prev_pid = BPF_CORE_READ(prev,pid);
int cpu_id = bpf_get_smp_processor_id();

// 第一种情况:目标进程从offcpu转变为oncpu
if((target_pid!= 0 && prev_pid!= target_pid && next_pid==target_pid) ||
(target_pid==0 && prev_pid!= target_pid && next_pid==target_pid && cpu_id==target_cpu_id))
{
u64 oncpu_time = bpf_ktime_get_ns();
struct proc_id proc_id = {};
struct proc_offcpu * proc_offcpu;

proc_id.pid = target_pid;
proc_id.cpu_id = target_cpu_id;

proc_offcpu = bpf_map_lookup_elem(&offcpu, &proc_id);
if(proc_offcpu){
// 完成一次cpu_event(offcpu)的输出
struct cpu_event *cpu_event;
cpu_event = bpf_ringbuf_reserve(&cpu_rb, sizeof(*cpu_event), 0);
if(!cpu_event){
return 0;
}

cpu_event->pid = target_pid;
cpu_event->flag = 0;
bpf_get_current_comm(&cpu_event->comm, sizeof(cpu_event->comm));
cpu_event->oncpu_id = cpu_id;
cpu_event->oncpu_time = oncpu_time;
cpu_event->offcpu_id = proc_offcpu->offcpu_id;
cpu_event->offcpu_time = proc_offcpu->offcpu_time;

bpf_ringbuf_submit(cpu_event, 0);

bpf_map_delete_elem(&offcpu, &proc_id);
}

// 记录pro_oncpu
struct proc_oncpu proc_oncpu ={};

proc_oncpu.oncpu_id = cpu_id;
proc_oncpu.oncpu_time = oncpu_time;

if(bpf_map_update_elem(&oncpu, &proc_id, &proc_oncpu, BPF_ANY))
return 0;

// 第二中情况:目标进程从oncpu转变为offcpu
}else if((target_pid!= 0 && prev_pid==target_pid && next_pid!=target_pid) ||
(target_pid==0 && prev_pid==target_pid && next_pid!=target_pid && cpu_id==target_cpu_id))
{
u64 offcpu_time = bpf_ktime_get_ns();
struct proc_id proc_id = {};
struct proc_oncpu * proc_oncpu;

proc_id.pid = target_pid;
proc_id.cpu_id = target_cpu_id;

proc_oncpu = bpf_map_lookup_elem(&oncpu, &proc_id);
if(proc_oncpu){
// 完成一次cpu_event(oncpu)的输出
struct cpu_event *cpu_event;
cpu_event = bpf_ringbuf_reserve(&cpu_rb, sizeof(*cpu_event), 0);
if(!cpu_event){
return 0;
}

// cpu_event->comm应该写入prev进程的comm
for(int i = 0; i <= TASK_COMM_LEN - 1; i++){
cpu_event->comm[i] = BPF_CORE_READ(prev,comm[i]);
if (BPF_CORE_READ(prev,comm[i]) == '\0')
break;
}

cpu_event->pid = target_pid;
cpu_event->flag = 1;
cpu_event->oncpu_id = proc_oncpu->oncpu_id;
cpu_event->oncpu_time = proc_oncpu->oncpu_time;
cpu_event->offcpu_id = cpu_id;
cpu_event->offcpu_time = offcpu_time;

bpf_ringbuf_submit(cpu_event, 0);

bpf_map_delete_elem(&oncpu, &proc_id);
}

// 记录pro_offcpu
struct proc_oncpu proc_offcpu ={};

proc_offcpu.oncpu_id = cpu_id;
proc_offcpu.oncpu_time = offcpu_time;

if(bpf_map_update_elem(&offcpu, &proc_id, &proc_offcpu, BPF_ANY))
return 0;

// 第三种情况:目标进程被重新调度
}else if((target_pid!= 0 && prev_pid==target_pid && next_pid==target_pid) ||
(target_pid==0 && prev_pid==target_pid && next_pid==target_pid && cpu_id==target_cpu_id))
{
u64 offcpu_time = bpf_ktime_get_ns();
struct proc_id proc_id = {};
struct proc_oncpu * proc_oncpu;

proc_id.pid = target_pid;
proc_id.cpu_id = target_cpu_id;

proc_oncpu = bpf_map_lookup_elem(&oncpu, &proc_id);
if(proc_oncpu){
// 完成一次cpu_event(oncpu)的输出
struct cpu_event *cpu_event;
cpu_event = bpf_ringbuf_reserve(&cpu_rb, sizeof(*cpu_event), 0);
if(!cpu_event){
return 0;
}

// cpu_event->comm应该写入prev进程的comm
for(int i = 0; i <= TASK_COMM_LEN - 1; i++){
cpu_event->comm[i] = BPF_CORE_READ(prev,comm[i]);
if (BPF_CORE_READ(prev,comm[i]) == '\0')
break;
}

cpu_event->pid = target_pid;
cpu_event->flag = 1;
cpu_event->oncpu_id = proc_oncpu->oncpu_id;
cpu_event->oncpu_time = proc_oncpu->oncpu_time;
cpu_event->offcpu_id = cpu_id;
cpu_event->offcpu_time = offcpu_time;

bpf_ringbuf_submit(cpu_event, 0);

bpf_map_delete_elem(&oncpu, &proc_id);
}

// 记录pro_oncpu
struct proc_oncpu proc_re_oncpu ={};

proc_re_oncpu.oncpu_id = cpu_id;
proc_re_oncpu.oncpu_time = offcpu_time;

if(bpf_map_update_elem(&oncpu, &proc_id, &proc_re_oncpu, BPF_ANY))
return 0;
}

return 0;

}
35 changes: 27 additions & 8 deletions eBPF_Supermarket/eBPF_proc_image/proc_image.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,31 +31,42 @@

static volatile bool exiting = false;
static int target_pid = 0;
static int target_cpu_id = 0;

const char argp_program_doc[] ="Trace process to get process image.\n";

static const struct argp_option opts[] = {
{ "pid", 'p', "PID", 0, "Process ID to trace" },
{ "cpuid", 'C', "CPUID", 0, "Set For Tracing Process 0(other processes don't need to set this parameter)" },
{ "time", 't', "TIME-SEC", 0, "Max Running Time(0 for infinite)" },
{},
};

static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
long pid;
long cpu_id;
long time;
switch (key) {
case 'p':
errno = 0;
pid = strtol(arg, NULL, 10);
if (errno || pid <= 0) {
if (errno || pid < 0) {
warn("Invalid PID: %s\n", arg);
// 调用argp_usage函数,用于打印用法信息并退出程序
argp_usage(state);
}
target_pid = pid;
break;
case 't':
case 'C':
cpu_id = strtol(arg, NULL, 10);
if(cpu_id < 0){
warn("Invalid CPUID: %s\n", arg);
argp_usage(state);
}
target_cpu_id = cpu_id;
break;
case 't':
time = strtol(arg, NULL, 10);
if(time) alarm(time);
break;
Expand All @@ -72,12 +83,19 @@ static void sig_handler(int sig)

static int handle_event(void *ctx, void *data,unsigned long data_sz)
{
const struct sleep_event *e = data;
double sleeptime = (e->oncpu_time - e->offcpu_time)*1.0/1000000000.0;
const struct cpu_event *e = data;
double time = (e->oncpu_time - e->offcpu_time)*1.0/1000000000.0;

if(e->flag == 1){
time = (e->offcpu_time - e->oncpu_time)*1.0/1000000000.0;
printf("flag:%d pid:%d comm:%-16s oncpu_id :%d oncpu_time :%llu offcpu_id:%d offcpu_time:%llu time:%lf\n",
e->flag,e->pid,e->comm,e->oncpu_id,e->oncpu_time,e->offcpu_id,e->offcpu_time,time);
}else if(e->flag == 0){
time = (e->oncpu_time - e->offcpu_time)*1.0/1000000000.0;
printf("flag:%d pid:%d comm:%-16s offcpu_id:%d offcpu_time:%llu oncpu_id :%d oncpu_time :%llu time:%lf\n",
e->flag,e->pid,e->comm,e->offcpu_id,e->offcpu_time,e->oncpu_id,e->oncpu_time,time);
}

printf("pid:%d comm:%s offcpu_id:%d offcpu_time:%llu oncpu_id:%d oncpu_time:%llu sleeptime:%lf\n",
e->pid,e->comm,e->offcpu_id,e->offcpu_time,e->oncpu_id,e->oncpu_time,sleeptime);

return 0;
}

Expand Down Expand Up @@ -123,6 +141,7 @@ int main(int argc, char **argv)
}

skel->rodata->target_pid = target_pid;
skel->rodata->target_cpu_id = target_cpu_id;

/* 加载并验证BPF程序 */
err = proc_image_bpf__load(skel);
Expand All @@ -140,7 +159,7 @@ int main(int argc, char **argv)

/* 设置环形缓冲区轮询 */
//ring_buffer__new() API,允许在不使用额外选项数据结构下指定回调
rb = ring_buffer__new(bpf_map__fd(skel->maps.sleep_rb), handle_event, NULL, NULL);
rb = ring_buffer__new(bpf_map__fd(skel->maps.cpu_rb), handle_event, NULL, NULL);
if (!rb) {
err = -1;
fprintf(stderr, "Failed to create ring buffer\n");
Expand Down
23 changes: 17 additions & 6 deletions eBPF_Supermarket/eBPF_proc_image/proc_image.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,32 @@

#define TASK_COMM_LEN 16

struct sleep_offcpu{
// 以便于对0号进程进行画像(0号进程是每cpu进程)
struct proc_id{
int pid;
int cpu_id;
};

struct proc_oncpu{
int pad;
int oncpu_id;
long long unsigned int oncpu_time;
};

struct proc_offcpu{
int pad;
int offcpu_id;
long long unsigned int offcpu_time;
};

struct sleep_event{
struct cpu_event{
int flag;
int pid;
char comm[TASK_COMM_LEN];
int offcpu_id;
long long unsigned int offcpu_time;
int oncpu_id;
long long unsigned int oncpu_time;
int offcpu_id;
long long unsigned int offcpu_time;
};



#endif /* __PROC_IMAGE_H */
Loading

0 comments on commit a7a9fbf

Please sign in to comment.