cpufreq学习笔记

20.11.26 cpufreq driver数据结构

//路径linux5.4/include/linux/cpufreq.h
struct cpufreq_driver {
	char		name[CPUFREQ_NAME_LEN];
	u8		flags;
	void		*driver_data;

	/* needed by all drivers */
	int		(*init)(struct cpufreq_policy *policy);
	int		(*verify)(struct cpufreq_policy *policy);

	/* define one out of two */
	int		(*setpolicy)(struct cpufreq_policy *policy);

	/*
	 * On failure, should always restore frequency to policy->restore_freq
	 * (i.e. old freq).
	 */
	int		(*target)(struct cpufreq_policy *policy,
				  unsigned int target_freq,
				  unsigned int relation);	/* Deprecated */
	int		(*target_index)(struct cpufreq_policy *policy,
					unsigned int index);
	unsigned int	(*fast_switch)(struct cpufreq_policy *policy,
				       unsigned int target_freq);
	unsigned int	(*resolve_freq)(struct cpufreq_policy *policy,
					unsigned int target_freq);

	unsigned int	(*get_intermediate)(struct cpufreq_policy *policy,
					    unsigned int index);
	int		(*target_intermediate)(struct cpufreq_policy *policy,
					       unsigned int index);

	/* should be defined, if possible */
	unsigned int	(*get)(unsigned int cpu);

	/* Called to update policy limits on firmware notifications. */
	void		(*update_limits)(unsigned int cpu);

	/* optional */
	int		(*bios_limit)(int cpu, unsigned int *limit);

	int		(*online)(struct cpufreq_policy *policy);
	int		(*offline)(struct cpufreq_policy *policy);
	int		(*exit)(struct cpufreq_policy *policy);
	void		(*stop_cpu)(struct cpufreq_policy *policy);
	int		(*suspend)(struct cpufreq_policy *policy);
	int		(*resume)(struct cpufreq_policy *policy);

	/* Will be called after the driver is fully initialized */
	void		(*ready)(struct cpufreq_policy *policy);

	struct freq_attr **attr;

	/* platform specific boost support code */
	bool		boost_enabled;
	int		(*set_boost)(int state);
};
  1. init回调函数是cpufreq driver的入口,由cpufreq core在CPU device添加之后调用,其主要功能就是初始化policy数据结构(把它想象成cpufreq device数据结构)。它是一个per-CPU初始化函数指针,每当一个新的CPU被注册进系统的时候,该函数就被调用,该函数接受一个cpufreq_policy的指针参数。
  2. verify回调函数用于对用户的cpufreq策略设置进行有效性验证和数据修正。每当用户设定一个新策略时,该函数根据老的策略和新的策略,检验新策略设置的有效性并对无效设置进行必要的修正。
  3. setpolicy回调函数对于可以自动调频的CPU,driver需要提供该接口,通过该接口,将调频范围告知CPU。
  4. target_index回调函数对于不可以自动调频的CPU,该接口用于指定CPU的运行频率。index表示frequency table中的index。driver需要通过index,将频率值取出,通过clock framework提供的API,将CPU的频率设置为对应的值。同时,driver可以调用OPP interface,获取该频率对应的电压值,通过regulator framework提供的API,将CPU的电压设置为对应的值。
  5. get回调函数用于获取指定cpu的频率值。
  6. exit回调函数和init相对应,在cpu device在被remove时调用。
  7. stop_cpu,在cpuv被stop时调用。
  8. suspend在系统挂起时,在clock和regulator driver被suspend之前,将cpu频率设置为一个确定的值。这一设置也可以通过policy中的suspend_freq设置。
  9. resume同suspend,在系统恢复后,设置cpu的运行频率。同样也可以通过policy中的restore_freq设置。
    (其他的后面慢慢补充)

这些回调函数的入口参数均为cpufreq_policy的结构体指针。cpufreq_policy:
cpufreq_policy数据结构源码
点击链接查看,这个结构体太庞大了。
重点理解cpuinfo、freq_table、governor这几个成员。
cpuinfo是一个cpufreq_cpuinfo类型的数据结构,该数据结构描述了cpu的最大频率、最小频率和频率转换延时(ns),其源码如下代码段所示:

//代码路径 kernel5.4/include/linux/cpufreq.h
struct cpufreq_cpuinfo {
	unsigned int		max_freq;
	unsigned int		min_freq;
	unsigned int		transition_latency;
};

freq_table是一个cpufreq_frequency_table结构体指针,该结构体源码如下代码段所示:

struct cpufreq_frequency_table {
	unsigned int	flags;
	unsigned int	driver_data;
	unsigned int	frequency; 
};

governor是一个cpufreq_governor结构体指针,该结构体源码如下代码段所示:

struct cpufreq_governor {
	char	name[CPUFREQ_NAME_LEN];
	int	(*init)(struct cpufreq_policy *policy);
	void	(*exit)(struct cpufreq_policy *policy);
	int	(*start)(struct cpufreq_policy *policy);
	void	(*stop)(struct cpufreq_policy *policy);
	void	(*limits)(struct cpufreq_policy *policy);
	ssize_t	(*show_setspeed)	(struct cpufreq_policy *policy,
					 char *buf);
	int	(*store_setspeed)	(struct cpufreq_policy *policy,
					 unsigned int freq);//show和store是用来支持用户空间读取或修改频率值
	/* For governors which change frequency dynamically by themselves */
	bool			dynamic_switching;
	struct list_head	governor_list;//用于将该governor挂到一个全局的governor链表(cpufreq_governor_list)上
	struct module		*owner;
};

我们来看一下cpufreq_driver在drivers/cpufreq/cpufreq-dt.c中的应用:

static struct cpufreq_driver dt_cpufreq_driver = {
	.flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK |
		 CPUFREQ_IS_COOLING_DEV,
	.verify = cpufreq_generic_frequency_table_verify,
	.target_index = set_target,
	.get = cpufreq_generic_get,
	.init = cpufreq_init,
	.exit = cpufreq_exit,
	.online = cpufreq_online,
	.offline = cpufreq_offline,
	.name = "cpufreq-dt",
	.attr = cpufreq_dt_attr,
	.suspend = cpufreq_generic_suspend,
};

20.12.01 cpufreq.c源代码学习

1.cpufreq.c中通过core_initcall(cpufreq_core_init)来进行cpufreq core的初始化。
初始化的过程就是添加cpufreq子系统的过程。源代码如下:

//代码路径 kernel5.4/drivers/cpufreq/cpufreq.c
static int __init cpufreq_core_init(void)
{
	if (cpufreq_disabled())
		return -ENODEV;

	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
	BUG_ON(!cpufreq_global_kobject);

	return 0;
}

其中kobject_create_and_add请参考另一篇博文–>由kobject_create_and_add全面了解kobject
在这里它的入口参数是cpufreq字符串:子系统的名称。第二个参数是cpufreq设备所对应的kobj。
2.下面我们来了解一下cpu_subsys数据结构。在cpu.h中可以看到cpu_subsys的变量声明:

extern struct bus_type cpu_subsys;

可以看出它是一个bus_type类型的数据结构。在device.h中可以看到bus_type的定义:

//代码路径 kernel5.4/include/linux/device.h
 struct bus_type {
	const char		*name;//总线名称
	const char		*dev_name;//子系统用它来进行设备枚举
	struct device		*dev_root;//默认设备,作为父设备
	const struct attribute_group **bus_groups;//总线属性组
	const struct attribute_group **dev_groups;//总线上的设备的属性组
	const struct attribute_group **drv_groups;//总线上的驱动的属性组

	int (*match)(struct device *dev, struct device_driver *drv);
	int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
	int (*probe)(struct device *dev);
	int (*remove)(struct device *dev);
	void (*shutdown)(struct device *dev);

	int (*online)(struct device *dev);
	int (*offline)(struct device *dev);

	int (*suspend)(struct device *dev, pm_message_t state);
	int (*resume)(struct device *dev);

	int (*num_vf)(struct device *dev);

	int (*dma_configure)(struct device *dev);

	const struct dev_pm_ops *pm;

	const struct iommu_ops *iommu_ops;

	struct subsys_private *p;
	struct lock_class_key lock_key;

	bool need_parent_lock;
};

注意到dev_root变量是一个指向device类型数据结构的指针。

20.12.01 cpufreq governor的理解

1.cpufreq governor提供调频策略。cpufreq存在两种调频思路:①在[min,max]范围内自动调节;②在[min, max]范围内指定几组OPP,按需选择合适的frequency。
下面是源码文档给出的这两种思路:

Basically, it's the following flow graph:

CPU can be set to switch independently	 |	   CPU can only be set
      within specific "limits"		 	 |     to specific frequencies

                                 "CPUfreq policy"
				consists of frequency limits (policy->{min,max})
  		    			 and CPUfreq governor to be used
			 			 /		      	 |	     \
						/		       	 |	      \
		      		   /		       	 |	the cpufreq governor decides
		      		  /			       	 |	(dynamically or statically)
		     		 /			       	 |	what target_freq to set within
		    		/			      	 |	the limits of policy->{min,max}
		   		   /			         |   		    \
		  		  /				   		 |	 		     \
			Using the ->setpolicy call,	 |	 		Using the ->target/target_index/fast_switch call,
	    		the limits and the		 |	  		the frequency closest
	     		"policy" is set.		 |	  		to target_freq is set.
						  			  	 |			It is assured that it
						  			  	 |			is within policy->{min,max}

2.几种governor介绍
Schedutil
这种governor是为了更好地配合kernel的调度器(scheduler)而生的。通过调度器的PELT(Per-Entity Load Tracking)机制获得负载预估(load estimation),这种机制同时也可以提供最近一次负载信息。这个governor现在只适用于CFS管理的任务(基于DVFS)。这一部分的代码放在kernel/sched/ directory路径下。

Performance
频率越高,性能当然越好。performance就是一种优先考虑性能的governor,它将CPU频率静态地设置为调频范围内的最高频。

Powersave
顾名思义,这种governor是想节省功耗,因此将CPU频率静态地设置为调频范围内的最低频。

Userspace
用户空间可通过scailing_setspeed设置指定的频率 。

Ondemand
顾名思义,按需的。这种governor是根据系统当前负载来调频的。通过调度器的update_util_data->func hook来触发负载预估;当负载预估被触发后,cpufreq检查最后一个时间段的cpu使用率(CPU-usage)来调频,被触发立即反映,比较快速。

Conservertive
类似Ondemand,不过频率调节的会平滑一下,不会忽然调整为最大值,又忽然调整为最小值。

参考资料:kernel cpufreq governor文档