在描述idr之前,需要读者对linux 内核的radix-tree有一定的了解。关于radix-tree(基数树)的描述,作者在之前有专门的文章对其进行描述,这里不再赘述。
A common problem to solve is allocating identifiers (IDs);generally small numbers which identify a thing.Examples include file descriptors(文件描述符), process IDs(进程ID), packet identifiers in networking protocols, SCSI tags and device instance numbers.
The IDR and the IDA provide a reasonable solution to the problem to avoid everybody inventing(发明/创造) their own.
The IDR provides the ability to map an ID to a pointer, while the IDA provides only ID allocation, and as a result is much more memory-efficient(存储效率).
idr的内部实现中很多地方和radix-tree中的api高度耦合。idr或者ida是在[start,end)范围内分配id,根据radix-tree中id分配情况,最终分配出来的id可能>=start.
1、数据结构
struct idr {
/*基数树*/
struct radix_tree_root idr_rt;
unsigned int idr_next;
};
/**
* struct radix_tree_iter - radix tree iterator state
*
* @index: index of current slot
* @next_index: one beyond the last index for this chunk
* @tags: bit-mask for tag-iterating
* @node: node that contains current slot
* @shift: shift for the node that holds our slots
*
* This radix tree iterator works in terms of "chunks" of slots. A chunk is a
* subinterval(小间隔) of slots contained within one radix tree leaf node. It is
* described by a pointer to its first slot and a struct radix_tree_iter
* which holds the chunk's position in the tree and its size. For tagged
* iteration radix_tree_iter also holds the slots' bit-mask for one chosen
* radix tree tag.
* 这个基数树迭代器根据槽的“块”工作。块是包含在一个叶节点中的槽的子区间。
* 它由一个指向它的第一个槽的指针和一个结构体radix_tree_iter描述,该结构体保存了块在树中的位置和大小。
* 对于带标记的迭代,radix_tree_iter还保存了所选基数树标记的槽位掩码。
*/
struct radix_tree_iter {
unsigned long index;
unsigned long next_index;
unsigned long tags;
struct radix_tree_node *node;
#ifdef CONFIG_RADIX_TREE_MULTIORDER
unsigned int shift;
#endif
};
/*
* The IDR API does not expose the tagging functionality of the radix tree
* to users. Use tag 0 to track whether a node has free space below it.
* IDR API不向用户公开基数树的标记功能。使用标记0跟踪节点下面是否有空闲空间。
*/
/*
如果node->tag[IDR_FREE][1] offset bit位置位表示node下存在空闲数据供分配使用
如果node->tag[IDR_FREE][1] offset bit位置0表示node下无空闲数据供分配使用,只有node的下层所有子节点的tag[IDR_FREE][1]为0,
即都无空闲数据分配时才清零父节点的node->tag[IDR_FREE][1] offset bit位
*/
#define IDR_FREE 0
/* Set the IDR flag and the IDR_FREE tag */
/*设置IDR flag and the IDR_FREE tag即radix-tree为idr用途且空闲*/
#define IDR_RT_MARKER ((__force gfp_t)(3 << __GFP_BITS_SHIFT)) //3<<25
2、接口说明
2.1、idr初始化
/*定义和声明idr数据结构*/
#define DEFINE_IDR(name) struct idr name = IDR_INIT
#define IDR_INIT \
{ \
.idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER) \
}
/*初始化已定义的idr数据结构*/
static inline void idr_init(struct idr *idr)
{
/*设置IDR flag and the IDR_FREE tag即radix-tree为idr用途且空闲*/
INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
idr->idr_next = 0;
}
2.2、idr分配id,idr分配满足范围[start,end)的index,同时存储index关联的ptr到radix-tree中
/**
* idr_alloc - allocate an id
* @idr: idr handle
* @ptr: pointer to be associated with the new id
* @start: the minimum id (inclusive 包括)
* @end: the maximum id (exclusive 不含)
* @gfp: memory allocation flags
*
* Allocates an unused ID in the range [start, end). Returns -ENOSPC
* if there are no unused IDs in that range.
*
* Note that @end is treated as max when <= 0. This is to always allow
* using @start + N as @end as long as N is inside integer range.
*
* Simultaneous modifications to the @idr are not allowed and should be
* prevented by the user, usually with a lock. idr_alloc() may be called
* concurrently with read-only accesses to the @idr, such as idr_find() and
* idr_for_each_entry().
*/
static inline int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
{
unsigned long id;
int ret;
if (WARN_ON_ONCE(start < 0))
return -EINVAL;
/*在radix-tree中寻找范围内的数据进行分配*/
ret = idr_alloc_cmn(idr, ptr, &id, start, end, gfp, false);
if (ret)
return ret;
return id; /*返回分配的id*/
}
int idr_alloc_cmn(struct idr *idr, void *ptr, unsigned long *index,unsigned long start, unsigned long end, gfp_t gfp,bool ext)
{
struct radix_tree_iter iter;
void __rcu **slot;
if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
return -EINVAL;
/*初始化迭代器*/
radix_tree_iter_init(&iter, start);
/*[statr,end)区间范围内,在radix-tree中分配一个有效数据*/
if (ext)
slot = idr_get_free_ext(&idr->idr_rt, &iter, gfp, end);
else
slot = idr_get_free(&idr->idr_rt, &iter, gfp, end);
if (IS_ERR(slot))
return PTR_ERR(slot);
/*将ptr放入slots[]中*/
radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr);
/*
iter->node!=NULL情况下,递归清除node->tags[IDR_FREE] offset位,如果node->tags[IDR_FREE][1]为0即全部node->slots[]都处于清除情况下才继续清除父节点tags[IDR_FREE] offset位
iter->node==NULL情况下, 清除root->gfp_mask中IDR_FREE位
也就是对tags[IDR_FREE]的管理,只有节点的所有slots[]中子节点的IDR_FREE清零时才清除node在父节点中tags[IDR_FREE] offset位。
*/
radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE);
if (index)
*index = iter.index; /*分配id*/
return 0;
}
/**
* radix_tree_iter_init - initialize radix tree iterator
*
* @iter: pointer to iterator state
* @start: iteration starting index
* Returns: NULL
*/
static __always_inline void __rcu ** radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
{
/*
* Leave iter->tags uninitialized. radix_tree_next_chunk() will fill it
* in the case of a successful tagged chunk lookup. If the lookup was
* unsuccessful or non-tagged then nobody cares about ->tags.
*
* Set index to zero to bypass next_index overflow protection.
* See the comment in radix_tree_next_chunk() for details.
*/
iter->index = 0;
iter->next_index = start; /*赋值给next_index*/
return NULL;
}
static inline void __rcu **idr_get_free(struct radix_tree_root *root,struct radix_tree_iter *iter,gfp_t gfp,int end)
{
return idr_get_free_cmn(root, iter, gfp, end > 0 ? end - 1 : INT_MAX);
}
static inline void __rcu **idr_get_free_ext(struct radix_tree_root *root,struct radix_tree_iter *iter,gfp_t gfp,unsigned long end)
{
/*和idr_get_free的差异在于对end>0的判断*/
return idr_get_free_cmn(root, iter, gfp, end - 1);
}
void __rcu **idr_get_free_cmn(struct radix_tree_root *root,
struct radix_tree_iter *iter, gfp_t gfp,
unsigned long max)
{
struct radix_tree_node *node = NULL, *child;
void __rcu **slot = (void __rcu **)&root->rnode;
unsigned long maxindex, start = iter->next_index;
unsigned int shift, offset = 0;
grow:
shift = radix_tree_load_root(root, &child, &maxindex);
if (!radix_tree_tagged(root, IDR_FREE)) /*root->gfp_mask IDR_FREE未置位,当前radix-tree中无剩余数据供分配*/
start = max(start, maxindex + 1);
/*注意特殊情况,start为0,且为第一次分配*/
if (start > max)
return ERR_PTR(-ENOSPC);
if (start > maxindex) {
/*start超过当前radix-tree能够存储maxindex则进行扩展操作,使其能够分配更大的数据*/
int error = radix_tree_extend(root, gfp, start, shift);
if (error < 0)
return ERR_PTR(error);
shift = error;
child = rcu_dereference_raw(root->rnode);
}
while (shift) { /*从root一层一层地往下检查确保在规定的范围内存在空闲数据供分配使用*/
shift -= RADIX_TREE_MAP_SHIFT;
if (child == NULL) { /*root->rnode为NULL*/
/* Have to add a child node. */
child = radix_tree_node_alloc(gfp, node, root, shift,offset, 0, 0);
if (!child)
return ERR_PTR(-ENOMEM);
all_tag_set(child, IDR_FREE); /*node->tags[IDR_FREE]全部置位,标识存在空闲数据空间供分配*/
rcu_assign_pointer(*slot, node_to_entry(child));
if (node)
node->count++;
} else if (!radix_tree_is_internal_node(child))
break;
node = entry_to_node(child);
/*计算start在node这一层中的offset以及在slots[offset]*/
offset = radix_tree_descend(node, &child, start);
if (!tag_get(node, IDR_FREE, offset)) { /*node->tags[IDR_FREE] offset bit没有置位,说明node->slots[offset]下无空闲数供分配*/
/*从node->tags[IDR_FREE] offset+1位置开始查找firset set bit,即使用node->slots[]中其他可用的offset来分配数据*/
offset = radix_tree_find_next_bit(node, IDR_FREE,offset + 1);
/*将start在node层级内的值修改为offset*/
start = next_index(start, node, offset);
if (start > max) /*这一步很重要,查看修改后的start数据是否超出寻找范围*/
return ERR_PTR(-ENOSPC);
while (offset == RADIX_TREE_MAP_SIZE) { /*radix_tree_find_next_bit没有找到first set bit,即node自身的slots[]中也无剩余数据空间供分配,此时需要借用node的相邻节点*/
offset = node->offset + 1; /*数据在寻找范围,寻找相邻slots[]中是否有空间*/
node = node->parent;
if (!node)
goto grow;
shift = node->shift;
}
child = rcu_dereference_raw(node->slots[offset]);
}
slot = &node->slots[offset];
}
/*最后,node指向最终分配出来的有效数据在最低层的节点,offset为其在slots[]中偏移,slot为其slots[offset]地址*/
iter->index = start; /*最终分配出来的值*/
if (node)
iter->next_index = 1 + min(max, (start | node_maxindex(node))); /*max和start在node上的最大取值中的 最大值*/
else
iter->next_index = 1;
iter->node = node; /*分配数最低层节点*/
__set_iter_shift(iter, shift);
/*设置iter的tags值*/
set_iter_tags(iter, node, offset, IDR_FREE);
return slot;
}
/**
* radix_tree_iter_tag_clear - clear a tag on the current iterator entry
* @root: radix tree root
* @iter: iterator state
* @tag: tag to clear
*/
void radix_tree_iter_tag_clear(struct radix_tree_root *root,const struct radix_tree_iter *iter, unsigned int tag)
{
/*
iter->node!=NULL情况下,递归清除node->tags[tag] offset位,如果node->tags[tag][1]为0即全部node->slots[]都处于清除情况下才继续清除父节点tags[tag] offset位
iter->node==NULL情况下, 清除root->gfp_mask中tag位
*/
node_tag_clear(root, iter->node, tag, iter_offset(iter));
}
/*
node!=NULL情况下,递归清除node->tags[tag] offset位,如果node->tags[tag][1]为0即全部node->slots[]都处于清除情况下才继续清除父节点tags[tag] offset位
node==NULL情况下, 清除root->gfp_mask中tag位
*/
static void node_tag_clear(struct radix_tree_root *root,
struct radix_tree_node *node,
unsigned int tag, unsigned int offset)
{
while (node) {
if (!tag_get(node, tag, offset)) /*node->tags[tag] offset未置位*/
return;
tag_clear(node, tag, offset);
if (any_tag_set(node, tag))
return;
/*只有子节点的tags[tag][1]为0即全部slots[]都清除情况下才继续清除父节点tags[tag] offset位*/
offset = node->offset;
node = node->parent;
}
/* clear the root's tag bit */
if (root_tag_get(root, tag))
root_tag_clear(root, tag);
}
2.3、idr搜索id,查询id是否在idr中,如果在返回id关联的item
static inline void *idr_find(const struct idr *idr, int id)
{
return idr_find_ext(idr, id);
}
/**
* idr_find - return pointer for given id
* @idr: idr handle
* @id: lookup key
*
* Return the pointer given the id it has been registered with. A %NULL
* return indicates that @id is not valid or you passed %NULL in
* idr_get_new().
*
* This function can be called under rcu_read_lock(), given that the leaf
* pointers lifetimes are correctly managed.
*/
static inline void *idr_find_ext(const struct idr *idr, unsigned long id)
{
return radix_tree_lookup(&idr->idr_rt, id);
}
2.4、idr删除id,从idr中删除id
static inline void *idr_remove(struct idr *idr, int id)
{
return idr_remove_ext(idr, id);
}
static inline void *idr_remove_ext(struct idr *idr, unsigned long id)
{
return radix_tree_delete_item(&idr->idr_rt, id, NULL);
}
2.5、idr修改id关联的item值
/**
* idr_replace - replace pointer for given id
* @idr: idr handle
* @ptr: New pointer to associate with the ID
* @id: Lookup key
*
* Replace the pointer registered with an ID and return the old value.
* This function can be called under the RCU read lock concurrently with
* idr_alloc() and idr_remove() (as long as the ID being removed is not
* the one being replaced!).
*
* Returns: the old value on success. %-ENOENT indicates that @id was not
* found. %-EINVAL indicates that @id or @ptr were not valid.
*/
void *idr_replace(struct idr *idr, void *ptr, int id)
{
if (id < 0)
return ERR_PTR(-EINVAL);
return idr_replace_ext(idr, ptr, id);
}
void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id)
{
struct radix_tree_node *node;
void __rcu **slot = NULL;
void *entry;
if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
return ERR_PTR(-EINVAL);
/*查找id在radix-tree中对应的最低层的node以及slots[]*/
entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
/*slot为NULL或则id对应的所有层级上node->tags[IDR_FREE] offset都置位(如果id存在旧值那么最底层节点的tags[IDR_FREE] offset位应该被清除)返回-ENOENT*/
if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
return ERR_PTR(-ENOENT);
/*替换操作*/
__radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL, NULL);
return entry;
}
2.6、释放idr
/**
* idr_destroy - release all internal memory from an IDR
* @idr: idr handle
*
* After this function is called, the IDR is empty, and may be reused or
* the data structure containing it may be freed.
*
* A typical clean-up sequence for objects stored in an idr tree will use
* idr_for_each() to free all objects, if necessary, then idr_destroy() to
* free the memory used to keep track of those objects.
*/
void idr_destroy(struct idr *idr)
{
struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.rnode);
if (radix_tree_is_internal_node(node))
radix_tree_free_nodes(node); /*简单粗暴直接删除node下的所有节点*/
idr->idr_rt.rnode = NULL;
root_tag_set(&idr->idr_rt, IDR_FREE); /*标记为free*/
}
2.7、遍历idr中的index
/**
* idr_for_each_entry - iterate over an idr's elements of a given type
* @idr: idr handle
* @entry: the type * to use as cursor
* @id: id entry's key
*
* @entry and @id do not need to be initialized before the loop, and
* after normal terminatinon @entry is left with the value NULL. This
* is convenient for a "not found" value.
*/
/*
以id=0为起始条件遍历idr中 index >= id 的有效节点,entry为index对应的item,id保存找到的index,
以++id为条件遍历idr中 index >= id 的有效节点,entry为index对应的item.
下面的宏定义中id总是从0 开始遍历idr
*/
#define idr_for_each_entry(idr, entry, id) \
for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; ++id)
#define idr_for_each_entry_ext(idr, entry, id) \
for (id = 0; ((entry) = idr_get_next_ext(idr, &(id))) != NULL; ++id)
/**
* idr_for_each_entry_continue - continue iteration over an idr's elements of a given type
* @idr: idr handle
* @entry: the type * to use as cursor
* @id: id entry's key
*
* Continue to iterate over list of given type, continuing after
* the current position.
*/
/*允许从用户传入的id开始遍历idr,功能和idr_for_each_entry类似,只是id的初始值不同*/
#define idr_for_each_entry_continue(idr, entry, id) \
for ((entry) = idr_get_next((idr), &(id)); \
entry; \
++id, (entry) = idr_get_next((idr), &(id)))
/**
* idr_get_next - Find next populated entry
* @idr: idr handle
* @nextid: Pointer to lowest possible ID to return
*
* Returns the next populated entry in the tree with an ID greater than
* or equal to the value pointed to by @nextid. On exit, @nextid is updated
* to the ID of the found value. To use in a loop, the value pointed to by
* nextid must be incremented by the user.
* 返回树中下一个填充条目,其ID大于或等于@nextid所指向的值。
* 退出时,@nextid更新为找到的值的ID。要在循环中使用,nextid所指向的值必须由用户递增。
*/
void *idr_get_next(struct idr *idr, int *nextid)
{
struct radix_tree_iter iter;
void __rcu **slot;
/*在root中寻找满足flags条件的有效节点(slots[offset]!=NULL),最终找到的index允许 >= *nextid*/
slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
if (!slot)
return NULL;
*nextid = iter.index;
return rcu_dereference_raw(*slot);
}
/**
* radix_tree_iter_find - find a present entry
* @root: radix tree root
* @iter: iterator state
* @index: start location
*
* This function returns the slot containing the entry with the lowest index
* which is at least @index. If @index is larger than any present entry, this
* function returns NULL. The @iter is updated to describe the entry found.
*/
static inline void __rcu ** radix_tree_iter_find(const struct radix_tree_root *root,struct radix_tree_iter *iter, unsigned long index)
{
radix_tree_iter_init(iter, index);
/*在root中寻找满足flags条件的有效节点(slots[offset]!=NULL),最终找到的index允许 >= index*/
return radix_tree_next_chunk(root, iter, 0);
}
/**
* radix_tree_next_chunk - find next chunk of slots for iteration
*
* @root: radix tree root
* @iter: iterator state
* @flags: RADIX_TREE_ITER_* flags and tag index
* Returns: pointer to chunk first slot, or NULL if there no more left
*
* This function looks up the next chunk in the radix tree starting from @iter->next_index.
* It returns a pointer to the chunk's first slot.
* Also it fills @iter with data about chunk: position in the tree (index),
* its end (next_index), and constructs a bit mask for tagged iterating (tags).
*/
/*在idr中寻找满足flags条件的有效节点(slots[offset]!=NULL),最终找到的index允许 >= iter->next_index*/
void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
struct radix_tree_iter *iter, unsigned flags)
{
unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; /* tag index in lower nybble */
struct radix_tree_node *node, *child;
unsigned long index, offset, maxindex;
if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
return NULL;
/*
* Catch next_index overflow after ~0UL. iter->index never overflows
* during iterating; it can be zero only at the beginning.
* And we cannot overflow iter->next_index in a single step,
* because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
*
* This condition also used by radix_tree_next_slot() to stop
* contiguous iterating, and forbid switching to the next chunk.
*/
index = iter->next_index;
if (!index && iter->index)
return NULL;
restart:
radix_tree_load_root(root, &child, &maxindex);
if (index > maxindex)
return NULL;
if (!child)
return NULL;
if (!radix_tree_is_internal_node(child)) { /*radix-tree 只存在一个item*/
/* Single-slot tree */
iter->index = index;
iter->next_index = maxindex + 1;
iter->tags = 1;
iter->node = NULL;
__set_iter_shift(iter, 0);
return (void __rcu **)&root->rnode;
}
/*从上往下一层一层的寻找满足flags条件的index,最终找到的满足条件的index >= 参数index*/
do {
node = entry_to_node(child);
/*index在当前层级的offset,slots[]*/
offset = radix_tree_descend(node, &child, index);
/*
lookup tagged slots
查询tagged slots,但是node->tags[tag] offset bit未置位情况,借用相邻slots[]
不查询tagged slots,但是child为NULL情况,借用相邻slots[]
*/
if ((flags & RADIX_TREE_ITER_TAGGED) ? !tag_get(node, tag, offset) : !child) {
/* Hole detected */
if (flags & RADIX_TREE_ITER_CONTIG) /* stop at first hole */
return NULL;
if (flags & RADIX_TREE_ITER_TAGGED) /*查询tagged slots,但是node->tags[tag] offset bit未置位情况*/
offset = radix_tree_find_next_bit(node, tag,offset + 1); /*从node->tags[tag] offset+1位置开始查找firset set bit,找slots[]中相邻节点中tagged的节点*/
else
while (++offset < RADIX_TREE_MAP_SIZE) { /*不查询tagged slots,但是child为NULL情况,找slots[]中相邻节点*/
void *slot = rcu_dereference_raw(node->slots[offset]);
if (is_sibling_entry(node, slot))
continue;
if (slot) /*相邻节点不为NULL*/
break;
}
/*上面操作导致index值在当前层级需要修正*/
index &= ~node_maxindex(node);
index += offset << node->shift; /*修改index*/
/* Overflow after ~0UL */
if (!index)
return NULL;
if (offset == RADIX_TREE_MAP_SIZE) /*在slots[]中没能找到满足条件的相邻节点*/
goto restart; /*最终index > maxinde从而退出*/
child = rcu_dereference_raw(node->slots[offset]);
}
if (!child)
goto restart;
if (child == RADIX_TREE_RETRY)
break;
} while (radix_tree_is_internal_node(child));
/* Update the iterator state */
/*找到满足条件的index*/
iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); /*index设置为在当前node内对应的值*/
iter->next_index = (index | node_maxindex(node)) + 1; /*next_index设置为当前node内能够存储的最大值+1*/
iter->node = node; /*满足条件的index在最底层的节点*/
__set_iter_shift(iter, node->shift);
if (flags & RADIX_TREE_ITER_TAGGED) /* lookup tagged slots */
set_iter_tags(iter, node, offset, tag); /*设置iter->tags值*/
return node->slots + offset; /*返回index对应的item*/
}