From cb8c52ef8cf2e80fa2a43e729ad26a6e7449d94a Mon Sep 17 00:00:00 2001 From: moonman Date: Sun, 10 Aug 2014 22:23:49 -0600 Subject: [PATCH] core/linux-kirkwood-dt to 3.16-1 --- ...kconfig-build-bits-for-BFQ-v7r5-3.15.patch | 104 - ...duce-the-BFQ-v7r5-I-O-sched-for-3.15.patch | 6635 ----------------- ...eue-Merge-EQM-to-BFQ-v7r5-for-3.15.0.patch | 1188 --- core/linux-kirkwood-dt/PKGBUILD | 21 +- core/linux-kirkwood-dt/config | 146 +- core/linux-kirkwood-dt/goflexhome_dtb.patch | 17 +- .../linux-kirkwood-dt.install | 2 +- core/linux-kirkwood-dt/pogo_e02_dtb.patch | 23 +- 8 files changed, 136 insertions(+), 8000 deletions(-) delete mode 100644 core/linux-kirkwood-dt/0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r5-3.15.patch delete mode 100644 core/linux-kirkwood-dt/0002-block-introduce-the-BFQ-v7r5-I-O-sched-for-3.15.patch delete mode 100644 core/linux-kirkwood-dt/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r5-for-3.15.0.patch diff --git a/core/linux-kirkwood-dt/0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r5-3.15.patch b/core/linux-kirkwood-dt/0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r5-3.15.patch deleted file mode 100644 index bcc900fc1..000000000 --- a/core/linux-kirkwood-dt/0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r5-3.15.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 2220cb873d952611ace9c36105c23b0c8425bd0a Mon Sep 17 00:00:00 2001 -From: Paolo Valente -Date: Thu, 22 May 2014 11:59:35 +0200 -Subject: [PATCH 1/3] block: cgroups, kconfig, build bits for BFQ-v7r5-3.15 - -Update Kconfig.iosched and do the related Makefile changes to include -kernel configuration options for BFQ. Also add the bfqio controller -to the cgroups subsystem. - -Signed-off-by: Paolo Valente -Signed-off-by: Arianna Avanzini ---- - block/Kconfig.iosched | 32 ++++++++++++++++++++++++++++++++ - block/Makefile | 1 + - include/linux/cgroup_subsys.h | 4 ++++ - 3 files changed, 37 insertions(+) - -diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched -index 421bef9..0ee5f0f 100644 ---- a/block/Kconfig.iosched -+++ b/block/Kconfig.iosched -@@ -39,6 +39,27 @@ config CFQ_GROUP_IOSCHED - ---help--- - Enable group IO scheduling in CFQ. - -+config IOSCHED_BFQ -+ tristate "BFQ I/O scheduler" -+ default n -+ ---help--- -+ The BFQ I/O scheduler tries to distribute bandwidth among -+ all processes according to their weights. -+ It aims at distributing the bandwidth as desired, independently of -+ the disk parameters and with any workload. It also tries to -+ guarantee low latency to interactive and soft real-time -+ applications. If compiled built-in (saying Y here), BFQ can -+ be configured to support hierarchical scheduling. -+ -+config CGROUP_BFQIO -+ bool "BFQ hierarchical scheduling support" -+ depends on CGROUPS && IOSCHED_BFQ=y -+ default n -+ ---help--- -+ Enable hierarchical scheduling in BFQ, using the cgroups -+ filesystem interface. The name of the subsystem will be -+ bfqio. -+ - choice - prompt "Default I/O scheduler" - default DEFAULT_CFQ -@@ -52,6 +73,16 @@ choice - config DEFAULT_CFQ - bool "CFQ" if IOSCHED_CFQ=y - -+ config DEFAULT_BFQ -+ bool "BFQ" if IOSCHED_BFQ=y -+ help -+ Selects BFQ as the default I/O scheduler which will be -+ used by default for all block devices. -+ The BFQ I/O scheduler aims at distributing the bandwidth -+ as desired, independently of the disk parameters and with -+ any workload. It also tries to guarantee low latency to -+ interactive and soft real-time applications. -+ - config DEFAULT_NOOP - bool "No-op" - -@@ -61,6 +92,7 @@ config DEFAULT_IOSCHED - string - default "deadline" if DEFAULT_DEADLINE - default "cfq" if DEFAULT_CFQ -+ default "bfq" if DEFAULT_BFQ - default "noop" if DEFAULT_NOOP - - endmenu -diff --git a/block/Makefile b/block/Makefile -index 20645e8..cbd83fb 100644 ---- a/block/Makefile -+++ b/block/Makefile -@@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o - obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o - obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o - obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o -+obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o - - obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o - obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o -diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h -index 768fe44..cdd2528 100644 ---- a/include/linux/cgroup_subsys.h -+++ b/include/linux/cgroup_subsys.h -@@ -39,6 +39,10 @@ SUBSYS(net_cls) - SUBSYS(blkio) - #endif - -+#if IS_ENABLED(CONFIG_CGROUP_BFQIO) -+SUBSYS(bfqio) -+#endif -+ - #if IS_ENABLED(CONFIG_CGROUP_PERF) - SUBSYS(perf_event) - #endif --- -1.9.3 - diff --git a/core/linux-kirkwood-dt/0002-block-introduce-the-BFQ-v7r5-I-O-sched-for-3.15.patch b/core/linux-kirkwood-dt/0002-block-introduce-the-BFQ-v7r5-I-O-sched-for-3.15.patch deleted file mode 100644 index f5dc0dfde..000000000 --- a/core/linux-kirkwood-dt/0002-block-introduce-the-BFQ-v7r5-I-O-sched-for-3.15.patch +++ /dev/null @@ -1,6635 +0,0 @@ -From 2d7017409acf7cba1065409819581239379098be Mon Sep 17 00:00:00 2001 -From: Paolo Valente -Date: Thu, 9 May 2013 19:10:02 +0200 -Subject: [PATCH 2/3] block: introduce the BFQ-v7r5 I/O sched for 3.15 - -Add the BFQ-v7r5 I/O scheduler to 3.15. -The general structure is borrowed from CFQ, as much of the code for -handling I/O contexts. Over time, several useful features have been -ported from CFQ as well (details in the changelog in README.BFQ). A -(bfq_)queue is associated to each task doing I/O on a device, and each -time a scheduling decision has to be made a queue is selected and served -until it expires. - - - Slices are given in the service domain: tasks are assigned - budgets, measured in number of sectors. Once got the disk, a task - must however consume its assigned budget within a configurable - maximum time (by default, the maximum possible value of the - budgets is automatically computed to comply with this timeout). - This allows the desired latency vs "throughput boosting" tradeoff - to be set. - - - Budgets are scheduled according to a variant of WF2Q+, implemented - using an augmented rb-tree to take eligibility into account while - preserving an O(log N) overall complexity. - - - A low-latency tunable is provided; if enabled, both interactive - and soft real-time applications are guaranteed a very low latency. - - - Latency guarantees are preserved also in the presence of NCQ. - - - Also with flash-based devices, a high throughput is achieved - while still preserving latency guarantees. - - - BFQ features Early Queue Merge (EQM), a sort of fusion of the - cooperating-queue-merging and the preemption mechanisms present - in CFQ. EQM is in fact a unified mechanism that tries to get a - sequential read pattern, and hence a high throughput, with any - set of processes performing interleaved I/O over a contiguous - sequence of sectors. - - - BFQ supports full hierarchical scheduling, exporting a cgroups - interface. Since each node has a full scheduler, each group can - be assigned its own weight. - - - If the cgroups interface is not used, only I/O priorities can be - assigned to processes, with ioprio values mapped to weights - with the relation weight = IOPRIO_BE_NR - ioprio. - - - ioprio classes are served in strict priority order, i.e., lower - priority queues are not served as long as there are higher - priority queues. Among queues in the same class the bandwidth is - distributed in proportion to the weight of each queue. A very - thin extra bandwidth is however guaranteed to the Idle class, to - prevent it from starving. - -Signed-off-by: Paolo Valente -Signed-off-by: Arianna Avanzini ---- - block/bfq-cgroup.c | 930 +++++++++++++ - block/bfq-ioc.c | 36 + - block/bfq-iosched.c | 3617 +++++++++++++++++++++++++++++++++++++++++++++++++++ - block/bfq-sched.c | 1207 +++++++++++++++++ - block/bfq.h | 742 +++++++++++ - 5 files changed, 6532 insertions(+) - create mode 100644 block/bfq-cgroup.c - create mode 100644 block/bfq-ioc.c - create mode 100644 block/bfq-iosched.c - create mode 100644 block/bfq-sched.c - create mode 100644 block/bfq.h - -diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c -new file mode 100644 -index 0000000..f742806 ---- /dev/null -+++ b/block/bfq-cgroup.c -@@ -0,0 +1,930 @@ -+/* -+ * BFQ: CGROUPS support. -+ * -+ * Based on ideas and code from CFQ: -+ * Copyright (C) 2003 Jens Axboe -+ * -+ * Copyright (C) 2008 Fabio Checconi -+ * Paolo Valente -+ * -+ * Copyright (C) 2010 Paolo Valente -+ * -+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ -+ * file. -+ */ -+ -+#ifdef CONFIG_CGROUP_BFQIO -+ -+static DEFINE_MUTEX(bfqio_mutex); -+ -+static bool bfqio_is_removed(struct bfqio_cgroup *bgrp) -+{ -+ return bgrp ? !bgrp->online : false; -+} -+ -+static struct bfqio_cgroup bfqio_root_cgroup = { -+ .weight = BFQ_DEFAULT_GRP_WEIGHT, -+ .ioprio = BFQ_DEFAULT_GRP_IOPRIO, -+ .ioprio_class = BFQ_DEFAULT_GRP_CLASS, -+}; -+ -+static inline void bfq_init_entity(struct bfq_entity *entity, -+ struct bfq_group *bfqg) -+{ -+ entity->weight = entity->new_weight; -+ entity->orig_weight = entity->new_weight; -+ entity->ioprio = entity->new_ioprio; -+ entity->ioprio_class = entity->new_ioprio_class; -+ entity->parent = bfqg->my_entity; -+ entity->sched_data = &bfqg->sched_data; -+} -+ -+static struct bfqio_cgroup *css_to_bfqio(struct cgroup_subsys_state *css) -+{ -+ return css ? container_of(css, struct bfqio_cgroup, css) : NULL; -+} -+ -+/* -+ * Search the bfq_group for bfqd into the hash table (by now only a list) -+ * of bgrp. Must be called under rcu_read_lock(). -+ */ -+static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp, -+ struct bfq_data *bfqd) -+{ -+ struct bfq_group *bfqg; -+ void *key; -+ -+ hlist_for_each_entry_rcu(bfqg, &bgrp->group_data, group_node) { -+ key = rcu_dereference(bfqg->bfqd); -+ if (key == bfqd) -+ return bfqg; -+ } -+ -+ return NULL; -+} -+ -+static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp, -+ struct bfq_group *bfqg) -+{ -+ struct bfq_entity *entity = &bfqg->entity; -+ -+ /* -+ * If the weight of the entity has never been set via the sysfs -+ * interface, then bgrp->weight == 0. In this case we initialize -+ * the weight from the current ioprio value. Otherwise, the group -+ * weight, if set, has priority over the ioprio value. -+ */ -+ if (bgrp->weight == 0) { -+ entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio); -+ entity->new_ioprio = bgrp->ioprio; -+ } else { -+ entity->new_weight = bgrp->weight; -+ entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight); -+ } -+ entity->orig_weight = entity->weight = entity->new_weight; -+ entity->ioprio = entity->new_ioprio; -+ entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class; -+ entity->my_sched_data = &bfqg->sched_data; -+ bfqg->active_entities = 0; -+} -+ -+static inline void bfq_group_set_parent(struct bfq_group *bfqg, -+ struct bfq_group *parent) -+{ -+ struct bfq_entity *entity; -+ -+ BUG_ON(parent == NULL); -+ BUG_ON(bfqg == NULL); -+ -+ entity = &bfqg->entity; -+ entity->parent = parent->my_entity; -+ entity->sched_data = &parent->sched_data; -+} -+ -+/** -+ * bfq_group_chain_alloc - allocate a chain of groups. -+ * @bfqd: queue descriptor. -+ * @css: the leaf cgroup_subsys_state this chain starts from. -+ * -+ * Allocate a chain of groups starting from the one belonging to -+ * @cgroup up to the root cgroup. Stop if a cgroup on the chain -+ * to the root has already an allocated group on @bfqd. -+ */ -+static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd, -+ struct cgroup_subsys_state *css) -+{ -+ struct bfqio_cgroup *bgrp; -+ struct bfq_group *bfqg, *prev = NULL, *leaf = NULL; -+ -+ for (; css != NULL; css = css->parent) { -+ bgrp = css_to_bfqio(css); -+ -+ bfqg = bfqio_lookup_group(bgrp, bfqd); -+ if (bfqg != NULL) { -+ /* -+ * All the cgroups in the path from there to the -+ * root must have a bfq_group for bfqd, so we don't -+ * need any more allocations. -+ */ -+ break; -+ } -+ -+ bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC); -+ if (bfqg == NULL) -+ goto cleanup; -+ -+ bfq_group_init_entity(bgrp, bfqg); -+ bfqg->my_entity = &bfqg->entity; -+ -+ if (leaf == NULL) { -+ leaf = bfqg; -+ prev = leaf; -+ } else { -+ bfq_group_set_parent(prev, bfqg); -+ /* -+ * Build a list of allocated nodes using the bfqd -+ * filed, that is still unused and will be -+ * initialized only after the node will be -+ * connected. -+ */ -+ prev->bfqd = bfqg; -+ prev = bfqg; -+ } -+ } -+ -+ return leaf; -+ -+cleanup: -+ while (leaf != NULL) { -+ prev = leaf; -+ leaf = leaf->bfqd; -+ kfree(prev); -+ } -+ -+ return NULL; -+} -+ -+/** -+ * bfq_group_chain_link - link an allocated group chain to a cgroup -+ * hierarchy. -+ * @bfqd: the queue descriptor. -+ * @css: the leaf cgroup_subsys_state to start from. -+ * @leaf: the leaf group (to be associated to @cgroup). -+ * -+ * Try to link a chain of groups to a cgroup hierarchy, connecting the -+ * nodes bottom-up, so we can be sure that when we find a cgroup in the -+ * hierarchy that already as a group associated to @bfqd all the nodes -+ * in the path to the root cgroup have one too. -+ * -+ * On locking: the queue lock protects the hierarchy (there is a hierarchy -+ * per device) while the bfqio_cgroup lock protects the list of groups -+ * belonging to the same cgroup. -+ */ -+static void bfq_group_chain_link(struct bfq_data *bfqd, -+ struct cgroup_subsys_state *css, -+ struct bfq_group *leaf) -+{ -+ struct bfqio_cgroup *bgrp; -+ struct bfq_group *bfqg, *next, *prev = NULL; -+ unsigned long flags; -+ -+ assert_spin_locked(bfqd->queue->queue_lock); -+ -+ for (; css != NULL && leaf != NULL; css = css->parent) { -+ bgrp = css_to_bfqio(css); -+ next = leaf->bfqd; -+ -+ bfqg = bfqio_lookup_group(bgrp, bfqd); -+ BUG_ON(bfqg != NULL); -+ -+ spin_lock_irqsave(&bgrp->lock, flags); -+ -+ rcu_assign_pointer(leaf->bfqd, bfqd); -+ hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data); -+ hlist_add_head(&leaf->bfqd_node, &bfqd->group_list); -+ -+ spin_unlock_irqrestore(&bgrp->lock, flags); -+ -+ prev = leaf; -+ leaf = next; -+ } -+ -+ BUG_ON(css == NULL && leaf != NULL); -+ if (css != NULL && prev != NULL) { -+ bgrp = css_to_bfqio(css); -+ bfqg = bfqio_lookup_group(bgrp, bfqd); -+ bfq_group_set_parent(prev, bfqg); -+ } -+} -+ -+/** -+ * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup. -+ * @bfqd: queue descriptor. -+ * @cgroup: cgroup being searched for. -+ * -+ * Return a group associated to @bfqd in @cgroup, allocating one if -+ * necessary. When a group is returned all the cgroups in the path -+ * to the root have a group associated to @bfqd. -+ * -+ * If the allocation fails, return the root group: this breaks guarantees -+ * but is a safe fallback. If this loss becomes a problem it can be -+ * mitigated using the equivalent weight (given by the product of the -+ * weights of the groups in the path from @group to the root) in the -+ * root scheduler. -+ * -+ * We allocate all the missing nodes in the path from the leaf cgroup -+ * to the root and we connect the nodes only after all the allocations -+ * have been successful. -+ */ -+static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, -+ struct cgroup_subsys_state *css) -+{ -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css); -+ struct bfq_group *bfqg; -+ -+ bfqg = bfqio_lookup_group(bgrp, bfqd); -+ if (bfqg != NULL) -+ return bfqg; -+ -+ bfqg = bfq_group_chain_alloc(bfqd, css); -+ if (bfqg != NULL) -+ bfq_group_chain_link(bfqd, css, bfqg); -+ else -+ bfqg = bfqd->root_group; -+ -+ return bfqg; -+} -+ -+/** -+ * bfq_bfqq_move - migrate @bfqq to @bfqg. -+ * @bfqd: queue descriptor. -+ * @bfqq: the queue to move. -+ * @entity: @bfqq's entity. -+ * @bfqg: the group to move to. -+ * -+ * Move @bfqq to @bfqg, deactivating it from its old group and reactivating -+ * it on the new one. Avoid putting the entity on the old group idle tree. -+ * -+ * Must be called under the queue lock; the cgroup owning @bfqg must -+ * not disappear (by now this just means that we are called under -+ * rcu_read_lock()). -+ */ -+static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ struct bfq_entity *entity, struct bfq_group *bfqg) -+{ -+ int busy, resume; -+ -+ busy = bfq_bfqq_busy(bfqq); -+ resume = !RB_EMPTY_ROOT(&bfqq->sort_list); -+ -+ BUG_ON(resume && !entity->on_st); -+ BUG_ON(busy && !resume && entity->on_st && -+ bfqq != bfqd->in_service_queue); -+ -+ if (busy) { -+ BUG_ON(atomic_read(&bfqq->ref) < 2); -+ -+ if (!resume) -+ bfq_del_bfqq_busy(bfqd, bfqq, 0); -+ else -+ bfq_deactivate_bfqq(bfqd, bfqq, 0); -+ } else if (entity->on_st) -+ bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); -+ -+ /* -+ * Here we use a reference to bfqg. We don't need a refcounter -+ * as the cgroup reference will not be dropped, so that its -+ * destroy() callback will not be invoked. -+ */ -+ entity->parent = bfqg->my_entity; -+ entity->sched_data = &bfqg->sched_data; -+ -+ if (busy && resume) -+ bfq_activate_bfqq(bfqd, bfqq); -+ -+ if (bfqd->in_service_queue == NULL && !bfqd->rq_in_driver) -+ bfq_schedule_dispatch(bfqd); -+} -+ -+/** -+ * __bfq_bic_change_cgroup - move @bic to @cgroup. -+ * @bfqd: the queue descriptor. -+ * @bic: the bic to move. -+ * @cgroup: the cgroup to move to. -+ * -+ * Move bic to cgroup, assuming that bfqd->queue is locked; the caller -+ * has to make sure that the reference to cgroup is valid across the call. -+ * -+ * NOTE: an alternative approach might have been to store the current -+ * cgroup in bfqq and getting a reference to it, reducing the lookup -+ * time here, at the price of slightly more complex code. -+ */ -+static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, -+ struct bfq_io_cq *bic, -+ struct cgroup_subsys_state *css) -+{ -+ struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0); -+ struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1); -+ struct bfq_entity *entity; -+ struct bfq_group *bfqg; -+ struct bfqio_cgroup *bgrp; -+ -+ bgrp = css_to_bfqio(css); -+ -+ bfqg = bfq_find_alloc_group(bfqd, css); -+ if (async_bfqq != NULL) { -+ entity = &async_bfqq->entity; -+ -+ if (entity->sched_data != &bfqg->sched_data) { -+ bic_set_bfqq(bic, NULL, 0); -+ bfq_log_bfqq(bfqd, async_bfqq, -+ "bic_change_group: %p %d", -+ async_bfqq, atomic_read(&async_bfqq->ref)); -+ bfq_put_queue(async_bfqq); -+ } -+ } -+ -+ if (sync_bfqq != NULL) { -+ entity = &sync_bfqq->entity; -+ if (entity->sched_data != &bfqg->sched_data) -+ bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg); -+ } -+ -+ return bfqg; -+} -+ -+/** -+ * bfq_bic_change_cgroup - move @bic to @cgroup. -+ * @bic: the bic being migrated. -+ * @cgroup: the destination cgroup. -+ * -+ * When the task owning @bic is moved to @cgroup, @bic is immediately -+ * moved into its new parent group. -+ */ -+static void bfq_bic_change_cgroup(struct bfq_io_cq *bic, -+ struct cgroup_subsys_state *css) -+{ -+ struct bfq_data *bfqd; -+ unsigned long uninitialized_var(flags); -+ -+ bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), -+ &flags); -+ if (bfqd != NULL) { -+ __bfq_bic_change_cgroup(bfqd, bic, css); -+ bfq_put_bfqd_unlock(bfqd, &flags); -+ } -+} -+ -+/** -+ * bfq_bic_update_cgroup - update the cgroup of @bic. -+ * @bic: the @bic to update. -+ * -+ * Make sure that @bic is enqueued in the cgroup of the current task. -+ * We need this in addition to moving bics during the cgroup attach -+ * phase because the task owning @bic could be at its first disk -+ * access or we may end up in the root cgroup as the result of a -+ * memory allocation failure and here we try to move to the right -+ * group. -+ * -+ * Must be called under the queue lock. It is safe to use the returned -+ * value even after the rcu_read_unlock() as the migration/destruction -+ * paths act under the queue lock too. IOW it is impossible to race with -+ * group migration/destruction and end up with an invalid group as: -+ * a) here cgroup has not yet been destroyed, nor its destroy callback -+ * has started execution, as current holds a reference to it, -+ * b) if it is destroyed after rcu_read_unlock() [after current is -+ * migrated to a different cgroup] its attach() callback will have -+ * taken care of remove all the references to the old cgroup data. -+ */ -+static struct bfq_group *bfq_bic_update_cgroup(struct bfq_io_cq *bic) -+{ -+ struct bfq_data *bfqd = bic_to_bfqd(bic); -+ struct bfq_group *bfqg; -+ struct cgroup_subsys_state *css; -+ -+ BUG_ON(bfqd == NULL); -+ -+ rcu_read_lock(); -+ css = task_css(current, bfqio_cgrp_id); -+ bfqg = __bfq_bic_change_cgroup(bfqd, bic, css); -+ rcu_read_unlock(); -+ -+ return bfqg; -+} -+ -+/** -+ * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. -+ * @st: the service tree being flushed. -+ */ -+static inline void bfq_flush_idle_tree(struct bfq_service_tree *st) -+{ -+ struct bfq_entity *entity = st->first_idle; -+ -+ for (; entity != NULL; entity = st->first_idle) -+ __bfq_deactivate_entity(entity, 0); -+} -+ -+/** -+ * bfq_reparent_leaf_entity - move leaf entity to the root_group. -+ * @bfqd: the device data structure with the root group. -+ * @entity: the entity to move. -+ */ -+static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ -+ BUG_ON(bfqq == NULL); -+ bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group); -+ return; -+} -+ -+/** -+ * bfq_reparent_active_entities - move to the root group all active -+ * entities. -+ * @bfqd: the device data structure with the root group. -+ * @bfqg: the group to move from. -+ * @st: the service tree with the entities. -+ * -+ * Needs queue_lock to be taken and reference to be valid over the call. -+ */ -+static inline void bfq_reparent_active_entities(struct bfq_data *bfqd, -+ struct bfq_group *bfqg, -+ struct bfq_service_tree *st) -+{ -+ struct rb_root *active = &st->active; -+ struct bfq_entity *entity = NULL; -+ -+ if (!RB_EMPTY_ROOT(&st->active)) -+ entity = bfq_entity_of(rb_first(active)); -+ -+ for (; entity != NULL; entity = bfq_entity_of(rb_first(active))) -+ bfq_reparent_leaf_entity(bfqd, entity); -+ -+ if (bfqg->sched_data.in_service_entity != NULL) -+ bfq_reparent_leaf_entity(bfqd, -+ bfqg->sched_data.in_service_entity); -+ -+ return; -+} -+ -+/** -+ * bfq_destroy_group - destroy @bfqg. -+ * @bgrp: the bfqio_cgroup containing @bfqg. -+ * @bfqg: the group being destroyed. -+ * -+ * Destroy @bfqg, making sure that it is not referenced from its parent. -+ */ -+static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg) -+{ -+ struct bfq_data *bfqd; -+ struct bfq_service_tree *st; -+ struct bfq_entity *entity = bfqg->my_entity; -+ unsigned long uninitialized_var(flags); -+ int i; -+ -+ hlist_del(&bfqg->group_node); -+ -+ /* -+ * Empty all service_trees belonging to this group before -+ * deactivating the group itself. -+ */ -+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { -+ st = bfqg->sched_data.service_tree + i; -+ -+ /* -+ * The idle tree may still contain bfq_queues belonging -+ * to exited task because they never migrated to a different -+ * cgroup from the one being destroyed now. No one else -+ * can access them so it's safe to act without any lock. -+ */ -+ bfq_flush_idle_tree(st); -+ -+ /* -+ * It may happen that some queues are still active -+ * (busy) upon group destruction (if the corresponding -+ * processes have been forced to terminate). We move -+ * all the leaf entities corresponding to these queues -+ * to the root_group. -+ * Also, it may happen that the group has an entity -+ * in service, which is disconnected from the active -+ * tree: it must be moved, too. -+ * There is no need to put the sync queues, as the -+ * scheduler has taken no reference. -+ */ -+ bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); -+ if (bfqd != NULL) { -+ bfq_reparent_active_entities(bfqd, bfqg, st); -+ bfq_put_bfqd_unlock(bfqd, &flags); -+ } -+ BUG_ON(!RB_EMPTY_ROOT(&st->active)); -+ BUG_ON(!RB_EMPTY_ROOT(&st->idle)); -+ } -+ BUG_ON(bfqg->sched_data.next_in_service != NULL); -+ BUG_ON(bfqg->sched_data.in_service_entity != NULL); -+ -+ /* -+ * We may race with device destruction, take extra care when -+ * dereferencing bfqg->bfqd. -+ */ -+ bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); -+ if (bfqd != NULL) { -+ hlist_del(&bfqg->bfqd_node); -+ __bfq_deactivate_entity(entity, 0); -+ bfq_put_async_queues(bfqd, bfqg); -+ bfq_put_bfqd_unlock(bfqd, &flags); -+ } -+ BUG_ON(entity->tree != NULL); -+ -+ /* -+ * No need to defer the kfree() to the end of the RCU grace -+ * period: we are called from the destroy() callback of our -+ * cgroup, so we can be sure that no one is a) still using -+ * this cgroup or b) doing lookups in it. -+ */ -+ kfree(bfqg); -+} -+ -+static void bfq_end_wr_async(struct bfq_data *bfqd) -+{ -+ struct hlist_node *tmp; -+ struct bfq_group *bfqg; -+ -+ hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node) -+ bfq_end_wr_async_queues(bfqd, bfqg); -+ bfq_end_wr_async_queues(bfqd, bfqd->root_group); -+} -+ -+/** -+ * bfq_disconnect_groups - disconnect @bfqd from all its groups. -+ * @bfqd: the device descriptor being exited. -+ * -+ * When the device exits we just make sure that no lookup can return -+ * the now unused group structures. They will be deallocated on cgroup -+ * destruction. -+ */ -+static void bfq_disconnect_groups(struct bfq_data *bfqd) -+{ -+ struct hlist_node *tmp; -+ struct bfq_group *bfqg; -+ -+ bfq_log(bfqd, "disconnect_groups beginning"); -+ hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node) { -+ hlist_del(&bfqg->bfqd_node); -+ -+ __bfq_deactivate_entity(bfqg->my_entity, 0); -+ -+ /* -+ * Don't remove from the group hash, just set an -+ * invalid key. No lookups can race with the -+ * assignment as bfqd is being destroyed; this -+ * implies also that new elements cannot be added -+ * to the list. -+ */ -+ rcu_assign_pointer(bfqg->bfqd, NULL); -+ -+ bfq_log(bfqd, "disconnect_groups: put async for group %p", -+ bfqg); -+ bfq_put_async_queues(bfqd, bfqg); -+ } -+} -+ -+static inline void bfq_free_root_group(struct bfq_data *bfqd) -+{ -+ struct bfqio_cgroup *bgrp = &bfqio_root_cgroup; -+ struct bfq_group *bfqg = bfqd->root_group; -+ -+ bfq_put_async_queues(bfqd, bfqg); -+ -+ spin_lock_irq(&bgrp->lock); -+ hlist_del_rcu(&bfqg->group_node); -+ spin_unlock_irq(&bgrp->lock); -+ -+ /* -+ * No need to synchronize_rcu() here: since the device is gone -+ * there cannot be any read-side access to its root_group. -+ */ -+ kfree(bfqg); -+} -+ -+static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) -+{ -+ struct bfq_group *bfqg; -+ struct bfqio_cgroup *bgrp; -+ int i; -+ -+ bfqg = kzalloc_node(sizeof(*bfqg), GFP_KERNEL, node); -+ if (bfqg == NULL) -+ return NULL; -+ -+ bfqg->entity.parent = NULL; -+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) -+ bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; -+ -+ bgrp = &bfqio_root_cgroup; -+ spin_lock_irq(&bgrp->lock); -+ rcu_assign_pointer(bfqg->bfqd, bfqd); -+ hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data); -+ spin_unlock_irq(&bgrp->lock); -+ -+ return bfqg; -+} -+ -+#define SHOW_FUNCTION(__VAR) \ -+static u64 bfqio_cgroup_##__VAR##_read(struct cgroup_subsys_state *css, \ -+ struct cftype *cftype) \ -+{ \ -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css); \ -+ u64 ret = -ENODEV; \ -+ \ -+ mutex_lock(&bfqio_mutex); \ -+ if (bfqio_is_removed(bgrp)) \ -+ goto out_unlock; \ -+ \ -+ spin_lock_irq(&bgrp->lock); \ -+ ret = bgrp->__VAR; \ -+ spin_unlock_irq(&bgrp->lock); \ -+ \ -+out_unlock: \ -+ mutex_unlock(&bfqio_mutex); \ -+ return ret; \ -+} -+ -+SHOW_FUNCTION(weight); -+SHOW_FUNCTION(ioprio); -+SHOW_FUNCTION(ioprio_class); -+#undef SHOW_FUNCTION -+ -+#define STORE_FUNCTION(__VAR, __MIN, __MAX) \ -+static int bfqio_cgroup_##__VAR##_write(struct cgroup_subsys_state *css,\ -+ struct cftype *cftype, \ -+ u64 val) \ -+{ \ -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css); \ -+ struct bfq_group *bfqg; \ -+ int ret = -EINVAL; \ -+ \ -+ if (val < (__MIN) || val > (__MAX)) \ -+ return ret; \ -+ \ -+ ret = -ENODEV; \ -+ mutex_lock(&bfqio_mutex); \ -+ if (bfqio_is_removed(bgrp)) \ -+ goto out_unlock; \ -+ ret = 0; \ -+ \ -+ spin_lock_irq(&bgrp->lock); \ -+ bgrp->__VAR = (unsigned short)val; \ -+ hlist_for_each_entry(bfqg, &bgrp->group_data, group_node) { \ -+ /* \ -+ * Setting the ioprio_changed flag of the entity \ -+ * to 1 with new_##__VAR == ##__VAR would re-set \ -+ * the value of the weight to its ioprio mapping. \ -+ * Set the flag only if necessary. \ -+ */ \ -+ if ((unsigned short)val != bfqg->entity.new_##__VAR) { \ -+ bfqg->entity.new_##__VAR = (unsigned short)val; \ -+ /* \ -+ * Make sure that the above new value has been \ -+ * stored in bfqg->entity.new_##__VAR before \ -+ * setting the ioprio_changed flag. In fact, \ -+ * this flag may be read asynchronously (in \ -+ * critical sections protected by a different \ -+ * lock than that held here), and finding this \ -+ * flag set may cause the execution of the code \ -+ * for updating parameters whose value may \ -+ * depend also on bfqg->entity.new_##__VAR (in \ -+ * __bfq_entity_update_weight_prio). \ -+ * This barrier makes sure that the new value \ -+ * of bfqg->entity.new_##__VAR is correctly \ -+ * seen in that code. \ -+ */ \ -+ smp_wmb(); \ -+ bfqg->entity.ioprio_changed = 1; \ -+ } \ -+ } \ -+ spin_unlock_irq(&bgrp->lock); \ -+ \ -+out_unlock: \ -+ mutex_unlock(&bfqio_mutex); \ -+ return ret; \ -+} -+ -+STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT); -+STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1); -+STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE); -+#undef STORE_FUNCTION -+ -+static struct cftype bfqio_files[] = { -+ { -+ .name = "weight", -+ .read_u64 = bfqio_cgroup_weight_read, -+ .write_u64 = bfqio_cgroup_weight_write, -+ }, -+ { -+ .name = "ioprio", -+ .read_u64 = bfqio_cgroup_ioprio_read, -+ .write_u64 = bfqio_cgroup_ioprio_write, -+ }, -+ { -+ .name = "ioprio_class", -+ .read_u64 = bfqio_cgroup_ioprio_class_read, -+ .write_u64 = bfqio_cgroup_ioprio_class_write, -+ }, -+ { }, /* terminate */ -+}; -+ -+static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys_state -+ *parent_css) -+{ -+ struct bfqio_cgroup *bgrp; -+ -+ if (parent_css != NULL) { -+ bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL); -+ if (bgrp == NULL) -+ return ERR_PTR(-ENOMEM); -+ } else -+ bgrp = &bfqio_root_cgroup; -+ -+ spin_lock_init(&bgrp->lock); -+ INIT_HLIST_HEAD(&bgrp->group_data); -+ bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO; -+ bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS; -+ -+ return &bgrp->css; -+} -+ -+/* -+ * We cannot support shared io contexts, as we have no means to support -+ * two tasks with the same ioc in two different groups without major rework -+ * of the main bic/bfqq data structures. By now we allow a task to change -+ * its cgroup only if it's the only owner of its ioc; the drawback of this -+ * behavior is that a group containing a task that forked using CLONE_IO -+ * will not be destroyed until the tasks sharing the ioc die. -+ */ -+static int bfqio_can_attach(struct cgroup_subsys_state *css, -+ struct cgroup_taskset *tset) -+{ -+ struct task_struct *task; -+ struct io_context *ioc; -+ int ret = 0; -+ -+ cgroup_taskset_for_each(task, tset) { -+ /* -+ * task_lock() is needed to avoid races with -+ * exit_io_context() -+ */ -+ task_lock(task); -+ ioc = task->io_context; -+ if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1) -+ /* -+ * ioc == NULL means that the task is either too -+ * young or exiting: if it has still no ioc the -+ * ioc can't be shared, if the task is exiting the -+ * attach will fail anyway, no matter what we -+ * return here. -+ */ -+ ret = -EINVAL; -+ task_unlock(task); -+ if (ret) -+ break; -+ } -+ -+ return ret; -+} -+ -+static void bfqio_attach(struct cgroup_subsys_state *css, -+ struct cgroup_taskset *tset) -+{ -+ struct task_struct *task; -+ struct io_context *ioc; -+ struct io_cq *icq; -+ -+ /* -+ * IMPORTANT NOTE: The move of more than one process at a time to a -+ * new group has not yet been tested. -+ */ -+ cgroup_taskset_for_each(task, tset) { -+ ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); -+ if (ioc) { -+ /* -+ * Handle cgroup change here. -+ */ -+ rcu_read_lock(); -+ hlist_for_each_entry_rcu(icq, &ioc->icq_list, ioc_node) -+ if (!strncmp( -+ icq->q->elevator->type->elevator_name, -+ "bfq", ELV_NAME_MAX)) -+ bfq_bic_change_cgroup(icq_to_bic(icq), -+ css); -+ rcu_read_unlock(); -+ put_io_context(ioc); -+ } -+ } -+} -+ -+static void bfqio_destroy(struct cgroup_subsys_state *css) -+{ -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css); -+ struct hlist_node *tmp; -+ struct bfq_group *bfqg; -+ -+ /* -+ * Since we are destroying the cgroup, there are no more tasks -+ * referencing it, and all the RCU grace periods that may have -+ * referenced it are ended (as the destruction of the parent -+ * cgroup is RCU-safe); bgrp->group_data will not be accessed by -+ * anything else and we don't need any synchronization. -+ */ -+ hlist_for_each_entry_safe(bfqg, tmp, &bgrp->group_data, group_node) -+ bfq_destroy_group(bgrp, bfqg); -+ -+ BUG_ON(!hlist_empty(&bgrp->group_data)); -+ -+ kfree(bgrp); -+} -+ -+static int bfqio_css_online(struct cgroup_subsys_state *css) -+{ -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css); -+ -+ mutex_lock(&bfqio_mutex); -+ bgrp->online = true; -+ mutex_unlock(&bfqio_mutex); -+ -+ return 0; -+} -+ -+static void bfqio_css_offline(struct cgroup_subsys_state *css) -+{ -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css); -+ -+ mutex_lock(&bfqio_mutex); -+ bgrp->online = false; -+ mutex_unlock(&bfqio_mutex); -+} -+ -+struct cgroup_subsys bfqio_cgrp_subsys = { -+ .css_alloc = bfqio_create, -+ .css_online = bfqio_css_online, -+ .css_offline = bfqio_css_offline, -+ .can_attach = bfqio_can_attach, -+ .attach = bfqio_attach, -+ .css_free = bfqio_destroy, -+ .base_cftypes = bfqio_files, -+}; -+#else -+static inline void bfq_init_entity(struct bfq_entity *entity, -+ struct bfq_group *bfqg) -+{ -+ entity->weight = entity->new_weight; -+ entity->orig_weight = entity->new_weight; -+ entity->ioprio = entity->new_ioprio; -+ entity->ioprio_class = entity->new_ioprio_class; -+ entity->sched_data = &bfqg->sched_data; -+} -+ -+static inline struct bfq_group * -+bfq_bic_update_cgroup(struct bfq_io_cq *bic) -+{ -+ struct bfq_data *bfqd = bic_to_bfqd(bic); -+ return bfqd->root_group; -+} -+ -+static inline void bfq_bfqq_move(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ struct bfq_entity *entity, -+ struct bfq_group *bfqg) -+{ -+} -+ -+static void bfq_end_wr_async(struct bfq_data *bfqd) -+{ -+ bfq_end_wr_async_queues(bfqd, bfqd->root_group); -+} -+ -+static inline void bfq_disconnect_groups(struct bfq_data *bfqd) -+{ -+ bfq_put_async_queues(bfqd, bfqd->root_group); -+} -+ -+static inline void bfq_free_root_group(struct bfq_data *bfqd) -+{ -+ kfree(bfqd->root_group); -+} -+ -+static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) -+{ -+ struct bfq_group *bfqg; -+ int i; -+ -+ bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); -+ if (bfqg == NULL) -+ return NULL; -+ -+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) -+ bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; -+ -+ return bfqg; -+} -+#endif -diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c -new file mode 100644 -index 0000000..7f6b000 ---- /dev/null -+++ b/block/bfq-ioc.c -@@ -0,0 +1,36 @@ -+/* -+ * BFQ: I/O context handling. -+ * -+ * Based on ideas and code from CFQ: -+ * Copyright (C) 2003 Jens Axboe -+ * -+ * Copyright (C) 2008 Fabio Checconi -+ * Paolo Valente -+ * -+ * Copyright (C) 2010 Paolo Valente -+ */ -+ -+/** -+ * icq_to_bic - convert iocontext queue structure to bfq_io_cq. -+ * @icq: the iocontext queue. -+ */ -+static inline struct bfq_io_cq *icq_to_bic(struct io_cq *icq) -+{ -+ /* bic->icq is the first member, %NULL will convert to %NULL */ -+ return container_of(icq, struct bfq_io_cq, icq); -+} -+ -+/** -+ * bfq_bic_lookup - search into @ioc a bic associated to @bfqd. -+ * @bfqd: the lookup key. -+ * @ioc: the io_context of the process doing I/O. -+ * -+ * Queue lock must be held. -+ */ -+static inline struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd, -+ struct io_context *ioc) -+{ -+ if (ioc) -+ return icq_to_bic(ioc_lookup_icq(ioc, bfqd->queue)); -+ return NULL; -+} -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -new file mode 100644 -index 0000000..6cf7bca ---- /dev/null -+++ b/block/bfq-iosched.c -@@ -0,0 +1,3617 @@ -+/* -+ * Budget Fair Queueing (BFQ) disk scheduler. -+ * -+ * Based on ideas and code from CFQ: -+ * Copyright (C) 2003 Jens Axboe -+ * -+ * Copyright (C) 2008 Fabio Checconi -+ * Paolo Valente -+ * -+ * Copyright (C) 2010 Paolo Valente -+ * -+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ -+ * file. -+ * -+ * BFQ is a proportional-share storage-I/O scheduling algorithm based on -+ * the slice-by-slice service scheme of CFQ. But BFQ assigns budgets, -+ * measured in number of sectors, to processes instead of time slices. The -+ * device is not granted to the in-service process for a given time slice, -+ * but until it has exhausted its assigned budget. This change from the time -+ * to the service domain allows BFQ to distribute the device throughput -+ * among processes as desired, without any distortion due to ZBR, workload -+ * fluctuations or other factors. BFQ uses an ad hoc internal scheduler, -+ * called B-WF2Q+, to schedule processes according to their budgets. More -+ * precisely, BFQ schedules queues associated to processes. Thanks to the -+ * accurate policy of B-WF2Q+, BFQ can afford to assign high budgets to -+ * I/O-bound processes issuing sequential requests (to boost the -+ * throughput), and yet guarantee a low latency to interactive and soft -+ * real-time applications. -+ * -+ * BFQ is described in [1], where also a reference to the initial, more -+ * theoretical paper on BFQ can be found. The interested reader can find -+ * in the latter paper full details on the main algorithm, as well as -+ * formulas of the guarantees and formal proofs of all the properties. -+ * With respect to the version of BFQ presented in these papers, this -+ * implementation adds a few more heuristics, such as the one that -+ * guarantees a low latency to soft real-time applications, and a -+ * hierarchical extension based on H-WF2Q+. -+ * -+ * B-WF2Q+ is based on WF2Q+, that is described in [2], together with -+ * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N) -+ * complexity derives from the one introduced with EEVDF in [3]. -+ * -+ * [1] P. Valente and M. Andreolini, ``Improving Application Responsiveness -+ * with the BFQ Disk I/O Scheduler'', -+ * Proceedings of the 5th Annual International Systems and Storage -+ * Conference (SYSTOR '12), June 2012. -+ * -+ * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf -+ * -+ * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing -+ * Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689, -+ * Oct 1997. -+ * -+ * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz -+ * -+ * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline -+ * First: A Flexible and Accurate Mechanism for Proportional Share -+ * Resource Allocation,'' technical report. -+ * -+ * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "bfq.h" -+#include "blk.h" -+ -+/* Max number of dispatches in one round of service. */ -+static const int bfq_quantum = 4; -+ -+/* Expiration time of sync (0) and async (1) requests, in jiffies. */ -+static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; -+ -+/* Maximum backwards seek, in KiB. */ -+static const int bfq_back_max = 16 * 1024; -+ -+/* Penalty of a backwards seek, in number of sectors. */ -+static const int bfq_back_penalty = 2; -+ -+/* Idling period duration, in jiffies. */ -+static int bfq_slice_idle = HZ / 125; -+ -+/* Default maximum budget values, in sectors and number of requests. */ -+static const int bfq_default_max_budget = 16 * 1024; -+static const int bfq_max_budget_async_rq = 4; -+ -+/* -+ * Async to sync throughput distribution is controlled as follows: -+ * when an async request is served, the entity is charged the number -+ * of sectors of the request, multiplied by the factor below -+ */ -+static const int bfq_async_charge_factor = 10; -+ -+/* Default timeout values, in jiffies, approximating CFQ defaults. */ -+static const int bfq_timeout_sync = HZ / 8; -+static int bfq_timeout_async = HZ / 25; -+ -+struct kmem_cache *bfq_pool; -+ -+/* Below this threshold (in ms), we consider thinktime immediate. */ -+#define BFQ_MIN_TT 2 -+ -+/* hw_tag detection: parallel requests threshold and min samples needed. */ -+#define BFQ_HW_QUEUE_THRESHOLD 4 -+#define BFQ_HW_QUEUE_SAMPLES 32 -+ -+#define BFQQ_SEEK_THR (sector_t)(8 * 1024) -+#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR) -+ -+/* Min samples used for peak rate estimation (for autotuning). */ -+#define BFQ_PEAK_RATE_SAMPLES 32 -+ -+/* Shift used for peak rate fixed precision calculations. */ -+#define BFQ_RATE_SHIFT 16 -+ -+/* -+ * By default, BFQ computes the duration of the weight raising for -+ * interactive applications automatically, using the following formula: -+ * duration = (R / r) * T, where r is the peak rate of the device, and -+ * R and T are two reference parameters. -+ * In particular, R is the peak rate of the reference device (see below), -+ * and T is a reference time: given the systems that are likely to be -+ * installed on the reference device according to its speed class, T is -+ * about the maximum time needed, under BFQ and while reading two files in -+ * parallel, to load typical large applications on these systems. -+ * In practice, the slower/faster the device at hand is, the more/less it -+ * takes to load applications with respect to the reference device. -+ * Accordingly, the longer/shorter BFQ grants weight raising to interactive -+ * applications. -+ * -+ * BFQ uses four different reference pairs (R, T), depending on: -+ * . whether the device is rotational or non-rotational; -+ * . whether the device is slow, such as old or portable HDDs, as well as -+ * SD cards, or fast, such as newer HDDs and SSDs. -+ * -+ * The device's speed class is dynamically (re)detected in -+ * bfq_update_peak_rate() every time the estimated peak rate is updated. -+ * -+ * In the following definitions, R_slow[0]/R_fast[0] and T_slow[0]/T_fast[0] -+ * are the reference values for a slow/fast rotational device, whereas -+ * R_slow[1]/R_fast[1] and T_slow[1]/T_fast[1] are the reference values for -+ * a slow/fast non-rotational device. Finally, device_speed_thresh are the -+ * thresholds used to switch between speed classes. -+ * Both the reference peak rates and the thresholds are measured in -+ * sectors/usec, left-shifted by BFQ_RATE_SHIFT. -+ */ -+static int R_slow[2] = {1536, 10752}; -+static int R_fast[2] = {17415, 34791}; -+/* -+ * To improve readability, a conversion function is used to initialize the -+ * following arrays, which entails that they can be initialized only in a -+ * function. -+ */ -+static int T_slow[2]; -+static int T_fast[2]; -+static int device_speed_thresh[2]; -+ -+#define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ -+ { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) -+ -+#define RQ_BIC(rq) ((struct bfq_io_cq *) (rq)->elv.priv[0]) -+#define RQ_BFQQ(rq) ((rq)->elv.priv[1]) -+ -+static inline void bfq_schedule_dispatch(struct bfq_data *bfqd); -+ -+#include "bfq-ioc.c" -+#include "bfq-sched.c" -+#include "bfq-cgroup.c" -+ -+#define bfq_class_idle(bfqq) ((bfqq)->entity.ioprio_class ==\ -+ IOPRIO_CLASS_IDLE) -+#define bfq_class_rt(bfqq) ((bfqq)->entity.ioprio_class ==\ -+ IOPRIO_CLASS_RT) -+ -+#define bfq_sample_valid(samples) ((samples) > 80) -+ -+/* -+ * We regard a request as SYNC, if either it's a read or has the SYNC bit -+ * set (in which case it could also be a direct WRITE). -+ */ -+static inline int bfq_bio_sync(struct bio *bio) -+{ -+ if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC)) -+ return 1; -+ -+ return 0; -+} -+ -+/* -+ * Scheduler run of queue, if there are requests pending and no one in the -+ * driver that will restart queueing. -+ */ -+static inline void bfq_schedule_dispatch(struct bfq_data *bfqd) -+{ -+ if (bfqd->queued != 0) { -+ bfq_log(bfqd, "schedule dispatch"); -+ kblockd_schedule_work(bfqd->queue, &bfqd->unplug_work); -+ } -+} -+ -+/* -+ * Lifted from AS - choose which of rq1 and rq2 that is best served now. -+ * We choose the request that is closesr to the head right now. Distance -+ * behind the head is penalized and only allowed to a certain extent. -+ */ -+static struct request *bfq_choose_req(struct bfq_data *bfqd, -+ struct request *rq1, -+ struct request *rq2, -+ sector_t last) -+{ -+ sector_t s1, s2, d1 = 0, d2 = 0; -+ unsigned long back_max; -+#define BFQ_RQ1_WRAP 0x01 /* request 1 wraps */ -+#define BFQ_RQ2_WRAP 0x02 /* request 2 wraps */ -+ unsigned wrap = 0; /* bit mask: requests behind the disk head? */ -+ -+ if (rq1 == NULL || rq1 == rq2) -+ return rq2; -+ if (rq2 == NULL) -+ return rq1; -+ -+ if (rq_is_sync(rq1) && !rq_is_sync(rq2)) -+ return rq1; -+ else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) -+ return rq2; -+ if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) -+ return rq1; -+ else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META)) -+ return rq2; -+ -+ s1 = blk_rq_pos(rq1); -+ s2 = blk_rq_pos(rq2); -+ -+ /* -+ * By definition, 1KiB is 2 sectors. -+ */ -+ back_max = bfqd->bfq_back_max * 2; -+ -+ /* -+ * Strict one way elevator _except_ in the case where we allow -+ * short backward seeks which are biased as twice the cost of a -+ * similar forward seek. -+ */ -+ if (s1 >= last) -+ d1 = s1 - last; -+ else if (s1 + back_max >= last) -+ d1 = (last - s1) * bfqd->bfq_back_penalty; -+ else -+ wrap |= BFQ_RQ1_WRAP; -+ -+ if (s2 >= last) -+ d2 = s2 - last; -+ else if (s2 + back_max >= last) -+ d2 = (last - s2) * bfqd->bfq_back_penalty; -+ else -+ wrap |= BFQ_RQ2_WRAP; -+ -+ /* Found required data */ -+ -+ /* -+ * By doing switch() on the bit mask "wrap" we avoid having to -+ * check two variables for all permutations: --> faster! -+ */ -+ switch (wrap) { -+ case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ -+ if (d1 < d2) -+ return rq1; -+ else if (d2 < d1) -+ return rq2; -+ else { -+ if (s1 >= s2) -+ return rq1; -+ else -+ return rq2; -+ } -+ -+ case BFQ_RQ2_WRAP: -+ return rq1; -+ case BFQ_RQ1_WRAP: -+ return rq2; -+ case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */ -+ default: -+ /* -+ * Since both rqs are wrapped, -+ * start with the one that's further behind head -+ * (--> only *one* back seek required), -+ * since back seek takes more time than forward. -+ */ -+ if (s1 <= s2) -+ return rq1; -+ else -+ return rq2; -+ } -+} -+ -+static struct bfq_queue * -+bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, -+ sector_t sector, struct rb_node **ret_parent, -+ struct rb_node ***rb_link) -+{ -+ struct rb_node **p, *parent; -+ struct bfq_queue *bfqq = NULL; -+ -+ parent = NULL; -+ p = &root->rb_node; -+ while (*p) { -+ struct rb_node **n; -+ -+ parent = *p; -+ bfqq = rb_entry(parent, struct bfq_queue, pos_node); -+ -+ /* -+ * Sort strictly based on sector. Smallest to the left, -+ * largest to the right. -+ */ -+ if (sector > blk_rq_pos(bfqq->next_rq)) -+ n = &(*p)->rb_right; -+ else if (sector < blk_rq_pos(bfqq->next_rq)) -+ n = &(*p)->rb_left; -+ else -+ break; -+ p = n; -+ bfqq = NULL; -+ } -+ -+ *ret_parent = parent; -+ if (rb_link) -+ *rb_link = p; -+ -+ bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", -+ (long long unsigned)sector, -+ bfqq != NULL ? bfqq->pid : 0); -+ -+ return bfqq; -+} -+ -+static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ struct rb_node **p, *parent; -+ struct bfq_queue *__bfqq; -+ -+ if (bfqq->pos_root != NULL) { -+ rb_erase(&bfqq->pos_node, bfqq->pos_root); -+ bfqq->pos_root = NULL; -+ } -+ -+ if (bfq_class_idle(bfqq)) -+ return; -+ if (!bfqq->next_rq) -+ return; -+ -+ bfqq->pos_root = &bfqd->rq_pos_tree; -+ __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root, -+ blk_rq_pos(bfqq->next_rq), &parent, &p); -+ if (__bfqq == NULL) { -+ rb_link_node(&bfqq->pos_node, parent, p); -+ rb_insert_color(&bfqq->pos_node, bfqq->pos_root); -+ } else -+ bfqq->pos_root = NULL; -+} -+ -+/* -+ * Tell whether there are active queues or groups with differentiated weights. -+ */ -+static inline bool bfq_differentiated_weights(struct bfq_data *bfqd) -+{ -+ BUG_ON(!bfqd->hw_tag); -+ /* -+ * For weights to differ, at least one of the trees must contain -+ * at least two nodes. -+ */ -+ return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) && -+ (bfqd->queue_weights_tree.rb_node->rb_left || -+ bfqd->queue_weights_tree.rb_node->rb_right) -+#ifdef CONFIG_CGROUP_BFQIO -+ ) || -+ (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) && -+ (bfqd->group_weights_tree.rb_node->rb_left || -+ bfqd->group_weights_tree.rb_node->rb_right) -+#endif -+ ); -+} -+ -+/* -+ * If the weight-counter tree passed as input contains no counter for -+ * the weight of the input entity, then add that counter; otherwise just -+ * increment the existing counter. -+ * -+ * Note that weight-counter trees contain few nodes in mostly symmetric -+ * scenarios. For example, if all queues have the same weight, then the -+ * weight-counter tree for the queues may contain at most one node. -+ * This holds even if low_latency is on, because weight-raised queues -+ * are not inserted in the tree. -+ * In most scenarios, the rate at which nodes are created/destroyed -+ * should be low too. -+ */ -+static void bfq_weights_tree_add(struct bfq_data *bfqd, -+ struct bfq_entity *entity, -+ struct rb_root *root) -+{ -+ struct rb_node **new = &(root->rb_node), *parent = NULL; -+ -+ /* -+ * Do not insert if: -+ * - the device does not support queueing; -+ * - the entity is already associated with a counter, which happens if: -+ * 1) the entity is associated with a queue, 2) a request arrival -+ * has caused the queue to become both non-weight-raised, and hence -+ * change its weight, and backlogged; in this respect, each -+ * of the two events causes an invocation of this function, -+ * 3) this is the invocation of this function caused by the second -+ * event. This second invocation is actually useless, and we handle -+ * this fact by exiting immediately. More efficient or clearer -+ * solutions might possibly be adopted. -+ */ -+ if (!bfqd->hw_tag || entity->weight_counter) -+ return; -+ -+ while (*new) { -+ struct bfq_weight_counter *__counter = container_of(*new, -+ struct bfq_weight_counter, -+ weights_node); -+ parent = *new; -+ -+ if (entity->weight == __counter->weight) { -+ entity->weight_counter = __counter; -+ goto inc_counter; -+ } -+ if (entity->weight < __counter->weight) -+ new = &((*new)->rb_left); -+ else -+ new = &((*new)->rb_right); -+ } -+ -+ entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), -+ GFP_ATOMIC); -+ entity->weight_counter->weight = entity->weight; -+ rb_link_node(&entity->weight_counter->weights_node, parent, new); -+ rb_insert_color(&entity->weight_counter->weights_node, root); -+ -+inc_counter: -+ entity->weight_counter->num_active++; -+} -+ -+/* -+ * Decrement the weight counter associated with the entity, and, if the -+ * counter reaches 0, remove the counter from the tree. -+ * See the comments to the function bfq_weights_tree_add() for considerations -+ * about overhead. -+ */ -+static void bfq_weights_tree_remove(struct bfq_data *bfqd, -+ struct bfq_entity *entity, -+ struct rb_root *root) -+{ -+ /* -+ * Check whether the entity is actually associated with a counter. -+ * In fact, the device may not be considered NCQ-capable for a while, -+ * which implies that no insertion in the weight trees is performed, -+ * after which the device may start to be deemed NCQ-capable, and hence -+ * this function may start to be invoked. This may cause the function -+ * to be invoked for entities that are not associated with any counter. -+ */ -+ if (!entity->weight_counter) -+ return; -+ -+ BUG_ON(RB_EMPTY_ROOT(root)); -+ BUG_ON(entity->weight_counter->weight != entity->weight); -+ -+ BUG_ON(!entity->weight_counter->num_active); -+ entity->weight_counter->num_active--; -+ if (entity->weight_counter->num_active > 0) -+ goto reset_entity_pointer; -+ -+ rb_erase(&entity->weight_counter->weights_node, root); -+ kfree(entity->weight_counter); -+ -+reset_entity_pointer: -+ entity->weight_counter = NULL; -+} -+ -+static struct request *bfq_find_next_rq(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ struct request *last) -+{ -+ struct rb_node *rbnext = rb_next(&last->rb_node); -+ struct rb_node *rbprev = rb_prev(&last->rb_node); -+ struct request *next = NULL, *prev = NULL; -+ -+ BUG_ON(RB_EMPTY_NODE(&last->rb_node)); -+ -+ if (rbprev != NULL) -+ prev = rb_entry_rq(rbprev); -+ -+ if (rbnext != NULL) -+ next = rb_entry_rq(rbnext); -+ else { -+ rbnext = rb_first(&bfqq->sort_list); -+ if (rbnext && rbnext != &last->rb_node) -+ next = rb_entry_rq(rbnext); -+ } -+ -+ return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last)); -+} -+ -+/* see the definition of bfq_async_charge_factor for details */ -+static inline unsigned long bfq_serv_to_charge(struct request *rq, -+ struct bfq_queue *bfqq) -+{ -+ return blk_rq_sectors(rq) * -+ (1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->wr_coeff == 1) * -+ bfq_async_charge_factor)); -+} -+ -+/** -+ * bfq_updated_next_req - update the queue after a new next_rq selection. -+ * @bfqd: the device data the queue belongs to. -+ * @bfqq: the queue to update. -+ * -+ * If the first request of a queue changes we make sure that the queue -+ * has enough budget to serve at least its first request (if the -+ * request has grown). We do this because if the queue has not enough -+ * budget for its first request, it has to go through two dispatch -+ * rounds to actually get it dispatched. -+ */ -+static void bfq_updated_next_req(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ struct bfq_service_tree *st = bfq_entity_service_tree(entity); -+ struct request *next_rq = bfqq->next_rq; -+ unsigned long new_budget; -+ -+ if (next_rq == NULL) -+ return; -+ -+ if (bfqq == bfqd->in_service_queue) -+ /* -+ * In order not to break guarantees, budgets cannot be -+ * changed after an entity has been selected. -+ */ -+ return; -+ -+ BUG_ON(entity->tree != &st->active); -+ BUG_ON(entity == entity->sched_data->in_service_entity); -+ -+ new_budget = max_t(unsigned long, bfqq->max_budget, -+ bfq_serv_to_charge(next_rq, bfqq)); -+ if (entity->budget != new_budget) { -+ entity->budget = new_budget; -+ bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", -+ new_budget); -+ bfq_activate_bfqq(bfqd, bfqq); -+ } -+} -+ -+static inline unsigned int bfq_wr_duration(struct bfq_data *bfqd) -+{ -+ u64 dur; -+ -+ if (bfqd->bfq_wr_max_time > 0) -+ return bfqd->bfq_wr_max_time; -+ -+ dur = bfqd->RT_prod; -+ do_div(dur, bfqd->peak_rate); -+ -+ return dur; -+} -+ -+static void bfq_add_request(struct request *rq) -+{ -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ struct bfq_entity *entity = &bfqq->entity; -+ struct bfq_data *bfqd = bfqq->bfqd; -+ struct request *next_rq, *prev; -+ unsigned long old_wr_coeff = bfqq->wr_coeff; -+ int idle_for_long_time = 0; -+ -+ bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq)); -+ bfqq->queued[rq_is_sync(rq)]++; -+ bfqd->queued++; -+ -+ elv_rb_add(&bfqq->sort_list, rq); -+ -+ /* -+ * Check if this request is a better next-serve candidate. -+ */ -+ prev = bfqq->next_rq; -+ next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); -+ BUG_ON(next_rq == NULL); -+ bfqq->next_rq = next_rq; -+ -+ /* -+ * Adjust priority tree position, if next_rq changes. -+ */ -+ if (prev != bfqq->next_rq) -+ bfq_rq_pos_tree_add(bfqd, bfqq); -+ -+ if (!bfq_bfqq_busy(bfqq)) { -+ int soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && -+ time_is_before_jiffies(bfqq->soft_rt_next_start); -+ idle_for_long_time = time_is_before_jiffies( -+ bfqq->budget_timeout + -+ bfqd->bfq_wr_min_idle_time); -+ entity->budget = max_t(unsigned long, bfqq->max_budget, -+ bfq_serv_to_charge(next_rq, bfqq)); -+ -+ if (!bfq_bfqq_IO_bound(bfqq)) { -+ if (time_before(jiffies, -+ RQ_BIC(rq)->ttime.last_end_request + -+ bfqd->bfq_slice_idle)) { -+ bfqq->requests_within_timer++; -+ if (bfqq->requests_within_timer >= -+ bfqd->bfq_requests_within_timer) -+ bfq_mark_bfqq_IO_bound(bfqq); -+ } else -+ bfqq->requests_within_timer = 0; -+ } -+ -+ if (!bfqd->low_latency) -+ goto add_bfqq_busy; -+ -+ /* -+ * If the queue is not being boosted and has been idle -+ * for enough time, start a weight-raising period -+ */ -+ if (old_wr_coeff == 1 && (idle_for_long_time || soft_rt)) { -+ bfqq->wr_coeff = bfqd->bfq_wr_coeff; -+ if (idle_for_long_time) -+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -+ else -+ bfqq->wr_cur_max_time = -+ bfqd->bfq_wr_rt_max_time; -+ bfq_log_bfqq(bfqd, bfqq, -+ "wrais starting at %lu, rais_max_time %u", -+ jiffies, -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ } else if (old_wr_coeff > 1) { -+ if (idle_for_long_time) -+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -+ else if (bfqq->wr_cur_max_time == -+ bfqd->bfq_wr_rt_max_time && -+ !soft_rt) { -+ bfqq->wr_coeff = 1; -+ bfq_log_bfqq(bfqd, bfqq, -+ "wrais ending at %lu, rais_max_time %u", -+ jiffies, -+ jiffies_to_msecs(bfqq-> -+ wr_cur_max_time)); -+ } else if (time_before( -+ bfqq->last_wr_start_finish + -+ bfqq->wr_cur_max_time, -+ jiffies + -+ bfqd->bfq_wr_rt_max_time) && -+ soft_rt) { -+ /* -+ * -+ * The remaining weight-raising time is lower -+ * than bfqd->bfq_wr_rt_max_time, which -+ * means that the application is enjoying -+ * weight raising either because deemed soft- -+ * rt in the near past, or because deemed -+ * interactive a long ago. In both cases, -+ * resetting now the current remaining weight- -+ * raising time for the application to the -+ * weight-raising duration for soft rt -+ * applications would not cause any latency -+ * increase for the application (as the new -+ * duration would be higher than the remaining -+ * time). -+ * -+ * In addition, the application is now meeting -+ * the requirements for being deemed soft rt. -+ * In the end we can correctly and safely -+ * (re)charge the weight-raising duration for -+ * the application with the weight-raising -+ * duration for soft rt applications. -+ * -+ * In particular, doing this recharge now, i.e., -+ * before the weight-raising period for the -+ * application finishes, reduces the probability -+ * of the following negative scenario: -+ * 1) the weight of a soft rt application is -+ * raised at startup (as for any newly -+ * created application), -+ * 2) since the application is not interactive, -+ * at a certain time weight-raising is -+ * stopped for the application, -+ * 3) at that time the application happens to -+ * still have pending requests, and hence -+ * is destined to not have a chance to be -+ * deemed soft rt before these requests are -+ * completed (see the comments to the -+ * function bfq_bfqq_softrt_next_start() -+ * for details on soft rt detection), -+ * 4) these pending requests experience a high -+ * latency because the application is not -+ * weight-raised while they are pending. -+ */ -+ bfqq->last_wr_start_finish = jiffies; -+ bfqq->wr_cur_max_time = -+ bfqd->bfq_wr_rt_max_time; -+ } -+ } -+ if (old_wr_coeff != bfqq->wr_coeff) -+ entity->ioprio_changed = 1; -+add_bfqq_busy: -+ bfqq->last_idle_bklogged = jiffies; -+ bfqq->service_from_backlogged = 0; -+ bfq_clear_bfqq_softrt_update(bfqq); -+ bfq_add_bfqq_busy(bfqd, bfqq); -+ } else { -+ if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) && -+ time_is_before_jiffies( -+ bfqq->last_wr_start_finish + -+ bfqd->bfq_wr_min_inter_arr_async)) { -+ bfqq->wr_coeff = bfqd->bfq_wr_coeff; -+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -+ -+ bfqd->wr_busy_queues++; -+ entity->ioprio_changed = 1; -+ bfq_log_bfqq(bfqd, bfqq, -+ "non-idle wrais starting at %lu, rais_max_time %u", -+ jiffies, -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ } -+ if (prev != bfqq->next_rq) -+ bfq_updated_next_req(bfqd, bfqq); -+ } -+ -+ if (bfqd->low_latency && -+ (old_wr_coeff == 1 || bfqq->wr_coeff == 1 || -+ idle_for_long_time)) -+ bfqq->last_wr_start_finish = jiffies; -+} -+ -+static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd, -+ struct bio *bio) -+{ -+ struct task_struct *tsk = current; -+ struct bfq_io_cq *bic; -+ struct bfq_queue *bfqq; -+ -+ bic = bfq_bic_lookup(bfqd, tsk->io_context); -+ if (bic == NULL) -+ return NULL; -+ -+ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); -+ if (bfqq != NULL) -+ return elv_rb_find(&bfqq->sort_list, bio_end_sector(bio)); -+ -+ return NULL; -+} -+ -+static void bfq_activate_request(struct request_queue *q, struct request *rq) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ -+ bfqd->rq_in_driver++; -+ bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); -+ bfq_log(bfqd, "activate_request: new bfqd->last_position %llu", -+ (long long unsigned)bfqd->last_position); -+} -+ -+static inline void bfq_deactivate_request(struct request_queue *q, -+ struct request *rq) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ -+ BUG_ON(bfqd->rq_in_driver == 0); -+ bfqd->rq_in_driver--; -+} -+ -+static void bfq_remove_request(struct request *rq) -+{ -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ struct bfq_data *bfqd = bfqq->bfqd; -+ const int sync = rq_is_sync(rq); -+ -+ if (bfqq->next_rq == rq) { -+ bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq); -+ bfq_updated_next_req(bfqd, bfqq); -+ } -+ -+ list_del_init(&rq->queuelist); -+ BUG_ON(bfqq->queued[sync] == 0); -+ bfqq->queued[sync]--; -+ bfqd->queued--; -+ elv_rb_del(&bfqq->sort_list, rq); -+ -+ if (RB_EMPTY_ROOT(&bfqq->sort_list)) { -+ if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) -+ bfq_del_bfqq_busy(bfqd, bfqq, 1); -+ /* -+ * Remove queue from request-position tree as it is empty. -+ */ -+ if (bfqq->pos_root != NULL) { -+ rb_erase(&bfqq->pos_node, bfqq->pos_root); -+ bfqq->pos_root = NULL; -+ } -+ } -+ -+ if (rq->cmd_flags & REQ_META) { -+ BUG_ON(bfqq->meta_pending == 0); -+ bfqq->meta_pending--; -+ } -+} -+ -+static int bfq_merge(struct request_queue *q, struct request **req, -+ struct bio *bio) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct request *__rq; -+ -+ __rq = bfq_find_rq_fmerge(bfqd, bio); -+ if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) { -+ *req = __rq; -+ return ELEVATOR_FRONT_MERGE; -+ } -+ -+ return ELEVATOR_NO_MERGE; -+} -+ -+static void bfq_merged_request(struct request_queue *q, struct request *req, -+ int type) -+{ -+ if (type == ELEVATOR_FRONT_MERGE && -+ rb_prev(&req->rb_node) && -+ blk_rq_pos(req) < -+ blk_rq_pos(container_of(rb_prev(&req->rb_node), -+ struct request, rb_node))) { -+ struct bfq_queue *bfqq = RQ_BFQQ(req); -+ struct bfq_data *bfqd = bfqq->bfqd; -+ struct request *prev, *next_rq; -+ -+ /* Reposition request in its sort_list */ -+ elv_rb_del(&bfqq->sort_list, req); -+ elv_rb_add(&bfqq->sort_list, req); -+ /* Choose next request to be served for bfqq */ -+ prev = bfqq->next_rq; -+ next_rq = bfq_choose_req(bfqd, bfqq->next_rq, req, -+ bfqd->last_position); -+ BUG_ON(next_rq == NULL); -+ bfqq->next_rq = next_rq; -+ /* -+ * If next_rq changes, update both the queue's budget to -+ * fit the new request and the queue's position in its -+ * rq_pos_tree. -+ */ -+ if (prev != bfqq->next_rq) { -+ bfq_updated_next_req(bfqd, bfqq); -+ bfq_rq_pos_tree_add(bfqd, bfqq); -+ } -+ } -+} -+ -+static void bfq_merged_requests(struct request_queue *q, struct request *rq, -+ struct request *next) -+{ -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ -+ /* -+ * Reposition in fifo if next is older than rq. -+ */ -+ if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && -+ time_before(next->fifo_time, rq->fifo_time)) { -+ list_move(&rq->queuelist, &next->queuelist); -+ rq->fifo_time = next->fifo_time; -+ } -+ -+ if (bfqq->next_rq == next) -+ bfqq->next_rq = rq; -+ -+ bfq_remove_request(next); -+} -+ -+/* Must be called with bfqq != NULL */ -+static inline void bfq_bfqq_end_wr(struct bfq_queue *bfqq) -+{ -+ BUG_ON(bfqq == NULL); -+ if (bfq_bfqq_busy(bfqq)) -+ bfqq->bfqd->wr_busy_queues--; -+ bfqq->wr_coeff = 1; -+ bfqq->wr_cur_max_time = 0; -+ /* Trigger a weight change on the next activation of the queue */ -+ bfqq->entity.ioprio_changed = 1; -+} -+ -+static void bfq_end_wr_async_queues(struct bfq_data *bfqd, -+ struct bfq_group *bfqg) -+{ -+ int i, j; -+ -+ for (i = 0; i < 2; i++) -+ for (j = 0; j < IOPRIO_BE_NR; j++) -+ if (bfqg->async_bfqq[i][j] != NULL) -+ bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]); -+ if (bfqg->async_idle_bfqq != NULL) -+ bfq_bfqq_end_wr(bfqg->async_idle_bfqq); -+} -+ -+static void bfq_end_wr(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq; -+ -+ spin_lock_irq(bfqd->queue->queue_lock); -+ -+ list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) -+ bfq_bfqq_end_wr(bfqq); -+ list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) -+ bfq_bfqq_end_wr(bfqq); -+ bfq_end_wr_async(bfqd); -+ -+ spin_unlock_irq(bfqd->queue->queue_lock); -+} -+ -+static int bfq_allow_merge(struct request_queue *q, struct request *rq, -+ struct bio *bio) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_io_cq *bic; -+ struct bfq_queue *bfqq; -+ -+ /* -+ * Disallow merge of a sync bio into an async request. -+ */ -+ if (bfq_bio_sync(bio) && !rq_is_sync(rq)) -+ return 0; -+ -+ /* -+ * Lookup the bfqq that this bio will be queued with. Allow -+ * merge only if rq is queued there. -+ * Queue lock is held here. -+ */ -+ bic = bfq_bic_lookup(bfqd, current->io_context); -+ if (bic == NULL) -+ return 0; -+ -+ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); -+ return bfqq == RQ_BFQQ(rq); -+} -+ -+static void __bfq_set_in_service_queue(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ if (bfqq != NULL) { -+ bfq_mark_bfqq_must_alloc(bfqq); -+ bfq_mark_bfqq_budget_new(bfqq); -+ bfq_clear_bfqq_fifo_expire(bfqq); -+ -+ bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "set_in_service_queue, cur-budget = %lu", -+ bfqq->entity.budget); -+ } -+ -+ bfqd->in_service_queue = bfqq; -+} -+ -+/* -+ * Get and set a new queue for service. -+ */ -+static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ if (!bfqq) -+ bfqq = bfq_get_next_queue(bfqd); -+ else -+ bfq_get_next_queue_forced(bfqd, bfqq); -+ -+ __bfq_set_in_service_queue(bfqd, bfqq); -+ return bfqq; -+} -+ -+static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd, -+ struct request *rq) -+{ -+ if (blk_rq_pos(rq) >= bfqd->last_position) -+ return blk_rq_pos(rq) - bfqd->last_position; -+ else -+ return bfqd->last_position - blk_rq_pos(rq); -+} -+ -+/* -+ * Return true if bfqq has no request pending and rq is close enough to -+ * bfqd->last_position, or if rq is closer to bfqd->last_position than -+ * bfqq->next_rq -+ */ -+static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq) -+{ -+ return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR; -+} -+ -+static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) -+{ -+ struct rb_root *root = &bfqd->rq_pos_tree; -+ struct rb_node *parent, *node; -+ struct bfq_queue *__bfqq; -+ sector_t sector = bfqd->last_position; -+ -+ if (RB_EMPTY_ROOT(root)) -+ return NULL; -+ -+ /* -+ * First, if we find a request starting at the end of the last -+ * request, choose it. -+ */ -+ __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL); -+ if (__bfqq != NULL) -+ return __bfqq; -+ -+ /* -+ * If the exact sector wasn't found, the parent of the NULL leaf -+ * will contain the closest sector (rq_pos_tree sorted by -+ * next_request position). -+ */ -+ __bfqq = rb_entry(parent, struct bfq_queue, pos_node); -+ if (bfq_rq_close(bfqd, __bfqq->next_rq)) -+ return __bfqq; -+ -+ if (blk_rq_pos(__bfqq->next_rq) < sector) -+ node = rb_next(&__bfqq->pos_node); -+ else -+ node = rb_prev(&__bfqq->pos_node); -+ if (node == NULL) -+ return NULL; -+ -+ __bfqq = rb_entry(node, struct bfq_queue, pos_node); -+ if (bfq_rq_close(bfqd, __bfqq->next_rq)) -+ return __bfqq; -+ -+ return NULL; -+} -+ -+/* -+ * bfqd - obvious -+ * cur_bfqq - passed in so that we don't decide that the current queue -+ * is closely cooperating with itself. -+ * -+ * We are assuming that cur_bfqq has dispatched at least one request, -+ * and that bfqd->last_position reflects a position on the disk associated -+ * with the I/O issued by cur_bfqq. -+ */ -+static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, -+ struct bfq_queue *cur_bfqq) -+{ -+ struct bfq_queue *bfqq; -+ -+ if (bfq_class_idle(cur_bfqq)) -+ return NULL; -+ if (!bfq_bfqq_sync(cur_bfqq)) -+ return NULL; -+ if (BFQQ_SEEKY(cur_bfqq)) -+ return NULL; -+ -+ /* If device has only one backlogged bfq_queue, don't search. */ -+ if (bfqd->busy_queues == 1) -+ return NULL; -+ -+ /* -+ * We should notice if some of the queues are cooperating, e.g. -+ * working closely on the same area of the disk. In that case, -+ * we can group them together and don't waste time idling. -+ */ -+ bfqq = bfqq_close(bfqd); -+ if (bfqq == NULL || bfqq == cur_bfqq) -+ return NULL; -+ -+ /* -+ * Do not merge queues from different bfq_groups. -+ */ -+ if (bfqq->entity.parent != cur_bfqq->entity.parent) -+ return NULL; -+ -+ /* -+ * It only makes sense to merge sync queues. -+ */ -+ if (!bfq_bfqq_sync(bfqq)) -+ return NULL; -+ if (BFQQ_SEEKY(bfqq)) -+ return NULL; -+ -+ /* -+ * Do not merge queues of different priority classes. -+ */ -+ if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq)) -+ return NULL; -+ -+ return bfqq; -+} -+ -+/* -+ * If enough samples have been computed, return the current max budget -+ * stored in bfqd, which is dynamically updated according to the -+ * estimated disk peak rate; otherwise return the default max budget -+ */ -+static inline unsigned long bfq_max_budget(struct bfq_data *bfqd) -+{ -+ if (bfqd->budgets_assigned < 194) -+ return bfq_default_max_budget; -+ else -+ return bfqd->bfq_max_budget; -+} -+ -+/* -+ * Return min budget, which is a fraction of the current or default -+ * max budget (trying with 1/32) -+ */ -+static inline unsigned long bfq_min_budget(struct bfq_data *bfqd) -+{ -+ if (bfqd->budgets_assigned < 194) -+ return bfq_default_max_budget / 32; -+ else -+ return bfqd->bfq_max_budget / 32; -+} -+ -+static void bfq_arm_slice_timer(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq = bfqd->in_service_queue; -+ struct bfq_io_cq *bic; -+ unsigned long sl; -+ -+ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); -+ -+ /* Processes have exited, don't wait. */ -+ bic = bfqd->in_service_bic; -+ if (bic == NULL || atomic_read(&bic->icq.ioc->active_ref) == 0) -+ return; -+ -+ bfq_mark_bfqq_wait_request(bfqq); -+ -+ /* -+ * We don't want to idle for seeks, but we do want to allow -+ * fair distribution of slice time for a process doing back-to-back -+ * seeks. So allow a little bit of time for him to submit a new rq. -+ * -+ * To prevent processes with (partly) seeky workloads from -+ * being too ill-treated, grant them a small fraction of the -+ * assigned budget before reducing the waiting time to -+ * BFQ_MIN_TT. This happened to help reduce latency. -+ */ -+ sl = bfqd->bfq_slice_idle; -+ /* -+ * Unless the queue is being weight-raised, grant only minimum idle -+ * time if the queue either has been seeky for long enough or has -+ * already proved to be constantly seeky. -+ */ -+ if (bfq_sample_valid(bfqq->seek_samples) && -+ ((BFQQ_SEEKY(bfqq) && bfqq->entity.service > -+ bfq_max_budget(bfqq->bfqd) / 8) || -+ bfq_bfqq_constantly_seeky(bfqq)) && bfqq->wr_coeff == 1) -+ sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT)); -+ else if (bfqq->wr_coeff > 1) -+ sl = sl * 3; -+ bfqd->last_idling_start = ktime_get(); -+ mod_timer(&bfqd->idle_slice_timer, jiffies + sl); -+ bfq_log(bfqd, "arm idle: %u/%u ms", -+ jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle)); -+} -+ -+/* -+ * Set the maximum time for the in-service queue to consume its -+ * budget. This prevents seeky processes from lowering the disk -+ * throughput (always guaranteed with a time slice scheme as in CFQ). -+ */ -+static void bfq_set_budget_timeout(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq = bfqd->in_service_queue; -+ unsigned int timeout_coeff; -+ if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time) -+ timeout_coeff = 1; -+ else -+ timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight; -+ -+ bfqd->last_budget_start = ktime_get(); -+ -+ bfq_clear_bfqq_budget_new(bfqq); -+ bfqq->budget_timeout = jiffies + -+ bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff; -+ -+ bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", -+ jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * -+ timeout_coeff)); -+} -+ -+/* -+ * Move request from internal lists to the request queue dispatch list. -+ */ -+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ -+ /* -+ * For consistency, the next instruction should have been executed -+ * after removing the request from the queue and dispatching it. -+ * We execute instead this instruction before bfq_remove_request() -+ * (and hence introduce a temporary inconsistency), for efficiency. -+ * In fact, in a forced_dispatch, this prevents two counters related -+ * to bfqq->dispatched to risk to be uselessly decremented if bfqq -+ * is not in service, and then to be incremented again after -+ * incrementing bfqq->dispatched. -+ */ -+ bfqq->dispatched++; -+ bfq_remove_request(rq); -+ elv_dispatch_sort(q, rq); -+ -+ if (bfq_bfqq_sync(bfqq)) -+ bfqd->sync_flight++; -+} -+ -+/* -+ * Return expired entry, or NULL to just start from scratch in rbtree. -+ */ -+static struct request *bfq_check_fifo(struct bfq_queue *bfqq) -+{ -+ struct request *rq = NULL; -+ -+ if (bfq_bfqq_fifo_expire(bfqq)) -+ return NULL; -+ -+ bfq_mark_bfqq_fifo_expire(bfqq); -+ -+ if (list_empty(&bfqq->fifo)) -+ return NULL; -+ -+ rq = rq_entry_fifo(bfqq->fifo.next); -+ -+ if (time_before(jiffies, rq->fifo_time)) -+ return NULL; -+ -+ return rq; -+} -+ -+/* -+ * Must be called with the queue_lock held. -+ */ -+static int bfqq_process_refs(struct bfq_queue *bfqq) -+{ -+ int process_refs, io_refs; -+ -+ io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; -+ process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; -+ BUG_ON(process_refs < 0); -+ return process_refs; -+} -+ -+static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) -+{ -+ int process_refs, new_process_refs; -+ struct bfq_queue *__bfqq; -+ -+ /* -+ * If there are no process references on the new_bfqq, then it is -+ * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain -+ * may have dropped their last reference (not just their last process -+ * reference). -+ */ -+ if (!bfqq_process_refs(new_bfqq)) -+ return; -+ -+ /* Avoid a circular list and skip interim queue merges. */ -+ while ((__bfqq = new_bfqq->new_bfqq)) { -+ if (__bfqq == bfqq) -+ return; -+ new_bfqq = __bfqq; -+ } -+ -+ process_refs = bfqq_process_refs(bfqq); -+ new_process_refs = bfqq_process_refs(new_bfqq); -+ /* -+ * If the process for the bfqq has gone away, there is no -+ * sense in merging the queues. -+ */ -+ if (process_refs == 0 || new_process_refs == 0) -+ return; -+ -+ /* -+ * Merge in the direction of the lesser amount of work. -+ */ -+ if (new_process_refs >= process_refs) { -+ bfqq->new_bfqq = new_bfqq; -+ atomic_add(process_refs, &new_bfqq->ref); -+ } else { -+ new_bfqq->new_bfqq = bfqq; -+ atomic_add(new_process_refs, &bfqq->ref); -+ } -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", -+ new_bfqq->pid); -+} -+ -+static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ return entity->budget - entity->service; -+} -+ -+static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ BUG_ON(bfqq != bfqd->in_service_queue); -+ -+ __bfq_bfqd_reset_in_service(bfqd); -+ -+ /* -+ * If this bfqq is shared between multiple processes, check -+ * to make sure that those processes are still issuing I/Os -+ * within the mean seek distance. If not, it may be time to -+ * break the queues apart again. -+ */ -+ if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq)) -+ bfq_mark_bfqq_split_coop(bfqq); -+ -+ if (RB_EMPTY_ROOT(&bfqq->sort_list)) { -+ /* -+ * Overloading budget_timeout field to store the time -+ * at which the queue remains with no backlog; used by -+ * the weight-raising mechanism. -+ */ -+ bfqq->budget_timeout = jiffies; -+ bfq_del_bfqq_busy(bfqd, bfqq, 1); -+ } else { -+ bfq_activate_bfqq(bfqd, bfqq); -+ /* -+ * Resort priority tree of potential close cooperators. -+ */ -+ bfq_rq_pos_tree_add(bfqd, bfqq); -+ } -+} -+ -+/** -+ * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior. -+ * @bfqd: device data. -+ * @bfqq: queue to update. -+ * @reason: reason for expiration. -+ * -+ * Handle the feedback on @bfqq budget. See the body for detailed -+ * comments. -+ */ -+static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ enum bfqq_expiration reason) -+{ -+ struct request *next_rq; -+ unsigned long budget, min_budget; -+ -+ budget = bfqq->max_budget; -+ min_budget = bfq_min_budget(bfqd); -+ -+ BUG_ON(bfqq != bfqd->in_service_queue); -+ -+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu", -+ bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); -+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu", -+ budget, bfq_min_budget(bfqd)); -+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", -+ bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue)); -+ -+ if (bfq_bfqq_sync(bfqq)) { -+ switch (reason) { -+ /* -+ * Caveat: in all the following cases we trade latency -+ * for throughput. -+ */ -+ case BFQ_BFQQ_TOO_IDLE: -+ /* -+ * This is the only case where we may reduce -+ * the budget: if there is no request of the -+ * process still waiting for completion, then -+ * we assume (tentatively) that the timer has -+ * expired because the batch of requests of -+ * the process could have been served with a -+ * smaller budget. Hence, betting that -+ * process will behave in the same way when it -+ * becomes backlogged again, we reduce its -+ * next budget. As long as we guess right, -+ * this budget cut reduces the latency -+ * experienced by the process. -+ * -+ * However, if there are still outstanding -+ * requests, then the process may have not yet -+ * issued its next request just because it is -+ * still waiting for the completion of some of -+ * the still outstanding ones. So in this -+ * subcase we do not reduce its budget, on the -+ * contrary we increase it to possibly boost -+ * the throughput, as discussed in the -+ * comments to the BUDGET_TIMEOUT case. -+ */ -+ if (bfqq->dispatched > 0) /* still outstanding reqs */ -+ budget = min(budget * 2, bfqd->bfq_max_budget); -+ else { -+ if (budget > 5 * min_budget) -+ budget -= 4 * min_budget; -+ else -+ budget = min_budget; -+ } -+ break; -+ case BFQ_BFQQ_BUDGET_TIMEOUT: -+ /* -+ * We double the budget here because: 1) it -+ * gives the chance to boost the throughput if -+ * this is not a seeky process (which may have -+ * bumped into this timeout because of, e.g., -+ * ZBR), 2) together with charge_full_budget -+ * it helps give seeky processes higher -+ * timestamps, and hence be served less -+ * frequently. -+ */ -+ budget = min(budget * 2, bfqd->bfq_max_budget); -+ break; -+ case BFQ_BFQQ_BUDGET_EXHAUSTED: -+ /* -+ * The process still has backlog, and did not -+ * let either the budget timeout or the disk -+ * idling timeout expire. Hence it is not -+ * seeky, has a short thinktime and may be -+ * happy with a higher budget too. So -+ * definitely increase the budget of this good -+ * candidate to boost the disk throughput. -+ */ -+ budget = min(budget * 4, bfqd->bfq_max_budget); -+ break; -+ case BFQ_BFQQ_NO_MORE_REQUESTS: -+ /* -+ * Leave the budget unchanged. -+ */ -+ default: -+ return; -+ } -+ } else /* async queue */ -+ /* async queues get always the maximum possible budget -+ * (their ability to dispatch is limited by -+ * @bfqd->bfq_max_budget_async_rq). -+ */ -+ budget = bfqd->bfq_max_budget; -+ -+ bfqq->max_budget = budget; -+ -+ if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 && -+ bfqq->max_budget > bfqd->bfq_max_budget) -+ bfqq->max_budget = bfqd->bfq_max_budget; -+ -+ /* -+ * Make sure that we have enough budget for the next request. -+ * Since the finish time of the bfqq must be kept in sync with -+ * the budget, be sure to call __bfq_bfqq_expire() after the -+ * update. -+ */ -+ next_rq = bfqq->next_rq; -+ if (next_rq != NULL) -+ bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget, -+ bfq_serv_to_charge(next_rq, bfqq)); -+ else -+ bfqq->entity.budget = bfqq->max_budget; -+ -+ bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu", -+ next_rq != NULL ? blk_rq_sectors(next_rq) : 0, -+ bfqq->entity.budget); -+} -+ -+static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout) -+{ -+ unsigned long max_budget; -+ -+ /* -+ * The max_budget calculated when autotuning is equal to the -+ * amount of sectors transfered in timeout_sync at the -+ * estimated peak rate. -+ */ -+ max_budget = (unsigned long)(peak_rate * 1000 * -+ timeout >> BFQ_RATE_SHIFT); -+ -+ return max_budget; -+} -+ -+/* -+ * In addition to updating the peak rate, checks whether the process -+ * is "slow", and returns 1 if so. This slow flag is used, in addition -+ * to the budget timeout, to reduce the amount of service provided to -+ * seeky processes, and hence reduce their chances to lower the -+ * throughput. See the code for more details. -+ */ -+static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ int compensate, enum bfqq_expiration reason) -+{ -+ u64 bw, usecs, expected, timeout; -+ ktime_t delta; -+ int update = 0; -+ -+ if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq)) -+ return 0; -+ -+ if (compensate) -+ delta = bfqd->last_idling_start; -+ else -+ delta = ktime_get(); -+ delta = ktime_sub(delta, bfqd->last_budget_start); -+ usecs = ktime_to_us(delta); -+ -+ /* Don't trust short/unrealistic values. */ -+ if (usecs < 100 || usecs >= LONG_MAX) -+ return 0; -+ -+ /* -+ * Calculate the bandwidth for the last slice. We use a 64 bit -+ * value to store the peak rate, in sectors per usec in fixed -+ * point math. We do so to have enough precision in the estimate -+ * and to avoid overflows. -+ */ -+ bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT; -+ do_div(bw, (unsigned long)usecs); -+ -+ timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); -+ -+ /* -+ * Use only long (> 20ms) intervals to filter out spikes for -+ * the peak rate estimation. -+ */ -+ if (usecs > 20000) { -+ if (bw > bfqd->peak_rate || -+ (!BFQQ_SEEKY(bfqq) && -+ reason == BFQ_BFQQ_BUDGET_TIMEOUT)) { -+ bfq_log(bfqd, "measured bw =%llu", bw); -+ /* -+ * To smooth oscillations use a low-pass filter with -+ * alpha=7/8, i.e., -+ * new_rate = (7/8) * old_rate + (1/8) * bw -+ */ -+ do_div(bw, 8); -+ if (bw == 0) -+ return 0; -+ bfqd->peak_rate *= 7; -+ do_div(bfqd->peak_rate, 8); -+ bfqd->peak_rate += bw; -+ update = 1; -+ bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate); -+ } -+ -+ update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1; -+ -+ if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES) -+ bfqd->peak_rate_samples++; -+ -+ if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES && -+ update) { -+ int dev_type = blk_queue_nonrot(bfqd->queue); -+ if (bfqd->bfq_user_max_budget == 0) { -+ bfqd->bfq_max_budget = -+ bfq_calc_max_budget(bfqd->peak_rate, -+ timeout); -+ bfq_log(bfqd, "new max_budget=%lu", -+ bfqd->bfq_max_budget); -+ } -+ if (bfqd->device_speed == BFQ_BFQD_FAST && -+ bfqd->peak_rate < device_speed_thresh[dev_type]) { -+ bfqd->device_speed = BFQ_BFQD_SLOW; -+ bfqd->RT_prod = R_slow[dev_type] * -+ T_slow[dev_type]; -+ } else if (bfqd->device_speed == BFQ_BFQD_SLOW && -+ bfqd->peak_rate > device_speed_thresh[dev_type]) { -+ bfqd->device_speed = BFQ_BFQD_FAST; -+ bfqd->RT_prod = R_fast[dev_type] * -+ T_fast[dev_type]; -+ } -+ } -+ } -+ -+ /* -+ * If the process has been served for a too short time -+ * interval to let its possible sequential accesses prevail on -+ * the initial seek time needed to move the disk head on the -+ * first sector it requested, then give the process a chance -+ * and for the moment return false. -+ */ -+ if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8) -+ return 0; -+ -+ /* -+ * A process is considered ``slow'' (i.e., seeky, so that we -+ * cannot treat it fairly in the service domain, as it would -+ * slow down too much the other processes) if, when a slice -+ * ends for whatever reason, it has received service at a -+ * rate that would not be high enough to complete the budget -+ * before the budget timeout expiration. -+ */ -+ expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT; -+ -+ /* -+ * Caveat: processes doing IO in the slower disk zones will -+ * tend to be slow(er) even if not seeky. And the estimated -+ * peak rate will actually be an average over the disk -+ * surface. Hence, to not be too harsh with unlucky processes, -+ * we keep a budget/3 margin of safety before declaring a -+ * process slow. -+ */ -+ return expected > (4 * bfqq->entity.budget) / 3; -+} -+ -+/* -+ * To be deemed as soft real-time, an application must meet two -+ * requirements. First, the application must not require an average -+ * bandwidth higher than the approximate bandwidth required to playback or -+ * record a compressed high-definition video. -+ * The next function is invoked on the completion of the last request of a -+ * batch, to compute the next-start time instant, soft_rt_next_start, such -+ * that, if the next request of the application does not arrive before -+ * soft_rt_next_start, then the above requirement on the bandwidth is met. -+ * -+ * The second requirement is that the request pattern of the application is -+ * isochronous, i.e., that, after issuing a request or a batch of requests, -+ * the application stops issuing new requests until all its pending requests -+ * have been completed. After that, the application may issue a new batch, -+ * and so on. -+ * For this reason the next function is invoked to compute -+ * soft_rt_next_start only for applications that meet this requirement, -+ * whereas soft_rt_next_start is set to infinity for applications that do -+ * not. -+ * -+ * Unfortunately, even a greedy application may happen to behave in an -+ * isochronous way if the CPU load is high. In fact, the application may -+ * stop issuing requests while the CPUs are busy serving other processes, -+ * then restart, then stop again for a while, and so on. In addition, if -+ * the disk achieves a low enough throughput with the request pattern -+ * issued by the application (e.g., because the request pattern is random -+ * and/or the device is slow), then the application may meet the above -+ * bandwidth requirement too. To prevent such a greedy application to be -+ * deemed as soft real-time, a further rule is used in the computation of -+ * soft_rt_next_start: soft_rt_next_start must be higher than the current -+ * time plus the maximum time for which the arrival of a request is waited -+ * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle. -+ * This filters out greedy applications, as the latter issue instead their -+ * next request as soon as possible after the last one has been completed -+ * (in contrast, when a batch of requests is completed, a soft real-time -+ * application spends some time processing data). -+ * -+ * Unfortunately, the last filter may easily generate false positives if -+ * only bfqd->bfq_slice_idle is used as a reference time interval and one -+ * or both the following cases occur: -+ * 1) HZ is so low that the duration of a jiffy is comparable to or higher -+ * than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with -+ * HZ=100. -+ * 2) jiffies, instead of increasing at a constant rate, may stop increasing -+ * for a while, then suddenly 'jump' by several units to recover the lost -+ * increments. This seems to happen, e.g., inside virtual machines. -+ * To address this issue, we do not use as a reference time interval just -+ * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In -+ * particular we add the minimum number of jiffies for which the filter -+ * seems to be quite precise also in embedded systems and KVM/QEMU virtual -+ * machines. -+ */ -+static inline unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ return max(bfqq->last_idle_bklogged + -+ HZ * bfqq->service_from_backlogged / -+ bfqd->bfq_wr_max_softrt_rate, -+ jiffies + bfqq->bfqd->bfq_slice_idle + 4); -+} -+ -+/* -+ * Return the largest-possible time instant such that, for as long as possible, -+ * the current time will be lower than this time instant according to the macro -+ * time_is_before_jiffies(). -+ */ -+static inline unsigned long bfq_infinity_from_now(unsigned long now) -+{ -+ return now + ULONG_MAX / 2; -+} -+ -+/** -+ * bfq_bfqq_expire - expire a queue. -+ * @bfqd: device owning the queue. -+ * @bfqq: the queue to expire. -+ * @compensate: if true, compensate for the time spent idling. -+ * @reason: the reason causing the expiration. -+ * -+ * -+ * If the process associated to the queue is slow (i.e., seeky), or in -+ * case of budget timeout, or, finally, if it is async, we -+ * artificially charge it an entire budget (independently of the -+ * actual service it received). As a consequence, the queue will get -+ * higher timestamps than the correct ones upon reactivation, and -+ * hence it will be rescheduled as if it had received more service -+ * than what it actually received. In the end, this class of processes -+ * will receive less service in proportion to how slowly they consume -+ * their budgets (and hence how seriously they tend to lower the -+ * throughput). -+ * -+ * In contrast, when a queue expires because it has been idling for -+ * too much or because it exhausted its budget, we do not touch the -+ * amount of service it has received. Hence when the queue will be -+ * reactivated and its timestamps updated, the latter will be in sync -+ * with the actual service received by the queue until expiration. -+ * -+ * Charging a full budget to the first type of queues and the exact -+ * service to the others has the effect of using the WF2Q+ policy to -+ * schedule the former on a timeslice basis, without violating the -+ * service domain guarantees of the latter. -+ */ -+static void bfq_bfqq_expire(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ int compensate, -+ enum bfqq_expiration reason) -+{ -+ int slow; -+ BUG_ON(bfqq != bfqd->in_service_queue); -+ -+ /* Update disk peak rate for autotuning and check whether the -+ * process is slow (see bfq_update_peak_rate). -+ */ -+ slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason); -+ -+ /* -+ * As above explained, 'punish' slow (i.e., seeky), timed-out -+ * and async queues, to favor sequential sync workloads. -+ * -+ * Processes doing I/O in the slower disk zones will tend to be -+ * slow(er) even if not seeky. Hence, since the estimated peak -+ * rate is actually an average over the disk surface, these -+ * processes may timeout just for bad luck. To avoid punishing -+ * them we do not charge a full budget to a process that -+ * succeeded in consuming at least 2/3 of its budget. -+ */ -+ if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT && -+ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3)) -+ bfq_bfqq_charge_full_budget(bfqq); -+ -+ bfqq->service_from_backlogged += bfqq->entity.service; -+ -+ if (BFQQ_SEEKY(bfqq) && reason == BFQ_BFQQ_BUDGET_TIMEOUT && -+ !bfq_bfqq_constantly_seeky(bfqq)) { -+ bfq_mark_bfqq_constantly_seeky(bfqq); -+ if (!blk_queue_nonrot(bfqd->queue)) -+ bfqd->const_seeky_busy_in_flight_queues++; -+ } -+ -+ if (reason == BFQ_BFQQ_TOO_IDLE && -+ bfqq->entity.service <= 2 * bfqq->entity.budget / 10 ) -+ bfq_clear_bfqq_IO_bound(bfqq); -+ -+ if (bfqd->low_latency && bfqq->wr_coeff == 1) -+ bfqq->last_wr_start_finish = jiffies; -+ -+ if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 && -+ RB_EMPTY_ROOT(&bfqq->sort_list)) { -+ /* -+ * If we get here, and there are no outstanding requests, -+ * then the request pattern is isochronous (see the comments -+ * to the function bfq_bfqq_softrt_next_start()). Hence we -+ * can compute soft_rt_next_start. If, instead, the queue -+ * still has outstanding requests, then we have to wait -+ * for the completion of all the outstanding requests to -+ * discover whether the request pattern is actually -+ * isochronous. -+ */ -+ if (bfqq->dispatched == 0) -+ bfqq->soft_rt_next_start = -+ bfq_bfqq_softrt_next_start(bfqd, bfqq); -+ else { -+ /* -+ * The application is still waiting for the -+ * completion of one or more requests: -+ * prevent it from possibly being incorrectly -+ * deemed as soft real-time by setting its -+ * soft_rt_next_start to infinity. In fact, -+ * without this assignment, the application -+ * would be incorrectly deemed as soft -+ * real-time if: -+ * 1) it issued a new request before the -+ * completion of all its in-flight -+ * requests, and -+ * 2) at that time, its soft_rt_next_start -+ * happened to be in the past. -+ */ -+ bfqq->soft_rt_next_start = -+ bfq_infinity_from_now(jiffies); -+ /* -+ * Schedule an update of soft_rt_next_start to when -+ * the task may be discovered to be isochronous. -+ */ -+ bfq_mark_bfqq_softrt_update(bfqq); -+ } -+ } -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "expire (%d, slow %d, num_disp %d, idle_win %d)", reason, -+ slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq)); -+ -+ /* -+ * Increase, decrease or leave budget unchanged according to -+ * reason. -+ */ -+ __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); -+ __bfq_bfqq_expire(bfqd, bfqq); -+} -+ -+/* -+ * Budget timeout is not implemented through a dedicated timer, but -+ * just checked on request arrivals and completions, as well as on -+ * idle timer expirations. -+ */ -+static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) -+{ -+ if (bfq_bfqq_budget_new(bfqq) || -+ time_before(jiffies, bfqq->budget_timeout)) -+ return 0; -+ return 1; -+} -+ -+/* -+ * If we expire a queue that is waiting for the arrival of a new -+ * request, we may prevent the fictitious timestamp back-shifting that -+ * allows the guarantees of the queue to be preserved (see [1] for -+ * this tricky aspect). Hence we return true only if this condition -+ * does not hold, or if the queue is slow enough to deserve only to be -+ * kicked off for preserving a high throughput. -+*/ -+static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) -+{ -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "may_budget_timeout: wait_request %d left %d timeout %d", -+ bfq_bfqq_wait_request(bfqq), -+ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3, -+ bfq_bfqq_budget_timeout(bfqq)); -+ -+ return (!bfq_bfqq_wait_request(bfqq) || -+ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3) -+ && -+ bfq_bfqq_budget_timeout(bfqq); -+} -+ -+/* -+ * Device idling is allowed only for the queues for which this function -+ * returns true. For this reason, the return value of this function plays a -+ * critical role for both throughput boosting and service guarantees. The -+ * return value is computed through a logical expression. In this rather -+ * long comment, we try to briefly describe all the details and motivations -+ * behind the components of this logical expression. -+ * -+ * First, the expression may be true only for sync queues. Besides, if -+ * bfqq is also being weight-raised, then the expression always evaluates -+ * to true, as device idling is instrumental for preserving low-latency -+ * guarantees (see [1]). Otherwise, the expression evaluates to true only -+ * if bfqq has a non-null idle window and at least one of the following -+ * two conditions holds. The first condition is that the device is not -+ * performing NCQ, because idling the device most certainly boosts the -+ * throughput if this condition holds and bfqq has been granted a non-null -+ * idle window. The second compound condition is made of the logical AND of -+ * two components. -+ * -+ * The first component is true only if there is no weight-raised busy -+ * queue. This guarantees that the device is not idled for a sync non- -+ * weight-raised queue when there are busy weight-raised queues. The former -+ * is then expired immediately if empty. Combined with the timestamping -+ * rules of BFQ (see [1] for details), this causes sync non-weight-raised -+ * queues to get a lower number of requests served, and hence to ask for a -+ * lower number of requests from the request pool, before the busy weight- -+ * raised queues get served again. -+ * -+ * This is beneficial for the processes associated with weight-raised -+ * queues, when the request pool is saturated (e.g., in the presence of -+ * write hogs). In fact, if the processes associated with the other queues -+ * ask for requests at a lower rate, then weight-raised processes have a -+ * higher probability to get a request from the pool immediately (or at -+ * least soon) when they need one. Hence they have a higher probability to -+ * actually get a fraction of the disk throughput proportional to their -+ * high weight. This is especially true with NCQ-capable drives, which -+ * enqueue several requests in advance and further reorder internally- -+ * queued requests. -+ * -+ * In the end, mistreating non-weight-raised queues when there are busy -+ * weight-raised queues seems to mitigate starvation problems in the -+ * presence of heavy write workloads and NCQ, and hence to guarantee a -+ * higher application and system responsiveness in these hostile scenarios. -+ * -+ * If the first component of the compound condition is instead true, i.e., -+ * there is no weight-raised busy queue, then the second component of the -+ * compound condition takes into account service-guarantee and throughput -+ * issues related to NCQ (recall that the compound condition is evaluated -+ * only if the device is detected as supporting NCQ). -+ * -+ * As for service guarantees, allowing the drive to enqueue more than one -+ * request at a time, and hence delegating de facto final scheduling -+ * decisions to the drive's internal scheduler, causes loss of control on -+ * the actual request service order. In this respect, when the drive is -+ * allowed to enqueue more than one request at a time, the service -+ * distribution enforced by the drive's internal scheduler is likely to -+ * coincide with the desired device-throughput distribution only in the -+ * following, perfectly symmetric, scenario: -+ * 1) all active queues have the same weight, -+ * 2) all active groups at the same level in the groups tree have the same -+ * weight, -+ * 3) all active groups at the same level in the groups tree have the same -+ * number of children. -+ * -+ * Even in such a scenario, sequential I/O may still receive a preferential -+ * treatment, but this is not likely to be a big issue with flash-based -+ * devices, because of their non-dramatic loss of throughput with random -+ * I/O. Things do differ with HDDs, for which additional care is taken, as -+ * explained after completing the discussion for flash-based devices. -+ * -+ * Unfortunately, keeping the necessary state for evaluating exactly the -+ * above symmetry conditions would be quite complex and time-consuming. -+ * Therefore BFQ evaluates instead the following stronger sub-conditions, -+ * for which it is much easier to maintain the needed state: -+ * 1) all active queues have the same weight, -+ * 2) all active groups have the same weight, -+ * 3) all active groups have at most one active child each. -+ * In particular, the last two conditions are always true if hierarchical -+ * support and the cgroups interface are not enabled, hence no state needs -+ * to be maintained in this case. -+ * -+ * According to the above considerations, the second component of the -+ * compound condition evaluates to true if any of the above symmetry -+ * sub-condition does not hold, or the device is not flash-based. Therefore, -+ * if also the first component is true, then idling is allowed for a sync -+ * queue. These are the only sub-conditions considered if the device is -+ * flash-based, as, for such a device, it is sensible to force idling only -+ * for service-guarantee issues. In fact, as for throughput, idling -+ * NCQ-capable flash-based devices would not boost the throughput even -+ * with sequential I/O; rather it would lower the throughput in proportion -+ * to how fast the device is. In the end, (only) if all the three -+ * sub-conditions hold and the device is flash-based, the compound -+ * condition evaluates to false and therefore no idling is performed. -+ * -+ * As already said, things change with a rotational device, where idling -+ * boosts the throughput with sequential I/O (even with NCQ). Hence, for -+ * such a device the second component of the compound condition evaluates -+ * to true also if the following additional sub-condition does not hold: -+ * the queue is constantly seeky. Unfortunately, this different behavior -+ * with respect to flash-based devices causes an additional asymmetry: if -+ * some sync queues enjoy idling and some other sync queues do not, then -+ * the latter get a low share of the device throughput, simply because the -+ * former get many requests served after being set as in service, whereas -+ * the latter do not. As a consequence, to guarantee the desired throughput -+ * distribution, on HDDs the compound expression evaluates to true (and -+ * hence device idling is performed) also if the following last symmetry -+ * condition does not hold: no other queue is benefiting from idling. Also -+ * this last condition is actually replaced with a simpler-to-maintain and -+ * stronger condition: there is no busy queue which is not constantly seeky -+ * (and hence may also benefit from idling). -+ * -+ * To sum up, when all the required symmetry and throughput-boosting -+ * sub-conditions hold, the second component of the compound condition -+ * evaluates to false, and hence no idling is performed. This helps to -+ * keep the drives' internal queues full on NCQ-capable devices, and hence -+ * to boost the throughput, without causing 'almost' any loss of service -+ * guarantees. The 'almost' follows from the fact that, if the internal -+ * queue of one such device is filled while all the sub-conditions hold, -+ * but at some point in time some sub-condition stops to hold, then it may -+ * become impossible to let requests be served in the new desired order -+ * until all the requests already queued in the device have been served. -+ */ -+static inline bool bfq_bfqq_must_not_expire(struct bfq_queue *bfqq) -+{ -+ struct bfq_data *bfqd = bfqq->bfqd; -+#ifdef CONFIG_CGROUP_BFQIO -+#define symmetric_scenario (!bfqd->active_numerous_groups && \ -+ !bfq_differentiated_weights(bfqd)) -+#else -+#define symmetric_scenario (!bfq_differentiated_weights(bfqd)) -+#endif -+#define cond_for_seeky_on_ncq_hdd (bfq_bfqq_constantly_seeky(bfqq) && \ -+ bfqd->busy_in_flight_queues == \ -+ bfqd->const_seeky_busy_in_flight_queues) -+/* -+ * Condition for expiring a non-weight-raised queue (and hence not idling -+ * the device). -+ */ -+#define cond_for_expiring_non_wr (bfqd->hw_tag && \ -+ (bfqd->wr_busy_queues > 0 || \ -+ (symmetric_scenario && \ -+ (blk_queue_nonrot(bfqd->queue) || \ -+ cond_for_seeky_on_ncq_hdd)))) -+ -+ return bfq_bfqq_sync(bfqq) && -+ (bfq_bfqq_IO_bound(bfqq) || bfqq->wr_coeff > 1) && -+ (bfqq->wr_coeff > 1 || -+ (bfq_bfqq_idle_window(bfqq) && -+ !cond_for_expiring_non_wr) -+ ); -+} -+ -+/* -+ * If the in-service queue is empty but sync, and the function -+ * bfq_bfqq_must_not_expire returns true, then: -+ * 1) the queue must remain in service and cannot be expired, and -+ * 2) the disk must be idled to wait for the possible arrival of a new -+ * request for the queue. -+ * See the comments to the function bfq_bfqq_must_not_expire for the reasons -+ * why performing device idling is the best choice to boost the throughput -+ * and preserve service guarantees when bfq_bfqq_must_not_expire itself -+ * returns true. -+ */ -+static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) -+{ -+ struct bfq_data *bfqd = bfqq->bfqd; -+ -+ return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 && -+ bfq_bfqq_must_not_expire(bfqq); -+} -+ -+/* -+ * Select a queue for service. If we have a current queue in service, -+ * check whether to continue servicing it, or retrieve and set a new one. -+ */ -+static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq, *new_bfqq = NULL; -+ struct request *next_rq; -+ enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; -+ -+ bfqq = bfqd->in_service_queue; -+ if (bfqq == NULL) -+ goto new_queue; -+ -+ bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); -+ -+ /* -+ * If another queue has a request waiting within our mean seek -+ * distance, let it run. The expire code will check for close -+ * cooperators and put the close queue at the front of the -+ * service tree. If possible, merge the expiring queue with the -+ * new bfqq. -+ */ -+ new_bfqq = bfq_close_cooperator(bfqd, bfqq); -+ if (new_bfqq != NULL && bfqq->new_bfqq == NULL) -+ bfq_setup_merge(bfqq, new_bfqq); -+ -+ if (bfq_may_expire_for_budg_timeout(bfqq) && -+ !timer_pending(&bfqd->idle_slice_timer) && -+ !bfq_bfqq_must_idle(bfqq)) -+ goto expire; -+ -+ next_rq = bfqq->next_rq; -+ /* -+ * If bfqq has requests queued and it has enough budget left to -+ * serve them, keep the queue, otherwise expire it. -+ */ -+ if (next_rq != NULL) { -+ if (bfq_serv_to_charge(next_rq, bfqq) > -+ bfq_bfqq_budget_left(bfqq)) { -+ reason = BFQ_BFQQ_BUDGET_EXHAUSTED; -+ goto expire; -+ } else { -+ /* -+ * The idle timer may be pending because we may -+ * not disable disk idling even when a new request -+ * arrives. -+ */ -+ if (timer_pending(&bfqd->idle_slice_timer)) { -+ /* -+ * If we get here: 1) at least a new request -+ * has arrived but we have not disabled the -+ * timer because the request was too small, -+ * 2) then the block layer has unplugged -+ * the device, causing the dispatch to be -+ * invoked. -+ * -+ * Since the device is unplugged, now the -+ * requests are probably large enough to -+ * provide a reasonable throughput. -+ * So we disable idling. -+ */ -+ bfq_clear_bfqq_wait_request(bfqq); -+ del_timer(&bfqd->idle_slice_timer); -+ } -+ if (new_bfqq == NULL) -+ goto keep_queue; -+ else -+ goto expire; -+ } -+ } -+ -+ /* -+ * No requests pending. If the in-service queue still has requests -+ * in flight (possibly waiting for a completion) or is idling for a -+ * new request, then keep it. -+ */ -+ if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) || -+ (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq)))) { -+ bfqq = NULL; -+ goto keep_queue; -+ } else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) { -+ /* -+ * Expiring the queue because there is a close cooperator, -+ * cancel timer. -+ */ -+ bfq_clear_bfqq_wait_request(bfqq); -+ del_timer(&bfqd->idle_slice_timer); -+ } -+ -+ reason = BFQ_BFQQ_NO_MORE_REQUESTS; -+expire: -+ bfq_bfqq_expire(bfqd, bfqq, 0, reason); -+new_queue: -+ bfqq = bfq_set_in_service_queue(bfqd, new_bfqq); -+ bfq_log(bfqd, "select_queue: new queue %d returned", -+ bfqq != NULL ? bfqq->pid : 0); -+keep_queue: -+ return bfqq; -+} -+ -+static void bfq_update_wr_data(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ if (bfqq->wr_coeff > 1) { /* queue is being boosted */ -+ struct bfq_entity *entity = &bfqq->entity; -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "raising period dur %u/%u msec, old coeff %u, w %d(%d)", -+ jiffies_to_msecs(jiffies - -+ bfqq->last_wr_start_finish), -+ jiffies_to_msecs(bfqq->wr_cur_max_time), -+ bfqq->wr_coeff, -+ bfqq->entity.weight, bfqq->entity.orig_weight); -+ -+ BUG_ON(bfqq != bfqd->in_service_queue && entity->weight != -+ entity->orig_weight * bfqq->wr_coeff); -+ if (entity->ioprio_changed) -+ bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); -+ /* -+ * If too much time has elapsed from the beginning -+ * of this weight-raising, stop it. -+ */ -+ if (time_is_before_jiffies(bfqq->last_wr_start_finish + -+ bfqq->wr_cur_max_time)) { -+ bfqq->last_wr_start_finish = jiffies; -+ bfq_log_bfqq(bfqd, bfqq, -+ "wrais ending at %lu, rais_max_time %u", -+ bfqq->last_wr_start_finish, -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ bfq_bfqq_end_wr(bfqq); -+ __bfq_entity_update_weight_prio( -+ bfq_entity_service_tree(entity), -+ entity); -+ } -+ } -+} -+ -+/* -+ * Dispatch one request from bfqq, moving it to the request queue -+ * dispatch list. -+ */ -+static int bfq_dispatch_request(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ int dispatched = 0; -+ struct request *rq; -+ unsigned long service_to_charge; -+ -+ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); -+ -+ /* Follow expired path, else get first next available. */ -+ rq = bfq_check_fifo(bfqq); -+ if (rq == NULL) -+ rq = bfqq->next_rq; -+ service_to_charge = bfq_serv_to_charge(rq, bfqq); -+ -+ if (service_to_charge > bfq_bfqq_budget_left(bfqq)) { -+ /* -+ * This may happen if the next rq is chosen in fifo order -+ * instead of sector order. The budget is properly -+ * dimensioned to be always sufficient to serve the next -+ * request only if it is chosen in sector order. The reason -+ * is that it would be quite inefficient and little useful -+ * to always make sure that the budget is large enough to -+ * serve even the possible next rq in fifo order. -+ * In fact, requests are seldom served in fifo order. -+ * -+ * Expire the queue for budget exhaustion, and make sure -+ * that the next act_budget is enough to serve the next -+ * request, even if it comes from the fifo expired path. -+ */ -+ bfqq->next_rq = rq; -+ /* -+ * Since this dispatch is failed, make sure that -+ * a new one will be performed -+ */ -+ if (!bfqd->rq_in_driver) -+ bfq_schedule_dispatch(bfqd); -+ goto expire; -+ } -+ -+ /* Finally, insert request into driver dispatch list. */ -+ bfq_bfqq_served(bfqq, service_to_charge); -+ bfq_dispatch_insert(bfqd->queue, rq); -+ -+ bfq_update_wr_data(bfqd, bfqq); -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "dispatched %u sec req (%llu), budg left %lu", -+ blk_rq_sectors(rq), -+ (long long unsigned)blk_rq_pos(rq), -+ bfq_bfqq_budget_left(bfqq)); -+ -+ dispatched++; -+ -+ if (bfqd->in_service_bic == NULL) { -+ atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount); -+ bfqd->in_service_bic = RQ_BIC(rq); -+ } -+ -+ if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) && -+ dispatched >= bfqd->bfq_max_budget_async_rq) || -+ bfq_class_idle(bfqq))) -+ goto expire; -+ -+ return dispatched; -+ -+expire: -+ bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED); -+ return dispatched; -+} -+ -+static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq) -+{ -+ int dispatched = 0; -+ -+ while (bfqq->next_rq != NULL) { -+ bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq); -+ dispatched++; -+ } -+ -+ BUG_ON(!list_empty(&bfqq->fifo)); -+ return dispatched; -+} -+ -+/* -+ * Drain our current requests. -+ * Used for barriers and when switching io schedulers on-the-fly. -+ */ -+static int bfq_forced_dispatch(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq, *n; -+ struct bfq_service_tree *st; -+ int dispatched = 0; -+ -+ bfqq = bfqd->in_service_queue; -+ if (bfqq != NULL) -+ __bfq_bfqq_expire(bfqd, bfqq); -+ -+ /* -+ * Loop through classes, and be careful to leave the scheduler -+ * in a consistent state, as feedback mechanisms and vtime -+ * updates cannot be disabled during the process. -+ */ -+ list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) { -+ st = bfq_entity_service_tree(&bfqq->entity); -+ -+ dispatched += __bfq_forced_dispatch_bfqq(bfqq); -+ bfqq->max_budget = bfq_max_budget(bfqd); -+ -+ bfq_forget_idle(st); -+ } -+ -+ BUG_ON(bfqd->busy_queues != 0); -+ -+ return dispatched; -+} -+ -+static int bfq_dispatch_requests(struct request_queue *q, int force) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_queue *bfqq; -+ int max_dispatch; -+ -+ bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues); -+ if (bfqd->busy_queues == 0) -+ return 0; -+ -+ if (unlikely(force)) -+ return bfq_forced_dispatch(bfqd); -+ -+ bfqq = bfq_select_queue(bfqd); -+ if (bfqq == NULL) -+ return 0; -+ -+ max_dispatch = bfqd->bfq_quantum; -+ if (bfq_class_idle(bfqq)) -+ max_dispatch = 1; -+ -+ if (!bfq_bfqq_sync(bfqq)) -+ max_dispatch = bfqd->bfq_max_budget_async_rq; -+ -+ if (bfqq->dispatched >= max_dispatch) { -+ if (bfqd->busy_queues > 1) -+ return 0; -+ if (bfqq->dispatched >= 4 * max_dispatch) -+ return 0; -+ } -+ -+ if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq)) -+ return 0; -+ -+ bfq_clear_bfqq_wait_request(bfqq); -+ BUG_ON(timer_pending(&bfqd->idle_slice_timer)); -+ -+ if (!bfq_dispatch_request(bfqd, bfqq)) -+ return 0; -+ -+ bfq_log_bfqq(bfqd, bfqq, "dispatched one request of %d (max_disp %d)", -+ bfqq->pid, max_dispatch); -+ -+ return 1; -+} -+ -+/* -+ * Task holds one reference to the queue, dropped when task exits. Each rq -+ * in-flight on this queue also holds a reference, dropped when rq is freed. -+ * -+ * Queue lock must be held here. -+ */ -+static void bfq_put_queue(struct bfq_queue *bfqq) -+{ -+ struct bfq_data *bfqd = bfqq->bfqd; -+ -+ BUG_ON(atomic_read(&bfqq->ref) <= 0); -+ -+ bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq, -+ atomic_read(&bfqq->ref)); -+ if (!atomic_dec_and_test(&bfqq->ref)) -+ return; -+ -+ BUG_ON(rb_first(&bfqq->sort_list) != NULL); -+ BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0); -+ BUG_ON(bfqq->entity.tree != NULL); -+ BUG_ON(bfq_bfqq_busy(bfqq)); -+ BUG_ON(bfqd->in_service_queue == bfqq); -+ -+ bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq); -+ -+ kmem_cache_free(bfq_pool, bfqq); -+} -+ -+static void bfq_put_cooperator(struct bfq_queue *bfqq) -+{ -+ struct bfq_queue *__bfqq, *next; -+ -+ /* -+ * If this queue was scheduled to merge with another queue, be -+ * sure to drop the reference taken on that queue (and others in -+ * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs. -+ */ -+ __bfqq = bfqq->new_bfqq; -+ while (__bfqq) { -+ if (__bfqq == bfqq) -+ break; -+ next = __bfqq->new_bfqq; -+ bfq_put_queue(__bfqq); -+ __bfqq = next; -+ } -+} -+ -+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ if (bfqq == bfqd->in_service_queue) { -+ __bfq_bfqq_expire(bfqd, bfqq); -+ bfq_schedule_dispatch(bfqd); -+ } -+ -+ bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, -+ atomic_read(&bfqq->ref)); -+ -+ bfq_put_cooperator(bfqq); -+ -+ bfq_put_queue(bfqq); -+} -+ -+static inline void bfq_init_icq(struct io_cq *icq) -+{ -+ struct bfq_io_cq *bic = icq_to_bic(icq); -+ -+ bic->ttime.last_end_request = jiffies; -+} -+ -+static void bfq_exit_icq(struct io_cq *icq) -+{ -+ struct bfq_io_cq *bic = icq_to_bic(icq); -+ struct bfq_data *bfqd = bic_to_bfqd(bic); -+ -+ if (bic->bfqq[BLK_RW_ASYNC]) { -+ bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_ASYNC]); -+ bic->bfqq[BLK_RW_ASYNC] = NULL; -+ } -+ -+ if (bic->bfqq[BLK_RW_SYNC]) { -+ bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]); -+ bic->bfqq[BLK_RW_SYNC] = NULL; -+ } -+} -+ -+/* -+ * Update the entity prio values; note that the new values will not -+ * be used until the next (re)activation. -+ */ -+static void bfq_init_prio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) -+{ -+ struct task_struct *tsk = current; -+ int ioprio_class; -+ -+ if (!bfq_bfqq_prio_changed(bfqq)) -+ return; -+ -+ ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); -+ switch (ioprio_class) { -+ default: -+ dev_err(bfqq->bfqd->queue->backing_dev_info.dev, -+ "bfq: bad prio %x\n", ioprio_class); -+ case IOPRIO_CLASS_NONE: -+ /* -+ * No prio set, inherit CPU scheduling settings. -+ */ -+ bfqq->entity.new_ioprio = task_nice_ioprio(tsk); -+ bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk); -+ break; -+ case IOPRIO_CLASS_RT: -+ bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio); -+ bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT; -+ break; -+ case IOPRIO_CLASS_BE: -+ bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio); -+ bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE; -+ break; -+ case IOPRIO_CLASS_IDLE: -+ bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE; -+ bfqq->entity.new_ioprio = 7; -+ bfq_clear_bfqq_idle_window(bfqq); -+ break; -+ } -+ -+ bfqq->entity.ioprio_changed = 1; -+ -+ bfq_clear_bfqq_prio_changed(bfqq); -+} -+ -+static void bfq_changed_ioprio(struct bfq_io_cq *bic) -+{ -+ struct bfq_data *bfqd; -+ struct bfq_queue *bfqq, *new_bfqq; -+ struct bfq_group *bfqg; -+ unsigned long uninitialized_var(flags); -+ int ioprio = bic->icq.ioc->ioprio; -+ -+ bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), -+ &flags); -+ /* -+ * This condition may trigger on a newly created bic, be sure to -+ * drop the lock before returning. -+ */ -+ if (unlikely(bfqd == NULL) || likely(bic->ioprio == ioprio)) -+ goto out; -+ -+ bfqq = bic->bfqq[BLK_RW_ASYNC]; -+ if (bfqq != NULL) { -+ bfqg = container_of(bfqq->entity.sched_data, struct bfq_group, -+ sched_data); -+ new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, bic, -+ GFP_ATOMIC); -+ if (new_bfqq != NULL) { -+ bic->bfqq[BLK_RW_ASYNC] = new_bfqq; -+ bfq_log_bfqq(bfqd, bfqq, -+ "changed_ioprio: bfqq %p %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ bfq_put_queue(bfqq); -+ } -+ } -+ -+ bfqq = bic->bfqq[BLK_RW_SYNC]; -+ if (bfqq != NULL) -+ bfq_mark_bfqq_prio_changed(bfqq); -+ -+ bic->ioprio = ioprio; -+ -+out: -+ bfq_put_bfqd_unlock(bfqd, &flags); -+} -+ -+static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ pid_t pid, int is_sync) -+{ -+ RB_CLEAR_NODE(&bfqq->entity.rb_node); -+ INIT_LIST_HEAD(&bfqq->fifo); -+ -+ atomic_set(&bfqq->ref, 0); -+ bfqq->bfqd = bfqd; -+ -+ bfq_mark_bfqq_prio_changed(bfqq); -+ -+ if (is_sync) { -+ if (!bfq_class_idle(bfqq)) -+ bfq_mark_bfqq_idle_window(bfqq); -+ bfq_mark_bfqq_sync(bfqq); -+ } -+ bfq_mark_bfqq_IO_bound(bfqq); -+ -+ /* Tentative initial value to trade off between thr and lat */ -+ bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3; -+ bfqq->pid = pid; -+ -+ bfqq->wr_coeff = 1; -+ bfqq->last_wr_start_finish = 0; -+ /* -+ * Set to the value for which bfqq will not be deemed as -+ * soft rt when it becomes backlogged. -+ */ -+ bfqq->soft_rt_next_start = bfq_infinity_from_now(jiffies); -+} -+ -+static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd, -+ struct bfq_group *bfqg, -+ int is_sync, -+ struct bfq_io_cq *bic, -+ gfp_t gfp_mask) -+{ -+ struct bfq_queue *bfqq, *new_bfqq = NULL; -+ -+retry: -+ /* bic always exists here */ -+ bfqq = bic_to_bfqq(bic, is_sync); -+ -+ /* -+ * Always try a new alloc if we fall back to the OOM bfqq -+ * originally, since it should just be a temporary situation. -+ */ -+ if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { -+ bfqq = NULL; -+ if (new_bfqq != NULL) { -+ bfqq = new_bfqq; -+ new_bfqq = NULL; -+ } else if (gfp_mask & __GFP_WAIT) { -+ spin_unlock_irq(bfqd->queue->queue_lock); -+ new_bfqq = kmem_cache_alloc_node(bfq_pool, -+ gfp_mask | __GFP_ZERO, -+ bfqd->queue->node); -+ spin_lock_irq(bfqd->queue->queue_lock); -+ if (new_bfqq != NULL) -+ goto retry; -+ } else { -+ bfqq = kmem_cache_alloc_node(bfq_pool, -+ gfp_mask | __GFP_ZERO, -+ bfqd->queue->node); -+ } -+ -+ if (bfqq != NULL) { -+ bfq_init_bfqq(bfqd, bfqq, current->pid, is_sync); -+ bfq_log_bfqq(bfqd, bfqq, "allocated"); -+ } else { -+ bfqq = &bfqd->oom_bfqq; -+ bfq_log_bfqq(bfqd, bfqq, "using oom bfqq"); -+ } -+ -+ bfq_init_prio_data(bfqq, bic); -+ bfq_init_entity(&bfqq->entity, bfqg); -+ } -+ -+ if (new_bfqq != NULL) -+ kmem_cache_free(bfq_pool, new_bfqq); -+ -+ return bfqq; -+} -+ -+static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, -+ struct bfq_group *bfqg, -+ int ioprio_class, int ioprio) -+{ -+ switch (ioprio_class) { -+ case IOPRIO_CLASS_RT: -+ return &bfqg->async_bfqq[0][ioprio]; -+ case IOPRIO_CLASS_NONE: -+ ioprio = IOPRIO_NORM; -+ /* fall through */ -+ case IOPRIO_CLASS_BE: -+ return &bfqg->async_bfqq[1][ioprio]; -+ case IOPRIO_CLASS_IDLE: -+ return &bfqg->async_idle_bfqq; -+ default: -+ BUG(); -+ } -+} -+ -+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, -+ struct bfq_group *bfqg, int is_sync, -+ struct bfq_io_cq *bic, gfp_t gfp_mask) -+{ -+ const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio); -+ const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); -+ struct bfq_queue **async_bfqq = NULL; -+ struct bfq_queue *bfqq = NULL; -+ -+ if (!is_sync) { -+ async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, -+ ioprio); -+ bfqq = *async_bfqq; -+ } -+ -+ if (bfqq == NULL) -+ bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, bic, gfp_mask); -+ -+ /* -+ * Pin the queue now that it's allocated, scheduler exit will -+ * prune it. -+ */ -+ if (!is_sync && *async_bfqq == NULL) { -+ atomic_inc(&bfqq->ref); -+ bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ *async_bfqq = bfqq; -+ } -+ -+ atomic_inc(&bfqq->ref); -+ bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, -+ atomic_read(&bfqq->ref)); -+ return bfqq; -+} -+ -+static void bfq_update_io_thinktime(struct bfq_data *bfqd, -+ struct bfq_io_cq *bic) -+{ -+ unsigned long elapsed = jiffies - bic->ttime.last_end_request; -+ unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle); -+ -+ bic->ttime.ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8; -+ bic->ttime.ttime_total = (7*bic->ttime.ttime_total + 256*ttime) / 8; -+ bic->ttime.ttime_mean = (bic->ttime.ttime_total + 128) / -+ bic->ttime.ttime_samples; -+} -+ -+static void bfq_update_io_seektime(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ struct request *rq) -+{ -+ sector_t sdist; -+ u64 total; -+ -+ if (bfqq->last_request_pos < blk_rq_pos(rq)) -+ sdist = blk_rq_pos(rq) - bfqq->last_request_pos; -+ else -+ sdist = bfqq->last_request_pos - blk_rq_pos(rq); -+ -+ /* -+ * Don't allow the seek distance to get too large from the -+ * odd fragment, pagein, etc. -+ */ -+ if (bfqq->seek_samples == 0) /* first request, not really a seek */ -+ sdist = 0; -+ else if (bfqq->seek_samples <= 60) /* second & third seek */ -+ sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024); -+ else -+ sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64); -+ -+ bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8; -+ bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8; -+ total = bfqq->seek_total + (bfqq->seek_samples/2); -+ do_div(total, bfqq->seek_samples); -+ bfqq->seek_mean = (sector_t)total; -+ -+ bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist, -+ (u64)bfqq->seek_mean); -+} -+ -+/* -+ * Disable idle window if the process thinks too long or seeks so much that -+ * it doesn't matter. -+ */ -+static void bfq_update_idle_window(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ struct bfq_io_cq *bic) -+{ -+ int enable_idle; -+ -+ /* Don't idle for async or idle io prio class. */ -+ if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) -+ return; -+ -+ enable_idle = bfq_bfqq_idle_window(bfqq); -+ -+ if (atomic_read(&bic->icq.ioc->active_ref) == 0 || -+ bfqd->bfq_slice_idle == 0 || -+ (bfqd->hw_tag && BFQQ_SEEKY(bfqq) && -+ bfqq->wr_coeff == 1)) -+ enable_idle = 0; -+ else if (bfq_sample_valid(bic->ttime.ttime_samples)) { -+ if (bic->ttime.ttime_mean > bfqd->bfq_slice_idle && -+ bfqq->wr_coeff == 1) -+ enable_idle = 0; -+ else -+ enable_idle = 1; -+ } -+ bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d", -+ enable_idle); -+ -+ if (enable_idle) -+ bfq_mark_bfqq_idle_window(bfqq); -+ else -+ bfq_clear_bfqq_idle_window(bfqq); -+} -+ -+/* -+ * Called when a new fs request (rq) is added to bfqq. Check if there's -+ * something we should do about it. -+ */ -+static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ struct request *rq) -+{ -+ struct bfq_io_cq *bic = RQ_BIC(rq); -+ -+ if (rq->cmd_flags & REQ_META) -+ bfqq->meta_pending++; -+ -+ bfq_update_io_thinktime(bfqd, bic); -+ bfq_update_io_seektime(bfqd, bfqq, rq); -+ if (!BFQQ_SEEKY(bfqq) && bfq_bfqq_constantly_seeky(bfqq)) { -+ bfq_clear_bfqq_constantly_seeky(bfqq); -+ if (!blk_queue_nonrot(bfqd->queue)) { -+ BUG_ON(!bfqd->const_seeky_busy_in_flight_queues); -+ bfqd->const_seeky_busy_in_flight_queues--; -+ } -+ } -+ if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || -+ !BFQQ_SEEKY(bfqq)) -+ bfq_update_idle_window(bfqd, bfqq, bic); -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", -+ bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq), -+ (long long unsigned)bfqq->seek_mean); -+ -+ bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); -+ -+ if (bfqq == bfqd->in_service_queue && bfq_bfqq_wait_request(bfqq)) { -+ int small_req = bfqq->queued[rq_is_sync(rq)] == 1 && -+ blk_rq_sectors(rq) < 32; -+ int budget_timeout = bfq_bfqq_budget_timeout(bfqq); -+ -+ /* -+ * There is just this request queued: if the request -+ * is small and the queue is not to be expired, then -+ * just exit. -+ * -+ * In this way, if the disk is being idled to wait for -+ * a new request from the in-service queue, we avoid -+ * unplugging the device and committing the disk to serve -+ * just a small request. On the contrary, we wait for -+ * the block layer to decide when to unplug the device: -+ * hopefully, new requests will be merged to this one -+ * quickly, then the device will be unplugged and -+ * larger requests will be dispatched. -+ */ -+ if (small_req && !budget_timeout) -+ return; -+ -+ /* -+ * A large enough request arrived, or the queue is to -+ * be expired: in both cases disk idling is to be -+ * stopped, so clear wait_request flag and reset -+ * timer. -+ */ -+ bfq_clear_bfqq_wait_request(bfqq); -+ del_timer(&bfqd->idle_slice_timer); -+ -+ /* -+ * The queue is not empty, because a new request just -+ * arrived. Hence we can safely expire the queue, in -+ * case of budget timeout, without risking that the -+ * timestamps of the queue are not updated correctly. -+ * See [1] for more details. -+ */ -+ if (budget_timeout) -+ bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT); -+ -+ /* -+ * Let the request rip immediately, or let a new queue be -+ * selected if bfqq has just been expired. -+ */ -+ __blk_run_queue(bfqd->queue); -+ } -+} -+ -+static void bfq_insert_request(struct request_queue *q, struct request *rq) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ -+ assert_spin_locked(bfqd->queue->queue_lock); -+ bfq_init_prio_data(bfqq, RQ_BIC(rq)); -+ -+ bfq_add_request(rq); -+ -+ rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; -+ list_add_tail(&rq->queuelist, &bfqq->fifo); -+ -+ bfq_rq_enqueued(bfqd, bfqq, rq); -+} -+ -+static void bfq_update_hw_tag(struct bfq_data *bfqd) -+{ -+ bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver, -+ bfqd->rq_in_driver); -+ -+ if (bfqd->hw_tag == 1) -+ return; -+ -+ /* -+ * This sample is valid if the number of outstanding requests -+ * is large enough to allow a queueing behavior. Note that the -+ * sum is not exact, as it's not taking into account deactivated -+ * requests. -+ */ -+ if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD) -+ return; -+ -+ if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES) -+ return; -+ -+ bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD; -+ bfqd->max_rq_in_driver = 0; -+ bfqd->hw_tag_samples = 0; -+} -+ -+static void bfq_completed_request(struct request_queue *q, struct request *rq) -+{ -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ struct bfq_data *bfqd = bfqq->bfqd; -+ bool sync = bfq_bfqq_sync(bfqq); -+ -+ bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left (%d)", -+ blk_rq_sectors(rq), sync); -+ -+ bfq_update_hw_tag(bfqd); -+ -+ BUG_ON(!bfqd->rq_in_driver); -+ BUG_ON(!bfqq->dispatched); -+ bfqd->rq_in_driver--; -+ bfqq->dispatched--; -+ -+ if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) { -+ bfq_weights_tree_remove(bfqd, &bfqq->entity, -+ &bfqd->queue_weights_tree); -+ if (!blk_queue_nonrot(bfqd->queue)) { -+ BUG_ON(!bfqd->busy_in_flight_queues); -+ bfqd->busy_in_flight_queues--; -+ if (bfq_bfqq_constantly_seeky(bfqq)) { -+ BUG_ON(!bfqd-> -+ const_seeky_busy_in_flight_queues); -+ bfqd->const_seeky_busy_in_flight_queues--; -+ } -+ } -+ } -+ -+ if (sync) { -+ bfqd->sync_flight--; -+ RQ_BIC(rq)->ttime.last_end_request = jiffies; -+ } -+ -+ /* -+ * If we are waiting to discover whether the request pattern of the -+ * task associated with the queue is actually isochronous, and -+ * both requisites for this condition to hold are satisfied, then -+ * compute soft_rt_next_start (see the comments to the function -+ * bfq_bfqq_softrt_next_start()). -+ */ -+ if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 && -+ RB_EMPTY_ROOT(&bfqq->sort_list)) -+ bfqq->soft_rt_next_start = -+ bfq_bfqq_softrt_next_start(bfqd, bfqq); -+ -+ /* -+ * If this is the in-service queue, check if it needs to be expired, -+ * or if we want to idle in case it has no pending requests. -+ */ -+ if (bfqd->in_service_queue == bfqq) { -+ if (bfq_bfqq_budget_new(bfqq)) -+ bfq_set_budget_timeout(bfqd); -+ -+ if (bfq_bfqq_must_idle(bfqq)) { -+ bfq_arm_slice_timer(bfqd); -+ goto out; -+ } else if (bfq_may_expire_for_budg_timeout(bfqq)) -+ bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT); -+ else if (RB_EMPTY_ROOT(&bfqq->sort_list) && -+ (bfqq->dispatched == 0 || -+ !bfq_bfqq_must_not_expire(bfqq))) -+ bfq_bfqq_expire(bfqd, bfqq, 0, -+ BFQ_BFQQ_NO_MORE_REQUESTS); -+ } -+ -+ if (!bfqd->rq_in_driver) -+ bfq_schedule_dispatch(bfqd); -+ -+out: -+ return; -+} -+ -+static inline int __bfq_may_queue(struct bfq_queue *bfqq) -+{ -+ if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) { -+ bfq_clear_bfqq_must_alloc(bfqq); -+ return ELV_MQUEUE_MUST; -+ } -+ -+ return ELV_MQUEUE_MAY; -+} -+ -+static int bfq_may_queue(struct request_queue *q, int rw) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct task_struct *tsk = current; -+ struct bfq_io_cq *bic; -+ struct bfq_queue *bfqq; -+ -+ /* -+ * Don't force setup of a queue from here, as a call to may_queue -+ * does not necessarily imply that a request actually will be -+ * queued. So just lookup a possibly existing queue, or return -+ * 'may queue' if that fails. -+ */ -+ bic = bfq_bic_lookup(bfqd, tsk->io_context); -+ if (bic == NULL) -+ return ELV_MQUEUE_MAY; -+ -+ bfqq = bic_to_bfqq(bic, rw_is_sync(rw)); -+ if (bfqq != NULL) { -+ bfq_init_prio_data(bfqq, bic); -+ -+ return __bfq_may_queue(bfqq); -+ } -+ -+ return ELV_MQUEUE_MAY; -+} -+ -+/* -+ * Queue lock held here. -+ */ -+static void bfq_put_request(struct request *rq) -+{ -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ -+ if (bfqq != NULL) { -+ const int rw = rq_data_dir(rq); -+ -+ BUG_ON(!bfqq->allocated[rw]); -+ bfqq->allocated[rw]--; -+ -+ rq->elv.priv[0] = NULL; -+ rq->elv.priv[1] = NULL; -+ -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ bfq_put_queue(bfqq); -+ } -+} -+ -+static struct bfq_queue * -+bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, -+ struct bfq_queue *bfqq) -+{ -+ bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", -+ (long unsigned)bfqq->new_bfqq->pid); -+ bic_set_bfqq(bic, bfqq->new_bfqq, 1); -+ bfq_mark_bfqq_coop(bfqq->new_bfqq); -+ bfq_put_queue(bfqq); -+ return bic_to_bfqq(bic, 1); -+} -+ -+/* -+ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this -+ * was the last process referring to said bfqq. -+ */ -+static struct bfq_queue * -+bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) -+{ -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); -+ if (bfqq_process_refs(bfqq) == 1) { -+ bfqq->pid = current->pid; -+ bfq_clear_bfqq_coop(bfqq); -+ bfq_clear_bfqq_split_coop(bfqq); -+ return bfqq; -+ } -+ -+ bic_set_bfqq(bic, NULL, 1); -+ -+ bfq_put_cooperator(bfqq); -+ -+ bfq_put_queue(bfqq); -+ return NULL; -+} -+ -+/* -+ * Allocate bfq data structures associated with this request. -+ */ -+static int bfq_set_request(struct request_queue *q, struct request *rq, -+ struct bio *bio, gfp_t gfp_mask) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq); -+ const int rw = rq_data_dir(rq); -+ const int is_sync = rq_is_sync(rq); -+ struct bfq_queue *bfqq; -+ struct bfq_group *bfqg; -+ unsigned long flags; -+ -+ might_sleep_if(gfp_mask & __GFP_WAIT); -+ -+ bfq_changed_ioprio(bic); -+ -+ spin_lock_irqsave(q->queue_lock, flags); -+ -+ if (bic == NULL) -+ goto queue_fail; -+ -+ bfqg = bfq_bic_update_cgroup(bic); -+ -+new_queue: -+ bfqq = bic_to_bfqq(bic, is_sync); -+ if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { -+ bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask); -+ bic_set_bfqq(bic, bfqq, is_sync); -+ } else { -+ /* -+ * If the queue was seeky for too long, break it apart. -+ */ -+ if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { -+ bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); -+ bfqq = bfq_split_bfqq(bic, bfqq); -+ if (!bfqq) -+ goto new_queue; -+ } -+ -+ /* -+ * Check to see if this queue is scheduled to merge with -+ * another closely cooperating queue. The merging of queues -+ * happens here as it must be done in process context. -+ * The reference on new_bfqq was taken in merge_bfqqs. -+ */ -+ if (bfqq->new_bfqq != NULL) -+ bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq); -+ } -+ -+ bfqq->allocated[rw]++; -+ atomic_inc(&bfqq->ref); -+ bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, -+ atomic_read(&bfqq->ref)); -+ -+ rq->elv.priv[0] = bic; -+ rq->elv.priv[1] = bfqq; -+ -+ spin_unlock_irqrestore(q->queue_lock, flags); -+ -+ return 0; -+ -+queue_fail: -+ bfq_schedule_dispatch(bfqd); -+ spin_unlock_irqrestore(q->queue_lock, flags); -+ -+ return 1; -+} -+ -+static void bfq_kick_queue(struct work_struct *work) -+{ -+ struct bfq_data *bfqd = -+ container_of(work, struct bfq_data, unplug_work); -+ struct request_queue *q = bfqd->queue; -+ -+ spin_lock_irq(q->queue_lock); -+ __blk_run_queue(q); -+ spin_unlock_irq(q->queue_lock); -+} -+ -+/* -+ * Handler of the expiration of the timer running if the in-service queue -+ * is idling inside its time slice. -+ */ -+static void bfq_idle_slice_timer(unsigned long data) -+{ -+ struct bfq_data *bfqd = (struct bfq_data *)data; -+ struct bfq_queue *bfqq; -+ unsigned long flags; -+ enum bfqq_expiration reason; -+ -+ spin_lock_irqsave(bfqd->queue->queue_lock, flags); -+ -+ bfqq = bfqd->in_service_queue; -+ /* -+ * Theoretical race here: the in-service queue can be NULL or -+ * different from the queue that was idling if the timer handler -+ * spins on the queue_lock and a new request arrives for the -+ * current queue and there is a full dispatch cycle that changes -+ * the in-service queue. This can hardly happen, but in the worst -+ * case we just expire a queue too early. -+ */ -+ if (bfqq != NULL) { -+ bfq_log_bfqq(bfqd, bfqq, "slice_timer expired"); -+ if (bfq_bfqq_budget_timeout(bfqq)) -+ /* -+ * Also here the queue can be safely expired -+ * for budget timeout without wasting -+ * guarantees -+ */ -+ reason = BFQ_BFQQ_BUDGET_TIMEOUT; -+ else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0) -+ /* -+ * The queue may not be empty upon timer expiration, -+ * because we may not disable the timer when the -+ * first request of the in-service queue arrives -+ * during disk idling. -+ */ -+ reason = BFQ_BFQQ_TOO_IDLE; -+ else -+ goto schedule_dispatch; -+ -+ bfq_bfqq_expire(bfqd, bfqq, 1, reason); -+ } -+ -+schedule_dispatch: -+ bfq_schedule_dispatch(bfqd); -+ -+ spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); -+} -+ -+static void bfq_shutdown_timer_wq(struct bfq_data *bfqd) -+{ -+ del_timer_sync(&bfqd->idle_slice_timer); -+ cancel_work_sync(&bfqd->unplug_work); -+} -+ -+static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd, -+ struct bfq_queue **bfqq_ptr) -+{ -+ struct bfq_group *root_group = bfqd->root_group; -+ struct bfq_queue *bfqq = *bfqq_ptr; -+ -+ bfq_log(bfqd, "put_async_bfqq: %p", bfqq); -+ if (bfqq != NULL) { -+ bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group); -+ bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ bfq_put_queue(bfqq); -+ *bfqq_ptr = NULL; -+ } -+} -+ -+/* -+ * Release all the bfqg references to its async queues. If we are -+ * deallocating the group these queues may still contain requests, so -+ * we reparent them to the root cgroup (i.e., the only one that will -+ * exist for sure until all the requests on a device are gone). -+ */ -+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) -+{ -+ int i, j; -+ -+ for (i = 0; i < 2; i++) -+ for (j = 0; j < IOPRIO_BE_NR; j++) -+ __bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]); -+ -+ __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq); -+} -+ -+static void bfq_exit_queue(struct elevator_queue *e) -+{ -+ struct bfq_data *bfqd = e->elevator_data; -+ struct request_queue *q = bfqd->queue; -+ struct bfq_queue *bfqq, *n; -+ -+ bfq_shutdown_timer_wq(bfqd); -+ -+ spin_lock_irq(q->queue_lock); -+ -+ BUG_ON(bfqd->in_service_queue != NULL); -+ list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) -+ bfq_deactivate_bfqq(bfqd, bfqq, 0); -+ -+ bfq_disconnect_groups(bfqd); -+ spin_unlock_irq(q->queue_lock); -+ -+ bfq_shutdown_timer_wq(bfqd); -+ -+ synchronize_rcu(); -+ -+ BUG_ON(timer_pending(&bfqd->idle_slice_timer)); -+ -+ bfq_free_root_group(bfqd); -+ kfree(bfqd); -+} -+ -+static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) -+{ -+ struct bfq_group *bfqg; -+ struct bfq_data *bfqd; -+ struct elevator_queue *eq; -+ -+ eq = elevator_alloc(q, e); -+ if (eq == NULL) -+ return -ENOMEM; -+ -+ bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node); -+ if (bfqd == NULL) { -+ kobject_put(&eq->kobj); -+ return -ENOMEM; -+ } -+ eq->elevator_data = bfqd; -+ -+ /* -+ * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues. -+ * Grab a permanent reference to it, so that the normal code flow -+ * will not attempt to free it. -+ */ -+ bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, 1, 0); -+ atomic_inc(&bfqd->oom_bfqq.ref); -+ -+ bfqd->queue = q; -+ -+ spin_lock_irq(q->queue_lock); -+ q->elevator = eq; -+ spin_unlock_irq(q->queue_lock); -+ -+ bfqg = bfq_alloc_root_group(bfqd, q->node); -+ if (bfqg == NULL) { -+ kfree(bfqd); -+ kobject_put(&eq->kobj); -+ return -ENOMEM; -+ } -+ -+ bfqd->root_group = bfqg; -+#ifdef CONFIG_CGROUP_BFQIO -+ bfqd->active_numerous_groups = 0; -+#endif -+ -+ init_timer(&bfqd->idle_slice_timer); -+ bfqd->idle_slice_timer.function = bfq_idle_slice_timer; -+ bfqd->idle_slice_timer.data = (unsigned long)bfqd; -+ -+ bfqd->rq_pos_tree = RB_ROOT; -+ bfqd->queue_weights_tree = RB_ROOT; -+ bfqd->group_weights_tree = RB_ROOT; -+ -+ INIT_WORK(&bfqd->unplug_work, bfq_kick_queue); -+ -+ INIT_LIST_HEAD(&bfqd->active_list); -+ INIT_LIST_HEAD(&bfqd->idle_list); -+ -+ bfqd->hw_tag = -1; -+ -+ bfqd->bfq_max_budget = bfq_default_max_budget; -+ -+ bfqd->bfq_quantum = bfq_quantum; -+ bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0]; -+ bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1]; -+ bfqd->bfq_back_max = bfq_back_max; -+ bfqd->bfq_back_penalty = bfq_back_penalty; -+ bfqd->bfq_slice_idle = bfq_slice_idle; -+ bfqd->bfq_class_idle_last_service = 0; -+ bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq; -+ bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async; -+ bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync; -+ -+ bfqd->bfq_coop_thresh = 2; -+ bfqd->bfq_failed_cooperations = 7000; -+ bfqd->bfq_requests_within_timer = 120; -+ -+ bfqd->low_latency = true; -+ -+ bfqd->bfq_wr_coeff = 20; -+ bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300); -+ bfqd->bfq_wr_max_time = 0; -+ bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000); -+ bfqd->bfq_wr_min_inter_arr_async = msecs_to_jiffies(500); -+ bfqd->bfq_wr_max_softrt_rate = 7000; /* -+ * Approximate rate required -+ * to playback or record a -+ * high-definition compressed -+ * video. -+ */ -+ bfqd->wr_busy_queues = 0; -+ bfqd->busy_in_flight_queues = 0; -+ bfqd->const_seeky_busy_in_flight_queues = 0; -+ -+ /* -+ * Begin by assuming, optimistically, that the device peak rate is -+ * equal to the highest reference rate. -+ */ -+ bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] * -+ T_fast[blk_queue_nonrot(bfqd->queue)]; -+ bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)]; -+ bfqd->device_speed = BFQ_BFQD_FAST; -+ -+ return 0; -+} -+ -+static void bfq_slab_kill(void) -+{ -+ if (bfq_pool != NULL) -+ kmem_cache_destroy(bfq_pool); -+} -+ -+static int __init bfq_slab_setup(void) -+{ -+ bfq_pool = KMEM_CACHE(bfq_queue, 0); -+ if (bfq_pool == NULL) -+ return -ENOMEM; -+ return 0; -+} -+ -+static ssize_t bfq_var_show(unsigned int var, char *page) -+{ -+ return sprintf(page, "%d\n", var); -+} -+ -+static ssize_t bfq_var_store(unsigned long *var, const char *page, -+ size_t count) -+{ -+ unsigned long new_val; -+ int ret = kstrtoul(page, 10, &new_val); -+ -+ if (ret == 0) -+ *var = new_val; -+ -+ return count; -+} -+ -+static ssize_t bfq_wr_max_time_show(struct elevator_queue *e, char *page) -+{ -+ struct bfq_data *bfqd = e->elevator_data; -+ return sprintf(page, "%d\n", bfqd->bfq_wr_max_time > 0 ? -+ jiffies_to_msecs(bfqd->bfq_wr_max_time) : -+ jiffies_to_msecs(bfq_wr_duration(bfqd))); -+} -+ -+static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) -+{ -+ struct bfq_queue *bfqq; -+ struct bfq_data *bfqd = e->elevator_data; -+ ssize_t num_char = 0; -+ -+ num_char += sprintf(page + num_char, "Tot reqs queued %d\n\n", -+ bfqd->queued); -+ -+ spin_lock_irq(bfqd->queue->queue_lock); -+ -+ num_char += sprintf(page + num_char, "Active:\n"); -+ list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) { -+ num_char += sprintf(page + num_char, -+ "pid%d: weight %hu, nr_queued %d %d, dur %d/%u\n", -+ bfqq->pid, -+ bfqq->entity.weight, -+ bfqq->queued[0], -+ bfqq->queued[1], -+ jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ } -+ -+ num_char += sprintf(page + num_char, "Idle:\n"); -+ list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) { -+ num_char += sprintf(page + num_char, -+ "pid%d: weight %hu, dur %d/%u\n", -+ bfqq->pid, -+ bfqq->entity.weight, -+ jiffies_to_msecs(jiffies - -+ bfqq->last_wr_start_finish), -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ } -+ -+ spin_unlock_irq(bfqd->queue->queue_lock); -+ -+ return num_char; -+} -+ -+#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ -+static ssize_t __FUNC(struct elevator_queue *e, char *page) \ -+{ \ -+ struct bfq_data *bfqd = e->elevator_data; \ -+ unsigned int __data = __VAR; \ -+ if (__CONV) \ -+ __data = jiffies_to_msecs(__data); \ -+ return bfq_var_show(__data, (page)); \ -+} -+SHOW_FUNCTION(bfq_quantum_show, bfqd->bfq_quantum, 0); -+SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1); -+SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1); -+SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0); -+SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0); -+SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1); -+SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0); -+SHOW_FUNCTION(bfq_max_budget_async_rq_show, -+ bfqd->bfq_max_budget_async_rq, 0); -+SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1); -+SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1); -+SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0); -+SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0); -+SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1); -+SHOW_FUNCTION(bfq_wr_min_idle_time_show, bfqd->bfq_wr_min_idle_time, 1); -+SHOW_FUNCTION(bfq_wr_min_inter_arr_async_show, bfqd->bfq_wr_min_inter_arr_async, -+ 1); -+SHOW_FUNCTION(bfq_wr_max_softrt_rate_show, bfqd->bfq_wr_max_softrt_rate, 0); -+#undef SHOW_FUNCTION -+ -+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ -+static ssize_t \ -+__FUNC(struct elevator_queue *e, const char *page, size_t count) \ -+{ \ -+ struct bfq_data *bfqd = e->elevator_data; \ -+ unsigned long uninitialized_var(__data); \ -+ int ret = bfq_var_store(&__data, (page), count); \ -+ if (__data < (MIN)) \ -+ __data = (MIN); \ -+ else if (__data > (MAX)) \ -+ __data = (MAX); \ -+ if (__CONV) \ -+ *(__PTR) = msecs_to_jiffies(__data); \ -+ else \ -+ *(__PTR) = __data; \ -+ return ret; \ -+} -+STORE_FUNCTION(bfq_quantum_store, &bfqd->bfq_quantum, 1, INT_MAX, 0); -+STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1, -+ INT_MAX, 1); -+STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1, -+ INT_MAX, 1); -+STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0); -+STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1, -+ INT_MAX, 0); -+STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1); -+STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq, -+ 1, INT_MAX, 0); -+STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0, -+ INT_MAX, 1); -+STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0); -+STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1); -+STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX, -+ 1); -+STORE_FUNCTION(bfq_wr_min_idle_time_store, &bfqd->bfq_wr_min_idle_time, 0, -+ INT_MAX, 1); -+STORE_FUNCTION(bfq_wr_min_inter_arr_async_store, -+ &bfqd->bfq_wr_min_inter_arr_async, 0, INT_MAX, 1); -+STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0, -+ INT_MAX, 0); -+#undef STORE_FUNCTION -+ -+/* do nothing for the moment */ -+static ssize_t bfq_weights_store(struct elevator_queue *e, -+ const char *page, size_t count) -+{ -+ return count; -+} -+ -+static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd) -+{ -+ u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); -+ -+ if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES) -+ return bfq_calc_max_budget(bfqd->peak_rate, timeout); -+ else -+ return bfq_default_max_budget; -+} -+ -+static ssize_t bfq_max_budget_store(struct elevator_queue *e, -+ const char *page, size_t count) -+{ -+ struct bfq_data *bfqd = e->elevator_data; -+ unsigned long uninitialized_var(__data); -+ int ret = bfq_var_store(&__data, (page), count); -+ -+ if (__data == 0) -+ bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); -+ else { -+ if (__data > INT_MAX) -+ __data = INT_MAX; -+ bfqd->bfq_max_budget = __data; -+ } -+ -+ bfqd->bfq_user_max_budget = __data; -+ -+ return ret; -+} -+ -+static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, -+ const char *page, size_t count) -+{ -+ struct bfq_data *bfqd = e->elevator_data; -+ unsigned long uninitialized_var(__data); -+ int ret = bfq_var_store(&__data, (page), count); -+ -+ if (__data < 1) -+ __data = 1; -+ else if (__data > INT_MAX) -+ __data = INT_MAX; -+ -+ bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data); -+ if (bfqd->bfq_user_max_budget == 0) -+ bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); -+ -+ return ret; -+} -+ -+static ssize_t bfq_low_latency_store(struct elevator_queue *e, -+ const char *page, size_t count) -+{ -+ struct bfq_data *bfqd = e->elevator_data; -+ unsigned long uninitialized_var(__data); -+ int ret = bfq_var_store(&__data, (page), count); -+ -+ if (__data > 1) -+ __data = 1; -+ if (__data == 0 && bfqd->low_latency != 0) -+ bfq_end_wr(bfqd); -+ bfqd->low_latency = __data; -+ -+ return ret; -+} -+ -+#define BFQ_ATTR(name) \ -+ __ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store) -+ -+static struct elv_fs_entry bfq_attrs[] = { -+ BFQ_ATTR(quantum), -+ BFQ_ATTR(fifo_expire_sync), -+ BFQ_ATTR(fifo_expire_async), -+ BFQ_ATTR(back_seek_max), -+ BFQ_ATTR(back_seek_penalty), -+ BFQ_ATTR(slice_idle), -+ BFQ_ATTR(max_budget), -+ BFQ_ATTR(max_budget_async_rq), -+ BFQ_ATTR(timeout_sync), -+ BFQ_ATTR(timeout_async), -+ BFQ_ATTR(low_latency), -+ BFQ_ATTR(wr_coeff), -+ BFQ_ATTR(wr_max_time), -+ BFQ_ATTR(wr_rt_max_time), -+ BFQ_ATTR(wr_min_idle_time), -+ BFQ_ATTR(wr_min_inter_arr_async), -+ BFQ_ATTR(wr_max_softrt_rate), -+ BFQ_ATTR(weights), -+ __ATTR_NULL -+}; -+ -+static struct elevator_type iosched_bfq = { -+ .ops = { -+ .elevator_merge_fn = bfq_merge, -+ .elevator_merged_fn = bfq_merged_request, -+ .elevator_merge_req_fn = bfq_merged_requests, -+ .elevator_allow_merge_fn = bfq_allow_merge, -+ .elevator_dispatch_fn = bfq_dispatch_requests, -+ .elevator_add_req_fn = bfq_insert_request, -+ .elevator_activate_req_fn = bfq_activate_request, -+ .elevator_deactivate_req_fn = bfq_deactivate_request, -+ .elevator_completed_req_fn = bfq_completed_request, -+ .elevator_former_req_fn = elv_rb_former_request, -+ .elevator_latter_req_fn = elv_rb_latter_request, -+ .elevator_init_icq_fn = bfq_init_icq, -+ .elevator_exit_icq_fn = bfq_exit_icq, -+ .elevator_set_req_fn = bfq_set_request, -+ .elevator_put_req_fn = bfq_put_request, -+ .elevator_may_queue_fn = bfq_may_queue, -+ .elevator_init_fn = bfq_init_queue, -+ .elevator_exit_fn = bfq_exit_queue, -+ }, -+ .icq_size = sizeof(struct bfq_io_cq), -+ .icq_align = __alignof__(struct bfq_io_cq), -+ .elevator_attrs = bfq_attrs, -+ .elevator_name = "bfq", -+ .elevator_owner = THIS_MODULE, -+}; -+ -+static int __init bfq_init(void) -+{ -+ /* -+ * Can be 0 on HZ < 1000 setups. -+ */ -+ if (bfq_slice_idle == 0) -+ bfq_slice_idle = 1; -+ -+ if (bfq_timeout_async == 0) -+ bfq_timeout_async = 1; -+ -+ if (bfq_slab_setup()) -+ return -ENOMEM; -+ -+ /* -+ * Times to load large popular applications for the typical systems -+ * installed on the reference devices (see the comments before the -+ * definitions of the two arrays). -+ */ -+ T_slow[0] = msecs_to_jiffies(2600); -+ T_slow[1] = msecs_to_jiffies(1000); -+ T_fast[0] = msecs_to_jiffies(5500); -+ T_fast[1] = msecs_to_jiffies(2000); -+ -+ /* -+ * Thresholds that determine the switch between speed classes (see -+ * the comments before the definition of the array). -+ */ -+ device_speed_thresh[0] = (R_fast[0] + R_slow[0]) / 2; -+ device_speed_thresh[1] = (R_fast[1] + R_slow[1]) / 2; -+ -+ elv_register(&iosched_bfq); -+ pr_info("BFQ I/O-scheduler version: v7r5"); -+ -+ return 0; -+} -+ -+static void __exit bfq_exit(void) -+{ -+ elv_unregister(&iosched_bfq); -+ bfq_slab_kill(); -+} -+ -+module_init(bfq_init); -+module_exit(bfq_exit); -+ -+MODULE_AUTHOR("Fabio Checconi, Paolo Valente"); -+MODULE_LICENSE("GPL"); -diff --git a/block/bfq-sched.c b/block/bfq-sched.c -new file mode 100644 -index 0000000..c4831b7 ---- /dev/null -+++ b/block/bfq-sched.c -@@ -0,0 +1,1207 @@ -+/* -+ * BFQ: Hierarchical B-WF2Q+ scheduler. -+ * -+ * Based on ideas and code from CFQ: -+ * Copyright (C) 2003 Jens Axboe -+ * -+ * Copyright (C) 2008 Fabio Checconi -+ * Paolo Valente -+ * -+ * Copyright (C) 2010 Paolo Valente -+ */ -+ -+#ifdef CONFIG_CGROUP_BFQIO -+#define for_each_entity(entity) \ -+ for (; entity != NULL; entity = entity->parent) -+ -+#define for_each_entity_safe(entity, parent) \ -+ for (; entity && ({ parent = entity->parent; 1; }); entity = parent) -+ -+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, -+ int extract, -+ struct bfq_data *bfqd); -+ -+static inline void bfq_update_budget(struct bfq_entity *next_in_service) -+{ -+ struct bfq_entity *bfqg_entity; -+ struct bfq_group *bfqg; -+ struct bfq_sched_data *group_sd; -+ -+ BUG_ON(next_in_service == NULL); -+ -+ group_sd = next_in_service->sched_data; -+ -+ bfqg = container_of(group_sd, struct bfq_group, sched_data); -+ /* -+ * bfq_group's my_entity field is not NULL only if the group -+ * is not the root group. We must not touch the root entity -+ * as it must never become an in-service entity. -+ */ -+ bfqg_entity = bfqg->my_entity; -+ if (bfqg_entity != NULL) -+ bfqg_entity->budget = next_in_service->budget; -+} -+ -+static int bfq_update_next_in_service(struct bfq_sched_data *sd) -+{ -+ struct bfq_entity *next_in_service; -+ -+ if (sd->in_service_entity != NULL) -+ /* will update/requeue at the end of service */ -+ return 0; -+ -+ /* -+ * NOTE: this can be improved in many ways, such as returning -+ * 1 (and thus propagating upwards the update) only when the -+ * budget changes, or caching the bfqq that will be scheduled -+ * next from this subtree. By now we worry more about -+ * correctness than about performance... -+ */ -+ next_in_service = bfq_lookup_next_entity(sd, 0, NULL); -+ sd->next_in_service = next_in_service; -+ -+ if (next_in_service != NULL) -+ bfq_update_budget(next_in_service); -+ -+ return 1; -+} -+ -+static inline void bfq_check_next_in_service(struct bfq_sched_data *sd, -+ struct bfq_entity *entity) -+{ -+ BUG_ON(sd->next_in_service != entity); -+} -+#else -+#define for_each_entity(entity) \ -+ for (; entity != NULL; entity = NULL) -+ -+#define for_each_entity_safe(entity, parent) \ -+ for (parent = NULL; entity != NULL; entity = parent) -+ -+static inline int bfq_update_next_in_service(struct bfq_sched_data *sd) -+{ -+ return 0; -+} -+ -+static inline void bfq_check_next_in_service(struct bfq_sched_data *sd, -+ struct bfq_entity *entity) -+{ -+} -+ -+static inline void bfq_update_budget(struct bfq_entity *next_in_service) -+{ -+} -+#endif -+ -+/* -+ * Shift for timestamp calculations. This actually limits the maximum -+ * service allowed in one timestamp delta (small shift values increase it), -+ * the maximum total weight that can be used for the queues in the system -+ * (big shift values increase it), and the period of virtual time -+ * wraparounds. -+ */ -+#define WFQ_SERVICE_SHIFT 22 -+ -+/** -+ * bfq_gt - compare two timestamps. -+ * @a: first ts. -+ * @b: second ts. -+ * -+ * Return @a > @b, dealing with wrapping correctly. -+ */ -+static inline int bfq_gt(u64 a, u64 b) -+{ -+ return (s64)(a - b) > 0; -+} -+ -+static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = NULL; -+ -+ BUG_ON(entity == NULL); -+ -+ if (entity->my_sched_data == NULL) -+ bfqq = container_of(entity, struct bfq_queue, entity); -+ -+ return bfqq; -+} -+ -+ -+/** -+ * bfq_delta - map service into the virtual time domain. -+ * @service: amount of service. -+ * @weight: scale factor (weight of an entity or weight sum). -+ */ -+static inline u64 bfq_delta(unsigned long service, -+ unsigned long weight) -+{ -+ u64 d = (u64)service << WFQ_SERVICE_SHIFT; -+ -+ do_div(d, weight); -+ return d; -+} -+ -+/** -+ * bfq_calc_finish - assign the finish time to an entity. -+ * @entity: the entity to act upon. -+ * @service: the service to be charged to the entity. -+ */ -+static inline void bfq_calc_finish(struct bfq_entity *entity, -+ unsigned long service) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ -+ BUG_ON(entity->weight == 0); -+ -+ entity->finish = entity->start + -+ bfq_delta(service, entity->weight); -+ -+ if (bfqq != NULL) { -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "calc_finish: serv %lu, w %d", -+ service, entity->weight); -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "calc_finish: start %llu, finish %llu, delta %llu", -+ entity->start, entity->finish, -+ bfq_delta(service, entity->weight)); -+ } -+} -+ -+/** -+ * bfq_entity_of - get an entity from a node. -+ * @node: the node field of the entity. -+ * -+ * Convert a node pointer to the relative entity. This is used only -+ * to simplify the logic of some functions and not as the generic -+ * conversion mechanism because, e.g., in the tree walking functions, -+ * the check for a %NULL value would be redundant. -+ */ -+static inline struct bfq_entity *bfq_entity_of(struct rb_node *node) -+{ -+ struct bfq_entity *entity = NULL; -+ -+ if (node != NULL) -+ entity = rb_entry(node, struct bfq_entity, rb_node); -+ -+ return entity; -+} -+ -+/** -+ * bfq_extract - remove an entity from a tree. -+ * @root: the tree root. -+ * @entity: the entity to remove. -+ */ -+static inline void bfq_extract(struct rb_root *root, -+ struct bfq_entity *entity) -+{ -+ BUG_ON(entity->tree != root); -+ -+ entity->tree = NULL; -+ rb_erase(&entity->rb_node, root); -+} -+ -+/** -+ * bfq_idle_extract - extract an entity from the idle tree. -+ * @st: the service tree of the owning @entity. -+ * @entity: the entity being removed. -+ */ -+static void bfq_idle_extract(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct rb_node *next; -+ -+ BUG_ON(entity->tree != &st->idle); -+ -+ if (entity == st->first_idle) { -+ next = rb_next(&entity->rb_node); -+ st->first_idle = bfq_entity_of(next); -+ } -+ -+ if (entity == st->last_idle) { -+ next = rb_prev(&entity->rb_node); -+ st->last_idle = bfq_entity_of(next); -+ } -+ -+ bfq_extract(&st->idle, entity); -+ -+ if (bfqq != NULL) -+ list_del(&bfqq->bfqq_list); -+} -+ -+/** -+ * bfq_insert - generic tree insertion. -+ * @root: tree root. -+ * @entity: entity to insert. -+ * -+ * This is used for the idle and the active tree, since they are both -+ * ordered by finish time. -+ */ -+static void bfq_insert(struct rb_root *root, struct bfq_entity *entity) -+{ -+ struct bfq_entity *entry; -+ struct rb_node **node = &root->rb_node; -+ struct rb_node *parent = NULL; -+ -+ BUG_ON(entity->tree != NULL); -+ -+ while (*node != NULL) { -+ parent = *node; -+ entry = rb_entry(parent, struct bfq_entity, rb_node); -+ -+ if (bfq_gt(entry->finish, entity->finish)) -+ node = &parent->rb_left; -+ else -+ node = &parent->rb_right; -+ } -+ -+ rb_link_node(&entity->rb_node, parent, node); -+ rb_insert_color(&entity->rb_node, root); -+ -+ entity->tree = root; -+} -+ -+/** -+ * bfq_update_min - update the min_start field of a entity. -+ * @entity: the entity to update. -+ * @node: one of its children. -+ * -+ * This function is called when @entity may store an invalid value for -+ * min_start due to updates to the active tree. The function assumes -+ * that the subtree rooted at @node (which may be its left or its right -+ * child) has a valid min_start value. -+ */ -+static inline void bfq_update_min(struct bfq_entity *entity, -+ struct rb_node *node) -+{ -+ struct bfq_entity *child; -+ -+ if (node != NULL) { -+ child = rb_entry(node, struct bfq_entity, rb_node); -+ if (bfq_gt(entity->min_start, child->min_start)) -+ entity->min_start = child->min_start; -+ } -+} -+ -+/** -+ * bfq_update_active_node - recalculate min_start. -+ * @node: the node to update. -+ * -+ * @node may have changed position or one of its children may have moved, -+ * this function updates its min_start value. The left and right subtrees -+ * are assumed to hold a correct min_start value. -+ */ -+static inline void bfq_update_active_node(struct rb_node *node) -+{ -+ struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node); -+ -+ entity->min_start = entity->start; -+ bfq_update_min(entity, node->rb_right); -+ bfq_update_min(entity, node->rb_left); -+} -+ -+/** -+ * bfq_update_active_tree - update min_start for the whole active tree. -+ * @node: the starting node. -+ * -+ * @node must be the deepest modified node after an update. This function -+ * updates its min_start using the values held by its children, assuming -+ * that they did not change, and then updates all the nodes that may have -+ * changed in the path to the root. The only nodes that may have changed -+ * are the ones in the path or their siblings. -+ */ -+static void bfq_update_active_tree(struct rb_node *node) -+{ -+ struct rb_node *parent; -+ -+up: -+ bfq_update_active_node(node); -+ -+ parent = rb_parent(node); -+ if (parent == NULL) -+ return; -+ -+ if (node == parent->rb_left && parent->rb_right != NULL) -+ bfq_update_active_node(parent->rb_right); -+ else if (parent->rb_left != NULL) -+ bfq_update_active_node(parent->rb_left); -+ -+ node = parent; -+ goto up; -+} -+ -+static void bfq_weights_tree_add(struct bfq_data *bfqd, -+ struct bfq_entity *entity, -+ struct rb_root *root); -+ -+static void bfq_weights_tree_remove(struct bfq_data *bfqd, -+ struct bfq_entity *entity, -+ struct rb_root *root); -+ -+ -+/** -+ * bfq_active_insert - insert an entity in the active tree of its -+ * group/device. -+ * @st: the service tree of the entity. -+ * @entity: the entity being inserted. -+ * -+ * The active tree is ordered by finish time, but an extra key is kept -+ * per each node, containing the minimum value for the start times of -+ * its children (and the node itself), so it's possible to search for -+ * the eligible node with the lowest finish time in logarithmic time. -+ */ -+static void bfq_active_insert(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct rb_node *node = &entity->rb_node; -+#ifdef CONFIG_CGROUP_BFQIO -+ struct bfq_sched_data *sd = NULL; -+ struct bfq_group *bfqg = NULL; -+ struct bfq_data *bfqd = NULL; -+#endif -+ -+ bfq_insert(&st->active, entity); -+ -+ if (node->rb_left != NULL) -+ node = node->rb_left; -+ else if (node->rb_right != NULL) -+ node = node->rb_right; -+ -+ bfq_update_active_tree(node); -+ -+#ifdef CONFIG_CGROUP_BFQIO -+ sd = entity->sched_data; -+ bfqg = container_of(sd, struct bfq_group, sched_data); -+ BUG_ON(!bfqg); -+ bfqd = (struct bfq_data *)bfqg->bfqd; -+#endif -+ if (bfqq != NULL) -+ list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list); -+#ifdef CONFIG_CGROUP_BFQIO -+ else { /* bfq_group */ -+ BUG_ON(!bfqd); -+ bfq_weights_tree_add(bfqd, entity, &bfqd->group_weights_tree); -+ } -+ if (bfqg != bfqd->root_group) { -+ BUG_ON(!bfqg); -+ BUG_ON(!bfqd); -+ bfqg->active_entities++; -+ if (bfqg->active_entities == 2) -+ bfqd->active_numerous_groups++; -+ } -+#endif -+} -+ -+/** -+ * bfq_ioprio_to_weight - calc a weight from an ioprio. -+ * @ioprio: the ioprio value to convert. -+ */ -+static inline unsigned short bfq_ioprio_to_weight(int ioprio) -+{ -+ BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR); -+ return IOPRIO_BE_NR - ioprio; -+} -+ -+/** -+ * bfq_weight_to_ioprio - calc an ioprio from a weight. -+ * @weight: the weight value to convert. -+ * -+ * To preserve as mush as possible the old only-ioprio user interface, -+ * 0 is used as an escape ioprio value for weights (numerically) equal or -+ * larger than IOPRIO_BE_NR -+ */ -+static inline unsigned short bfq_weight_to_ioprio(int weight) -+{ -+ BUG_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT); -+ return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight; -+} -+ -+static inline void bfq_get_entity(struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ -+ if (bfqq != NULL) { -+ atomic_inc(&bfqq->ref); -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ } -+} -+ -+/** -+ * bfq_find_deepest - find the deepest node that an extraction can modify. -+ * @node: the node being removed. -+ * -+ * Do the first step of an extraction in an rb tree, looking for the -+ * node that will replace @node, and returning the deepest node that -+ * the following modifications to the tree can touch. If @node is the -+ * last node in the tree return %NULL. -+ */ -+static struct rb_node *bfq_find_deepest(struct rb_node *node) -+{ -+ struct rb_node *deepest; -+ -+ if (node->rb_right == NULL && node->rb_left == NULL) -+ deepest = rb_parent(node); -+ else if (node->rb_right == NULL) -+ deepest = node->rb_left; -+ else if (node->rb_left == NULL) -+ deepest = node->rb_right; -+ else { -+ deepest = rb_next(node); -+ if (deepest->rb_right != NULL) -+ deepest = deepest->rb_right; -+ else if (rb_parent(deepest) != node) -+ deepest = rb_parent(deepest); -+ } -+ -+ return deepest; -+} -+ -+/** -+ * bfq_active_extract - remove an entity from the active tree. -+ * @st: the service_tree containing the tree. -+ * @entity: the entity being removed. -+ */ -+static void bfq_active_extract(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct rb_node *node; -+#ifdef CONFIG_CGROUP_BFQIO -+ struct bfq_sched_data *sd = NULL; -+ struct bfq_group *bfqg = NULL; -+ struct bfq_data *bfqd = NULL; -+#endif -+ -+ node = bfq_find_deepest(&entity->rb_node); -+ bfq_extract(&st->active, entity); -+ -+ if (node != NULL) -+ bfq_update_active_tree(node); -+ -+#ifdef CONFIG_CGROUP_BFQIO -+ sd = entity->sched_data; -+ bfqg = container_of(sd, struct bfq_group, sched_data); -+ BUG_ON(!bfqg); -+ bfqd = (struct bfq_data *)bfqg->bfqd; -+#endif -+ if (bfqq != NULL) -+ list_del(&bfqq->bfqq_list); -+#ifdef CONFIG_CGROUP_BFQIO -+ else { /* bfq_group */ -+ BUG_ON(!bfqd); -+ bfq_weights_tree_remove(bfqd, entity, -+ &bfqd->group_weights_tree); -+ } -+ if (bfqg != bfqd->root_group) { -+ BUG_ON(!bfqg); -+ BUG_ON(!bfqd); -+ BUG_ON(!bfqg->active_entities); -+ bfqg->active_entities--; -+ if (bfqg->active_entities == 1) { -+ BUG_ON(!bfqd->active_numerous_groups); -+ bfqd->active_numerous_groups--; -+ } -+ } -+#endif -+} -+ -+/** -+ * bfq_idle_insert - insert an entity into the idle tree. -+ * @st: the service tree containing the tree. -+ * @entity: the entity to insert. -+ */ -+static void bfq_idle_insert(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct bfq_entity *first_idle = st->first_idle; -+ struct bfq_entity *last_idle = st->last_idle; -+ -+ if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish)) -+ st->first_idle = entity; -+ if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish)) -+ st->last_idle = entity; -+ -+ bfq_insert(&st->idle, entity); -+ -+ if (bfqq != NULL) -+ list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list); -+} -+ -+/** -+ * bfq_forget_entity - remove an entity from the wfq trees. -+ * @st: the service tree. -+ * @entity: the entity being removed. -+ * -+ * Update the device status and forget everything about @entity, putting -+ * the device reference to it, if it is a queue. Entities belonging to -+ * groups are not refcounted. -+ */ -+static void bfq_forget_entity(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct bfq_sched_data *sd; -+ -+ BUG_ON(!entity->on_st); -+ -+ entity->on_st = 0; -+ st->wsum -= entity->weight; -+ if (bfqq != NULL) { -+ sd = entity->sched_data; -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ bfq_put_queue(bfqq); -+ } -+} -+ -+/** -+ * bfq_put_idle_entity - release the idle tree ref of an entity. -+ * @st: service tree for the entity. -+ * @entity: the entity being released. -+ */ -+static void bfq_put_idle_entity(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ bfq_idle_extract(st, entity); -+ bfq_forget_entity(st, entity); -+} -+ -+/** -+ * bfq_forget_idle - update the idle tree if necessary. -+ * @st: the service tree to act upon. -+ * -+ * To preserve the global O(log N) complexity we only remove one entry here; -+ * as the idle tree will not grow indefinitely this can be done safely. -+ */ -+static void bfq_forget_idle(struct bfq_service_tree *st) -+{ -+ struct bfq_entity *first_idle = st->first_idle; -+ struct bfq_entity *last_idle = st->last_idle; -+ -+ if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL && -+ !bfq_gt(last_idle->finish, st->vtime)) { -+ /* -+ * Forget the whole idle tree, increasing the vtime past -+ * the last finish time of idle entities. -+ */ -+ st->vtime = last_idle->finish; -+ } -+ -+ if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime)) -+ bfq_put_idle_entity(st, first_idle); -+} -+ -+static struct bfq_service_tree * -+__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_service_tree *new_st = old_st; -+ -+ if (entity->ioprio_changed) { -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ unsigned short prev_weight, new_weight; -+ struct bfq_data *bfqd = NULL; -+ struct rb_root *root; -+#ifdef CONFIG_CGROUP_BFQIO -+ struct bfq_sched_data *sd; -+ struct bfq_group *bfqg; -+#endif -+ -+ if (bfqq != NULL) -+ bfqd = bfqq->bfqd; -+#ifdef CONFIG_CGROUP_BFQIO -+ else { -+ sd = entity->my_sched_data; -+ bfqg = container_of(sd, struct bfq_group, sched_data); -+ BUG_ON(!bfqg); -+ bfqd = (struct bfq_data *)bfqg->bfqd; -+ BUG_ON(!bfqd); -+ } -+#endif -+ -+ BUG_ON(old_st->wsum < entity->weight); -+ old_st->wsum -= entity->weight; -+ -+ if (entity->new_weight != entity->orig_weight) { -+ entity->orig_weight = entity->new_weight; -+ entity->ioprio = -+ bfq_weight_to_ioprio(entity->orig_weight); -+ } else if (entity->new_ioprio != entity->ioprio) { -+ entity->ioprio = entity->new_ioprio; -+ entity->orig_weight = -+ bfq_ioprio_to_weight(entity->ioprio); -+ } else -+ entity->new_weight = entity->orig_weight = -+ bfq_ioprio_to_weight(entity->ioprio); -+ -+ entity->ioprio_class = entity->new_ioprio_class; -+ entity->ioprio_changed = 0; -+ -+ /* -+ * NOTE: here we may be changing the weight too early, -+ * this will cause unfairness. The correct approach -+ * would have required additional complexity to defer -+ * weight changes to the proper time instants (i.e., -+ * when entity->finish <= old_st->vtime). -+ */ -+ new_st = bfq_entity_service_tree(entity); -+ -+ prev_weight = entity->weight; -+ new_weight = entity->orig_weight * -+ (bfqq != NULL ? bfqq->wr_coeff : 1); -+ /* -+ * If the weight of the entity changes, remove the entity -+ * from its old weight counter (if there is a counter -+ * associated with the entity), and add it to the counter -+ * associated with its new weight. -+ */ -+ if (prev_weight != new_weight) { -+ root = bfqq ? &bfqd->queue_weights_tree : -+ &bfqd->group_weights_tree; -+ bfq_weights_tree_remove(bfqd, entity, root); -+ } -+ entity->weight = new_weight; -+ /* -+ * Add the entity to its weights tree only if it is -+ * not associated with a weight-raised queue. -+ */ -+ if (prev_weight != new_weight && -+ (bfqq ? bfqq->wr_coeff == 1 : 1)) -+ /* If we get here, root has been initialized. */ -+ bfq_weights_tree_add(bfqd, entity, root); -+ -+ new_st->wsum += entity->weight; -+ -+ if (new_st != old_st) -+ entity->start = new_st->vtime; -+ } -+ -+ return new_st; -+} -+ -+/** -+ * bfq_bfqq_served - update the scheduler status after selection for -+ * service. -+ * @bfqq: the queue being served. -+ * @served: bytes to transfer. -+ * -+ * NOTE: this can be optimized, as the timestamps of upper level entities -+ * are synchronized every time a new bfqq is selected for service. By now, -+ * we keep it to better check consistency. -+ */ -+static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ struct bfq_service_tree *st; -+ -+ for_each_entity(entity) { -+ st = bfq_entity_service_tree(entity); -+ -+ entity->service += served; -+ BUG_ON(entity->service > entity->budget); -+ BUG_ON(st->wsum == 0); -+ -+ st->vtime += bfq_delta(served, st->wsum); -+ bfq_forget_idle(st); -+ } -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served); -+} -+ -+/** -+ * bfq_bfqq_charge_full_budget - set the service to the entity budget. -+ * @bfqq: the queue that needs a service update. -+ * -+ * When it's not possible to be fair in the service domain, because -+ * a queue is not consuming its budget fast enough (the meaning of -+ * fast depends on the timeout parameter), we charge it a full -+ * budget. In this way we should obtain a sort of time-domain -+ * fairness among all the seeky/slow queues. -+ */ -+static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget"); -+ -+ bfq_bfqq_served(bfqq, entity->budget - entity->service); -+} -+ -+/** -+ * __bfq_activate_entity - activate an entity. -+ * @entity: the entity being activated. -+ * -+ * Called whenever an entity is activated, i.e., it is not active and one -+ * of its children receives a new request, or has to be reactivated due to -+ * budget exhaustion. It uses the current budget of the entity (and the -+ * service received if @entity is active) of the queue to calculate its -+ * timestamps. -+ */ -+static void __bfq_activate_entity(struct bfq_entity *entity) -+{ -+ struct bfq_sched_data *sd = entity->sched_data; -+ struct bfq_service_tree *st = bfq_entity_service_tree(entity); -+ -+ if (entity == sd->in_service_entity) { -+ BUG_ON(entity->tree != NULL); -+ /* -+ * If we are requeueing the current entity we have -+ * to take care of not charging to it service it has -+ * not received. -+ */ -+ bfq_calc_finish(entity, entity->service); -+ entity->start = entity->finish; -+ sd->in_service_entity = NULL; -+ } else if (entity->tree == &st->active) { -+ /* -+ * Requeueing an entity due to a change of some -+ * next_in_service entity below it. We reuse the -+ * old start time. -+ */ -+ bfq_active_extract(st, entity); -+ } else if (entity->tree == &st->idle) { -+ /* -+ * Must be on the idle tree, bfq_idle_extract() will -+ * check for that. -+ */ -+ bfq_idle_extract(st, entity); -+ entity->start = bfq_gt(st->vtime, entity->finish) ? -+ st->vtime : entity->finish; -+ } else { -+ /* -+ * The finish time of the entity may be invalid, and -+ * it is in the past for sure, otherwise the queue -+ * would have been on the idle tree. -+ */ -+ entity->start = st->vtime; -+ st->wsum += entity->weight; -+ bfq_get_entity(entity); -+ -+ BUG_ON(entity->on_st); -+ entity->on_st = 1; -+ } -+ -+ st = __bfq_entity_update_weight_prio(st, entity); -+ bfq_calc_finish(entity, entity->budget); -+ bfq_active_insert(st, entity); -+} -+ -+/** -+ * bfq_activate_entity - activate an entity and its ancestors if necessary. -+ * @entity: the entity to activate. -+ * -+ * Activate @entity and all the entities on the path from it to the root. -+ */ -+static void bfq_activate_entity(struct bfq_entity *entity) -+{ -+ struct bfq_sched_data *sd; -+ -+ for_each_entity(entity) { -+ __bfq_activate_entity(entity); -+ -+ sd = entity->sched_data; -+ if (!bfq_update_next_in_service(sd)) -+ /* -+ * No need to propagate the activation to the -+ * upper entities, as they will be updated when -+ * the in-service entity is rescheduled. -+ */ -+ break; -+ } -+} -+ -+/** -+ * __bfq_deactivate_entity - deactivate an entity from its service tree. -+ * @entity: the entity to deactivate. -+ * @requeue: if false, the entity will not be put into the idle tree. -+ * -+ * Deactivate an entity, independently from its previous state. If the -+ * entity was not on a service tree just return, otherwise if it is on -+ * any scheduler tree, extract it from that tree, and if necessary -+ * and if the caller did not specify @requeue, put it on the idle tree. -+ * -+ * Return %1 if the caller should update the entity hierarchy, i.e., -+ * if the entity was in service or if it was the next_in_service for -+ * its sched_data; return %0 otherwise. -+ */ -+static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue) -+{ -+ struct bfq_sched_data *sd = entity->sched_data; -+ struct bfq_service_tree *st = bfq_entity_service_tree(entity); -+ int was_in_service = entity == sd->in_service_entity; -+ int ret = 0; -+ -+ if (!entity->on_st) -+ return 0; -+ -+ BUG_ON(was_in_service && entity->tree != NULL); -+ -+ if (was_in_service) { -+ bfq_calc_finish(entity, entity->service); -+ sd->in_service_entity = NULL; -+ } else if (entity->tree == &st->active) -+ bfq_active_extract(st, entity); -+ else if (entity->tree == &st->idle) -+ bfq_idle_extract(st, entity); -+ else if (entity->tree != NULL) -+ BUG(); -+ -+ if (was_in_service || sd->next_in_service == entity) -+ ret = bfq_update_next_in_service(sd); -+ -+ if (!requeue || !bfq_gt(entity->finish, st->vtime)) -+ bfq_forget_entity(st, entity); -+ else -+ bfq_idle_insert(st, entity); -+ -+ BUG_ON(sd->in_service_entity == entity); -+ BUG_ON(sd->next_in_service == entity); -+ -+ return ret; -+} -+ -+/** -+ * bfq_deactivate_entity - deactivate an entity. -+ * @entity: the entity to deactivate. -+ * @requeue: true if the entity can be put on the idle tree -+ */ -+static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) -+{ -+ struct bfq_sched_data *sd; -+ struct bfq_entity *parent; -+ -+ for_each_entity_safe(entity, parent) { -+ sd = entity->sched_data; -+ -+ if (!__bfq_deactivate_entity(entity, requeue)) -+ /* -+ * The parent entity is still backlogged, and -+ * we don't need to update it as it is still -+ * in service. -+ */ -+ break; -+ -+ if (sd->next_in_service != NULL) -+ /* -+ * The parent entity is still backlogged and -+ * the budgets on the path towards the root -+ * need to be updated. -+ */ -+ goto update; -+ -+ /* -+ * If we reach there the parent is no more backlogged and -+ * we want to propagate the dequeue upwards. -+ */ -+ requeue = 1; -+ } -+ -+ return; -+ -+update: -+ entity = parent; -+ for_each_entity(entity) { -+ __bfq_activate_entity(entity); -+ -+ sd = entity->sched_data; -+ if (!bfq_update_next_in_service(sd)) -+ break; -+ } -+} -+ -+/** -+ * bfq_update_vtime - update vtime if necessary. -+ * @st: the service tree to act upon. -+ * -+ * If necessary update the service tree vtime to have at least one -+ * eligible entity, skipping to its start time. Assumes that the -+ * active tree of the device is not empty. -+ * -+ * NOTE: this hierarchical implementation updates vtimes quite often, -+ * we may end up with reactivated processes getting timestamps after a -+ * vtime skip done because we needed a ->first_active entity on some -+ * intermediate node. -+ */ -+static void bfq_update_vtime(struct bfq_service_tree *st) -+{ -+ struct bfq_entity *entry; -+ struct rb_node *node = st->active.rb_node; -+ -+ entry = rb_entry(node, struct bfq_entity, rb_node); -+ if (bfq_gt(entry->min_start, st->vtime)) { -+ st->vtime = entry->min_start; -+ bfq_forget_idle(st); -+ } -+} -+ -+/** -+ * bfq_first_active_entity - find the eligible entity with -+ * the smallest finish time -+ * @st: the service tree to select from. -+ * -+ * This function searches the first schedulable entity, starting from the -+ * root of the tree and going on the left every time on this side there is -+ * a subtree with at least one eligible (start >= vtime) entity. The path on -+ * the right is followed only if a) the left subtree contains no eligible -+ * entities and b) no eligible entity has been found yet. -+ */ -+static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) -+{ -+ struct bfq_entity *entry, *first = NULL; -+ struct rb_node *node = st->active.rb_node; -+ -+ while (node != NULL) { -+ entry = rb_entry(node, struct bfq_entity, rb_node); -+left: -+ if (!bfq_gt(entry->start, st->vtime)) -+ first = entry; -+ -+ BUG_ON(bfq_gt(entry->min_start, st->vtime)); -+ -+ if (node->rb_left != NULL) { -+ entry = rb_entry(node->rb_left, -+ struct bfq_entity, rb_node); -+ if (!bfq_gt(entry->min_start, st->vtime)) { -+ node = node->rb_left; -+ goto left; -+ } -+ } -+ if (first != NULL) -+ break; -+ node = node->rb_right; -+ } -+ -+ BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active)); -+ return first; -+} -+ -+/** -+ * __bfq_lookup_next_entity - return the first eligible entity in @st. -+ * @st: the service tree. -+ * -+ * Update the virtual time in @st and return the first eligible entity -+ * it contains. -+ */ -+static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st, -+ bool force) -+{ -+ struct bfq_entity *entity, *new_next_in_service = NULL; -+ -+ if (RB_EMPTY_ROOT(&st->active)) -+ return NULL; -+ -+ bfq_update_vtime(st); -+ entity = bfq_first_active_entity(st); -+ BUG_ON(bfq_gt(entity->start, st->vtime)); -+ -+ /* -+ * If the chosen entity does not match with the sched_data's -+ * next_in_service and we are forcedly serving the IDLE priority -+ * class tree, bubble up budget update. -+ */ -+ if (unlikely(force && entity != entity->sched_data->next_in_service)) { -+ new_next_in_service = entity; -+ for_each_entity(new_next_in_service) -+ bfq_update_budget(new_next_in_service); -+ } -+ -+ return entity; -+} -+ -+/** -+ * bfq_lookup_next_entity - return the first eligible entity in @sd. -+ * @sd: the sched_data. -+ * @extract: if true the returned entity will be also extracted from @sd. -+ * -+ * NOTE: since we cache the next_in_service entity at each level of the -+ * hierarchy, the complexity of the lookup can be decreased with -+ * absolutely no effort just returning the cached next_in_service value; -+ * we prefer to do full lookups to test the consistency of * the data -+ * structures. -+ */ -+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, -+ int extract, -+ struct bfq_data *bfqd) -+{ -+ struct bfq_service_tree *st = sd->service_tree; -+ struct bfq_entity *entity; -+ int i = 0; -+ -+ BUG_ON(sd->in_service_entity != NULL); -+ -+ if (bfqd != NULL && -+ jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) { -+ entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1, -+ true); -+ if (entity != NULL) { -+ i = BFQ_IOPRIO_CLASSES - 1; -+ bfqd->bfq_class_idle_last_service = jiffies; -+ sd->next_in_service = entity; -+ } -+ } -+ for (; i < BFQ_IOPRIO_CLASSES; i++) { -+ entity = __bfq_lookup_next_entity(st + i, false); -+ if (entity != NULL) { -+ if (extract) { -+ bfq_check_next_in_service(sd, entity); -+ bfq_active_extract(st + i, entity); -+ sd->in_service_entity = entity; -+ sd->next_in_service = NULL; -+ } -+ break; -+ } -+ } -+ -+ return entity; -+} -+ -+/* -+ * Get next queue for service. -+ */ -+static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) -+{ -+ struct bfq_entity *entity = NULL; -+ struct bfq_sched_data *sd; -+ struct bfq_queue *bfqq; -+ -+ BUG_ON(bfqd->in_service_queue != NULL); -+ -+ if (bfqd->busy_queues == 0) -+ return NULL; -+ -+ sd = &bfqd->root_group->sched_data; -+ for (; sd != NULL; sd = entity->my_sched_data) { -+ entity = bfq_lookup_next_entity(sd, 1, bfqd); -+ BUG_ON(entity == NULL); -+ entity->service = 0; -+ } -+ -+ bfqq = bfq_entity_to_bfqq(entity); -+ BUG_ON(bfqq == NULL); -+ -+ return bfqq; -+} -+ -+/* -+ * Forced extraction of the given queue. -+ */ -+static void bfq_get_next_queue_forced(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity; -+ struct bfq_sched_data *sd; -+ -+ BUG_ON(bfqd->in_service_queue != NULL); -+ -+ entity = &bfqq->entity; -+ /* -+ * Bubble up extraction/update from the leaf to the root. -+ */ -+ for_each_entity(entity) { -+ sd = entity->sched_data; -+ bfq_update_budget(entity); -+ bfq_update_vtime(bfq_entity_service_tree(entity)); -+ bfq_active_extract(bfq_entity_service_tree(entity), entity); -+ sd->in_service_entity = entity; -+ sd->next_in_service = NULL; -+ entity->service = 0; -+ } -+ -+ return; -+} -+ -+static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) -+{ -+ if (bfqd->in_service_bic != NULL) { -+ put_io_context(bfqd->in_service_bic->icq.ioc); -+ bfqd->in_service_bic = NULL; -+ } -+ -+ bfqd->in_service_queue = NULL; -+ del_timer(&bfqd->idle_slice_timer); -+} -+ -+static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ int requeue) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ -+ if (bfqq == bfqd->in_service_queue) -+ __bfq_bfqd_reset_in_service(bfqd); -+ -+ bfq_deactivate_entity(entity, requeue); -+} -+ -+static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ -+ bfq_activate_entity(entity); -+} -+ -+/* -+ * Called when the bfqq no longer has requests pending, remove it from -+ * the service tree. -+ */ -+static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ int requeue) -+{ -+ BUG_ON(!bfq_bfqq_busy(bfqq)); -+ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); -+ -+ bfq_log_bfqq(bfqd, bfqq, "del from busy"); -+ -+ bfq_clear_bfqq_busy(bfqq); -+ -+ BUG_ON(bfqd->busy_queues == 0); -+ bfqd->busy_queues--; -+ -+ if (!bfqq->dispatched) { -+ bfq_weights_tree_remove(bfqd, &bfqq->entity, -+ &bfqd->queue_weights_tree); -+ if (!blk_queue_nonrot(bfqd->queue)) { -+ BUG_ON(!bfqd->busy_in_flight_queues); -+ bfqd->busy_in_flight_queues--; -+ if (bfq_bfqq_constantly_seeky(bfqq)) { -+ BUG_ON(!bfqd-> -+ const_seeky_busy_in_flight_queues); -+ bfqd->const_seeky_busy_in_flight_queues--; -+ } -+ } -+ } -+ if (bfqq->wr_coeff > 1) -+ bfqd->wr_busy_queues--; -+ -+ bfq_deactivate_bfqq(bfqd, bfqq, requeue); -+} -+ -+/* -+ * Called when an inactive queue receives a new request. -+ */ -+static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ BUG_ON(bfq_bfqq_busy(bfqq)); -+ BUG_ON(bfqq == bfqd->in_service_queue); -+ -+ bfq_log_bfqq(bfqd, bfqq, "add to busy"); -+ -+ bfq_activate_bfqq(bfqd, bfqq); -+ -+ bfq_mark_bfqq_busy(bfqq); -+ bfqd->busy_queues++; -+ -+ if (!bfqq->dispatched) { -+ if (bfqq->wr_coeff == 1) -+ bfq_weights_tree_add(bfqd, &bfqq->entity, -+ &bfqd->queue_weights_tree); -+ if (!blk_queue_nonrot(bfqd->queue)) { -+ bfqd->busy_in_flight_queues++; -+ if (bfq_bfqq_constantly_seeky(bfqq)) -+ bfqd->const_seeky_busy_in_flight_queues++; -+ } -+ } -+ if (bfqq->wr_coeff > 1) -+ bfqd->wr_busy_queues++; -+} -diff --git a/block/bfq.h b/block/bfq.h -new file mode 100644 -index 0000000..aeca08e ---- /dev/null -+++ b/block/bfq.h -@@ -0,0 +1,742 @@ -+/* -+ * BFQ-v7r5 for 3.15.0: data structures and common functions prototypes. -+ * -+ * Based on ideas and code from CFQ: -+ * Copyright (C) 2003 Jens Axboe -+ * -+ * Copyright (C) 2008 Fabio Checconi -+ * Paolo Valente -+ * -+ * Copyright (C) 2010 Paolo Valente -+ */ -+ -+#ifndef _BFQ_H -+#define _BFQ_H -+ -+#include -+#include -+#include -+#include -+ -+#define BFQ_IOPRIO_CLASSES 3 -+#define BFQ_CL_IDLE_TIMEOUT (HZ/5) -+ -+#define BFQ_MIN_WEIGHT 1 -+#define BFQ_MAX_WEIGHT 1000 -+ -+#define BFQ_DEFAULT_GRP_WEIGHT 10 -+#define BFQ_DEFAULT_GRP_IOPRIO 0 -+#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE -+ -+struct bfq_entity; -+ -+/** -+ * struct bfq_service_tree - per ioprio_class service tree. -+ * @active: tree for active entities (i.e., those backlogged). -+ * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i). -+ * @first_idle: idle entity with minimum F_i. -+ * @last_idle: idle entity with maximum F_i. -+ * @vtime: scheduler virtual time. -+ * @wsum: scheduler weight sum; active and idle entities contribute to it. -+ * -+ * Each service tree represents a B-WF2Q+ scheduler on its own. Each -+ * ioprio_class has its own independent scheduler, and so its own -+ * bfq_service_tree. All the fields are protected by the queue lock -+ * of the containing bfqd. -+ */ -+struct bfq_service_tree { -+ struct rb_root active; -+ struct rb_root idle; -+ -+ struct bfq_entity *first_idle; -+ struct bfq_entity *last_idle; -+ -+ u64 vtime; -+ unsigned long wsum; -+}; -+ -+/** -+ * struct bfq_sched_data - multi-class scheduler. -+ * @in_service_entity: entity in service. -+ * @next_in_service: head-of-the-line entity in the scheduler. -+ * @service_tree: array of service trees, one per ioprio_class. -+ * -+ * bfq_sched_data is the basic scheduler queue. It supports three -+ * ioprio_classes, and can be used either as a toplevel queue or as -+ * an intermediate queue on a hierarchical setup. -+ * @next_in_service points to the active entity of the sched_data -+ * service trees that will be scheduled next. -+ * -+ * The supported ioprio_classes are the same as in CFQ, in descending -+ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. -+ * Requests from higher priority queues are served before all the -+ * requests from lower priority queues; among requests of the same -+ * queue requests are served according to B-WF2Q+. -+ * All the fields are protected by the queue lock of the containing bfqd. -+ */ -+struct bfq_sched_data { -+ struct bfq_entity *in_service_entity; -+ struct bfq_entity *next_in_service; -+ struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES]; -+}; -+ -+/** -+ * struct bfq_weight_counter - counter of the number of all active entities -+ * with a given weight. -+ * @weight: weight of the entities that this counter refers to. -+ * @num_active: number of active entities with this weight. -+ * @weights_node: weights tree member (see bfq_data's @queue_weights_tree -+ * and @group_weights_tree). -+ */ -+struct bfq_weight_counter { -+ short int weight; -+ unsigned int num_active; -+ struct rb_node weights_node; -+}; -+ -+/** -+ * struct bfq_entity - schedulable entity. -+ * @rb_node: service_tree member. -+ * @weight_counter: pointer to the weight counter associated with this entity. -+ * @on_st: flag, true if the entity is on a tree (either the active or -+ * the idle one of its service_tree). -+ * @finish: B-WF2Q+ finish timestamp (aka F_i). -+ * @start: B-WF2Q+ start timestamp (aka S_i). -+ * @tree: tree the entity is enqueued into; %NULL if not on a tree. -+ * @min_start: minimum start time of the (active) subtree rooted at -+ * this entity; used for O(log N) lookups into active trees. -+ * @service: service received during the last round of service. -+ * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight. -+ * @weight: weight of the queue -+ * @parent: parent entity, for hierarchical scheduling. -+ * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the -+ * associated scheduler queue, %NULL on leaf nodes. -+ * @sched_data: the scheduler queue this entity belongs to. -+ * @ioprio: the ioprio in use. -+ * @new_weight: when a weight change is requested, the new weight value. -+ * @orig_weight: original weight, used to implement weight boosting -+ * @new_ioprio: when an ioprio change is requested, the new ioprio value. -+ * @ioprio_class: the ioprio_class in use. -+ * @new_ioprio_class: when an ioprio_class change is requested, the new -+ * ioprio_class value. -+ * @ioprio_changed: flag, true when the user requested a weight, ioprio or -+ * ioprio_class change. -+ * -+ * A bfq_entity is used to represent either a bfq_queue (leaf node in the -+ * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each -+ * entity belongs to the sched_data of the parent group in the cgroup -+ * hierarchy. Non-leaf entities have also their own sched_data, stored -+ * in @my_sched_data. -+ * -+ * Each entity stores independently its priority values; this would -+ * allow different weights on different devices, but this -+ * functionality is not exported to userspace by now. Priorities and -+ * weights are updated lazily, first storing the new values into the -+ * new_* fields, then setting the @ioprio_changed flag. As soon as -+ * there is a transition in the entity state that allows the priority -+ * update to take place the effective and the requested priority -+ * values are synchronized. -+ * -+ * Unless cgroups are used, the weight value is calculated from the -+ * ioprio to export the same interface as CFQ. When dealing with -+ * ``well-behaved'' queues (i.e., queues that do not spend too much -+ * time to consume their budget and have true sequential behavior, and -+ * when there are no external factors breaking anticipation) the -+ * relative weights at each level of the cgroups hierarchy should be -+ * guaranteed. All the fields are protected by the queue lock of the -+ * containing bfqd. -+ */ -+struct bfq_entity { -+ struct rb_node rb_node; -+ struct bfq_weight_counter *weight_counter; -+ -+ int on_st; -+ -+ u64 finish; -+ u64 start; -+ -+ struct rb_root *tree; -+ -+ u64 min_start; -+ -+ unsigned long service, budget; -+ unsigned short weight, new_weight; -+ unsigned short orig_weight; -+ -+ struct bfq_entity *parent; -+ -+ struct bfq_sched_data *my_sched_data; -+ struct bfq_sched_data *sched_data; -+ -+ unsigned short ioprio, new_ioprio; -+ unsigned short ioprio_class, new_ioprio_class; -+ -+ int ioprio_changed; -+}; -+ -+struct bfq_group; -+ -+/** -+ * struct bfq_queue - leaf schedulable entity. -+ * @ref: reference counter. -+ * @bfqd: parent bfq_data. -+ * @new_bfqq: shared bfq_queue if queue is cooperating with -+ * one or more other queues. -+ * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree). -+ * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree). -+ * @sort_list: sorted list of pending requests. -+ * @next_rq: if fifo isn't expired, next request to serve. -+ * @queued: nr of requests queued in @sort_list. -+ * @allocated: currently allocated requests. -+ * @meta_pending: pending metadata requests. -+ * @fifo: fifo list of requests in sort_list. -+ * @entity: entity representing this queue in the scheduler. -+ * @max_budget: maximum budget allowed from the feedback mechanism. -+ * @budget_timeout: budget expiration (in jiffies). -+ * @dispatched: number of requests on the dispatch list or inside driver. -+ * @flags: status flags. -+ * @bfqq_list: node for active/idle bfqq list inside our bfqd. -+ * @seek_samples: number of seeks sampled -+ * @seek_total: sum of the distances of the seeks sampled -+ * @seek_mean: mean seek distance -+ * @last_request_pos: position of the last request enqueued -+ * @requests_within_timer: number of consecutive pairs of request completion -+ * and arrival, such that the queue becomes idle -+ * after the completion, but the next request arrives -+ * within an idle time slice; used only if the queue's -+ * IO_bound has been cleared. -+ * @pid: pid of the process owning the queue, used for logging purposes. -+ * @last_wr_start_finish: start time of the current weight-raising period if -+ * the @bfq-queue is being weight-raised, otherwise -+ * finish time of the last weight-raising period -+ * @wr_cur_max_time: current max raising time for this queue -+ * @soft_rt_next_start: minimum time instant such that, only if a new -+ * request is enqueued after this time instant in an -+ * idle @bfq_queue with no outstanding requests, then -+ * the task associated with the queue it is deemed as -+ * soft real-time (see the comments to the function -+ * bfq_bfqq_softrt_next_start()). -+ * @last_idle_bklogged: time of the last transition of the @bfq_queue from -+ * idle to backlogged -+ * @service_from_backlogged: cumulative service received from the @bfq_queue -+ * since the last transition from idle to -+ * backlogged -+ * -+ * A bfq_queue is a leaf request queue; it can be associated with an io_context -+ * or more, if it is async or shared between cooperating processes. @cgroup -+ * holds a reference to the cgroup, to be sure that it does not disappear while -+ * a bfqq still references it (mostly to avoid races between request issuing and -+ * task migration followed by cgroup destruction). -+ * All the fields are protected by the queue lock of the containing bfqd. -+ */ -+struct bfq_queue { -+ atomic_t ref; -+ struct bfq_data *bfqd; -+ -+ /* fields for cooperating queues handling */ -+ struct bfq_queue *new_bfqq; -+ struct rb_node pos_node; -+ struct rb_root *pos_root; -+ -+ struct rb_root sort_list; -+ struct request *next_rq; -+ int queued[2]; -+ int allocated[2]; -+ int meta_pending; -+ struct list_head fifo; -+ -+ struct bfq_entity entity; -+ -+ unsigned long max_budget; -+ unsigned long budget_timeout; -+ -+ int dispatched; -+ -+ unsigned int flags; -+ -+ struct list_head bfqq_list; -+ -+ unsigned int seek_samples; -+ u64 seek_total; -+ sector_t seek_mean; -+ sector_t last_request_pos; -+ -+ unsigned int requests_within_timer; -+ -+ pid_t pid; -+ -+ /* weight-raising fields */ -+ unsigned long wr_cur_max_time; -+ unsigned long soft_rt_next_start; -+ unsigned long last_wr_start_finish; -+ unsigned int wr_coeff; -+ unsigned long last_idle_bklogged; -+ unsigned long service_from_backlogged; -+}; -+ -+/** -+ * struct bfq_ttime - per process thinktime stats. -+ * @ttime_total: total process thinktime -+ * @ttime_samples: number of thinktime samples -+ * @ttime_mean: average process thinktime -+ */ -+struct bfq_ttime { -+ unsigned long last_end_request; -+ -+ unsigned long ttime_total; -+ unsigned long ttime_samples; -+ unsigned long ttime_mean; -+}; -+ -+/** -+ * struct bfq_io_cq - per (request_queue, io_context) structure. -+ * @icq: associated io_cq structure -+ * @bfqq: array of two process queues, the sync and the async -+ * @ttime: associated @bfq_ttime struct -+ */ -+struct bfq_io_cq { -+ struct io_cq icq; /* must be the first member */ -+ struct bfq_queue *bfqq[2]; -+ struct bfq_ttime ttime; -+ int ioprio; -+}; -+ -+enum bfq_device_speed { -+ BFQ_BFQD_FAST, -+ BFQ_BFQD_SLOW, -+}; -+ -+/** -+ * struct bfq_data - per device data structure. -+ * @queue: request queue for the managed device. -+ * @root_group: root bfq_group for the device. -+ * @rq_pos_tree: rbtree sorted by next_request position, used when -+ * determining if two or more queues have interleaving -+ * requests (see bfq_close_cooperator()). -+ * @active_numerous_groups: number of bfq_groups containing more than one -+ * active @bfq_entity. -+ * @queue_weights_tree: rbtree of weight counters of @bfq_queues, sorted by -+ * weight. Used to keep track of whether all @bfq_queues -+ * have the same weight. The tree contains one counter -+ * for each distinct weight associated to some active -+ * and not weight-raised @bfq_queue (see the comments to -+ * the functions bfq_weights_tree_[add|remove] for -+ * further details). -+ * @group_weights_tree: rbtree of non-queue @bfq_entity weight counters, sorted -+ * by weight. Used to keep track of whether all -+ * @bfq_groups have the same weight. The tree contains -+ * one counter for each distinct weight associated to -+ * some active @bfq_group (see the comments to the -+ * functions bfq_weights_tree_[add|remove] for further -+ * details). -+ * @busy_queues: number of bfq_queues containing requests (including the -+ * queue in service, even if it is idling). -+ * @busy_in_flight_queues: number of @bfq_queues containing pending or -+ * in-flight requests, plus the @bfq_queue in -+ * service, even if idle but waiting for the -+ * possible arrival of its next sync request. This -+ * field is updated only if the device is rotational, -+ * but used only if the device is also NCQ-capable. -+ * The reason why the field is updated also for non- -+ * NCQ-capable rotational devices is related to the -+ * fact that the value of @hw_tag may be set also -+ * later than when busy_in_flight_queues may need to -+ * be incremented for the first time(s). Taking also -+ * this possibility into account, to avoid unbalanced -+ * increments/decrements, would imply more overhead -+ * than just updating busy_in_flight_queues -+ * regardless of the value of @hw_tag. -+ * @const_seeky_busy_in_flight_queues: number of constantly-seeky @bfq_queues -+ * (that is, seeky queues that expired -+ * for budget timeout at least once) -+ * containing pending or in-flight -+ * requests, including the in-service -+ * @bfq_queue if constantly seeky. This -+ * field is updated only if the device -+ * is rotational, but used only if the -+ * device is also NCQ-capable (see the -+ * comments to @busy_in_flight_queues). -+ * @wr_busy_queues: number of weight-raised busy @bfq_queues. -+ * @queued: number of queued requests. -+ * @rq_in_driver: number of requests dispatched and waiting for completion. -+ * @sync_flight: number of sync requests in the driver. -+ * @max_rq_in_driver: max number of reqs in driver in the last -+ * @hw_tag_samples completed requests. -+ * @hw_tag_samples: nr of samples used to calculate hw_tag. -+ * @hw_tag: flag set to one if the driver is showing a queueing behavior. -+ * @budgets_assigned: number of budgets assigned. -+ * @idle_slice_timer: timer set when idling for the next sequential request -+ * from the queue in service. -+ * @unplug_work: delayed work to restart dispatching on the request queue. -+ * @in_service_queue: bfq_queue in service. -+ * @in_service_bic: bfq_io_cq (bic) associated with the @in_service_queue. -+ * @last_position: on-disk position of the last served request. -+ * @last_budget_start: beginning of the last budget. -+ * @last_idling_start: beginning of the last idle slice. -+ * @peak_rate: peak transfer rate observed for a budget. -+ * @peak_rate_samples: number of samples used to calculate @peak_rate. -+ * @bfq_max_budget: maximum budget allotted to a bfq_queue before -+ * rescheduling. -+ * @group_list: list of all the bfq_groups active on the device. -+ * @active_list: list of all the bfq_queues active on the device. -+ * @idle_list: list of all the bfq_queues idle on the device. -+ * @bfq_quantum: max number of requests dispatched per dispatch round. -+ * @bfq_fifo_expire: timeout for async/sync requests; when it expires -+ * requests are served in fifo order. -+ * @bfq_back_penalty: weight of backward seeks wrt forward ones. -+ * @bfq_back_max: maximum allowed backward seek. -+ * @bfq_slice_idle: maximum idling time. -+ * @bfq_user_max_budget: user-configured max budget value -+ * (0 for auto-tuning). -+ * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to -+ * async queues. -+ * @bfq_timeout: timeout for bfq_queues to consume their budget; used to -+ * to prevent seeky queues to impose long latencies to well -+ * behaved ones (this also implies that seeky queues cannot -+ * receive guarantees in the service domain; after a timeout -+ * they are charged for the whole allocated budget, to try -+ * to preserve a behavior reasonably fair among them, but -+ * without service-domain guarantees). -+ * @bfq_coop_thresh: number of queue merges after which a @bfq_queue is -+ * no more granted any weight-raising. -+ * @bfq_failed_cooperations: number of consecutive failed cooperation -+ * chances after which weight-raising is restored -+ * to a queue subject to more than bfq_coop_thresh -+ * queue merges. -+ * @bfq_requests_within_timer: number of consecutive requests that must be -+ * issued within the idle time slice to set -+ * again idling to a queue which was marked as -+ * non-I/O-bound (see the definition of the -+ * IO_bound flag for further details). -+ * @bfq_wr_coeff: Maximum factor by which the weight of a weight-raised -+ * queue is multiplied -+ * @bfq_wr_max_time: maximum duration of a weight-raising period (jiffies) -+ * @bfq_wr_rt_max_time: maximum duration for soft real-time processes -+ * @bfq_wr_min_idle_time: minimum idle period after which weight-raising -+ * may be reactivated for a queue (in jiffies) -+ * @bfq_wr_min_inter_arr_async: minimum period between request arrivals -+ * after which weight-raising may be -+ * reactivated for an already busy queue -+ * (in jiffies) -+ * @bfq_wr_max_softrt_rate: max service-rate for a soft real-time queue, -+ * sectors per seconds -+ * @RT_prod: cached value of the product R*T used for computing the maximum -+ * duration of the weight raising automatically -+ * @device_speed: device-speed class for the low-latency heuristic -+ * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions -+ * -+ * All the fields are protected by the @queue lock. -+ */ -+struct bfq_data { -+ struct request_queue *queue; -+ -+ struct bfq_group *root_group; -+ struct rb_root rq_pos_tree; -+ -+#ifdef CONFIG_CGROUP_BFQIO -+ int active_numerous_groups; -+#endif -+ -+ struct rb_root queue_weights_tree; -+ struct rb_root group_weights_tree; -+ -+ int busy_queues; -+ int busy_in_flight_queues; -+ int const_seeky_busy_in_flight_queues; -+ int wr_busy_queues; -+ int queued; -+ int rq_in_driver; -+ int sync_flight; -+ -+ int max_rq_in_driver; -+ int hw_tag_samples; -+ int hw_tag; -+ -+ int budgets_assigned; -+ -+ struct timer_list idle_slice_timer; -+ struct work_struct unplug_work; -+ -+ struct bfq_queue *in_service_queue; -+ struct bfq_io_cq *in_service_bic; -+ -+ sector_t last_position; -+ -+ ktime_t last_budget_start; -+ ktime_t last_idling_start; -+ int peak_rate_samples; -+ u64 peak_rate; -+ unsigned long bfq_max_budget; -+ -+ struct hlist_head group_list; -+ struct list_head active_list; -+ struct list_head idle_list; -+ -+ unsigned int bfq_quantum; -+ unsigned int bfq_fifo_expire[2]; -+ unsigned int bfq_back_penalty; -+ unsigned int bfq_back_max; -+ unsigned int bfq_slice_idle; -+ u64 bfq_class_idle_last_service; -+ -+ unsigned int bfq_user_max_budget; -+ unsigned int bfq_max_budget_async_rq; -+ unsigned int bfq_timeout[2]; -+ -+ unsigned int bfq_coop_thresh; -+ unsigned int bfq_failed_cooperations; -+ unsigned int bfq_requests_within_timer; -+ -+ bool low_latency; -+ -+ /* parameters of the low_latency heuristics */ -+ unsigned int bfq_wr_coeff; -+ unsigned int bfq_wr_max_time; -+ unsigned int bfq_wr_rt_max_time; -+ unsigned int bfq_wr_min_idle_time; -+ unsigned long bfq_wr_min_inter_arr_async; -+ unsigned int bfq_wr_max_softrt_rate; -+ u64 RT_prod; -+ enum bfq_device_speed device_speed; -+ -+ struct bfq_queue oom_bfqq; -+}; -+ -+enum bfqq_state_flags { -+ BFQ_BFQQ_FLAG_busy = 0, /* has requests or is in service */ -+ BFQ_BFQQ_FLAG_wait_request, /* waiting for a request */ -+ BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ -+ BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ -+ BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */ -+ BFQ_BFQQ_FLAG_prio_changed, /* task priority has changed */ -+ BFQ_BFQQ_FLAG_sync, /* synchronous queue */ -+ BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ -+ BFQ_BFQQ_FLAG_IO_bound, /* -+ * bfqq has timed-out at least once -+ * having consumed at most 2/10 of -+ * its budget -+ */ -+ BFQ_BFQQ_FLAG_constantly_seeky, /* -+ * bfqq has proved to be slow and -+ * seeky until budget timeout -+ */ -+ BFQ_BFQQ_FLAG_softrt_update, /* -+ * may need softrt-next-start -+ * update -+ */ -+ BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ -+ BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be splitted */ -+}; -+ -+#define BFQ_BFQQ_FNS(name) \ -+static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \ -+{ \ -+ (bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name); \ -+} \ -+static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq) \ -+{ \ -+ (bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name); \ -+} \ -+static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq) \ -+{ \ -+ return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \ -+} -+ -+BFQ_BFQQ_FNS(busy); -+BFQ_BFQQ_FNS(wait_request); -+BFQ_BFQQ_FNS(must_alloc); -+BFQ_BFQQ_FNS(fifo_expire); -+BFQ_BFQQ_FNS(idle_window); -+BFQ_BFQQ_FNS(prio_changed); -+BFQ_BFQQ_FNS(sync); -+BFQ_BFQQ_FNS(budget_new); -+BFQ_BFQQ_FNS(IO_bound); -+BFQ_BFQQ_FNS(constantly_seeky); -+BFQ_BFQQ_FNS(coop); -+BFQ_BFQQ_FNS(split_coop); -+BFQ_BFQQ_FNS(softrt_update); -+#undef BFQ_BFQQ_FNS -+ -+/* Logging facilities. */ -+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ -+ blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args) -+ -+#define bfq_log(bfqd, fmt, args...) \ -+ blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) -+ -+/* Expiration reasons. */ -+enum bfqq_expiration { -+ BFQ_BFQQ_TOO_IDLE = 0, /* -+ * queue has been idling for -+ * too long -+ */ -+ BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */ -+ BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */ -+ BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */ -+}; -+ -+#ifdef CONFIG_CGROUP_BFQIO -+/** -+ * struct bfq_group - per (device, cgroup) data structure. -+ * @entity: schedulable entity to insert into the parent group sched_data. -+ * @sched_data: own sched_data, to contain child entities (they may be -+ * both bfq_queues and bfq_groups). -+ * @group_node: node to be inserted into the bfqio_cgroup->group_data -+ * list of the containing cgroup's bfqio_cgroup. -+ * @bfqd_node: node to be inserted into the @bfqd->group_list list -+ * of the groups active on the same device; used for cleanup. -+ * @bfqd: the bfq_data for the device this group acts upon. -+ * @async_bfqq: array of async queues for all the tasks belonging to -+ * the group, one queue per ioprio value per ioprio_class, -+ * except for the idle class that has only one queue. -+ * @async_idle_bfqq: async queue for the idle class (ioprio is ignored). -+ * @my_entity: pointer to @entity, %NULL for the toplevel group; used -+ * to avoid too many special cases during group creation/ -+ * migration. -+ * @active_entities: number of active entities belonging to the group; -+ * unused for the root group. Used to know whether there -+ * are groups with more than one active @bfq_entity -+ * (see the comments to the function -+ * bfq_bfqq_must_not_expire()). -+ * -+ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup -+ * there is a set of bfq_groups, each one collecting the lower-level -+ * entities belonging to the group that are acting on the same device. -+ * -+ * Locking works as follows: -+ * o @group_node is protected by the bfqio_cgroup lock, and is accessed -+ * via RCU from its readers. -+ * o @bfqd is protected by the queue lock, RCU is used to access it -+ * from the readers. -+ * o All the other fields are protected by the @bfqd queue lock. -+ */ -+struct bfq_group { -+ struct bfq_entity entity; -+ struct bfq_sched_data sched_data; -+ -+ struct hlist_node group_node; -+ struct hlist_node bfqd_node; -+ -+ void *bfqd; -+ -+ struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; -+ struct bfq_queue *async_idle_bfqq; -+ -+ struct bfq_entity *my_entity; -+ -+ int active_entities; -+}; -+ -+/** -+ * struct bfqio_cgroup - bfq cgroup data structure. -+ * @css: subsystem state for bfq in the containing cgroup. -+ * @online: flag marked when the subsystem is inserted. -+ * @weight: cgroup weight. -+ * @ioprio: cgroup ioprio. -+ * @ioprio_class: cgroup ioprio_class. -+ * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data. -+ * @group_data: list containing the bfq_group belonging to this cgroup. -+ * -+ * @group_data is accessed using RCU, with @lock protecting the updates, -+ * @ioprio and @ioprio_class are protected by @lock. -+ */ -+struct bfqio_cgroup { -+ struct cgroup_subsys_state css; -+ bool online; -+ -+ unsigned short weight, ioprio, ioprio_class; -+ -+ spinlock_t lock; -+ struct hlist_head group_data; -+}; -+#else -+struct bfq_group { -+ struct bfq_sched_data sched_data; -+ -+ struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; -+ struct bfq_queue *async_idle_bfqq; -+}; -+#endif -+ -+static inline struct bfq_service_tree * -+bfq_entity_service_tree(struct bfq_entity *entity) -+{ -+ struct bfq_sched_data *sched_data = entity->sched_data; -+ unsigned int idx = entity->ioprio_class - 1; -+ -+ BUG_ON(idx >= BFQ_IOPRIO_CLASSES); -+ BUG_ON(sched_data == NULL); -+ -+ return sched_data->service_tree + idx; -+} -+ -+static inline struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, -+ int is_sync) -+{ -+ return bic->bfqq[!!is_sync]; -+} -+ -+static inline void bic_set_bfqq(struct bfq_io_cq *bic, -+ struct bfq_queue *bfqq, int is_sync) -+{ -+ bic->bfqq[!!is_sync] = bfqq; -+} -+ -+static inline struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic) -+{ -+ return bic->icq.q->elevator->elevator_data; -+} -+ -+/** -+ * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer. -+ * @ptr: a pointer to a bfqd. -+ * @flags: storage for the flags to be saved. -+ * -+ * This function allows bfqg->bfqd to be protected by the -+ * queue lock of the bfqd they reference; the pointer is dereferenced -+ * under RCU, so the storage for bfqd is assured to be safe as long -+ * as the RCU read side critical section does not end. After the -+ * bfqd->queue->queue_lock is taken the pointer is rechecked, to be -+ * sure that no other writer accessed it. If we raced with a writer, -+ * the function returns NULL, with the queue unlocked, otherwise it -+ * returns the dereferenced pointer, with the queue locked. -+ */ -+static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr, -+ unsigned long *flags) -+{ -+ struct bfq_data *bfqd; -+ -+ rcu_read_lock(); -+ bfqd = rcu_dereference(*(struct bfq_data **)ptr); -+ -+ if (bfqd != NULL) { -+ spin_lock_irqsave(bfqd->queue->queue_lock, *flags); -+ if (*ptr == bfqd) -+ goto out; -+ spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); -+ } -+ -+ bfqd = NULL; -+out: -+ rcu_read_unlock(); -+ return bfqd; -+} -+ -+static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd, -+ unsigned long *flags) -+{ -+ spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); -+} -+ -+static void bfq_changed_ioprio(struct bfq_io_cq *bic); -+static void bfq_put_queue(struct bfq_queue *bfqq); -+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq); -+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, -+ struct bfq_group *bfqg, int is_sync, -+ struct bfq_io_cq *bic, gfp_t gfp_mask); -+static void bfq_end_wr_async_queues(struct bfq_data *bfqd, -+ struct bfq_group *bfqg); -+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); -+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); -+ -+#endif /* _BFQ_H */ --- -1.9.3 - diff --git a/core/linux-kirkwood-dt/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r5-for-3.15.0.patch b/core/linux-kirkwood-dt/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r5-for-3.15.0.patch deleted file mode 100644 index db90163e3..000000000 --- a/core/linux-kirkwood-dt/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r5-for-3.15.0.patch +++ /dev/null @@ -1,1188 +0,0 @@ -From e07cf5701607dd66a8e360c7037ac29b0df4e279 Mon Sep 17 00:00:00 2001 -From: Mauro Andreolini -Date: Wed, 18 Jun 2014 17:38:07 +0200 -Subject: [PATCH 3/3] block, bfq: add Early Queue Merge (EQM) to BFQ-v7r5 for - 3.15.0 - -A set of processes may happen to perform interleaved reads, i.e.,requests -whose union would give rise to a sequential read pattern. There are two -typical cases: in the first case, processes read fixed-size chunks of -data at a fixed distance from each other, while in the second case processes -may read variable-size chunks at variable distances. The latter case occurs -for example with QEMU, which splits the I/O generated by the guest into -multiple chunks, and lets these chunks be served by a pool of cooperating -processes, iteratively assigning the next chunk of I/O to the first -available process. CFQ uses actual queue merging for the first type of -rocesses, whereas it uses preemption to get a sequential read pattern out -of the read requests performed by the second type of processes. In the end -it uses two different mechanisms to achieve the same goal: boosting the -throughput with interleaved I/O. - -This patch introduces Early Queue Merge (EQM), a unified mechanism to get a -sequential read pattern with both types of processes. The main idea is -checking newly arrived requests against the next request of the active queue -both in case of actual request insert and in case of request merge. By doing -so, both the types of processes can be handled by just merging their queues. -EQM is then simpler and more compact than the pair of mechanisms used in -CFQ. - -Finally, EQM also preserves the typical low-latency properties of BFQ, by -properly restoring the weight-raising state of a queue when it gets back to -a non-merged state. - -Signed-off-by: Mauro Andreolini -Signed-off-by: Arianna Avanzini -Signed-off-by: Paolo Valente ---- - block/bfq-iosched.c | 736 ++++++++++++++++++++++++++++++++++++---------------- - block/bfq-sched.c | 28 -- - block/bfq.h | 46 +++- - 3 files changed, 556 insertions(+), 254 deletions(-) - -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index 6cf7bca..4579c6d 100644 ---- a/block/bfq-iosched.c -+++ b/block/bfq-iosched.c -@@ -571,6 +571,57 @@ static inline unsigned int bfq_wr_duration(struct bfq_data *bfqd) - return dur; - } - -+static inline unsigned -+bfq_bfqq_cooperations(struct bfq_queue *bfqq) -+{ -+ return bfqq->bic ? bfqq->bic->cooperations : 0; -+} -+ -+static inline void -+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) -+{ -+ if (bic->saved_idle_window) -+ bfq_mark_bfqq_idle_window(bfqq); -+ else -+ bfq_clear_bfqq_idle_window(bfqq); -+ if (bic->saved_IO_bound) -+ bfq_mark_bfqq_IO_bound(bfqq); -+ else -+ bfq_clear_bfqq_IO_bound(bfqq); -+ if (bic->wr_time_left && bfqq->bfqd->low_latency && -+ bic->cooperations < bfqq->bfqd->bfq_coop_thresh) { -+ /* -+ * Start a weight raising period with the duration given by -+ * the raising_time_left snapshot. -+ */ -+ if (bfq_bfqq_busy(bfqq)) -+ bfqq->bfqd->wr_busy_queues++; -+ bfqq->wr_coeff = bfqq->bfqd->bfq_wr_coeff; -+ bfqq->wr_cur_max_time = bic->wr_time_left; -+ bfqq->last_wr_start_finish = jiffies; -+ bfqq->entity.ioprio_changed = 1; -+ } -+ /* -+ * Clear wr_time_left to prevent bfq_bfqq_save_state() from -+ * getting confused about the queue's need of a weight-raising -+ * period. -+ */ -+ bic->wr_time_left = 0; -+} -+ -+/* -+ * Must be called with the queue_lock held. -+ */ -+static int bfqq_process_refs(struct bfq_queue *bfqq) -+{ -+ int process_refs, io_refs; -+ -+ io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; -+ process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; -+ BUG_ON(process_refs < 0); -+ return process_refs; -+} -+ - static void bfq_add_request(struct request *rq) - { - struct bfq_queue *bfqq = RQ_BFQQ(rq); -@@ -602,8 +653,11 @@ static void bfq_add_request(struct request *rq) - - if (!bfq_bfqq_busy(bfqq)) { - int soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && -+ bfq_bfqq_cooperations(bfqq) < bfqd->bfq_coop_thresh && - time_is_before_jiffies(bfqq->soft_rt_next_start); -- idle_for_long_time = time_is_before_jiffies( -+ idle_for_long_time = bfq_bfqq_cooperations(bfqq) < -+ bfqd->bfq_coop_thresh && -+ time_is_before_jiffies( - bfqq->budget_timeout + - bfqd->bfq_wr_min_idle_time); - entity->budget = max_t(unsigned long, bfqq->max_budget, -@@ -624,11 +678,20 @@ static void bfq_add_request(struct request *rq) - if (!bfqd->low_latency) - goto add_bfqq_busy; - -+ if (bfq_bfqq_just_split(bfqq)) -+ goto set_ioprio_changed; -+ - /* -- * If the queue is not being boosted and has been idle -- * for enough time, start a weight-raising period -+ * If the queue: -+ * - is not being boosted, -+ * - has been idle for enough time, -+ * - is not a sync queue or is linked to a bfq_io_cq (it is -+ * shared "for its nature" or it is not shared and its -+ * requests have not been redirected to a shared queue) -+ * start a weight-raising period. - */ -- if (old_wr_coeff == 1 && (idle_for_long_time || soft_rt)) { -+ if (old_wr_coeff == 1 && (idle_for_long_time || soft_rt) && -+ (!bfq_bfqq_sync(bfqq) || bfqq->bic != NULL)) { - bfqq->wr_coeff = bfqd->bfq_wr_coeff; - if (idle_for_long_time) - bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -@@ -642,9 +705,11 @@ static void bfq_add_request(struct request *rq) - } else if (old_wr_coeff > 1) { - if (idle_for_long_time) - bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -- else if (bfqq->wr_cur_max_time == -- bfqd->bfq_wr_rt_max_time && -- !soft_rt) { -+ else if (bfq_bfqq_cooperations(bfqq) >= -+ bfqd->bfq_coop_thresh || -+ (bfqq->wr_cur_max_time == -+ bfqd->bfq_wr_rt_max_time && -+ !soft_rt)) { - bfqq->wr_coeff = 1; - bfq_log_bfqq(bfqd, bfqq, - "wrais ending at %lu, rais_max_time %u", -@@ -660,18 +725,18 @@ static void bfq_add_request(struct request *rq) - /* - * - * The remaining weight-raising time is lower -- * than bfqd->bfq_wr_rt_max_time, which -- * means that the application is enjoying -- * weight raising either because deemed soft- -- * rt in the near past, or because deemed -- * interactive a long ago. In both cases, -- * resetting now the current remaining weight- -- * raising time for the application to the -- * weight-raising duration for soft rt -- * applications would not cause any latency -- * increase for the application (as the new -- * duration would be higher than the remaining -- * time). -+ * than bfqd->bfq_wr_rt_max_time, which means -+ * that the application is enjoying weight -+ * raising either because deemed soft-rt in -+ * the near past, or because deemed interactive -+ * a long ago. -+ * In both cases, resetting now the current -+ * remaining weight-raising time for the -+ * application to the weight-raising duration -+ * for soft rt applications would not cause any -+ * latency increase for the application (as the -+ * new duration would be higher than the -+ * remaining time). - * - * In addition, the application is now meeting - * the requirements for being deemed soft rt. -@@ -706,6 +771,7 @@ static void bfq_add_request(struct request *rq) - bfqd->bfq_wr_rt_max_time; - } - } -+set_ioprio_changed: - if (old_wr_coeff != bfqq->wr_coeff) - entity->ioprio_changed = 1; - add_bfqq_busy: -@@ -918,90 +984,35 @@ static void bfq_end_wr(struct bfq_data *bfqd) - spin_unlock_irq(bfqd->queue->queue_lock); - } - --static int bfq_allow_merge(struct request_queue *q, struct request *rq, -- struct bio *bio) -+static inline sector_t bfq_io_struct_pos(void *io_struct, bool request) - { -- struct bfq_data *bfqd = q->elevator->elevator_data; -- struct bfq_io_cq *bic; -- struct bfq_queue *bfqq; -- -- /* -- * Disallow merge of a sync bio into an async request. -- */ -- if (bfq_bio_sync(bio) && !rq_is_sync(rq)) -- return 0; -- -- /* -- * Lookup the bfqq that this bio will be queued with. Allow -- * merge only if rq is queued there. -- * Queue lock is held here. -- */ -- bic = bfq_bic_lookup(bfqd, current->io_context); -- if (bic == NULL) -- return 0; -- -- bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); -- return bfqq == RQ_BFQQ(rq); --} -- --static void __bfq_set_in_service_queue(struct bfq_data *bfqd, -- struct bfq_queue *bfqq) --{ -- if (bfqq != NULL) { -- bfq_mark_bfqq_must_alloc(bfqq); -- bfq_mark_bfqq_budget_new(bfqq); -- bfq_clear_bfqq_fifo_expire(bfqq); -- -- bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; -- -- bfq_log_bfqq(bfqd, bfqq, -- "set_in_service_queue, cur-budget = %lu", -- bfqq->entity.budget); -- } -- -- bfqd->in_service_queue = bfqq; --} -- --/* -- * Get and set a new queue for service. -- */ --static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd, -- struct bfq_queue *bfqq) --{ -- if (!bfqq) -- bfqq = bfq_get_next_queue(bfqd); -+ if (request) -+ return blk_rq_pos(io_struct); - else -- bfq_get_next_queue_forced(bfqd, bfqq); -- -- __bfq_set_in_service_queue(bfqd, bfqq); -- return bfqq; -+ return ((struct bio *)io_struct)->bi_iter.bi_sector; - } - --static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd, -- struct request *rq) -+static inline sector_t bfq_dist_from(sector_t pos1, -+ sector_t pos2) - { -- if (blk_rq_pos(rq) >= bfqd->last_position) -- return blk_rq_pos(rq) - bfqd->last_position; -+ if (pos1 >= pos2) -+ return pos1 - pos2; - else -- return bfqd->last_position - blk_rq_pos(rq); -+ return pos2 - pos1; - } - --/* -- * Return true if bfqq has no request pending and rq is close enough to -- * bfqd->last_position, or if rq is closer to bfqd->last_position than -- * bfqq->next_rq -- */ --static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq) -+static inline int bfq_rq_close_to_sector(void *io_struct, bool request, -+ sector_t sector) - { -- return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR; -+ return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <= -+ BFQQ_SEEK_THR; - } - --static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) -+static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector) - { - struct rb_root *root = &bfqd->rq_pos_tree; - struct rb_node *parent, *node; - struct bfq_queue *__bfqq; -- sector_t sector = bfqd->last_position; - - if (RB_EMPTY_ROOT(root)) - return NULL; -@@ -1020,7 +1031,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) - * next_request position). - */ - __bfqq = rb_entry(parent, struct bfq_queue, pos_node); -- if (bfq_rq_close(bfqd, __bfqq->next_rq)) -+ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) - return __bfqq; - - if (blk_rq_pos(__bfqq->next_rq) < sector) -@@ -1031,7 +1042,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) - return NULL; - - __bfqq = rb_entry(node, struct bfq_queue, pos_node); -- if (bfq_rq_close(bfqd, __bfqq->next_rq)) -+ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) - return __bfqq; - - return NULL; -@@ -1040,14 +1051,12 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) - /* - * bfqd - obvious - * cur_bfqq - passed in so that we don't decide that the current queue -- * is closely cooperating with itself. -- * -- * We are assuming that cur_bfqq has dispatched at least one request, -- * and that bfqd->last_position reflects a position on the disk associated -- * with the I/O issued by cur_bfqq. -+ * is closely cooperating with itself -+ * sector - used as a reference point to search for a close queue - */ - static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, -- struct bfq_queue *cur_bfqq) -+ struct bfq_queue *cur_bfqq, -+ sector_t sector) - { - struct bfq_queue *bfqq; - -@@ -1067,7 +1076,7 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, - * working closely on the same area of the disk. In that case, - * we can group them together and don't waste time idling. - */ -- bfqq = bfqq_close(bfqd); -+ bfqq = bfqq_close(bfqd, sector); - if (bfqq == NULL || bfqq == cur_bfqq) - return NULL; - -@@ -1094,6 +1103,305 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, - return bfqq; - } - -+static struct bfq_queue * -+bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) -+{ -+ int process_refs, new_process_refs; -+ struct bfq_queue *__bfqq; -+ -+ /* -+ * If there are no process references on the new_bfqq, then it is -+ * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain -+ * may have dropped their last reference (not just their last process -+ * reference). -+ */ -+ if (!bfqq_process_refs(new_bfqq)) -+ return NULL; -+ -+ /* Avoid a circular list and skip interim queue merges. */ -+ while ((__bfqq = new_bfqq->new_bfqq)) { -+ if (__bfqq == bfqq) -+ return NULL; -+ new_bfqq = __bfqq; -+ } -+ -+ process_refs = bfqq_process_refs(bfqq); -+ new_process_refs = bfqq_process_refs(new_bfqq); -+ /* -+ * If the process for the bfqq has gone away, there is no -+ * sense in merging the queues. -+ */ -+ if (process_refs == 0 || new_process_refs == 0) -+ return NULL; -+ -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", -+ new_bfqq->pid); -+ -+ /* -+ * Merging is just a redirection: the requests of the process -+ * owning one of the two queues are redirected to the other queue. -+ * The latter queue, in its turn, is set as shared if this is the -+ * first time that the requests of some process are redirected to -+ * it. -+ * -+ * We redirect bfqq to new_bfqq and not the opposite, because we -+ * are in the context of the process owning bfqq, hence we have -+ * the io_cq of this process. So we can immediately configure this -+ * io_cq to redirect the requests of the process to new_bfqq. -+ * -+ * NOTE, even if new_bfqq coincides with the in-service queue, the -+ * io_cq of new_bfqq is not available, because, if the in-service -+ * queue is shared, bfqd->in_service_bic may not point to the -+ * io_cq of the in-service queue. -+ * Redirecting the requests of the process owning bfqq to the -+ * currently in-service queue is in any case the best option, as -+ * we feed the in-service queue with new requests close to the -+ * last request served and, by doing so, hopefully increase the -+ * throughput. -+ */ -+ bfqq->new_bfqq = new_bfqq; -+ atomic_add(process_refs, &new_bfqq->ref); -+ return new_bfqq; -+} -+ -+/* -+ * Attempt to schedule a merge of bfqq with the currently in-service queue -+ * or with a close queue among the scheduled queues. -+ * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue -+ * structure otherwise. -+ */ -+static struct bfq_queue * -+bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ void *io_struct, bool request) -+{ -+ struct bfq_queue *in_service_bfqq, *new_bfqq; -+ -+ if (bfqq->new_bfqq) -+ return bfqq->new_bfqq; -+ -+ if (!io_struct) -+ return NULL; -+ -+ in_service_bfqq = bfqd->in_service_queue; -+ -+ if (in_service_bfqq == NULL || in_service_bfqq == bfqq || -+ !bfqd->in_service_bic) -+ goto check_scheduled; -+ -+ if (bfq_class_idle(in_service_bfqq) || bfq_class_idle(bfqq)) -+ goto check_scheduled; -+ -+ if (bfq_class_rt(in_service_bfqq) != bfq_class_rt(bfqq)) -+ goto check_scheduled; -+ -+ if (in_service_bfqq->entity.parent != bfqq->entity.parent) -+ goto check_scheduled; -+ -+ if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && -+ bfq_bfqq_sync(in_service_bfqq) && bfq_bfqq_sync(bfqq)) { -+ new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq); -+ if (new_bfqq != NULL) -+ return new_bfqq; /* Merge with in-service queue */ -+ } -+ -+ /* -+ * Check whether there is a cooperator among currently scheduled -+ * queues. The only thing we need is that the bio/request is not -+ * NULL, as we need it to establish whether a cooperator exists. -+ */ -+check_scheduled: -+ new_bfqq = bfq_close_cooperator(bfqd, bfqq, -+ bfq_io_struct_pos(io_struct, request)); -+ if (new_bfqq) -+ return bfq_setup_merge(bfqq, new_bfqq); -+ -+ return NULL; -+} -+ -+static inline void -+bfq_bfqq_save_state(struct bfq_queue *bfqq) -+{ -+ /* -+ * If bfqq->bic == NULL, the queue is already shared or its requests -+ * have already been redirected to a shared queue; both idle window -+ * and weight raising state have already been saved. Do nothing. -+ */ -+ if (bfqq->bic == NULL) -+ return; -+ if (bfqq->bic->wr_time_left) -+ /* -+ * This is the queue of a just-started process, and would -+ * deserve weight raising: we set wr_time_left to the full -+ * weight-raising duration to trigger weight-raising when -+ * and if the queue is split and the first request of the -+ * queue is enqueued. -+ */ -+ bfqq->bic->wr_time_left = bfq_wr_duration(bfqq->bfqd); -+ else if (bfqq->wr_coeff > 1) { -+ unsigned long wr_duration = -+ jiffies - bfqq->last_wr_start_finish; -+ /* -+ * It may happen that a queue's weight raising period lasts -+ * longer than its wr_cur_max_time, as weight raising is -+ * handled only when a request is enqueued or dispatched (it -+ * does not use any timer). If the weight raising period is -+ * about to end, don't save it. -+ */ -+ if (bfqq->wr_cur_max_time <= wr_duration) -+ bfqq->bic->wr_time_left = 0; -+ else -+ bfqq->bic->wr_time_left = -+ bfqq->wr_cur_max_time - wr_duration; -+ /* -+ * The bfq_queue is becoming shared or the requests of the -+ * process owning the queue are being redirected to a shared -+ * queue. Stop the weight raising period of the queue, as in -+ * both cases it should not be owned by an interactive or -+ * soft real-time application. -+ */ -+ bfq_bfqq_end_wr(bfqq); -+ } else -+ bfqq->bic->wr_time_left = 0; -+ bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); -+ bfqq->bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); -+ bfqq->bic->cooperations++; -+ bfqq->bic->failed_cooperations = 0; -+} -+ -+static inline void -+bfq_get_bic_reference(struct bfq_queue *bfqq) -+{ -+ /* -+ * If bfqq->bic has a non-NULL value, the bic to which it belongs -+ * is about to begin using a shared bfq_queue. -+ */ -+ if (bfqq->bic) -+ atomic_long_inc(&bfqq->bic->icq.ioc->refcount); -+} -+ -+static void -+bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, -+ struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) -+{ -+ bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", -+ (long unsigned)new_bfqq->pid); -+ /* Save weight raising and idle window of the merged queues */ -+ bfq_bfqq_save_state(bfqq); -+ bfq_bfqq_save_state(new_bfqq); -+ if (bfq_bfqq_IO_bound(bfqq)) -+ bfq_mark_bfqq_IO_bound(new_bfqq); -+ bfq_clear_bfqq_IO_bound(bfqq); -+ /* -+ * Grab a reference to the bic, to prevent it from being destroyed -+ * before being possibly touched by a bfq_split_bfqq(). -+ */ -+ bfq_get_bic_reference(bfqq); -+ bfq_get_bic_reference(new_bfqq); -+ /* -+ * Merge queues (that is, let bic redirect its requests to new_bfqq) -+ */ -+ bic_set_bfqq(bic, new_bfqq, 1); -+ bfq_mark_bfqq_coop(new_bfqq); -+ /* -+ * new_bfqq now belongs to at least two bics (it is a shared queue): -+ * set new_bfqq->bic to NULL. bfqq either: -+ * - does not belong to any bic any more, and hence bfqq->bic must -+ * be set to NULL, or -+ * - is a queue whose owning bics have already been redirected to a -+ * different queue, hence the queue is destined to not belong to -+ * any bic soon and bfqq->bic is already NULL (therefore the next -+ * assignment causes no harm). -+ */ -+ new_bfqq->bic = NULL; -+ bfqq->bic = NULL; -+ bfq_put_queue(bfqq); -+} -+ -+static inline void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq) -+{ -+ struct bfq_io_cq *bic = bfqq->bic; -+ struct bfq_data *bfqd = bfqq->bfqd; -+ -+ if (bic && bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh) { -+ bic->failed_cooperations++; -+ if (bic->failed_cooperations >= bfqd->bfq_failed_cooperations) -+ bic->cooperations = 0; -+ } -+} -+ -+static int bfq_allow_merge(struct request_queue *q, struct request *rq, -+ struct bio *bio) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_io_cq *bic; -+ struct bfq_queue *bfqq, *new_bfqq; -+ -+ /* -+ * Disallow merge of a sync bio into an async request. -+ */ -+ if (bfq_bio_sync(bio) && !rq_is_sync(rq)) -+ return 0; -+ -+ /* -+ * Lookup the bfqq that this bio will be queued with. Allow -+ * merge only if rq is queued there. -+ * Queue lock is held here. -+ */ -+ bic = bfq_bic_lookup(bfqd, current->io_context); -+ if (bic == NULL) -+ return 0; -+ -+ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); -+ /* -+ * We take advantage of this function to perform an early merge -+ * of the queues of possible cooperating processes. -+ */ -+ if (bfqq != NULL) { -+ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false); -+ if (new_bfqq != NULL) { -+ bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq); -+ /* -+ * If we get here, the bio will be queued in the -+ * shared queue, i.e., new_bfqq, so use new_bfqq -+ * to decide whether bio and rq can be merged. -+ */ -+ bfqq = new_bfqq; -+ } else -+ bfq_bfqq_increase_failed_cooperations(bfqq); -+ } -+ -+ return bfqq == RQ_BFQQ(rq); -+} -+ -+static void __bfq_set_in_service_queue(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ if (bfqq != NULL) { -+ bfq_mark_bfqq_must_alloc(bfqq); -+ bfq_mark_bfqq_budget_new(bfqq); -+ bfq_clear_bfqq_fifo_expire(bfqq); -+ -+ bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "set_in_service_queue, cur-budget = %lu", -+ bfqq->entity.budget); -+ } -+ -+ bfqd->in_service_queue = bfqq; -+} -+ -+/* -+ * Get and set a new queue for service. -+ */ -+static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq = bfq_get_next_queue(bfqd); -+ -+ __bfq_set_in_service_queue(bfqd, bfqq); -+ return bfqq; -+} -+ - /* - * If enough samples have been computed, return the current max budget - * stored in bfqd, which is dynamically updated according to the -@@ -1237,63 +1545,6 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq) - return rq; - } - --/* -- * Must be called with the queue_lock held. -- */ --static int bfqq_process_refs(struct bfq_queue *bfqq) --{ -- int process_refs, io_refs; -- -- io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; -- process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; -- BUG_ON(process_refs < 0); -- return process_refs; --} -- --static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) --{ -- int process_refs, new_process_refs; -- struct bfq_queue *__bfqq; -- -- /* -- * If there are no process references on the new_bfqq, then it is -- * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain -- * may have dropped their last reference (not just their last process -- * reference). -- */ -- if (!bfqq_process_refs(new_bfqq)) -- return; -- -- /* Avoid a circular list and skip interim queue merges. */ -- while ((__bfqq = new_bfqq->new_bfqq)) { -- if (__bfqq == bfqq) -- return; -- new_bfqq = __bfqq; -- } -- -- process_refs = bfqq_process_refs(bfqq); -- new_process_refs = bfqq_process_refs(new_bfqq); -- /* -- * If the process for the bfqq has gone away, there is no -- * sense in merging the queues. -- */ -- if (process_refs == 0 || new_process_refs == 0) -- return; -- -- /* -- * Merge in the direction of the lesser amount of work. -- */ -- if (new_process_refs >= process_refs) { -- bfqq->new_bfqq = new_bfqq; -- atomic_add(process_refs, &new_bfqq->ref); -- } else { -- new_bfqq->new_bfqq = bfqq; -- atomic_add(new_process_refs, &bfqq->ref); -- } -- bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", -- new_bfqq->pid); --} -- - static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq) - { - struct bfq_entity *entity = &bfqq->entity; -@@ -2011,7 +2262,7 @@ static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) - */ - static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) - { -- struct bfq_queue *bfqq, *new_bfqq = NULL; -+ struct bfq_queue *bfqq; - struct request *next_rq; - enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; - -@@ -2021,17 +2272,6 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) - - bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); - -- /* -- * If another queue has a request waiting within our mean seek -- * distance, let it run. The expire code will check for close -- * cooperators and put the close queue at the front of the -- * service tree. If possible, merge the expiring queue with the -- * new bfqq. -- */ -- new_bfqq = bfq_close_cooperator(bfqd, bfqq); -- if (new_bfqq != NULL && bfqq->new_bfqq == NULL) -- bfq_setup_merge(bfqq, new_bfqq); -- - if (bfq_may_expire_for_budg_timeout(bfqq) && - !timer_pending(&bfqd->idle_slice_timer) && - !bfq_bfqq_must_idle(bfqq)) -@@ -2070,10 +2310,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) - bfq_clear_bfqq_wait_request(bfqq); - del_timer(&bfqd->idle_slice_timer); - } -- if (new_bfqq == NULL) -- goto keep_queue; -- else -- goto expire; -+ goto keep_queue; - } - } - -@@ -2082,40 +2319,30 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) - * in flight (possibly waiting for a completion) or is idling for a - * new request, then keep it. - */ -- if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) || -- (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq)))) { -+ if (timer_pending(&bfqd->idle_slice_timer) || -+ (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq))) { - bfqq = NULL; - goto keep_queue; -- } else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) { -- /* -- * Expiring the queue because there is a close cooperator, -- * cancel timer. -- */ -- bfq_clear_bfqq_wait_request(bfqq); -- del_timer(&bfqd->idle_slice_timer); - } - - reason = BFQ_BFQQ_NO_MORE_REQUESTS; - expire: - bfq_bfqq_expire(bfqd, bfqq, 0, reason); - new_queue: -- bfqq = bfq_set_in_service_queue(bfqd, new_bfqq); -+ bfqq = bfq_set_in_service_queue(bfqd); - bfq_log(bfqd, "select_queue: new queue %d returned", - bfqq != NULL ? bfqq->pid : 0); - keep_queue: - return bfqq; - } - --static void bfq_update_wr_data(struct bfq_data *bfqd, -- struct bfq_queue *bfqq) -+static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) - { -- if (bfqq->wr_coeff > 1) { /* queue is being boosted */ -- struct bfq_entity *entity = &bfqq->entity; -- -+ struct bfq_entity *entity = &bfqq->entity; -+ if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */ - bfq_log_bfqq(bfqd, bfqq, - "raising period dur %u/%u msec, old coeff %u, w %d(%d)", -- jiffies_to_msecs(jiffies - -- bfqq->last_wr_start_finish), -+ jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), - jiffies_to_msecs(bfqq->wr_cur_max_time), - bfqq->wr_coeff, - bfqq->entity.weight, bfqq->entity.orig_weight); -@@ -2124,11 +2351,15 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, - entity->orig_weight * bfqq->wr_coeff); - if (entity->ioprio_changed) - bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); -+ - /* - * If too much time has elapsed from the beginning -- * of this weight-raising, stop it. -+ * of this weight-raising period, or the queue has -+ * exceeded the acceptable number of cooperations, -+ * stop it. - */ -- if (time_is_before_jiffies(bfqq->last_wr_start_finish + -+ if (bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh || -+ time_is_before_jiffies(bfqq->last_wr_start_finish + - bfqq->wr_cur_max_time)) { - bfqq->last_wr_start_finish = jiffies; - bfq_log_bfqq(bfqd, bfqq, -@@ -2136,11 +2367,13 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, - bfqq->last_wr_start_finish, - jiffies_to_msecs(bfqq->wr_cur_max_time)); - bfq_bfqq_end_wr(bfqq); -- __bfq_entity_update_weight_prio( -- bfq_entity_service_tree(entity), -- entity); - } - } -+ /* Update weight both if it must be raised and if it must be lowered */ -+ if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1)) -+ __bfq_entity_update_weight_prio( -+ bfq_entity_service_tree(entity), -+ entity); - } - - /* -@@ -2377,6 +2610,25 @@ static inline void bfq_init_icq(struct io_cq *icq) - struct bfq_io_cq *bic = icq_to_bic(icq); - - bic->ttime.last_end_request = jiffies; -+ /* -+ * A newly created bic indicates that the process has just -+ * started doing I/O, and is probably mapping into memory its -+ * executable and libraries: it definitely needs weight raising. -+ * There is however the possibility that the process performs, -+ * for a while, I/O close to some other process. EQM intercepts -+ * this behavior and may merge the queue corresponding to the -+ * process with some other queue, BEFORE the weight of the queue -+ * is raised. Merged queues are not weight-raised (they are assumed -+ * to belong to processes that benefit only from high throughput). -+ * If the merge is basically the consequence of an accident, then -+ * the queue will be split soon and will get back its old weight. -+ * It is then important to write down somewhere that this queue -+ * does need weight raising, even if it did not make it to get its -+ * weight raised before being merged. To this purpose, we overload -+ * the field raising_time_left and assign 1 to it, to mark the queue -+ * as needing weight raising. -+ */ -+ bic->wr_time_left = 1; - } - - static void bfq_exit_icq(struct io_cq *icq) -@@ -2390,6 +2642,13 @@ static void bfq_exit_icq(struct io_cq *icq) - } - - if (bic->bfqq[BLK_RW_SYNC]) { -+ /* -+ * If the bic is using a shared queue, put the reference -+ * taken on the io_context when the bic started using a -+ * shared bfq_queue. -+ */ -+ if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC])) -+ put_io_context(icq->ioc); - bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]); - bic->bfqq[BLK_RW_SYNC] = NULL; - } -@@ -2678,6 +2937,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, - if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) - return; - -+ /* Idle window just restored, statistics are meaningless. */ -+ if (bfq_bfqq_just_split(bfqq)) -+ return; -+ - enable_idle = bfq_bfqq_idle_window(bfqq); - - if (atomic_read(&bic->icq.ioc->active_ref) == 0 || -@@ -2725,6 +2988,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, - if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || - !BFQQ_SEEKY(bfqq)) - bfq_update_idle_window(bfqd, bfqq, bic); -+ bfq_clear_bfqq_just_split(bfqq); - - bfq_log_bfqq(bfqd, bfqq, - "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", -@@ -2785,13 +3049,49 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, - static void bfq_insert_request(struct request_queue *q, struct request *rq) - { - struct bfq_data *bfqd = q->elevator->elevator_data; -- struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq; - - assert_spin_locked(bfqd->queue->queue_lock); -+ -+ /* -+ * An unplug may trigger a requeue of a request from the device -+ * driver: make sure we are in process context while trying to -+ * merge two bfq_queues. -+ */ -+ if (!in_interrupt()) { -+ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true); -+ if (new_bfqq != NULL) { -+ if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq) -+ new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1); -+ /* -+ * Release the request's reference to the old bfqq -+ * and make sure one is taken to the shared queue. -+ */ -+ new_bfqq->allocated[rq_data_dir(rq)]++; -+ bfqq->allocated[rq_data_dir(rq)]--; -+ atomic_inc(&new_bfqq->ref); -+ bfq_put_queue(bfqq); -+ if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq) -+ bfq_merge_bfqqs(bfqd, RQ_BIC(rq), -+ bfqq, new_bfqq); -+ rq->elv.priv[1] = new_bfqq; -+ bfqq = new_bfqq; -+ } else -+ bfq_bfqq_increase_failed_cooperations(bfqq); -+ } -+ - bfq_init_prio_data(bfqq, RQ_BIC(rq)); - - bfq_add_request(rq); - -+ /* -+ * Here a newly-created bfq_queue has already started a weight-raising -+ * period: clear raising_time_left to prevent bfq_bfqq_save_state() -+ * from assigning it a full weight-raising period. See the detailed -+ * comments about this field in bfq_init_icq(). -+ */ -+ if (bfqq->bic != NULL) -+ bfqq->bic->wr_time_left = 0; - rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; - list_add_tail(&rq->queuelist, &bfqq->fifo); - -@@ -2956,18 +3256,6 @@ static void bfq_put_request(struct request *rq) - } - } - --static struct bfq_queue * --bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, -- struct bfq_queue *bfqq) --{ -- bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", -- (long unsigned)bfqq->new_bfqq->pid); -- bic_set_bfqq(bic, bfqq->new_bfqq, 1); -- bfq_mark_bfqq_coop(bfqq->new_bfqq); -- bfq_put_queue(bfqq); -- return bic_to_bfqq(bic, 1); --} -- - /* - * Returns NULL if a new bfqq should be allocated, or the old bfqq if this - * was the last process referring to said bfqq. -@@ -2976,6 +3264,9 @@ static struct bfq_queue * - bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) - { - bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); -+ -+ put_io_context(bic->icq.ioc); -+ - if (bfqq_process_refs(bfqq) == 1) { - bfqq->pid = current->pid; - bfq_clear_bfqq_coop(bfqq); -@@ -3004,6 +3295,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, - struct bfq_queue *bfqq; - struct bfq_group *bfqg; - unsigned long flags; -+ bool split = false; - - might_sleep_if(gfp_mask & __GFP_WAIT); - -@@ -3022,24 +3314,14 @@ new_queue: - bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask); - bic_set_bfqq(bic, bfqq, is_sync); - } else { -- /* -- * If the queue was seeky for too long, break it apart. -- */ -+ /* If the queue was seeky for too long, break it apart. */ - if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { - bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); - bfqq = bfq_split_bfqq(bic, bfqq); -+ split = true; - if (!bfqq) - goto new_queue; - } -- -- /* -- * Check to see if this queue is scheduled to merge with -- * another closely cooperating queue. The merging of queues -- * happens here as it must be done in process context. -- * The reference on new_bfqq was taken in merge_bfqqs. -- */ -- if (bfqq->new_bfqq != NULL) -- bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq); - } - - bfqq->allocated[rw]++; -@@ -3050,6 +3332,26 @@ new_queue: - rq->elv.priv[0] = bic; - rq->elv.priv[1] = bfqq; - -+ /* -+ * If a bfq_queue has only one process reference, it is owned -+ * by only one bfq_io_cq: we can set the bic field of the -+ * bfq_queue to the address of that structure. Also, if the -+ * queue has just been split, mark a flag so that the -+ * information is available to the other scheduler hooks. -+ */ -+ if (bfqq_process_refs(bfqq) == 1) { -+ bfqq->bic = bic; -+ if (split) { -+ bfq_mark_bfqq_just_split(bfqq); -+ /* -+ * If the queue has just been split from a shared -+ * queue, restore the idle window and the possible -+ * weight raising period. -+ */ -+ bfq_bfqq_resume_state(bfqq, bic); -+ } -+ } -+ - spin_unlock_irqrestore(q->queue_lock, flags); - - return 0; -diff --git a/block/bfq-sched.c b/block/bfq-sched.c -index c4831b7..546a254 100644 ---- a/block/bfq-sched.c -+++ b/block/bfq-sched.c -@@ -1084,34 +1084,6 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) - return bfqq; - } - --/* -- * Forced extraction of the given queue. -- */ --static void bfq_get_next_queue_forced(struct bfq_data *bfqd, -- struct bfq_queue *bfqq) --{ -- struct bfq_entity *entity; -- struct bfq_sched_data *sd; -- -- BUG_ON(bfqd->in_service_queue != NULL); -- -- entity = &bfqq->entity; -- /* -- * Bubble up extraction/update from the leaf to the root. -- */ -- for_each_entity(entity) { -- sd = entity->sched_data; -- bfq_update_budget(entity); -- bfq_update_vtime(bfq_entity_service_tree(entity)); -- bfq_active_extract(bfq_entity_service_tree(entity), entity); -- sd->in_service_entity = entity; -- sd->next_in_service = NULL; -- entity->service = 0; -- } -- -- return; --} -- - static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) - { - if (bfqd->in_service_bic != NULL) { -diff --git a/block/bfq.h b/block/bfq.h -index aeca08e..4834b70 100644 ---- a/block/bfq.h -+++ b/block/bfq.h -@@ -215,18 +215,21 @@ struct bfq_group; - * idle @bfq_queue with no outstanding requests, then - * the task associated with the queue it is deemed as - * soft real-time (see the comments to the function -- * bfq_bfqq_softrt_next_start()). -+ * bfq_bfqq_softrt_next_start()) - * @last_idle_bklogged: time of the last transition of the @bfq_queue from - * idle to backlogged - * @service_from_backlogged: cumulative service received from the @bfq_queue - * since the last transition from idle to - * backlogged -+ * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the -+ * queue is shared - * -- * A bfq_queue is a leaf request queue; it can be associated with an io_context -- * or more, if it is async or shared between cooperating processes. @cgroup -- * holds a reference to the cgroup, to be sure that it does not disappear while -- * a bfqq still references it (mostly to avoid races between request issuing and -- * task migration followed by cgroup destruction). -+ * A bfq_queue is a leaf request queue; it can be associated with an -+ * io_context or more, if it is async or shared between cooperating -+ * processes. @cgroup holds a reference to the cgroup, to be sure that it -+ * does not disappear while a bfqq still references it (mostly to avoid -+ * races between request issuing and task migration followed by cgroup -+ * destruction). - * All the fields are protected by the queue lock of the containing bfqd. - */ - struct bfq_queue { -@@ -264,6 +267,7 @@ struct bfq_queue { - unsigned int requests_within_timer; - - pid_t pid; -+ struct bfq_io_cq *bic; - - /* weight-raising fields */ - unsigned long wr_cur_max_time; -@@ -293,12 +297,34 @@ struct bfq_ttime { - * @icq: associated io_cq structure - * @bfqq: array of two process queues, the sync and the async - * @ttime: associated @bfq_ttime struct -+ * @wr_time_left: snapshot of the time left before weight raising ends -+ * for the sync queue associated to this process; this -+ * snapshot is taken to remember this value while the weight -+ * raising is suspended because the queue is merged with a -+ * shared queue, and is used to set @raising_cur_max_time -+ * when the queue is split from the shared queue and its -+ * weight is raised again -+ * @saved_idle_window: same purpose as the previous field for the idle -+ * window -+ * @saved_IO_bound: same purpose as the previous two fields for the I/O -+ * bound classification of a queue -+ * @cooperations: counter of consecutive successful queue merges underwent -+ * by any of the process' @bfq_queues -+ * @failed_cooperations: counter of consecutive failed queue merges of any -+ * of the process' @bfq_queues - */ - struct bfq_io_cq { - struct io_cq icq; /* must be the first member */ - struct bfq_queue *bfqq[2]; - struct bfq_ttime ttime; - int ioprio; -+ -+ unsigned int wr_time_left; -+ unsigned int saved_idle_window; -+ unsigned int saved_IO_bound; -+ -+ unsigned int cooperations; -+ unsigned int failed_cooperations; - }; - - enum bfq_device_speed { -@@ -511,7 +537,7 @@ enum bfqq_state_flags { - BFQ_BFQQ_FLAG_prio_changed, /* task priority has changed */ - BFQ_BFQQ_FLAG_sync, /* synchronous queue */ - BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ -- BFQ_BFQQ_FLAG_IO_bound, /* -+ BFQ_BFQQ_FLAG_IO_bound, /* - * bfqq has timed-out at least once - * having consumed at most 2/10 of - * its budget -@@ -520,12 +546,13 @@ enum bfqq_state_flags { - * bfqq has proved to be slow and - * seeky until budget timeout - */ -- BFQ_BFQQ_FLAG_softrt_update, /* -+ BFQ_BFQQ_FLAG_softrt_update, /* - * may need softrt-next-start - * update - */ - BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ -- BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be splitted */ -+ BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be split */ -+ BFQ_BFQQ_FLAG_just_split, /* queue has just been split */ - }; - - #define BFQ_BFQQ_FNS(name) \ -@@ -554,6 +581,7 @@ BFQ_BFQQ_FNS(IO_bound); - BFQ_BFQQ_FNS(constantly_seeky); - BFQ_BFQQ_FNS(coop); - BFQ_BFQQ_FNS(split_coop); -+BFQ_BFQQ_FNS(just_split); - BFQ_BFQQ_FNS(softrt_update); - #undef BFQ_BFQQ_FNS - --- -1.9.3 - diff --git a/core/linux-kirkwood-dt/PKGBUILD b/core/linux-kirkwood-dt/PKGBUILD index 37f105d9c..1805f0f3b 100644 --- a/core/linux-kirkwood-dt/PKGBUILD +++ b/core/linux-kirkwood-dt/PKGBUILD @@ -7,7 +7,7 @@ buildarch=2 pkgbase=linux-kirkwood-dt _kernelname=${pkgbase#linux} _desc="Marvell Kirkwood DT" -pkgver=3.15.8 +pkgver=3.16 pkgrel=1 cryptodev_commit=8af0fe8a977f33236d3e00306ac3648063052704 bfqver=v7r5 @@ -23,8 +23,8 @@ source=("ftp://ftp.kernel.org/pub/linux/kernel/v3.x/linux-${pkgver}.tar.xz" 'archlinuxarm.patch' 'mach-types::http://www.arm.linux.org.uk/developer/machines/download.php' 'usb-add-reset-resume-quirk-for-several-webcams.patch' -# "git://git.code.sf.net/p/aufs/aufs3-standalone#branch=aufs3.x-rcN" - "git://git.code.sf.net/p/aufs/aufs3-standalone#branch=aufs${pkgver:0:4}" + "git://git.code.sf.net/p/aufs/aufs3-standalone#branch=aufs3.x-rcN" +# "git://git.code.sf.net/p/aufs/aufs3-standalone#branch=aufs${pkgver:0:4}" "https://github.com/cryptodev-linux/cryptodev-linux/archive/${cryptodev_commit}.tar.gz" "http://algo.ing.unimo.it/people/paolo/disk_sched/patches/${pkgver:0:4}.0-${bfqver}/0001-block-cgroups-kconfig-build-bits-for-BFQ-${bfqver}-${pkgver:0:4}.patch" "http://algo.ing.unimo.it/people/paolo/disk_sched/patches/${pkgver:0:4}.0-${bfqver}/0002-block-introduce-the-BFQ-${bfqver}-I-O-sched-for-${pkgver:0:4}.patch" @@ -32,19 +32,18 @@ source=("ftp://ftp.kernel.org/pub/linux/kernel/v3.x/linux-${pkgver}.tar.xz" 'goflexhome_dtb.patch' 'pogo_e02_dtb.patch') -md5sums=('5a3a136e93159ecfca33a2d6b5c8dae2' - 'd8801ebb3891f5922a3580bfe01d6ef4' +md5sums=('5c569ed649a0c9711879f333e90c5386' + '1d2d6c0d0a70f6229028115625862127' 'e319a8e1a596a39a8951fc6454664b15' 'SKIP' '9b5a265440abf57d2052838f31486a3a' 'SKIP' '32ba4029bb418a2f9a71738eb2d6d3a8' - '7779dfa3f5e0fa9ae8311ba7b25f66fd' - '5c8fb6476f768c9bd55d8198600d87d5' - 'ef4735aaf72fde3b9b6bc5fb7538b16b' - '4b68b14d17c78d7299b8b4d680cf6aab' - '15fd07ea17a926afceb2fdbba5318549') - + '27b45e15c3f9855792808498af21f664' + '812c18775a53e337f1b2c7cd9078bff1' + 'e903ed2c79546d2f0ec41e7e631c62f7' + 'e42c64b6f4b54f382cf3d7e8d82968f1' + '341f8164dad19b6baf05d764a800862a') prepare() { cd "${srcdir}/linux-${pkgver}" diff --git a/core/linux-kirkwood-dt/config b/core/linux-kirkwood-dt/config index 33f607d7e..9ee953445 100644 --- a/core/linux-kirkwood-dt/config +++ b/core/linux-kirkwood-dt/config @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/arm 3.15.7-1 Kernel Configuration +# Linux/arm 3.16.0-1 Kernel Configuration # CONFIG_ARM=y CONFIG_ARM_HAS_SG_CHAIN=y @@ -11,8 +11,7 @@ CONFIG_STACKTRACE_SUPPORT=y CONFIG_HAVE_LATENCYTOP_SUPPORT=y CONFIG_LOCKDEP_SUPPORT=y CONFIG_TRACE_IRQFLAGS_SUPPORT=y -CONFIG_RWSEM_GENERIC_SPINLOCK=y -CONFIG_ARCH_HAS_CPUFREQ=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_NEED_DMA_MAP_STATE=y @@ -49,6 +48,7 @@ CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y CONFIG_POSIX_MQUEUE_SYSCTL=y +CONFIG_CROSS_MEMORY_ATTACH=y CONFIG_FHANDLE=y CONFIG_USELIB=y CONFIG_AUDIT=y @@ -149,6 +149,7 @@ CONFIG_ANON_INODES=y CONFIG_HAVE_UID16=y # CONFIG_EXPERT is not set CONFIG_UID16=y +# CONFIG_SGETMASK_SYSCALL is not set CONFIG_SYSFS_SYSCALL=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y @@ -322,7 +323,6 @@ CONFIG_ARCH_MULTIPLATFORM=y # CONFIG_ARCH_S5P64X0 is not set # CONFIG_ARCH_S5PC100 is not set # CONFIG_ARCH_S5PV210 is not set -# CONFIG_ARCH_EXYNOS is not set # CONFIG_ARCH_DAVINCI is not set # CONFIG_ARCH_OMAP1 is not set @@ -341,13 +341,7 @@ CONFIG_ARCH_MULTI_V4_V5=y # CONFIG_ARCH_MULTI_V7 is not set CONFIG_ARCH_MULTI_CPU_AUTO=y CONFIG_ARCH_MVEBU=y - -# -# Marvell EBU SoC variants -# CONFIG_MACH_KIRKWOOD=y -CONFIG_MACH_T5325=y -# CONFIG_ARCH_BCM is not set # CONFIG_ARCH_MXC is not set # CONFIG_ARCH_MXS is not set # CONFIG_ARCH_NOMADIK is not set @@ -388,7 +382,6 @@ CONFIG_CACHE_FEROCEON_L2=y # CONFIG_CACHE_L2X0 is not set CONFIG_ARM_L1_CACHE_SHIFT=5 CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y -CONFIG_ARM_NR_BANKS=8 CONFIG_MULTI_IRQ_HANDLER=y # @@ -408,6 +401,7 @@ CONFIG_PCI_SYSCALL=y # PCI host controller drivers # # CONFIG_PCI_MVEBU is not set +# CONFIG_PCI_HOST_GENERIC is not set # CONFIG_PCIEPORTBUS is not set # CONFIG_PCCARD is not set @@ -454,7 +448,6 @@ CONFIG_ZONE_DMA_FLAG=0 CONFIG_NEED_BOUNCE_POOL=y CONFIG_KSM=y CONFIG_DEFAULT_MMAP_MIN_ADDR=32768 -CONFIG_CROSS_MEMORY_ATTACH=y CONFIG_NEED_PER_CPU_KM=y # CONFIG_CLEANCACHE is not set # CONFIG_FRONTSWAP is not set @@ -507,6 +500,7 @@ CONFIG_CPU_FREQ_GOV_POWERSAVE=m CONFIG_CPU_FREQ_GOV_USERSPACE=m CONFIG_CPU_FREQ_GOV_ONDEMAND=m CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m +# CONFIG_GENERIC_CPUFREQ_CPU0 is not set # # ARM CPU frequency scaling drivers @@ -524,6 +518,7 @@ CONFIG_CPU_IDLE_GOV_MENU=y # # ARM CPU Idle Drivers # +# CONFIG_ARM_ARMADA_370_XP_CPUIDLE is not set CONFIG_ARM_KIRKWOOD_CPUIDLE=y # CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED is not set @@ -552,6 +547,7 @@ CONFIG_COREDUMP=y # CONFIG_SUSPEND=y CONFIG_SUSPEND_FREEZER=y +# CONFIG_HIBERNATION is not set CONFIG_PM_SLEEP=y # CONFIG_PM_AUTOSLEEP is not set # CONFIG_PM_WAKELOCKS is not set @@ -567,6 +563,7 @@ CONFIG_PM_CLK=y CONFIG_CPU_PM=y CONFIG_ARCH_SUSPEND_POSSIBLE=y CONFIG_ARM_CPU_SUSPEND=y +CONFIG_ARCH_HIBERNATION_POSSIBLE=y CONFIG_NET=y # @@ -884,6 +881,7 @@ CONFIG_NF_NAT_IPV6=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m CONFIG_NF_TABLES_BRIDGE=m +# CONFIG_NFT_BRIDGE_META is not set CONFIG_BRIDGE_NF_EBTABLES=m CONFIG_BRIDGE_EBT_BROUTE=m CONFIG_BRIDGE_EBT_T_FILTER=m @@ -1185,6 +1183,7 @@ CONFIG_HAVE_BPF_JIT=y # # Generic Driver Options # +CONFIG_UEVENT_HELPER=y CONFIG_UEVENT_HELPER_PATH="" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -1198,6 +1197,7 @@ CONFIG_FW_LOADER_USER_HELPER=y # CONFIG_DEBUG_DEVRES is not set # CONFIG_SYS_HYPERVISOR is not set # CONFIG_GENERIC_CPU_DEVICES is not set +CONFIG_SOC_BUS=y CONFIG_REGMAP=y CONFIG_REGMAP_I2C=m CONFIG_REGMAP_SPI=m @@ -1206,8 +1206,9 @@ CONFIG_DMA_SHARED_BUFFER=y # # Bus devices # +# CONFIG_BRCMSTB_GISB_ARB is not set CONFIG_MVEBU_MBUS=y -# CONFIG_ARM_CCI is not set +# CONFIG_VEXPRESS_CONFIG is not set # CONFIG_CONNECTOR is not set CONFIG_MTD=y # CONFIG_MTD_TESTS is not set @@ -1278,7 +1279,6 @@ CONFIG_MTD_PHYSMAP=y # # CONFIG_MTD_PMC551 is not set # CONFIG_MTD_DATAFLASH is not set -CONFIG_MTD_M25P80=y # CONFIG_MTD_SST25L is not set # CONFIG_MTD_SLRAM is not set # CONFIG_MTD_PHRAM is not set @@ -1289,7 +1289,6 @@ CONFIG_MTD_M25P80=y # Disk-On-Chip Device Drivers # # CONFIG_MTD_DOCG3 is not set -# CONFIG_MTD_ST_SPI_FSM is not set CONFIG_MTD_NAND_ECC=y # CONFIG_MTD_NAND_ECC_SMC is not set CONFIG_MTD_NAND=y @@ -1309,9 +1308,11 @@ CONFIG_MTD_NAND_ORION=y # CONFIG_MTD_ONENAND is not set # -# LPDDR flash memory drivers +# LPDDR & LPDDR2 PCM memory drivers # # CONFIG_MTD_LPDDR is not set +# CONFIG_MTD_LPDDR2_NVM is not set +# CONFIG_MTD_SPI_NOR is not set CONFIG_MTD_UBI=y CONFIG_MTD_UBI_WL_THRESHOLD=4096 CONFIG_MTD_UBI_BEB_LIMIT=20 @@ -1336,8 +1337,8 @@ CONFIG_OF_PCI=y CONFIG_OF_PCI_IRQ=y CONFIG_OF_MTD=y CONFIG_OF_RESERVED_MEM=y -# CONFIG_PARPORT is not set CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y +# CONFIG_PARPORT is not set CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_NULL_BLK is not set # CONFIG_BLK_DEV_PCIESSD_MTIP32XX is not set @@ -1373,14 +1374,12 @@ CONFIG_ATA_OVER_ETH=m CONFIG_AD525X_DPOT=m # CONFIG_AD525X_DPOT_I2C is not set # CONFIG_AD525X_DPOT_SPI is not set -# CONFIG_ATMEL_PWM is not set # CONFIG_DUMMY_IRQ is not set CONFIG_PHANTOM=m CONFIG_SGI_IOC4=m CONFIG_TIFM_CORE=m CONFIG_TIFM_7XX1=m CONFIG_ICS932S401=m -# CONFIG_ATMEL_SSC is not set CONFIG_ENCLOSURE_SERVICES=m # CONFIG_HP_ILO is not set # CONFIG_APDS9802ALS is not set @@ -1395,7 +1394,6 @@ CONFIG_DS1682=m CONFIG_TI_DAC7512=m # CONFIG_BMP085_I2C is not set # CONFIG_BMP085_SPI is not set -# CONFIG_PCH_PHUB is not set # CONFIG_USB_SWITCH_FSA9480 is not set # CONFIG_LATTICE_ECP3_CONFIG is not set # CONFIG_SRAM is not set @@ -1535,6 +1533,7 @@ CONFIG_SATA_PMP=y # # CONFIG_SATA_AHCI is not set # CONFIG_SATA_AHCI_PLATFORM is not set +# CONFIG_AHCI_MVEBU is not set # CONFIG_SATA_INIC162X is not set # CONFIG_SATA_ACARD_AHCI is not set # CONFIG_SATA_SIL24 is not set @@ -1709,6 +1708,7 @@ CONFIG_NET_VENDOR_ALTEON=y CONFIG_NET_VENDOR_AMD=y # CONFIG_AMD8111_ETH is not set # CONFIG_PCNET32 is not set +# CONFIG_AMD_XGBE is not set CONFIG_NET_VENDOR_ARC=y # CONFIG_ARC_EMAC is not set CONFIG_NET_VENDOR_ATHEROS=y @@ -1725,6 +1725,7 @@ CONFIG_NET_VENDOR_BROADCOM=y # CONFIG_CNIC is not set # CONFIG_TIGON3 is not set # CONFIG_BNX2X is not set +# CONFIG_SYSTEMPORT is not set CONFIG_NET_VENDOR_BROCADE=y # CONFIG_BNA is not set # CONFIG_NET_CALXEDA_XGMAC is not set @@ -1752,6 +1753,8 @@ CONFIG_NET_VENDOR_EXAR=y CONFIG_NET_VENDOR_FARADAY=y # CONFIG_FTMAC100 is not set # CONFIG_FTGMAC100 is not set +CONFIG_NET_VENDOR_HISILICON=y +# CONFIG_HIX5HD2_GMAC is not set CONFIG_NET_VENDOR_HP=y # CONFIG_HP100 is not set CONFIG_NET_VENDOR_INTEL=y @@ -1854,6 +1857,7 @@ CONFIG_PHYLIB=y # CONFIG_AT803X_PHY=m # CONFIG_AMD_PHY is not set +# CONFIG_AMD_XGBE_PHY is not set CONFIG_MARVELL_PHY=y # CONFIG_DAVICOM_PHY is not set # CONFIG_QSEMI_PHY is not set @@ -1984,13 +1988,19 @@ CONFIG_ATH10K_PCI=m CONFIG_WCN36XX=m # CONFIG_WCN36XX_DEBUGFS is not set CONFIG_B43=m +CONFIG_B43_BCMA=y CONFIG_B43_SSB=y +CONFIG_B43_BUSES_BCMA_AND_SSB=y +# CONFIG_B43_BUSES_BCMA is not set +# CONFIG_B43_BUSES_SSB is not set CONFIG_B43_PCI_AUTOSELECT=y CONFIG_B43_PCICORE_AUTOSELECT=y # CONFIG_B43_SDIO is not set +CONFIG_B43_BCMA_PIO=y CONFIG_B43_PIO=y # CONFIG_B43_PHY_N is not set CONFIG_B43_PHY_LP=y +CONFIG_B43_PHY_HT=y CONFIG_B43_LEDS=y # CONFIG_B43_DEBUG is not set CONFIG_B43LEGACY=m @@ -2025,6 +2035,7 @@ CONFIG_IPW2200=m CONFIG_LIBIPW=m # CONFIG_LIBIPW_DEBUG is not set CONFIG_IWLWIFI=m +CONFIG_IWLWIFI_LEDS=y CONFIG_IWLDVM=m # CONFIG_IWLMVM is not set CONFIG_IWLWIFI_OPMODE_MODULAR=y @@ -2165,6 +2176,7 @@ CONFIG_KEYBOARD_TCA6416=m # CONFIG_KEYBOARD_SAMSUNG is not set # CONFIG_KEYBOARD_STOWAWAY is not set # CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_OMAP4 is not set # CONFIG_KEYBOARD_XTKBD is not set CONFIG_INPUT_MOUSE=y CONFIG_MOUSE_PS2=m @@ -2193,6 +2205,7 @@ CONFIG_INPUT_TABLET=y # CONFIG_TABLET_USB_KBTAB is not set CONFIG_TABLET_USB_WACOM=m CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_OF_TOUCHSCREEN=y CONFIG_TOUCHSCREEN_ADS7846=m CONFIG_TOUCHSCREEN_AD7877=m CONFIG_TOUCHSCREEN_AD7879=m @@ -2249,7 +2262,6 @@ CONFIG_TOUCHSCREEN_USB_EASYTOUCH=y # CONFIG_TOUCHSCREEN_TSC_SERIO is not set # CONFIG_TOUCHSCREEN_TSC2005 is not set CONFIG_TOUCHSCREEN_TSC2007=m -# CONFIG_TOUCHSCREEN_W90X900 is not set # CONFIG_TOUCHSCREEN_ST1232 is not set # CONFIG_TOUCHSCREEN_SUR40 is not set CONFIG_TOUCHSCREEN_TPS6507X=m @@ -2293,6 +2305,7 @@ CONFIG_LEGACY_PTY_COUNT=16 # # Serial drivers # +CONFIG_SERIAL_EARLYCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_DEPRECATED_OPTIONS=y CONFIG_SERIAL_8250_CONSOLE=y @@ -2307,6 +2320,7 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=2 # # Non-8250 serial port support # +# CONFIG_SERIAL_EARLYCON_ARM_SEMIHOST is not set # CONFIG_SERIAL_MAX3100 is not set # CONFIG_SERIAL_MAX310X is not set # CONFIG_SERIAL_MFD_HSU is not set @@ -2315,10 +2329,10 @@ CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_SERIAL_SCCNXP is not set +# CONFIG_SERIAL_SC16IS7XX is not set # CONFIG_SERIAL_ALTERA_JTAGUART is not set # CONFIG_SERIAL_ALTERA_UART is not set # CONFIG_SERIAL_IFX6X60 is not set -# CONFIG_SERIAL_PCH_UART is not set # CONFIG_SERIAL_XILINX_PS_UART is not set # CONFIG_SERIAL_ARC is not set # CONFIG_SERIAL_RP2 is not set @@ -2377,12 +2391,12 @@ CONFIG_I2C_ALGOBIT=m # CONFIG_I2C_CBUS_GPIO is not set # CONFIG_I2C_DESIGNWARE_PLATFORM is not set # CONFIG_I2C_DESIGNWARE_PCI is not set -# CONFIG_I2C_EG20T is not set # CONFIG_I2C_GPIO is not set CONFIG_I2C_MV64XXX=m # CONFIG_I2C_OCORES is not set # CONFIG_I2C_PCA_PLATFORM is not set # CONFIG_I2C_PXA_PCI is not set +# CONFIG_I2C_RK3X is not set # CONFIG_I2C_SIMTEC is not set # CONFIG_I2C_XILINX is not set @@ -2411,6 +2425,7 @@ CONFIG_SPI_MASTER=y # # CONFIG_SPI_ALTERA is not set # CONFIG_SPI_BITBANG is not set +# CONFIG_SPI_CADENCE is not set # CONFIG_SPI_GPIO is not set # CONFIG_SPI_FSL_SPI is not set # CONFIG_SPI_OC_TINY is not set @@ -2418,7 +2433,6 @@ CONFIG_SPI_ORION=y # CONFIG_SPI_PXA2XX is not set # CONFIG_SPI_PXA2XX_PCI is not set # CONFIG_SPI_SC18IS602 is not set -# CONFIG_SPI_TOPCLIFF_PCH is not set # CONFIG_SPI_XCOMM is not set # CONFIG_SPI_XILINX is not set # CONFIG_SPI_DESIGNWARE is not set @@ -2465,7 +2479,8 @@ CONFIG_PINMUX=y CONFIG_PINCONF=y # CONFIG_DEBUG_PINCTRL is not set # CONFIG_PINCTRL_BCM281XX is not set -# CONFIG_PINCTRL_MSM8X74 is not set +# CONFIG_PINCTRL_APQ8064 is not set +# CONFIG_PINCTRL_IPQ8064 is not set # CONFIG_PINCTRL_SINGLE is not set CONFIG_PINCTRL_MVEBU=y CONFIG_PINCTRL_KIRKWOOD=y @@ -2487,7 +2502,6 @@ CONFIG_GPIO_GENERIC=y # CONFIG_GPIO_EM is not set # CONFIG_GPIO_ZEVIO is not set CONFIG_GPIO_MVEBU=y -# CONFIG_GPIO_RCAR is not set # CONFIG_GPIO_SCH311X is not set # CONFIG_GPIO_VX855 is not set # CONFIG_GPIO_GRGPIO is not set @@ -2666,11 +2680,13 @@ CONFIG_SENSORS_LM90=m # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_PC87427 is not set # CONFIG_SENSORS_NTC_THERMISTOR is not set +# CONFIG_SENSORS_NCT6683 is not set # CONFIG_SENSORS_NCT6775 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_PMBUS is not set # CONFIG_SENSORS_SHT15 is not set # CONFIG_SENSORS_SHT21 is not set +# CONFIG_SENSORS_SHTC1 is not set # CONFIG_SENSORS_SIS5595 is not set # CONFIG_SENSORS_DME1737 is not set # CONFIG_SENSORS_EMC1403 is not set @@ -2772,7 +2788,14 @@ CONFIG_BCMA_POSSIBLE=y # # Broadcom specific AMBA # -# CONFIG_BCMA is not set +CONFIG_BCMA=m +CONFIG_BCMA_BLOCKIO=y +CONFIG_BCMA_HOST_PCI_POSSIBLE=y +CONFIG_BCMA_HOST_PCI=y +# CONFIG_BCMA_HOST_SOC is not set +# CONFIG_BCMA_DRIVER_GMAC_CMN is not set +# CONFIG_BCMA_DRIVER_GPIO is not set +# CONFIG_BCMA_DEBUG is not set # # Multifunction device drivers @@ -2783,6 +2806,7 @@ CONFIG_MFD_CORE=m # CONFIG_PMIC_ADP5520 is not set # CONFIG_MFD_AAT2870_CORE is not set # CONFIG_MFD_BCM590XX is not set +# CONFIG_MFD_AXP20X is not set # CONFIG_MFD_CROS_EC is not set # CONFIG_MFD_ASIC3 is not set # CONFIG_PMIC_DA903X is not set @@ -2860,7 +2884,6 @@ CONFIG_MFD_WL1273_CORE=m # CONFIG_MFD_WM831X_SPI is not set # CONFIG_MFD_WM8350_I2C is not set # CONFIG_MFD_WM8994 is not set -# CONFIG_VEXPRESS_CONFIG is not set CONFIG_REGULATOR=y # CONFIG_REGULATOR_DEBUG is not set CONFIG_REGULATOR_FIXED_VOLTAGE=y @@ -2876,6 +2899,7 @@ CONFIG_REGULATOR_GPIO=y # CONFIG_REGULATOR_LP3972 is not set # CONFIG_REGULATOR_LP872X is not set # CONFIG_REGULATOR_LP8755 is not set +# CONFIG_REGULATOR_LTC3589 is not set # CONFIG_REGULATOR_MAX1586 is not set # CONFIG_REGULATOR_MAX8649 is not set # CONFIG_REGULATOR_MAX8660 is not set @@ -2958,6 +2982,7 @@ CONFIG_USB_GL860=m CONFIG_USB_GSPCA_BENQ=m CONFIG_USB_GSPCA_CONEX=m CONFIG_USB_GSPCA_CPIA1=m +# CONFIG_USB_GSPCA_DTCS033 is not set CONFIG_USB_GSPCA_ETOMS=m CONFIG_USB_GSPCA_FINEPIX=m CONFIG_USB_GSPCA_JEILINJ=m @@ -3247,6 +3272,7 @@ CONFIG_MEDIA_TUNER_E4000=m CONFIG_MEDIA_TUNER_FC2580=m CONFIG_MEDIA_TUNER_M88TS2022=m CONFIG_MEDIA_TUNER_TUA9001=m +CONFIG_MEDIA_TUNER_SI2157=m CONFIG_MEDIA_TUNER_IT913X=m CONFIG_MEDIA_TUNER_R820T=m @@ -3306,6 +3332,7 @@ CONFIG_DVB_EC100=m CONFIG_DVB_CXD2820R=m CONFIG_DVB_RTL2830=m CONFIG_DVB_RTL2832=m +CONFIG_DVB_SI2168=m # # DVB-C (cable) frontends @@ -3422,16 +3449,13 @@ CONFIG_FB_MODE_HELPERS=y # CONFIG_FB_ARK is not set # CONFIG_FB_PM3 is not set # CONFIG_FB_CARMINE is not set -# CONFIG_FB_TMIO is not set # CONFIG_FB_SMSCUFX is not set CONFIG_FB_UDL=m -# CONFIG_FB_GOLDFISH is not set # CONFIG_FB_VIRTUAL is not set # CONFIG_FB_METRONOME is not set # CONFIG_FB_MB862XX is not set # CONFIG_FB_BROADSHEET is not set # CONFIG_FB_AUO_K190X is not set -# CONFIG_EXYNOS_VIDEO is not set # CONFIG_FB_SSD1307 is not set CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_LCD_CLASS_DEVICE=m @@ -3452,9 +3476,7 @@ CONFIG_BACKLIGHT_CLASS_DEVICE=m CONFIG_BACKLIGHT_GENERIC=m # CONFIG_BACKLIGHT_ADP8860 is not set # CONFIG_BACKLIGHT_ADP8870 is not set -# CONFIG_BACKLIGHT_LM3630A is not set # CONFIG_BACKLIGHT_LM3639 is not set -# CONFIG_BACKLIGHT_LP855X is not set # CONFIG_BACKLIGHT_GPIO is not set # CONFIG_BACKLIGHT_LV5207LP is not set # CONFIG_BACKLIGHT_BD6107 is not set @@ -3581,12 +3603,25 @@ CONFIG_SND_USB_CAIAQ=m # CONFIG_SND_USB_CAIAQ_INPUT is not set CONFIG_SND_USB_6FIRE=m CONFIG_SND_USB_HIFACE=m +# CONFIG_SND_BCD2000 is not set CONFIG_SND_SOC=m # CONFIG_SND_ATMEL_SOC is not set # CONFIG_SND_DESIGNWARE_I2S is not set + +# +# SoC Audio for Freescale CPUs +# + +# +# Common SoC Audio options for Freescale CPUs: +# +# CONFIG_SND_SOC_FSL_SAI is not set +# CONFIG_SND_SOC_FSL_SSI is not set +# CONFIG_SND_SOC_FSL_SPDIF is not set +# CONFIG_SND_SOC_FSL_ESAI is not set +# CONFIG_SND_SOC_IMX_AUDMUX is not set CONFIG_SND_KIRKWOOD_SOC=m CONFIG_SND_KIRKWOOD_SOC_ARMADA370_DB=m -CONFIG_SND_KIRKWOOD_SOC_T5325=m CONFIG_SND_SOC_I2C_AND_SPI=m # @@ -3600,6 +3635,7 @@ CONFIG_SND_SOC_I2C_AND_SPI=m CONFIG_SND_SOC_ALC5623=m CONFIG_SND_SOC_CS42L51=m # CONFIG_SND_SOC_CS42L52 is not set +# CONFIG_SND_SOC_CS42L56 is not set # CONFIG_SND_SOC_CS42L73 is not set # CONFIG_SND_SOC_CS4270 is not set # CONFIG_SND_SOC_CS4271 is not set @@ -3612,6 +3648,7 @@ CONFIG_SND_SOC_CS42L51=m # CONFIG_SND_SOC_SGTL5000 is not set # CONFIG_SND_SOC_SIRF_AUDIO_CODEC is not set CONFIG_SND_SOC_SPDIF=m +# CONFIG_SND_SOC_STA350 is not set # CONFIG_SND_SOC_TAS5086 is not set # CONFIG_SND_SOC_TLV320AIC3X is not set # CONFIG_SND_SOC_WM8510 is not set @@ -3705,6 +3742,7 @@ CONFIG_HID_SONY=m CONFIG_HID_SPEEDLINK=m # CONFIG_HID_STEELSERIES is not set CONFIG_HID_SUNPLUS=m +# CONFIG_HID_RMI is not set CONFIG_HID_GREENASIA=m CONFIG_GREENASIA_FF=y CONFIG_HID_SMARTJOYPLUS=m @@ -3738,7 +3776,6 @@ CONFIG_USB_SUPPORT=y CONFIG_USB_COMMON=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB=y -# CONFIG_USB_DEBUG is not set # CONFIG_USB_ANNOUNCE_NEW_DEVICES is not set # @@ -3747,6 +3784,7 @@ CONFIG_USB=y CONFIG_USB_DEFAULT_PERSIST=y # CONFIG_USB_DYNAMIC_MINORS is not set # CONFIG_USB_OTG is not set +# CONFIG_USB_OTG_FSM is not set # CONFIG_USB_MON is not set CONFIG_USB_WUSB_CBAF=m # CONFIG_USB_WUSB_CBAF_DEBUG is not set @@ -3756,6 +3794,7 @@ CONFIG_USB_WUSB_CBAF=m # # CONFIG_USB_C67X00_HCD is not set CONFIG_USB_XHCI_HCD=y +# CONFIG_USB_XHCI_MVEBU is not set CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_ROOT_HUB_TT=y CONFIG_USB_EHCI_TT_NEWSCHED=y @@ -3768,10 +3807,12 @@ CONFIG_USB_EHCI_HCD_ORION=y # CONFIG_USB_ISP1362_HCD is not set # CONFIG_USB_FUSBH200_HCD is not set # CONFIG_USB_FOTG210_HCD is not set +# CONFIG_USB_MAX3421_HCD is not set # CONFIG_USB_OHCI_HCD is not set # CONFIG_USB_UHCI_HCD is not set # CONFIG_USB_SL811_HCD is not set # CONFIG_USB_R8A66597_HCD is not set +# CONFIG_USB_HCD_BCMA is not set # CONFIG_USB_HCD_SSB is not set # CONFIG_USB_HCD_TEST_MODE is not set @@ -3919,14 +3960,12 @@ CONFIG_USB_HSIC_USB3503=m # USB Physical Layer drivers # # CONFIG_USB_PHY is not set -# CONFIG_USB_OTG_FSM is not set # CONFIG_NOP_USB_XCEIV is not set # CONFIG_AM335X_PHY_USB is not set # CONFIG_SAMSUNG_USB2PHY is not set # CONFIG_SAMSUNG_USB3PHY is not set # CONFIG_USB_GPIO_VBUS is not set # CONFIG_USB_ISP1301 is not set -# CONFIG_USB_RCAR_PHY is not set # CONFIG_USB_ULPI is not set # CONFIG_USB_GADGET is not set # CONFIG_UWB is not set @@ -3960,6 +3999,7 @@ CONFIG_MMC_MVSDIO=y # CONFIG_MMC_DW is not set # CONFIG_MMC_VUB300 is not set # CONFIG_MMC_USHC is not set +# CONFIG_MMC_USDHI6ROL0 is not set # CONFIG_MEMSTICK is not set CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y @@ -3978,7 +4018,6 @@ CONFIG_LEDS_GPIO=y # CONFIG_LEDS_LP8501 is not set # CONFIG_LEDS_PCA955X is not set # CONFIG_LEDS_PCA963X is not set -# CONFIG_LEDS_PCA9685 is not set # CONFIG_LEDS_DAC124S085 is not set # CONFIG_LEDS_REGULATOR is not set # CONFIG_LEDS_BD2802 is not set @@ -3987,6 +4026,10 @@ CONFIG_LEDS_NS2=y CONFIG_LEDS_NETXBIG=y # CONFIG_LEDS_TCA6507 is not set # CONFIG_LEDS_LM355x is not set + +# +# LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM) +# # CONFIG_LEDS_BLINKM is not set # @@ -4059,6 +4102,7 @@ CONFIG_RTC_DRV_S35390A=m # CONFIG_RTC_DRV_M41T93 is not set # CONFIG_RTC_DRV_M41T94 is not set # CONFIG_RTC_DRV_DS1305 is not set +# CONFIG_RTC_DRV_DS1343 is not set # CONFIG_RTC_DRV_DS1347 is not set # CONFIG_RTC_DRV_DS1390 is not set # CONFIG_RTC_DRV_MAX6902 is not set @@ -4067,6 +4111,7 @@ CONFIG_RTC_DRV_S35390A=m # CONFIG_RTC_DRV_DS3234 is not set # CONFIG_RTC_DRV_PCF2123 is not set # CONFIG_RTC_DRV_RX4581 is not set +# CONFIG_RTC_DRV_MCP795 is not set # # Platform RTC drivers @@ -4092,6 +4137,7 @@ CONFIG_RTC_DRV_S35390A=m CONFIG_RTC_DRV_MV=y # CONFIG_RTC_DRV_SNVS is not set # CONFIG_RTC_DRV_MOXART is not set +# CONFIG_RTC_DRV_XGENE is not set # # HID Sensor RTC drivers @@ -4146,12 +4192,11 @@ CONFIG_R8712U=y CONFIG_R8188EU=m CONFIG_88EU_AP_MODE=y CONFIG_88EU_P2P=y +# CONFIG_R8192EE is not set CONFIG_R8723AU=m CONFIG_8723AU_AP_MODE=y -CONFIG_8723AU_P2P=y CONFIG_8723AU_BT_COEXIST=y # CONFIG_R8821AE is not set -# CONFIG_RTS5139 is not set # CONFIG_RTS5208 is not set # CONFIG_TRANZPORT is not set # CONFIG_IDE_PHISON is not set @@ -4181,7 +4226,6 @@ CONFIG_USB_SERIAL_QUATECH2=m # # CONFIG_AD7291 is not set # CONFIG_AD7606 is not set -# CONFIG_AD799X is not set # CONFIG_AD7780 is not set # CONFIG_AD7816 is not set # CONFIG_AD7192 is not set @@ -4285,6 +4329,10 @@ CONFIG_NET_VENDOR_SILICOM=y # CONFIG_DGNC is not set # CONFIG_DGAP is not set # CONFIG_GS_FPGABOOT is not set + +# +# SOC (System On Chip) specific Drivers +# CONFIG_CLKDEV_LOOKUP=y CONFIG_HAVE_CLK_PREPARE=y CONFIG_COMMON_CLK=y @@ -4308,6 +4356,7 @@ CONFIG_ORION_TIMER=y # CONFIG_SH_TIMER_MTU2 is not set # CONFIG_SH_TIMER_TMU is not set # CONFIG_EM_TIMER_STI is not set +# CONFIG_CLKSRC_VERSATILE is not set # CONFIG_MAILBOX is not set CONFIG_IOMMU_SUPPORT=y CONFIG_OF_IOMMU=y @@ -4334,6 +4383,7 @@ CONFIG_IIO=m # CONFIG_HID_SENSOR_ACCEL_3D is not set # CONFIG_IIO_ST_ACCEL_3AXIS is not set # CONFIG_KXSD9 is not set +# CONFIG_MMA8452 is not set # # Analog to digital converters @@ -4345,7 +4395,7 @@ CONFIG_IIO=m # CONFIG_AD7793 is not set # CONFIG_AD7887 is not set # CONFIG_AD7923 is not set -# CONFIG_EXYNOS_ADC is not set +# CONFIG_AD799X is not set # CONFIG_MAX1363 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set @@ -4450,17 +4500,25 @@ CONFIG_CM32181=m # Inclinometer sensors # # CONFIG_HID_SENSOR_INCLINOMETER_3D is not set +# CONFIG_HID_SENSOR_DEVICE_ROTATION is not set # # Pressure sensors # # CONFIG_HID_SENSOR_PRESS is not set +# CONFIG_MPL115 is not set # CONFIG_MPL3115 is not set # CONFIG_IIO_ST_PRESS is not set +# +# Lightning sensors +# +# CONFIG_AS3935 is not set + # # Temperature sensors # +# CONFIG_MLX90614 is not set # CONFIG_TMP006 is not set # CONFIG_VME_BUS is not set # CONFIG_PWM is not set @@ -4842,6 +4900,7 @@ CONFIG_STACKTRACE=y # CONFIG_DEBUG_KOBJECT is not set CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_LIST is not set +# CONFIG_DEBUG_PI_LIST is not set # CONFIG_DEBUG_SG is not set # CONFIG_DEBUG_NOTIFIERS is not set # CONFIG_DEBUG_CREDENTIALS is not set @@ -4891,6 +4950,7 @@ CONFIG_BRANCH_PROFILE_NONE=y CONFIG_KPROBE_EVENT=y # CONFIG_UPROBE_EVENT is not set CONFIG_PROBE_EVENTS=y +# CONFIG_TRACEPOINT_BENCHMARK is not set # CONFIG_RING_BUFFER_BENCHMARK is not set # CONFIG_RING_BUFFER_STARTUP_TEST is not set @@ -4911,6 +4971,7 @@ CONFIG_PROBE_EVENTS=y # CONFIG_DMA_API_DEBUG is not set # CONFIG_TEST_MODULE is not set # CONFIG_TEST_USER_COPY is not set +# CONFIG_TEST_BPF is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -5161,6 +5222,7 @@ CONFIG_LRU_CACHE=m CONFIG_AVERAGE=y # CONFIG_CORDIC is not set # CONFIG_DDR is not set +CONFIG_LIBFDT=y CONFIG_OID_REGISTRY=m CONFIG_FONT_SUPPORT=m # CONFIG_FONTS is not set diff --git a/core/linux-kirkwood-dt/goflexhome_dtb.patch b/core/linux-kirkwood-dt/goflexhome_dtb.patch index c81d4bb26..38a2e33f4 100644 --- a/core/linux-kirkwood-dt/goflexhome_dtb.patch +++ b/core/linux-kirkwood-dt/goflexhome_dtb.patch @@ -1,7 +1,7 @@ -diff -ruN a/arch/arm/boot/dts/kirkwood-goflexhome.dts b/arch/arm/boot/dts/kirkwood-goflexhome.dts +diff -uprN a/arch/arm/boot/dts/kirkwood-goflexhome.dts b/arch/arm/boot/dts/kirkwood-goflexhome.dts --- a/arch/arm/boot/dts/kirkwood-goflexhome.dts 1969-12-31 17:00:00.000000000 -0700 -+++ b/arch/arm/boot/dts/kirkwood-goflexhome.dts 2014-06-15 02:32:04.560094799 -0600 -@@ -0,0 +1,126 @@ ++++ b/arch/arm/boot/dts/kirkwood-goflexhome.dts 2014-08-10 02:34:52.310545726 -0600 +@@ -0,0 +1,127 @@ +/dts-v1/; + +#include "kirkwood.dtsi" @@ -18,10 +18,11 @@ diff -ruN a/arch/arm/boot/dts/kirkwood-goflexhome.dts b/arch/arm/boot/dts/kirkwo + + chosen { + bootargs = "console=ttyS0,115200n8 earlyprintk root=/dev/sda1 rootdelay=10"; ++ stdout-path = &uart0; + }; + + ocp@f1000000 { -+ pinctrl: pinctrl@10000 { ++ pinctrl: pin-controller@10000 { + pmx_usb_power_enable: pmx-usb-power-enable { + marvell,pins = "mpp29"; + marvell,function = "gpio"; @@ -128,10 +129,10 @@ diff -ruN a/arch/arm/boot/dts/kirkwood-goflexhome.dts b/arch/arm/boot/dts/kirkwo + phy-handle = <ðphy0>; + }; +}; -diff -ruN a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile ---- a/arch/arm/boot/dts/Makefile 2014-06-08 12:19:54.000000000 -0600 -+++ b/arch/arm/boot/dts/Makefile 2014-06-15 09:03:45.173909941 -0600 -@@ -105,6 +105,7 @@ +diff -uprN a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile +--- a/arch/arm/boot/dts/Makefile 2014-08-03 16:25:02.000000000 -0600 ++++ b/arch/arm/boot/dts/Makefile 2014-08-10 03:14:10.157372577 -0600 +@@ -111,6 +111,7 @@ kirkwood := \ kirkwood-ds411.dtb \ kirkwood-ds411j.dtb \ kirkwood-ds411slim.dtb \ diff --git a/core/linux-kirkwood-dt/linux-kirkwood-dt.install b/core/linux-kirkwood-dt/linux-kirkwood-dt.install index f38708e7f..24bb0e519 100644 --- a/core/linux-kirkwood-dt/linux-kirkwood-dt.install +++ b/core/linux-kirkwood-dt/linux-kirkwood-dt.install @@ -2,7 +2,7 @@ # arg 2: the old package version KERNEL_NAME=-kirkwood-dt -KERNEL_VERSION=3.15.0-1-ARCH +KERNEL_VERSION=3.16.0-1-ARCH post_install () { # updating module dependencies diff --git a/core/linux-kirkwood-dt/pogo_e02_dtb.patch b/core/linux-kirkwood-dt/pogo_e02_dtb.patch index c9678e0e4..ceaa99d67 100644 --- a/core/linux-kirkwood-dt/pogo_e02_dtb.patch +++ b/core/linux-kirkwood-dt/pogo_e02_dtb.patch @@ -1,7 +1,7 @@ -diff -ruN a/arch/arm/boot/dts/kirkwood-pogo_e02.dts b/arch/arm/boot/dts/kirkwood-pogo_e02.dts +diff -uprN a/arch/arm/boot/dts/kirkwood-pogo_e02.dts b/arch/arm/boot/dts/kirkwood-pogo_e02.dts --- a/arch/arm/boot/dts/kirkwood-pogo_e02.dts 1969-12-31 17:00:00.000000000 -0700 -+++ b/arch/arm/boot/dts/kirkwood-pogo_e02.dts 2014-06-15 02:40:53.995246289 -0600 -@@ -0,0 +1,116 @@ ++++ b/arch/arm/boot/dts/kirkwood-pogo_e02.dts 2014-08-10 03:59:10.616338535 -0600 +@@ -0,0 +1,117 @@ +/dts-v1/; + +#include "kirkwood.dtsi" @@ -18,10 +18,11 @@ diff -ruN a/arch/arm/boot/dts/kirkwood-pogo_e02.dts b/arch/arm/boot/dts/kirkwood + + chosen { + bootargs = "console=ttyS0,115200n8 earlyprintk root=/dev/sda1 rootdelay=10"; ++ stdout-path = &uart0; + }; + + ocp@f1000000 { -+ pinctrl: pinctrl@10000 { ++ pinctrl: pin-controller@10000 { + pmx_usb_power_enable: pmx-usb-power-enable { + marvell,pins = "mpp29"; + marvell,function = "gpio"; @@ -118,13 +119,13 @@ diff -ruN a/arch/arm/boot/dts/kirkwood-pogo_e02.dts b/arch/arm/boot/dts/kirkwood + phy-handle = <ðphy0>; + }; +}; -diff -ruN a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile ---- a/arch/arm/boot/dts/Makefile 2014-06-15 09:13:34.599049935 -0600 -+++ b/arch/arm/boot/dts/Makefile 2014-06-15 09:13:07.862424840 -0600 -@@ -128,6 +128,7 @@ - kirkwood-nsa310a.dtb \ - kirkwood-openblocks_a6.dtb \ - kirkwood-openblocks_a7.dtb \ +diff -uprN a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile +--- a/arch/arm/boot/dts/Makefile 2014-08-10 03:54:22.764885366 -0600 ++++ b/arch/arm/boot/dts/Makefile 2014-08-10 04:00:43.570037020 -0600 +@@ -137,6 +137,7 @@ kirkwood := \ + kirkwood-openrd-base.dtb \ + kirkwood-openrd-client.dtb \ + kirkwood-openrd-ultimate.dtb \ + kirkwood-pogo_e02.dtb \ kirkwood-rd88f6192.dtb \ kirkwood-rd88f6281-a0.dtb \