mirror of
https://github.com/archlinuxarm/PKGBUILDs.git
synced 2024-11-08 22:45:43 +00:00
1189 lines
40 KiB
Diff
1189 lines
40 KiB
Diff
|
From e07cf5701607dd66a8e360c7037ac29b0df4e279 Mon Sep 17 00:00:00 2001
|
||
|
From: Mauro Andreolini <mauro.andreolini@unimore.it>
|
||
|
Date: Wed, 18 Jun 2014 17:38:07 +0200
|
||
|
Subject: [PATCH 3/3] block, bfq: add Early Queue Merge (EQM) to BFQ-v7r5 for
|
||
|
3.15.0
|
||
|
|
||
|
A set of processes may happen to perform interleaved reads, i.e.,requests
|
||
|
whose union would give rise to a sequential read pattern. There are two
|
||
|
typical cases: in the first case, processes read fixed-size chunks of
|
||
|
data at a fixed distance from each other, while in the second case processes
|
||
|
may read variable-size chunks at variable distances. The latter case occurs
|
||
|
for example with QEMU, which splits the I/O generated by the guest into
|
||
|
multiple chunks, and lets these chunks be served by a pool of cooperating
|
||
|
processes, iteratively assigning the next chunk of I/O to the first
|
||
|
available process. CFQ uses actual queue merging for the first type of
|
||
|
rocesses, whereas it uses preemption to get a sequential read pattern out
|
||
|
of the read requests performed by the second type of processes. In the end
|
||
|
it uses two different mechanisms to achieve the same goal: boosting the
|
||
|
throughput with interleaved I/O.
|
||
|
|
||
|
This patch introduces Early Queue Merge (EQM), a unified mechanism to get a
|
||
|
sequential read pattern with both types of processes. The main idea is
|
||
|
checking newly arrived requests against the next request of the active queue
|
||
|
both in case of actual request insert and in case of request merge. By doing
|
||
|
so, both the types of processes can be handled by just merging their queues.
|
||
|
EQM is then simpler and more compact than the pair of mechanisms used in
|
||
|
CFQ.
|
||
|
|
||
|
Finally, EQM also preserves the typical low-latency properties of BFQ, by
|
||
|
properly restoring the weight-raising state of a queue when it gets back to
|
||
|
a non-merged state.
|
||
|
|
||
|
Signed-off-by: Mauro Andreolini <mauro.andreolini@unimore.it>
|
||
|
Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com>
|
||
|
Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
|
||
|
---
|
||
|
block/bfq-iosched.c | 736 ++++++++++++++++++++++++++++++++++++----------------
|
||
|
block/bfq-sched.c | 28 --
|
||
|
block/bfq.h | 46 +++-
|
||
|
3 files changed, 556 insertions(+), 254 deletions(-)
|
||
|
|
||
|
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
|
||
|
index 6cf7bca..4579c6d 100644
|
||
|
--- a/block/bfq-iosched.c
|
||
|
+++ b/block/bfq-iosched.c
|
||
|
@@ -571,6 +571,57 @@ static inline unsigned int bfq_wr_duration(struct bfq_data *bfqd)
|
||
|
return dur;
|
||
|
}
|
||
|
|
||
|
+static inline unsigned
|
||
|
+bfq_bfqq_cooperations(struct bfq_queue *bfqq)
|
||
|
+{
|
||
|
+ return bfqq->bic ? bfqq->bic->cooperations : 0;
|
||
|
+}
|
||
|
+
|
||
|
+static inline void
|
||
|
+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
|
||
|
+{
|
||
|
+ if (bic->saved_idle_window)
|
||
|
+ bfq_mark_bfqq_idle_window(bfqq);
|
||
|
+ else
|
||
|
+ bfq_clear_bfqq_idle_window(bfqq);
|
||
|
+ if (bic->saved_IO_bound)
|
||
|
+ bfq_mark_bfqq_IO_bound(bfqq);
|
||
|
+ else
|
||
|
+ bfq_clear_bfqq_IO_bound(bfqq);
|
||
|
+ if (bic->wr_time_left && bfqq->bfqd->low_latency &&
|
||
|
+ bic->cooperations < bfqq->bfqd->bfq_coop_thresh) {
|
||
|
+ /*
|
||
|
+ * Start a weight raising period with the duration given by
|
||
|
+ * the raising_time_left snapshot.
|
||
|
+ */
|
||
|
+ if (bfq_bfqq_busy(bfqq))
|
||
|
+ bfqq->bfqd->wr_busy_queues++;
|
||
|
+ bfqq->wr_coeff = bfqq->bfqd->bfq_wr_coeff;
|
||
|
+ bfqq->wr_cur_max_time = bic->wr_time_left;
|
||
|
+ bfqq->last_wr_start_finish = jiffies;
|
||
|
+ bfqq->entity.ioprio_changed = 1;
|
||
|
+ }
|
||
|
+ /*
|
||
|
+ * Clear wr_time_left to prevent bfq_bfqq_save_state() from
|
||
|
+ * getting confused about the queue's need of a weight-raising
|
||
|
+ * period.
|
||
|
+ */
|
||
|
+ bic->wr_time_left = 0;
|
||
|
+}
|
||
|
+
|
||
|
+/*
|
||
|
+ * Must be called with the queue_lock held.
|
||
|
+ */
|
||
|
+static int bfqq_process_refs(struct bfq_queue *bfqq)
|
||
|
+{
|
||
|
+ int process_refs, io_refs;
|
||
|
+
|
||
|
+ io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];
|
||
|
+ process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;
|
||
|
+ BUG_ON(process_refs < 0);
|
||
|
+ return process_refs;
|
||
|
+}
|
||
|
+
|
||
|
static void bfq_add_request(struct request *rq)
|
||
|
{
|
||
|
struct bfq_queue *bfqq = RQ_BFQQ(rq);
|
||
|
@@ -602,8 +653,11 @@ static void bfq_add_request(struct request *rq)
|
||
|
|
||
|
if (!bfq_bfqq_busy(bfqq)) {
|
||
|
int soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
|
||
|
+ bfq_bfqq_cooperations(bfqq) < bfqd->bfq_coop_thresh &&
|
||
|
time_is_before_jiffies(bfqq->soft_rt_next_start);
|
||
|
- idle_for_long_time = time_is_before_jiffies(
|
||
|
+ idle_for_long_time = bfq_bfqq_cooperations(bfqq) <
|
||
|
+ bfqd->bfq_coop_thresh &&
|
||
|
+ time_is_before_jiffies(
|
||
|
bfqq->budget_timeout +
|
||
|
bfqd->bfq_wr_min_idle_time);
|
||
|
entity->budget = max_t(unsigned long, bfqq->max_budget,
|
||
|
@@ -624,11 +678,20 @@ static void bfq_add_request(struct request *rq)
|
||
|
if (!bfqd->low_latency)
|
||
|
goto add_bfqq_busy;
|
||
|
|
||
|
+ if (bfq_bfqq_just_split(bfqq))
|
||
|
+ goto set_ioprio_changed;
|
||
|
+
|
||
|
/*
|
||
|
- * If the queue is not being boosted and has been idle
|
||
|
- * for enough time, start a weight-raising period
|
||
|
+ * If the queue:
|
||
|
+ * - is not being boosted,
|
||
|
+ * - has been idle for enough time,
|
||
|
+ * - is not a sync queue or is linked to a bfq_io_cq (it is
|
||
|
+ * shared "for its nature" or it is not shared and its
|
||
|
+ * requests have not been redirected to a shared queue)
|
||
|
+ * start a weight-raising period.
|
||
|
*/
|
||
|
- if (old_wr_coeff == 1 && (idle_for_long_time || soft_rt)) {
|
||
|
+ if (old_wr_coeff == 1 && (idle_for_long_time || soft_rt) &&
|
||
|
+ (!bfq_bfqq_sync(bfqq) || bfqq->bic != NULL)) {
|
||
|
bfqq->wr_coeff = bfqd->bfq_wr_coeff;
|
||
|
if (idle_for_long_time)
|
||
|
bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
|
||
|
@@ -642,9 +705,11 @@ static void bfq_add_request(struct request *rq)
|
||
|
} else if (old_wr_coeff > 1) {
|
||
|
if (idle_for_long_time)
|
||
|
bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
|
||
|
- else if (bfqq->wr_cur_max_time ==
|
||
|
- bfqd->bfq_wr_rt_max_time &&
|
||
|
- !soft_rt) {
|
||
|
+ else if (bfq_bfqq_cooperations(bfqq) >=
|
||
|
+ bfqd->bfq_coop_thresh ||
|
||
|
+ (bfqq->wr_cur_max_time ==
|
||
|
+ bfqd->bfq_wr_rt_max_time &&
|
||
|
+ !soft_rt)) {
|
||
|
bfqq->wr_coeff = 1;
|
||
|
bfq_log_bfqq(bfqd, bfqq,
|
||
|
"wrais ending at %lu, rais_max_time %u",
|
||
|
@@ -660,18 +725,18 @@ static void bfq_add_request(struct request *rq)
|
||
|
/*
|
||
|
*
|
||
|
* The remaining weight-raising time is lower
|
||
|
- * than bfqd->bfq_wr_rt_max_time, which
|
||
|
- * means that the application is enjoying
|
||
|
- * weight raising either because deemed soft-
|
||
|
- * rt in the near past, or because deemed
|
||
|
- * interactive a long ago. In both cases,
|
||
|
- * resetting now the current remaining weight-
|
||
|
- * raising time for the application to the
|
||
|
- * weight-raising duration for soft rt
|
||
|
- * applications would not cause any latency
|
||
|
- * increase for the application (as the new
|
||
|
- * duration would be higher than the remaining
|
||
|
- * time).
|
||
|
+ * than bfqd->bfq_wr_rt_max_time, which means
|
||
|
+ * that the application is enjoying weight
|
||
|
+ * raising either because deemed soft-rt in
|
||
|
+ * the near past, or because deemed interactive
|
||
|
+ * a long ago.
|
||
|
+ * In both cases, resetting now the current
|
||
|
+ * remaining weight-raising time for the
|
||
|
+ * application to the weight-raising duration
|
||
|
+ * for soft rt applications would not cause any
|
||
|
+ * latency increase for the application (as the
|
||
|
+ * new duration would be higher than the
|
||
|
+ * remaining time).
|
||
|
*
|
||
|
* In addition, the application is now meeting
|
||
|
* the requirements for being deemed soft rt.
|
||
|
@@ -706,6 +771,7 @@ static void bfq_add_request(struct request *rq)
|
||
|
bfqd->bfq_wr_rt_max_time;
|
||
|
}
|
||
|
}
|
||
|
+set_ioprio_changed:
|
||
|
if (old_wr_coeff != bfqq->wr_coeff)
|
||
|
entity->ioprio_changed = 1;
|
||
|
add_bfqq_busy:
|
||
|
@@ -918,90 +984,35 @@ static void bfq_end_wr(struct bfq_data *bfqd)
|
||
|
spin_unlock_irq(bfqd->queue->queue_lock);
|
||
|
}
|
||
|
|
||
|
-static int bfq_allow_merge(struct request_queue *q, struct request *rq,
|
||
|
- struct bio *bio)
|
||
|
+static inline sector_t bfq_io_struct_pos(void *io_struct, bool request)
|
||
|
{
|
||
|
- struct bfq_data *bfqd = q->elevator->elevator_data;
|
||
|
- struct bfq_io_cq *bic;
|
||
|
- struct bfq_queue *bfqq;
|
||
|
-
|
||
|
- /*
|
||
|
- * Disallow merge of a sync bio into an async request.
|
||
|
- */
|
||
|
- if (bfq_bio_sync(bio) && !rq_is_sync(rq))
|
||
|
- return 0;
|
||
|
-
|
||
|
- /*
|
||
|
- * Lookup the bfqq that this bio will be queued with. Allow
|
||
|
- * merge only if rq is queued there.
|
||
|
- * Queue lock is held here.
|
||
|
- */
|
||
|
- bic = bfq_bic_lookup(bfqd, current->io_context);
|
||
|
- if (bic == NULL)
|
||
|
- return 0;
|
||
|
-
|
||
|
- bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
|
||
|
- return bfqq == RQ_BFQQ(rq);
|
||
|
-}
|
||
|
-
|
||
|
-static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
|
||
|
- struct bfq_queue *bfqq)
|
||
|
-{
|
||
|
- if (bfqq != NULL) {
|
||
|
- bfq_mark_bfqq_must_alloc(bfqq);
|
||
|
- bfq_mark_bfqq_budget_new(bfqq);
|
||
|
- bfq_clear_bfqq_fifo_expire(bfqq);
|
||
|
-
|
||
|
- bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
|
||
|
-
|
||
|
- bfq_log_bfqq(bfqd, bfqq,
|
||
|
- "set_in_service_queue, cur-budget = %lu",
|
||
|
- bfqq->entity.budget);
|
||
|
- }
|
||
|
-
|
||
|
- bfqd->in_service_queue = bfqq;
|
||
|
-}
|
||
|
-
|
||
|
-/*
|
||
|
- * Get and set a new queue for service.
|
||
|
- */
|
||
|
-static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd,
|
||
|
- struct bfq_queue *bfqq)
|
||
|
-{
|
||
|
- if (!bfqq)
|
||
|
- bfqq = bfq_get_next_queue(bfqd);
|
||
|
+ if (request)
|
||
|
+ return blk_rq_pos(io_struct);
|
||
|
else
|
||
|
- bfq_get_next_queue_forced(bfqd, bfqq);
|
||
|
-
|
||
|
- __bfq_set_in_service_queue(bfqd, bfqq);
|
||
|
- return bfqq;
|
||
|
+ return ((struct bio *)io_struct)->bi_iter.bi_sector;
|
||
|
}
|
||
|
|
||
|
-static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd,
|
||
|
- struct request *rq)
|
||
|
+static inline sector_t bfq_dist_from(sector_t pos1,
|
||
|
+ sector_t pos2)
|
||
|
{
|
||
|
- if (blk_rq_pos(rq) >= bfqd->last_position)
|
||
|
- return blk_rq_pos(rq) - bfqd->last_position;
|
||
|
+ if (pos1 >= pos2)
|
||
|
+ return pos1 - pos2;
|
||
|
else
|
||
|
- return bfqd->last_position - blk_rq_pos(rq);
|
||
|
+ return pos2 - pos1;
|
||
|
}
|
||
|
|
||
|
-/*
|
||
|
- * Return true if bfqq has no request pending and rq is close enough to
|
||
|
- * bfqd->last_position, or if rq is closer to bfqd->last_position than
|
||
|
- * bfqq->next_rq
|
||
|
- */
|
||
|
-static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq)
|
||
|
+static inline int bfq_rq_close_to_sector(void *io_struct, bool request,
|
||
|
+ sector_t sector)
|
||
|
{
|
||
|
- return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR;
|
||
|
+ return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <=
|
||
|
+ BFQQ_SEEK_THR;
|
||
|
}
|
||
|
|
||
|
-static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)
|
||
|
+static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector)
|
||
|
{
|
||
|
struct rb_root *root = &bfqd->rq_pos_tree;
|
||
|
struct rb_node *parent, *node;
|
||
|
struct bfq_queue *__bfqq;
|
||
|
- sector_t sector = bfqd->last_position;
|
||
|
|
||
|
if (RB_EMPTY_ROOT(root))
|
||
|
return NULL;
|
||
|
@@ -1020,7 +1031,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)
|
||
|
* next_request position).
|
||
|
*/
|
||
|
__bfqq = rb_entry(parent, struct bfq_queue, pos_node);
|
||
|
- if (bfq_rq_close(bfqd, __bfqq->next_rq))
|
||
|
+ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
|
||
|
return __bfqq;
|
||
|
|
||
|
if (blk_rq_pos(__bfqq->next_rq) < sector)
|
||
|
@@ -1031,7 +1042,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)
|
||
|
return NULL;
|
||
|
|
||
|
__bfqq = rb_entry(node, struct bfq_queue, pos_node);
|
||
|
- if (bfq_rq_close(bfqd, __bfqq->next_rq))
|
||
|
+ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
|
||
|
return __bfqq;
|
||
|
|
||
|
return NULL;
|
||
|
@@ -1040,14 +1051,12 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)
|
||
|
/*
|
||
|
* bfqd - obvious
|
||
|
* cur_bfqq - passed in so that we don't decide that the current queue
|
||
|
- * is closely cooperating with itself.
|
||
|
- *
|
||
|
- * We are assuming that cur_bfqq has dispatched at least one request,
|
||
|
- * and that bfqd->last_position reflects a position on the disk associated
|
||
|
- * with the I/O issued by cur_bfqq.
|
||
|
+ * is closely cooperating with itself
|
||
|
+ * sector - used as a reference point to search for a close queue
|
||
|
*/
|
||
|
static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,
|
||
|
- struct bfq_queue *cur_bfqq)
|
||
|
+ struct bfq_queue *cur_bfqq,
|
||
|
+ sector_t sector)
|
||
|
{
|
||
|
struct bfq_queue *bfqq;
|
||
|
|
||
|
@@ -1067,7 +1076,7 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,
|
||
|
* working closely on the same area of the disk. In that case,
|
||
|
* we can group them together and don't waste time idling.
|
||
|
*/
|
||
|
- bfqq = bfqq_close(bfqd);
|
||
|
+ bfqq = bfqq_close(bfqd, sector);
|
||
|
if (bfqq == NULL || bfqq == cur_bfqq)
|
||
|
return NULL;
|
||
|
|
||
|
@@ -1094,6 +1103,305 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,
|
||
|
return bfqq;
|
||
|
}
|
||
|
|
||
|
+static struct bfq_queue *
|
||
|
+bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
|
||
|
+{
|
||
|
+ int process_refs, new_process_refs;
|
||
|
+ struct bfq_queue *__bfqq;
|
||
|
+
|
||
|
+ /*
|
||
|
+ * If there are no process references on the new_bfqq, then it is
|
||
|
+ * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain
|
||
|
+ * may have dropped their last reference (not just their last process
|
||
|
+ * reference).
|
||
|
+ */
|
||
|
+ if (!bfqq_process_refs(new_bfqq))
|
||
|
+ return NULL;
|
||
|
+
|
||
|
+ /* Avoid a circular list and skip interim queue merges. */
|
||
|
+ while ((__bfqq = new_bfqq->new_bfqq)) {
|
||
|
+ if (__bfqq == bfqq)
|
||
|
+ return NULL;
|
||
|
+ new_bfqq = __bfqq;
|
||
|
+ }
|
||
|
+
|
||
|
+ process_refs = bfqq_process_refs(bfqq);
|
||
|
+ new_process_refs = bfqq_process_refs(new_bfqq);
|
||
|
+ /*
|
||
|
+ * If the process for the bfqq has gone away, there is no
|
||
|
+ * sense in merging the queues.
|
||
|
+ */
|
||
|
+ if (process_refs == 0 || new_process_refs == 0)
|
||
|
+ return NULL;
|
||
|
+
|
||
|
+ bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
|
||
|
+ new_bfqq->pid);
|
||
|
+
|
||
|
+ /*
|
||
|
+ * Merging is just a redirection: the requests of the process
|
||
|
+ * owning one of the two queues are redirected to the other queue.
|
||
|
+ * The latter queue, in its turn, is set as shared if this is the
|
||
|
+ * first time that the requests of some process are redirected to
|
||
|
+ * it.
|
||
|
+ *
|
||
|
+ * We redirect bfqq to new_bfqq and not the opposite, because we
|
||
|
+ * are in the context of the process owning bfqq, hence we have
|
||
|
+ * the io_cq of this process. So we can immediately configure this
|
||
|
+ * io_cq to redirect the requests of the process to new_bfqq.
|
||
|
+ *
|
||
|
+ * NOTE, even if new_bfqq coincides with the in-service queue, the
|
||
|
+ * io_cq of new_bfqq is not available, because, if the in-service
|
||
|
+ * queue is shared, bfqd->in_service_bic may not point to the
|
||
|
+ * io_cq of the in-service queue.
|
||
|
+ * Redirecting the requests of the process owning bfqq to the
|
||
|
+ * currently in-service queue is in any case the best option, as
|
||
|
+ * we feed the in-service queue with new requests close to the
|
||
|
+ * last request served and, by doing so, hopefully increase the
|
||
|
+ * throughput.
|
||
|
+ */
|
||
|
+ bfqq->new_bfqq = new_bfqq;
|
||
|
+ atomic_add(process_refs, &new_bfqq->ref);
|
||
|
+ return new_bfqq;
|
||
|
+}
|
||
|
+
|
||
|
+/*
|
||
|
+ * Attempt to schedule a merge of bfqq with the currently in-service queue
|
||
|
+ * or with a close queue among the scheduled queues.
|
||
|
+ * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue
|
||
|
+ * structure otherwise.
|
||
|
+ */
|
||
|
+static struct bfq_queue *
|
||
|
+bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||
|
+ void *io_struct, bool request)
|
||
|
+{
|
||
|
+ struct bfq_queue *in_service_bfqq, *new_bfqq;
|
||
|
+
|
||
|
+ if (bfqq->new_bfqq)
|
||
|
+ return bfqq->new_bfqq;
|
||
|
+
|
||
|
+ if (!io_struct)
|
||
|
+ return NULL;
|
||
|
+
|
||
|
+ in_service_bfqq = bfqd->in_service_queue;
|
||
|
+
|
||
|
+ if (in_service_bfqq == NULL || in_service_bfqq == bfqq ||
|
||
|
+ !bfqd->in_service_bic)
|
||
|
+ goto check_scheduled;
|
||
|
+
|
||
|
+ if (bfq_class_idle(in_service_bfqq) || bfq_class_idle(bfqq))
|
||
|
+ goto check_scheduled;
|
||
|
+
|
||
|
+ if (bfq_class_rt(in_service_bfqq) != bfq_class_rt(bfqq))
|
||
|
+ goto check_scheduled;
|
||
|
+
|
||
|
+ if (in_service_bfqq->entity.parent != bfqq->entity.parent)
|
||
|
+ goto check_scheduled;
|
||
|
+
|
||
|
+ if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) &&
|
||
|
+ bfq_bfqq_sync(in_service_bfqq) && bfq_bfqq_sync(bfqq)) {
|
||
|
+ new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq);
|
||
|
+ if (new_bfqq != NULL)
|
||
|
+ return new_bfqq; /* Merge with in-service queue */
|
||
|
+ }
|
||
|
+
|
||
|
+ /*
|
||
|
+ * Check whether there is a cooperator among currently scheduled
|
||
|
+ * queues. The only thing we need is that the bio/request is not
|
||
|
+ * NULL, as we need it to establish whether a cooperator exists.
|
||
|
+ */
|
||
|
+check_scheduled:
|
||
|
+ new_bfqq = bfq_close_cooperator(bfqd, bfqq,
|
||
|
+ bfq_io_struct_pos(io_struct, request));
|
||
|
+ if (new_bfqq)
|
||
|
+ return bfq_setup_merge(bfqq, new_bfqq);
|
||
|
+
|
||
|
+ return NULL;
|
||
|
+}
|
||
|
+
|
||
|
+static inline void
|
||
|
+bfq_bfqq_save_state(struct bfq_queue *bfqq)
|
||
|
+{
|
||
|
+ /*
|
||
|
+ * If bfqq->bic == NULL, the queue is already shared or its requests
|
||
|
+ * have already been redirected to a shared queue; both idle window
|
||
|
+ * and weight raising state have already been saved. Do nothing.
|
||
|
+ */
|
||
|
+ if (bfqq->bic == NULL)
|
||
|
+ return;
|
||
|
+ if (bfqq->bic->wr_time_left)
|
||
|
+ /*
|
||
|
+ * This is the queue of a just-started process, and would
|
||
|
+ * deserve weight raising: we set wr_time_left to the full
|
||
|
+ * weight-raising duration to trigger weight-raising when
|
||
|
+ * and if the queue is split and the first request of the
|
||
|
+ * queue is enqueued.
|
||
|
+ */
|
||
|
+ bfqq->bic->wr_time_left = bfq_wr_duration(bfqq->bfqd);
|
||
|
+ else if (bfqq->wr_coeff > 1) {
|
||
|
+ unsigned long wr_duration =
|
||
|
+ jiffies - bfqq->last_wr_start_finish;
|
||
|
+ /*
|
||
|
+ * It may happen that a queue's weight raising period lasts
|
||
|
+ * longer than its wr_cur_max_time, as weight raising is
|
||
|
+ * handled only when a request is enqueued or dispatched (it
|
||
|
+ * does not use any timer). If the weight raising period is
|
||
|
+ * about to end, don't save it.
|
||
|
+ */
|
||
|
+ if (bfqq->wr_cur_max_time <= wr_duration)
|
||
|
+ bfqq->bic->wr_time_left = 0;
|
||
|
+ else
|
||
|
+ bfqq->bic->wr_time_left =
|
||
|
+ bfqq->wr_cur_max_time - wr_duration;
|
||
|
+ /*
|
||
|
+ * The bfq_queue is becoming shared or the requests of the
|
||
|
+ * process owning the queue are being redirected to a shared
|
||
|
+ * queue. Stop the weight raising period of the queue, as in
|
||
|
+ * both cases it should not be owned by an interactive or
|
||
|
+ * soft real-time application.
|
||
|
+ */
|
||
|
+ bfq_bfqq_end_wr(bfqq);
|
||
|
+ } else
|
||
|
+ bfqq->bic->wr_time_left = 0;
|
||
|
+ bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);
|
||
|
+ bfqq->bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
|
||
|
+ bfqq->bic->cooperations++;
|
||
|
+ bfqq->bic->failed_cooperations = 0;
|
||
|
+}
|
||
|
+
|
||
|
+static inline void
|
||
|
+bfq_get_bic_reference(struct bfq_queue *bfqq)
|
||
|
+{
|
||
|
+ /*
|
||
|
+ * If bfqq->bic has a non-NULL value, the bic to which it belongs
|
||
|
+ * is about to begin using a shared bfq_queue.
|
||
|
+ */
|
||
|
+ if (bfqq->bic)
|
||
|
+ atomic_long_inc(&bfqq->bic->icq.ioc->refcount);
|
||
|
+}
|
||
|
+
|
||
|
+static void
|
||
|
+bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
|
||
|
+ struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
|
||
|
+{
|
||
|
+ bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
|
||
|
+ (long unsigned)new_bfqq->pid);
|
||
|
+ /* Save weight raising and idle window of the merged queues */
|
||
|
+ bfq_bfqq_save_state(bfqq);
|
||
|
+ bfq_bfqq_save_state(new_bfqq);
|
||
|
+ if (bfq_bfqq_IO_bound(bfqq))
|
||
|
+ bfq_mark_bfqq_IO_bound(new_bfqq);
|
||
|
+ bfq_clear_bfqq_IO_bound(bfqq);
|
||
|
+ /*
|
||
|
+ * Grab a reference to the bic, to prevent it from being destroyed
|
||
|
+ * before being possibly touched by a bfq_split_bfqq().
|
||
|
+ */
|
||
|
+ bfq_get_bic_reference(bfqq);
|
||
|
+ bfq_get_bic_reference(new_bfqq);
|
||
|
+ /*
|
||
|
+ * Merge queues (that is, let bic redirect its requests to new_bfqq)
|
||
|
+ */
|
||
|
+ bic_set_bfqq(bic, new_bfqq, 1);
|
||
|
+ bfq_mark_bfqq_coop(new_bfqq);
|
||
|
+ /*
|
||
|
+ * new_bfqq now belongs to at least two bics (it is a shared queue):
|
||
|
+ * set new_bfqq->bic to NULL. bfqq either:
|
||
|
+ * - does not belong to any bic any more, and hence bfqq->bic must
|
||
|
+ * be set to NULL, or
|
||
|
+ * - is a queue whose owning bics have already been redirected to a
|
||
|
+ * different queue, hence the queue is destined to not belong to
|
||
|
+ * any bic soon and bfqq->bic is already NULL (therefore the next
|
||
|
+ * assignment causes no harm).
|
||
|
+ */
|
||
|
+ new_bfqq->bic = NULL;
|
||
|
+ bfqq->bic = NULL;
|
||
|
+ bfq_put_queue(bfqq);
|
||
|
+}
|
||
|
+
|
||
|
+static inline void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq)
|
||
|
+{
|
||
|
+ struct bfq_io_cq *bic = bfqq->bic;
|
||
|
+ struct bfq_data *bfqd = bfqq->bfqd;
|
||
|
+
|
||
|
+ if (bic && bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh) {
|
||
|
+ bic->failed_cooperations++;
|
||
|
+ if (bic->failed_cooperations >= bfqd->bfq_failed_cooperations)
|
||
|
+ bic->cooperations = 0;
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
+static int bfq_allow_merge(struct request_queue *q, struct request *rq,
|
||
|
+ struct bio *bio)
|
||
|
+{
|
||
|
+ struct bfq_data *bfqd = q->elevator->elevator_data;
|
||
|
+ struct bfq_io_cq *bic;
|
||
|
+ struct bfq_queue *bfqq, *new_bfqq;
|
||
|
+
|
||
|
+ /*
|
||
|
+ * Disallow merge of a sync bio into an async request.
|
||
|
+ */
|
||
|
+ if (bfq_bio_sync(bio) && !rq_is_sync(rq))
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ /*
|
||
|
+ * Lookup the bfqq that this bio will be queued with. Allow
|
||
|
+ * merge only if rq is queued there.
|
||
|
+ * Queue lock is held here.
|
||
|
+ */
|
||
|
+ bic = bfq_bic_lookup(bfqd, current->io_context);
|
||
|
+ if (bic == NULL)
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
|
||
|
+ /*
|
||
|
+ * We take advantage of this function to perform an early merge
|
||
|
+ * of the queues of possible cooperating processes.
|
||
|
+ */
|
||
|
+ if (bfqq != NULL) {
|
||
|
+ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false);
|
||
|
+ if (new_bfqq != NULL) {
|
||
|
+ bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);
|
||
|
+ /*
|
||
|
+ * If we get here, the bio will be queued in the
|
||
|
+ * shared queue, i.e., new_bfqq, so use new_bfqq
|
||
|
+ * to decide whether bio and rq can be merged.
|
||
|
+ */
|
||
|
+ bfqq = new_bfqq;
|
||
|
+ } else
|
||
|
+ bfq_bfqq_increase_failed_cooperations(bfqq);
|
||
|
+ }
|
||
|
+
|
||
|
+ return bfqq == RQ_BFQQ(rq);
|
||
|
+}
|
||
|
+
|
||
|
+static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
|
||
|
+ struct bfq_queue *bfqq)
|
||
|
+{
|
||
|
+ if (bfqq != NULL) {
|
||
|
+ bfq_mark_bfqq_must_alloc(bfqq);
|
||
|
+ bfq_mark_bfqq_budget_new(bfqq);
|
||
|
+ bfq_clear_bfqq_fifo_expire(bfqq);
|
||
|
+
|
||
|
+ bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
|
||
|
+
|
||
|
+ bfq_log_bfqq(bfqd, bfqq,
|
||
|
+ "set_in_service_queue, cur-budget = %lu",
|
||
|
+ bfqq->entity.budget);
|
||
|
+ }
|
||
|
+
|
||
|
+ bfqd->in_service_queue = bfqq;
|
||
|
+}
|
||
|
+
|
||
|
+/*
|
||
|
+ * Get and set a new queue for service.
|
||
|
+ */
|
||
|
+static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
|
||
|
+{
|
||
|
+ struct bfq_queue *bfqq = bfq_get_next_queue(bfqd);
|
||
|
+
|
||
|
+ __bfq_set_in_service_queue(bfqd, bfqq);
|
||
|
+ return bfqq;
|
||
|
+}
|
||
|
+
|
||
|
/*
|
||
|
* If enough samples have been computed, return the current max budget
|
||
|
* stored in bfqd, which is dynamically updated according to the
|
||
|
@@ -1237,63 +1545,6 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
|
||
|
return rq;
|
||
|
}
|
||
|
|
||
|
-/*
|
||
|
- * Must be called with the queue_lock held.
|
||
|
- */
|
||
|
-static int bfqq_process_refs(struct bfq_queue *bfqq)
|
||
|
-{
|
||
|
- int process_refs, io_refs;
|
||
|
-
|
||
|
- io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];
|
||
|
- process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;
|
||
|
- BUG_ON(process_refs < 0);
|
||
|
- return process_refs;
|
||
|
-}
|
||
|
-
|
||
|
-static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
|
||
|
-{
|
||
|
- int process_refs, new_process_refs;
|
||
|
- struct bfq_queue *__bfqq;
|
||
|
-
|
||
|
- /*
|
||
|
- * If there are no process references on the new_bfqq, then it is
|
||
|
- * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain
|
||
|
- * may have dropped their last reference (not just their last process
|
||
|
- * reference).
|
||
|
- */
|
||
|
- if (!bfqq_process_refs(new_bfqq))
|
||
|
- return;
|
||
|
-
|
||
|
- /* Avoid a circular list and skip interim queue merges. */
|
||
|
- while ((__bfqq = new_bfqq->new_bfqq)) {
|
||
|
- if (__bfqq == bfqq)
|
||
|
- return;
|
||
|
- new_bfqq = __bfqq;
|
||
|
- }
|
||
|
-
|
||
|
- process_refs = bfqq_process_refs(bfqq);
|
||
|
- new_process_refs = bfqq_process_refs(new_bfqq);
|
||
|
- /*
|
||
|
- * If the process for the bfqq has gone away, there is no
|
||
|
- * sense in merging the queues.
|
||
|
- */
|
||
|
- if (process_refs == 0 || new_process_refs == 0)
|
||
|
- return;
|
||
|
-
|
||
|
- /*
|
||
|
- * Merge in the direction of the lesser amount of work.
|
||
|
- */
|
||
|
- if (new_process_refs >= process_refs) {
|
||
|
- bfqq->new_bfqq = new_bfqq;
|
||
|
- atomic_add(process_refs, &new_bfqq->ref);
|
||
|
- } else {
|
||
|
- new_bfqq->new_bfqq = bfqq;
|
||
|
- atomic_add(new_process_refs, &bfqq->ref);
|
||
|
- }
|
||
|
- bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
|
||
|
- new_bfqq->pid);
|
||
|
-}
|
||
|
-
|
||
|
static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq)
|
||
|
{
|
||
|
struct bfq_entity *entity = &bfqq->entity;
|
||
|
@@ -2011,7 +2262,7 @@ static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
|
||
|
*/
|
||
|
static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
|
||
|
{
|
||
|
- struct bfq_queue *bfqq, *new_bfqq = NULL;
|
||
|
+ struct bfq_queue *bfqq;
|
||
|
struct request *next_rq;
|
||
|
enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT;
|
||
|
|
||
|
@@ -2021,17 +2272,6 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
|
||
|
|
||
|
bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue");
|
||
|
|
||
|
- /*
|
||
|
- * If another queue has a request waiting within our mean seek
|
||
|
- * distance, let it run. The expire code will check for close
|
||
|
- * cooperators and put the close queue at the front of the
|
||
|
- * service tree. If possible, merge the expiring queue with the
|
||
|
- * new bfqq.
|
||
|
- */
|
||
|
- new_bfqq = bfq_close_cooperator(bfqd, bfqq);
|
||
|
- if (new_bfqq != NULL && bfqq->new_bfqq == NULL)
|
||
|
- bfq_setup_merge(bfqq, new_bfqq);
|
||
|
-
|
||
|
if (bfq_may_expire_for_budg_timeout(bfqq) &&
|
||
|
!timer_pending(&bfqd->idle_slice_timer) &&
|
||
|
!bfq_bfqq_must_idle(bfqq))
|
||
|
@@ -2070,10 +2310,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
|
||
|
bfq_clear_bfqq_wait_request(bfqq);
|
||
|
del_timer(&bfqd->idle_slice_timer);
|
||
|
}
|
||
|
- if (new_bfqq == NULL)
|
||
|
- goto keep_queue;
|
||
|
- else
|
||
|
- goto expire;
|
||
|
+ goto keep_queue;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
@@ -2082,40 +2319,30 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
|
||
|
* in flight (possibly waiting for a completion) or is idling for a
|
||
|
* new request, then keep it.
|
||
|
*/
|
||
|
- if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) ||
|
||
|
- (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq)))) {
|
||
|
+ if (timer_pending(&bfqd->idle_slice_timer) ||
|
||
|
+ (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq))) {
|
||
|
bfqq = NULL;
|
||
|
goto keep_queue;
|
||
|
- } else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) {
|
||
|
- /*
|
||
|
- * Expiring the queue because there is a close cooperator,
|
||
|
- * cancel timer.
|
||
|
- */
|
||
|
- bfq_clear_bfqq_wait_request(bfqq);
|
||
|
- del_timer(&bfqd->idle_slice_timer);
|
||
|
}
|
||
|
|
||
|
reason = BFQ_BFQQ_NO_MORE_REQUESTS;
|
||
|
expire:
|
||
|
bfq_bfqq_expire(bfqd, bfqq, 0, reason);
|
||
|
new_queue:
|
||
|
- bfqq = bfq_set_in_service_queue(bfqd, new_bfqq);
|
||
|
+ bfqq = bfq_set_in_service_queue(bfqd);
|
||
|
bfq_log(bfqd, "select_queue: new queue %d returned",
|
||
|
bfqq != NULL ? bfqq->pid : 0);
|
||
|
keep_queue:
|
||
|
return bfqq;
|
||
|
}
|
||
|
|
||
|
-static void bfq_update_wr_data(struct bfq_data *bfqd,
|
||
|
- struct bfq_queue *bfqq)
|
||
|
+static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
|
||
|
{
|
||
|
- if (bfqq->wr_coeff > 1) { /* queue is being boosted */
|
||
|
- struct bfq_entity *entity = &bfqq->entity;
|
||
|
-
|
||
|
+ struct bfq_entity *entity = &bfqq->entity;
|
||
|
+ if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */
|
||
|
bfq_log_bfqq(bfqd, bfqq,
|
||
|
"raising period dur %u/%u msec, old coeff %u, w %d(%d)",
|
||
|
- jiffies_to_msecs(jiffies -
|
||
|
- bfqq->last_wr_start_finish),
|
||
|
+ jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
|
||
|
jiffies_to_msecs(bfqq->wr_cur_max_time),
|
||
|
bfqq->wr_coeff,
|
||
|
bfqq->entity.weight, bfqq->entity.orig_weight);
|
||
|
@@ -2124,11 +2351,15 @@ static void bfq_update_wr_data(struct bfq_data *bfqd,
|
||
|
entity->orig_weight * bfqq->wr_coeff);
|
||
|
if (entity->ioprio_changed)
|
||
|
bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change");
|
||
|
+
|
||
|
/*
|
||
|
* If too much time has elapsed from the beginning
|
||
|
- * of this weight-raising, stop it.
|
||
|
+ * of this weight-raising period, or the queue has
|
||
|
+ * exceeded the acceptable number of cooperations,
|
||
|
+ * stop it.
|
||
|
*/
|
||
|
- if (time_is_before_jiffies(bfqq->last_wr_start_finish +
|
||
|
+ if (bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh ||
|
||
|
+ time_is_before_jiffies(bfqq->last_wr_start_finish +
|
||
|
bfqq->wr_cur_max_time)) {
|
||
|
bfqq->last_wr_start_finish = jiffies;
|
||
|
bfq_log_bfqq(bfqd, bfqq,
|
||
|
@@ -2136,11 +2367,13 @@ static void bfq_update_wr_data(struct bfq_data *bfqd,
|
||
|
bfqq->last_wr_start_finish,
|
||
|
jiffies_to_msecs(bfqq->wr_cur_max_time));
|
||
|
bfq_bfqq_end_wr(bfqq);
|
||
|
- __bfq_entity_update_weight_prio(
|
||
|
- bfq_entity_service_tree(entity),
|
||
|
- entity);
|
||
|
}
|
||
|
}
|
||
|
+ /* Update weight both if it must be raised and if it must be lowered */
|
||
|
+ if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1))
|
||
|
+ __bfq_entity_update_weight_prio(
|
||
|
+ bfq_entity_service_tree(entity),
|
||
|
+ entity);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
@@ -2377,6 +2610,25 @@ static inline void bfq_init_icq(struct io_cq *icq)
|
||
|
struct bfq_io_cq *bic = icq_to_bic(icq);
|
||
|
|
||
|
bic->ttime.last_end_request = jiffies;
|
||
|
+ /*
|
||
|
+ * A newly created bic indicates that the process has just
|
||
|
+ * started doing I/O, and is probably mapping into memory its
|
||
|
+ * executable and libraries: it definitely needs weight raising.
|
||
|
+ * There is however the possibility that the process performs,
|
||
|
+ * for a while, I/O close to some other process. EQM intercepts
|
||
|
+ * this behavior and may merge the queue corresponding to the
|
||
|
+ * process with some other queue, BEFORE the weight of the queue
|
||
|
+ * is raised. Merged queues are not weight-raised (they are assumed
|
||
|
+ * to belong to processes that benefit only from high throughput).
|
||
|
+ * If the merge is basically the consequence of an accident, then
|
||
|
+ * the queue will be split soon and will get back its old weight.
|
||
|
+ * It is then important to write down somewhere that this queue
|
||
|
+ * does need weight raising, even if it did not make it to get its
|
||
|
+ * weight raised before being merged. To this purpose, we overload
|
||
|
+ * the field raising_time_left and assign 1 to it, to mark the queue
|
||
|
+ * as needing weight raising.
|
||
|
+ */
|
||
|
+ bic->wr_time_left = 1;
|
||
|
}
|
||
|
|
||
|
static void bfq_exit_icq(struct io_cq *icq)
|
||
|
@@ -2390,6 +2642,13 @@ static void bfq_exit_icq(struct io_cq *icq)
|
||
|
}
|
||
|
|
||
|
if (bic->bfqq[BLK_RW_SYNC]) {
|
||
|
+ /*
|
||
|
+ * If the bic is using a shared queue, put the reference
|
||
|
+ * taken on the io_context when the bic started using a
|
||
|
+ * shared bfq_queue.
|
||
|
+ */
|
||
|
+ if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC]))
|
||
|
+ put_io_context(icq->ioc);
|
||
|
bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]);
|
||
|
bic->bfqq[BLK_RW_SYNC] = NULL;
|
||
|
}
|
||
|
@@ -2678,6 +2937,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,
|
||
|
if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))
|
||
|
return;
|
||
|
|
||
|
+ /* Idle window just restored, statistics are meaningless. */
|
||
|
+ if (bfq_bfqq_just_split(bfqq))
|
||
|
+ return;
|
||
|
+
|
||
|
enable_idle = bfq_bfqq_idle_window(bfqq);
|
||
|
|
||
|
if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||
|
||
|
@@ -2725,6 +2988,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||
|
if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||
|
||
|
!BFQQ_SEEKY(bfqq))
|
||
|
bfq_update_idle_window(bfqd, bfqq, bic);
|
||
|
+ bfq_clear_bfqq_just_split(bfqq);
|
||
|
|
||
|
bfq_log_bfqq(bfqd, bfqq,
|
||
|
"rq_enqueued: idle_window=%d (seeky %d, mean %llu)",
|
||
|
@@ -2785,13 +3049,49 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||
|
static void bfq_insert_request(struct request_queue *q, struct request *rq)
|
||
|
{
|
||
|
struct bfq_data *bfqd = q->elevator->elevator_data;
|
||
|
- struct bfq_queue *bfqq = RQ_BFQQ(rq);
|
||
|
+ struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq;
|
||
|
|
||
|
assert_spin_locked(bfqd->queue->queue_lock);
|
||
|
+
|
||
|
+ /*
|
||
|
+ * An unplug may trigger a requeue of a request from the device
|
||
|
+ * driver: make sure we are in process context while trying to
|
||
|
+ * merge two bfq_queues.
|
||
|
+ */
|
||
|
+ if (!in_interrupt()) {
|
||
|
+ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
|
||
|
+ if (new_bfqq != NULL) {
|
||
|
+ if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
|
||
|
+ new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1);
|
||
|
+ /*
|
||
|
+ * Release the request's reference to the old bfqq
|
||
|
+ * and make sure one is taken to the shared queue.
|
||
|
+ */
|
||
|
+ new_bfqq->allocated[rq_data_dir(rq)]++;
|
||
|
+ bfqq->allocated[rq_data_dir(rq)]--;
|
||
|
+ atomic_inc(&new_bfqq->ref);
|
||
|
+ bfq_put_queue(bfqq);
|
||
|
+ if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)
|
||
|
+ bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
|
||
|
+ bfqq, new_bfqq);
|
||
|
+ rq->elv.priv[1] = new_bfqq;
|
||
|
+ bfqq = new_bfqq;
|
||
|
+ } else
|
||
|
+ bfq_bfqq_increase_failed_cooperations(bfqq);
|
||
|
+ }
|
||
|
+
|
||
|
bfq_init_prio_data(bfqq, RQ_BIC(rq));
|
||
|
|
||
|
bfq_add_request(rq);
|
||
|
|
||
|
+ /*
|
||
|
+ * Here a newly-created bfq_queue has already started a weight-raising
|
||
|
+ * period: clear raising_time_left to prevent bfq_bfqq_save_state()
|
||
|
+ * from assigning it a full weight-raising period. See the detailed
|
||
|
+ * comments about this field in bfq_init_icq().
|
||
|
+ */
|
||
|
+ if (bfqq->bic != NULL)
|
||
|
+ bfqq->bic->wr_time_left = 0;
|
||
|
rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
|
||
|
list_add_tail(&rq->queuelist, &bfqq->fifo);
|
||
|
|
||
|
@@ -2956,18 +3256,6 @@ static void bfq_put_request(struct request *rq)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
-static struct bfq_queue *
|
||
|
-bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
|
||
|
- struct bfq_queue *bfqq)
|
||
|
-{
|
||
|
- bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
|
||
|
- (long unsigned)bfqq->new_bfqq->pid);
|
||
|
- bic_set_bfqq(bic, bfqq->new_bfqq, 1);
|
||
|
- bfq_mark_bfqq_coop(bfqq->new_bfqq);
|
||
|
- bfq_put_queue(bfqq);
|
||
|
- return bic_to_bfqq(bic, 1);
|
||
|
-}
|
||
|
-
|
||
|
/*
|
||
|
* Returns NULL if a new bfqq should be allocated, or the old bfqq if this
|
||
|
* was the last process referring to said bfqq.
|
||
|
@@ -2976,6 +3264,9 @@ static struct bfq_queue *
|
||
|
bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
|
||
|
{
|
||
|
bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");
|
||
|
+
|
||
|
+ put_io_context(bic->icq.ioc);
|
||
|
+
|
||
|
if (bfqq_process_refs(bfqq) == 1) {
|
||
|
bfqq->pid = current->pid;
|
||
|
bfq_clear_bfqq_coop(bfqq);
|
||
|
@@ -3004,6 +3295,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
|
||
|
struct bfq_queue *bfqq;
|
||
|
struct bfq_group *bfqg;
|
||
|
unsigned long flags;
|
||
|
+ bool split = false;
|
||
|
|
||
|
might_sleep_if(gfp_mask & __GFP_WAIT);
|
||
|
|
||
|
@@ -3022,24 +3314,14 @@ new_queue:
|
||
|
bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask);
|
||
|
bic_set_bfqq(bic, bfqq, is_sync);
|
||
|
} else {
|
||
|
- /*
|
||
|
- * If the queue was seeky for too long, break it apart.
|
||
|
- */
|
||
|
+ /* If the queue was seeky for too long, break it apart. */
|
||
|
if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
|
||
|
bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
|
||
|
bfqq = bfq_split_bfqq(bic, bfqq);
|
||
|
+ split = true;
|
||
|
if (!bfqq)
|
||
|
goto new_queue;
|
||
|
}
|
||
|
-
|
||
|
- /*
|
||
|
- * Check to see if this queue is scheduled to merge with
|
||
|
- * another closely cooperating queue. The merging of queues
|
||
|
- * happens here as it must be done in process context.
|
||
|
- * The reference on new_bfqq was taken in merge_bfqqs.
|
||
|
- */
|
||
|
- if (bfqq->new_bfqq != NULL)
|
||
|
- bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq);
|
||
|
}
|
||
|
|
||
|
bfqq->allocated[rw]++;
|
||
|
@@ -3050,6 +3332,26 @@ new_queue:
|
||
|
rq->elv.priv[0] = bic;
|
||
|
rq->elv.priv[1] = bfqq;
|
||
|
|
||
|
+ /*
|
||
|
+ * If a bfq_queue has only one process reference, it is owned
|
||
|
+ * by only one bfq_io_cq: we can set the bic field of the
|
||
|
+ * bfq_queue to the address of that structure. Also, if the
|
||
|
+ * queue has just been split, mark a flag so that the
|
||
|
+ * information is available to the other scheduler hooks.
|
||
|
+ */
|
||
|
+ if (bfqq_process_refs(bfqq) == 1) {
|
||
|
+ bfqq->bic = bic;
|
||
|
+ if (split) {
|
||
|
+ bfq_mark_bfqq_just_split(bfqq);
|
||
|
+ /*
|
||
|
+ * If the queue has just been split from a shared
|
||
|
+ * queue, restore the idle window and the possible
|
||
|
+ * weight raising period.
|
||
|
+ */
|
||
|
+ bfq_bfqq_resume_state(bfqq, bic);
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
spin_unlock_irqrestore(q->queue_lock, flags);
|
||
|
|
||
|
return 0;
|
||
|
diff --git a/block/bfq-sched.c b/block/bfq-sched.c
|
||
|
index c4831b7..546a254 100644
|
||
|
--- a/block/bfq-sched.c
|
||
|
+++ b/block/bfq-sched.c
|
||
|
@@ -1084,34 +1084,6 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
|
||
|
return bfqq;
|
||
|
}
|
||
|
|
||
|
-/*
|
||
|
- * Forced extraction of the given queue.
|
||
|
- */
|
||
|
-static void bfq_get_next_queue_forced(struct bfq_data *bfqd,
|
||
|
- struct bfq_queue *bfqq)
|
||
|
-{
|
||
|
- struct bfq_entity *entity;
|
||
|
- struct bfq_sched_data *sd;
|
||
|
-
|
||
|
- BUG_ON(bfqd->in_service_queue != NULL);
|
||
|
-
|
||
|
- entity = &bfqq->entity;
|
||
|
- /*
|
||
|
- * Bubble up extraction/update from the leaf to the root.
|
||
|
- */
|
||
|
- for_each_entity(entity) {
|
||
|
- sd = entity->sched_data;
|
||
|
- bfq_update_budget(entity);
|
||
|
- bfq_update_vtime(bfq_entity_service_tree(entity));
|
||
|
- bfq_active_extract(bfq_entity_service_tree(entity), entity);
|
||
|
- sd->in_service_entity = entity;
|
||
|
- sd->next_in_service = NULL;
|
||
|
- entity->service = 0;
|
||
|
- }
|
||
|
-
|
||
|
- return;
|
||
|
-}
|
||
|
-
|
||
|
static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
|
||
|
{
|
||
|
if (bfqd->in_service_bic != NULL) {
|
||
|
diff --git a/block/bfq.h b/block/bfq.h
|
||
|
index aeca08e..4834b70 100644
|
||
|
--- a/block/bfq.h
|
||
|
+++ b/block/bfq.h
|
||
|
@@ -215,18 +215,21 @@ struct bfq_group;
|
||
|
* idle @bfq_queue with no outstanding requests, then
|
||
|
* the task associated with the queue it is deemed as
|
||
|
* soft real-time (see the comments to the function
|
||
|
- * bfq_bfqq_softrt_next_start()).
|
||
|
+ * bfq_bfqq_softrt_next_start())
|
||
|
* @last_idle_bklogged: time of the last transition of the @bfq_queue from
|
||
|
* idle to backlogged
|
||
|
* @service_from_backlogged: cumulative service received from the @bfq_queue
|
||
|
* since the last transition from idle to
|
||
|
* backlogged
|
||
|
+ * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the
|
||
|
+ * queue is shared
|
||
|
*
|
||
|
- * A bfq_queue is a leaf request queue; it can be associated with an io_context
|
||
|
- * or more, if it is async or shared between cooperating processes. @cgroup
|
||
|
- * holds a reference to the cgroup, to be sure that it does not disappear while
|
||
|
- * a bfqq still references it (mostly to avoid races between request issuing and
|
||
|
- * task migration followed by cgroup destruction).
|
||
|
+ * A bfq_queue is a leaf request queue; it can be associated with an
|
||
|
+ * io_context or more, if it is async or shared between cooperating
|
||
|
+ * processes. @cgroup holds a reference to the cgroup, to be sure that it
|
||
|
+ * does not disappear while a bfqq still references it (mostly to avoid
|
||
|
+ * races between request issuing and task migration followed by cgroup
|
||
|
+ * destruction).
|
||
|
* All the fields are protected by the queue lock of the containing bfqd.
|
||
|
*/
|
||
|
struct bfq_queue {
|
||
|
@@ -264,6 +267,7 @@ struct bfq_queue {
|
||
|
unsigned int requests_within_timer;
|
||
|
|
||
|
pid_t pid;
|
||
|
+ struct bfq_io_cq *bic;
|
||
|
|
||
|
/* weight-raising fields */
|
||
|
unsigned long wr_cur_max_time;
|
||
|
@@ -293,12 +297,34 @@ struct bfq_ttime {
|
||
|
* @icq: associated io_cq structure
|
||
|
* @bfqq: array of two process queues, the sync and the async
|
||
|
* @ttime: associated @bfq_ttime struct
|
||
|
+ * @wr_time_left: snapshot of the time left before weight raising ends
|
||
|
+ * for the sync queue associated to this process; this
|
||
|
+ * snapshot is taken to remember this value while the weight
|
||
|
+ * raising is suspended because the queue is merged with a
|
||
|
+ * shared queue, and is used to set @raising_cur_max_time
|
||
|
+ * when the queue is split from the shared queue and its
|
||
|
+ * weight is raised again
|
||
|
+ * @saved_idle_window: same purpose as the previous field for the idle
|
||
|
+ * window
|
||
|
+ * @saved_IO_bound: same purpose as the previous two fields for the I/O
|
||
|
+ * bound classification of a queue
|
||
|
+ * @cooperations: counter of consecutive successful queue merges underwent
|
||
|
+ * by any of the process' @bfq_queues
|
||
|
+ * @failed_cooperations: counter of consecutive failed queue merges of any
|
||
|
+ * of the process' @bfq_queues
|
||
|
*/
|
||
|
struct bfq_io_cq {
|
||
|
struct io_cq icq; /* must be the first member */
|
||
|
struct bfq_queue *bfqq[2];
|
||
|
struct bfq_ttime ttime;
|
||
|
int ioprio;
|
||
|
+
|
||
|
+ unsigned int wr_time_left;
|
||
|
+ unsigned int saved_idle_window;
|
||
|
+ unsigned int saved_IO_bound;
|
||
|
+
|
||
|
+ unsigned int cooperations;
|
||
|
+ unsigned int failed_cooperations;
|
||
|
};
|
||
|
|
||
|
enum bfq_device_speed {
|
||
|
@@ -511,7 +537,7 @@ enum bfqq_state_flags {
|
||
|
BFQ_BFQQ_FLAG_prio_changed, /* task priority has changed */
|
||
|
BFQ_BFQQ_FLAG_sync, /* synchronous queue */
|
||
|
BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */
|
||
|
- BFQ_BFQQ_FLAG_IO_bound, /*
|
||
|
+ BFQ_BFQQ_FLAG_IO_bound, /*
|
||
|
* bfqq has timed-out at least once
|
||
|
* having consumed at most 2/10 of
|
||
|
* its budget
|
||
|
@@ -520,12 +546,13 @@ enum bfqq_state_flags {
|
||
|
* bfqq has proved to be slow and
|
||
|
* seeky until budget timeout
|
||
|
*/
|
||
|
- BFQ_BFQQ_FLAG_softrt_update, /*
|
||
|
+ BFQ_BFQQ_FLAG_softrt_update, /*
|
||
|
* may need softrt-next-start
|
||
|
* update
|
||
|
*/
|
||
|
BFQ_BFQQ_FLAG_coop, /* bfqq is shared */
|
||
|
- BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be splitted */
|
||
|
+ BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be split */
|
||
|
+ BFQ_BFQQ_FLAG_just_split, /* queue has just been split */
|
||
|
};
|
||
|
|
||
|
#define BFQ_BFQQ_FNS(name) \
|
||
|
@@ -554,6 +581,7 @@ BFQ_BFQQ_FNS(IO_bound);
|
||
|
BFQ_BFQQ_FNS(constantly_seeky);
|
||
|
BFQ_BFQQ_FNS(coop);
|
||
|
BFQ_BFQQ_FNS(split_coop);
|
||
|
+BFQ_BFQQ_FNS(just_split);
|
||
|
BFQ_BFQQ_FNS(softrt_update);
|
||
|
#undef BFQ_BFQQ_FNS
|
||
|
|
||
|
--
|
||
|
1.9.3
|
||
|
|