Motr  M0
io_req.c
Go to the documentation of this file.
1 /* -*- C -*- */
2 /*
3  * Copyright (c) 2020 Seagate Technology LLC and/or its Affiliates
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  * For any questions about this software or licensing,
18  * please email opensource@seagate.com or cortx-questions@seagate.com.
19  *
20  */
21 
22 
23 #include "motr/client.h"
24 #include "motr/client_internal.h"
25 #include "motr/addb.h"
26 #include "motr/pg.h"
27 #include "motr/io.h"
28 
29 #include "lib/errno.h"
30 #include "lib/semaphore.h" /* m0_semaphore_{down|up}*/
31 #include "fid/fid.h" /* m0_fid */
32 #include "rpc/rpclib.h" /* m0_rpc_client_connect */
33 #include "lib/ext.h" /* struct m0_ext */
34 #include "lib/misc.h" /* M0_KEY_VAL_NULL */
35 #include "lib/cksum.h"
36 
37 #define M0_TRACE_SUBSYSTEM M0_TRACE_SUBSYS_CLIENT
38 #include "lib/trace.h" /* M0_LOG */
39 
40 /*
41  * CPU usage threshold for parity calculation which is introuduced by
42  * commit d4fcee53611e to solve the LNet timeout problem caused by
43  * by IO overusing CPUs.
44  */
46 
48 static struct m0_sm_state_descr io_states[] = {
49  [IRS_INITIALIZED] = {
51  .sd_name = "IO_initial",
52  .sd_allowed = M0_BITS(IRS_READING, IRS_WRITING,
54  },
55  [IRS_READING] = {
56  .sd_name = "IO_reading",
57  .sd_allowed = M0_BITS(IRS_READ_COMPLETE, IRS_FAILED),
58  },
59  [IRS_READ_COMPLETE] = {
60  .sd_name = "IO_read_complete",
61  .sd_allowed = M0_BITS(IRS_WRITING, IRS_REQ_COMPLETE,
63  IRS_READING),
64  },
66  .sd_name = "IO_degraded_read",
67  .sd_allowed = M0_BITS(IRS_READ_COMPLETE, IRS_FAILED),
68  },
70  .sd_name = "IO_degraded_write",
71  .sd_allowed = M0_BITS(IRS_WRITE_COMPLETE, IRS_FAILED),
72  },
73  [IRS_TRUNCATE] = {
74  .sd_name = "IO_truncate",
76  },
78  .sd_name = "IO_truncate_complte",
79  .sd_allowed = M0_BITS(IRS_REQ_COMPLETE, IRS_FAILED),
80  },
81  [IRS_WRITING] = {
82  .sd_name = "IO_writing",
83  .sd_allowed = M0_BITS(IRS_WRITE_COMPLETE, IRS_FAILED),
84  },
85  [IRS_WRITE_COMPLETE] = {
86  .sd_name = "IO_write_complete",
87  .sd_allowed = M0_BITS(IRS_REQ_COMPLETE, IRS_FAILED,
90  },
91  [IRS_FAILED] = {
92  /* XXX Add M0_SDF_TERMINAL | M0_SDF_FINAL ? */
93  .sd_flags = M0_SDF_FAILURE,
94  .sd_name = "IO_req_failed",
95  .sd_allowed = M0_BITS(IRS_REQ_COMPLETE),
96  },
97  [IRS_REQ_COMPLETE] = {
98  /* XXX Add M0_SDF_FINAL ? */
99  .sd_flags = M0_SDF_TERMINAL,
100  .sd_name = "IO_req_complete",
101  },
102 };
103 
104 static struct m0_sm_trans_descr ioo_trans[] = {
105  { "init-reading", IRS_INITIALIZED, IRS_READING },
106  { "init-writing", IRS_INITIALIZED, IRS_WRITING },
107  { "init-complete", IRS_INITIALIZED, IRS_REQ_COMPLETE },
108  { "init-failed", IRS_INITIALIZED, IRS_FAILED },
109 
110  { "read-complete", IRS_READING, IRS_READ_COMPLETE },
111  { "read-failed", IRS_READING, IRS_FAILED },
112  { "write-complete", IRS_WRITING, IRS_WRITE_COMPLETE },
113  { "write-failed", IRS_WRITING, IRS_FAILED },
114 
115  { "rcompl-write", IRS_READ_COMPLETE, IRS_WRITING },
116  { "rcompl-complete", IRS_READ_COMPLETE, IRS_REQ_COMPLETE },
117  { "rcompl-dgread", IRS_READ_COMPLETE, IRS_DEGRADED_READING },
118  { "rcompl-failed", IRS_READ_COMPLETE, IRS_FAILED },
119  { "rcompl-reading", IRS_READ_COMPLETE, IRS_READING },
120 
121  { "wcompl-dgwrite", IRS_WRITE_COMPLETE, IRS_DEGRADED_WRITING },
122  { "wcompl-complete", IRS_WRITE_COMPLETE, IRS_REQ_COMPLETE },
123  { "wcompl-trunc", IRS_WRITE_COMPLETE, IRS_TRUNCATE },
124  { "wcompl-failed", IRS_WRITE_COMPLETE, IRS_FAILED },
125 
126  { "trunc-tcompl", IRS_TRUNCATE, IRS_TRUNCATE_COMPLETE },
127  { "trunc-failed", IRS_TRUNCATE, IRS_FAILED },
128 
129  { "tcompl-complete", IRS_TRUNCATE_COMPLETE, IRS_REQ_COMPLETE },
130  { "tcompl-failed", IRS_TRUNCATE_COMPLETE, IRS_FAILED },
131 
132  { "dgread-rcompl", IRS_DEGRADED_READING, IRS_READ_COMPLETE },
133  { "dgread-failed", IRS_DEGRADED_READING, IRS_FAILED },
134  { "dgwrite-wcompl", IRS_DEGRADED_WRITING, IRS_WRITE_COMPLETE },
135  { "dgwrite-failed", IRS_DEGRADED_WRITING, IRS_FAILED },
136 
137  { "failed-complete", IRS_FAILED, IRS_REQ_COMPLETE },
138 };
139 
142  .scf_name = "IO request state machine configuration",
143  .scf_nr_states = ARRAY_SIZE(io_states),
144  .scf_state = io_states,
145  .scf_trans = ioo_trans,
146  .scf_trans_nr = ARRAY_SIZE(ioo_trans),
147 };
148 
151 M0_BOB_DEFINE(M0_INTERNAL, &ioo_bobtype, m0_op_io);
152 
153 const struct m0_bob_type ioo_bobtype = {
154  .bt_name = "m0_op_io_bobtype",
155  .bt_magix_offset = offsetof(struct m0_op_io, ioo_magic),
156  .bt_magix = M0_IOREQ_MAGIC,
157  .bt_check = NULL,
158 };
159 
168 static bool is_pver_dud(uint32_t fdev_nr, uint32_t dev_k, uint32_t fsvc_nr,
169  uint32_t svc_k, uint32_t fnode_nr, uint32_t node_k)
170 {
171  if (fdev_nr > 0 && dev_k == 0)
172  return M0_RC(true);
173  if (fsvc_nr > 0 && svc_k == 0)
174  return M0_RC(true);
175  if (fnode_nr > 0 && node_k == 0)
176  return M0_RC(true);
177 
178  /* Summation of F(l) / K(l) across node, service and device */
179  if (node_k + fnode_nr > 0)
180  return M0_RC((fnode_nr * dev_k * svc_k +
181  node_k * (fdev_nr * svc_k + fsvc_nr * dev_k)) >
182  node_k * dev_k * svc_k);
183  else if (svc_k + fsvc_nr > 0)
184  return M0_RC((fdev_nr * svc_k + fsvc_nr * dev_k) >
185  dev_k * svc_k);
186  else
187  return M0_RC(fdev_nr > dev_k);
188 }
189 
193 M0_INTERNAL void ioreq_sm_state_set_locked(struct m0_op_io *ioo,
194  int state)
195 {
196  M0_ENTRY();
197 
198  M0_PRE(ioo != NULL);
200 
201  M0_LOG(M0_INFO, "[%p] IO request changes state %s -> %s",
202  ioo, io_states[ioreq_sm_state(ioo)].sd_name,
203  io_states[state].sd_name);
204  m0_sm_state_set(&ioo->ioo_sm, state);
205 
206  M0_LEAVE();
207 }
208 
212 M0_INTERNAL void ioreq_sm_failed_locked(struct m0_op_io *ioo, int rc)
213 {
214  M0_ENTRY();
215 
216  M0_PRE(ioo != NULL);
218 
219  /* Set the io operation state - FAILED isn't a terminal state */
220  m0_sm_move(&ioo->ioo_sm, rc, IRS_FAILED);
221 
222  M0_LEAVE();
223 }
224 
225 static void ioreq_sm_executed_post(struct m0_op_io *ioo)
226 {
227 
228  M0_ENTRY();
229 
230  M0_PRE(ioo != NULL);
232 
234  m0_sm_ast_post(ioo->ioo_oo.oo_sm_grp, &ioo->ioo_ast);
235 
236  M0_LEAVE();
237 }
238 
239 static int truncate_dispatch(struct m0_op_io *ioo)
240 {
241  int rc = 0;
242  struct m0_op *op;
243 
244  M0_ENTRY();
245 
246  M0_PRE(ioo != NULL);
247  op = &ioo->ioo_oo.oo_oc.oc_op;
248 
249  if (ioreq_sm_state(ioo) == IRS_WRITE_COMPLETE &&
250  op->op_code == M0_OC_FREE) {
253  }
254 
255  return M0_RC(rc);
256 }
257 
266 {
267  struct target_ioreq *ti;
268 
269  M0_ENTRY();
270 
271  M0_PRE(xfer != NULL);
272  M0_PRE(xfer->nxr_state == NXS_COMPLETE);
273 
274  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
275  ti->ti_state = M0_PNDS_ONLINE;
277 
278  M0_LEAVE();
279 }
280 
287 static void ioreq_ioo_reset(struct m0_op_io *ioo)
288 {
289  struct nw_xfer_request *xfer;
290  struct target_ioreq *ti;
291 
292  M0_ENTRY();
293 
294  M0_PRE(ioo != NULL);
295  xfer = &ioo->ioo_nwxfer;
296 
297  xfer->nxr_rc = 0;
298  xfer->nxr_bytes = 0;
299 
300  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
301  ti->ti_rc = 0;
303 
304  ioo->ioo_rc = 0;
305  M0_LEAVE();
306 }
307 
317  struct m0_sm_ast *ast)
318 {
319  int rc;
320  struct m0_op *op;
321  struct m0_op_io *ioo;
322  struct m0_pdclust_layout *play;
323 
324  M0_ENTRY();
325 
326  M0_PRE(grp != NULL);
328  M0_PRE(ast != NULL);
329  ioo = bob_of(ast, struct m0_op_io, ioo_ast, &ioo_bobtype);
331  op = &ioo->ioo_oo.oo_oc.oc_op;
332  play = pdlayout_get(ioo);
333 
334  /* @todo Do error handling based on m0_sm::sm_rc. */
335  /*
336  * Since m0_sm is part of io_request, for any parity group
337  * which is partial, read-modify-write state transition is followed
338  * for all parity groups.
339  */
340  if (ioo->ioo_map_idx == ioo->ioo_iomap_nr) {
341  enum ioreq_state state;
342 
343  state = (op->op_code == M0_OC_READ) ?
345 
346  if (state == IRS_WRITING) {
347  if (op->op_code != M0_OC_FREE) {
349  CD_COPY_FROM_APP, 0);
350  if (rc != 0) {
351  M0_LOG(M0_ERROR, "iro_application_data_copy() "
352  "failed: rc=%d", rc);
353  goto fail_locked;
354  }
355  }
356  if (!m0_pdclust_is_replicated(play)) {
357  rc = ioo->ioo_ops->iro_parity_recalc(ioo);
358  if (rc != 0) {
359  M0_LOG(M0_ERROR, "iro_parity_recalc() "
360  "failed: rc=%d", rc);
361  goto fail_locked;
362  }
363  }
364  }
365 
366  ioreq_sm_state_set_locked(ioo, state);
367  M0_ASSERT(ergo(op->op_code == M0_OC_FREE,
368  ioreq_sm_state(ioo) == IRS_WRITING));
369  if (op->op_code == M0_OC_FREE) {
372  goto out;
373  }
375  if (rc != 0) {
376  M0_LOG(M0_ERROR, "nxo_dispatch() failed: rc=%d", rc);
377  goto fail_locked;
378  }
379  } else {
380  struct target_ioreq *ti;
381  uint32_t seg;
382  m0_bcount_t read_pages = 0;
383 
385 
386  m0_htable_for(tioreqht, ti, &ioo->ioo_nwxfer.nxr_tioreqs_hash) {
387  for (seg = 0; seg < ti->ti_bufvec.ov_vec.v_nr; ++seg)
388  if (ti->ti_pageattrs[seg] & PA_READ)
389  ++read_pages;
391 
392  /* Read IO is issued only if byte count > 0. */
393  if (read_pages > 0) {
394  ioo->ioo_rmw_read_pages = read_pages;
396  &ioo->ioo_nwxfer);
397  if (rc != 0) {
399  "nxo_dispatch() failed: rc=%d", rc);
400  goto fail_locked;
401  }
402  } else {
403  /* Don't want the sm to complain (state transition)*/
405 
406  /*
407  * If there is no READ IO issued, switch to
408  * ioreq iosm_handle_executed
409  */
411  }
412  }
413 out:
414  M0_LOG(M0_INFO, "nxr_bytes = %" PRIu64 ", copied_nr = %"PRIu64,
415  ioo->ioo_nwxfer.nxr_bytes, ioo->ioo_copied_nr);
416 
417  /* lock this as it isn't a locality group lock */
418  m0_sm_group_lock(&op->op_sm_group);
419  m0_sm_move(&op->op_sm, 0, M0_OS_LAUNCHED);
420  m0_sm_group_unlock(&op->op_sm_group);
421 
422  M0_LEAVE();
423  return;
424 
425 fail_locked:
426  ioo->ioo_rc = rc;
428  /* N.B. Failed is not a terminal state */
430 
431  /* fixed by commit 5a189beac81297ec9ea1cecf7016697aa02b0182 */
432  ioo->ioo_nwxfer.nxr_ops->nxo_complete(&ioo->ioo_nwxfer, false);
433 
434  /* Move the operation state machine along */
435  m0_sm_group_lock(&op->op_sm_group);
436  m0_sm_fail(&op->op_sm, M0_OS_FAILED, rc);
437  m0_op_failed(op);
438  m0_sm_group_unlock(&op->op_sm_group);
439 
440  M0_LOG(M0_ERROR, "ioreq_iosm_handle_launch failed");
441  M0_LEAVE();
442 }
443 
452  struct m0_sm_ast *ast)
453 {
454  int rc;
455  bool rmw;
456  struct m0_client *instance;
457  struct m0_op *op;
458  struct m0_op_io *ioo;
459  struct m0_pdclust_layout *play;
460 
461  M0_ENTRY("op_io:ast %p", ast);
462 
463  M0_PRE(grp != NULL);
464  M0_PRE(ast != NULL);
466  ioo = bob_of(ast, struct m0_op_io, ioo_ast, &ioo_bobtype);
468  op = &ioo->ioo_oo.oo_oc.oc_op;
470  M0_PRE(instance != NULL);
471 
472  play = pdlayout_get(ioo);
473 
474  /* @todo Do error handling based on m0_sm::sm_rc. */
475  /*
476  * Since m0_sm is part of io_request, for any parity group
477  * which is partial, read-modify-write state transition is followed
478  * for all parity groups.
479  */
480  M0_LOG(M0_DEBUG, "map=%" PRIu64 " map_nr=%"PRIu64,
481  ioo->ioo_map_idx, ioo->ioo_iomap_nr);
482  rmw = ioo->ioo_map_idx != ioo->ioo_iomap_nr;
484  goto done;
485  if (!rmw) {
486  enum ioreq_state state;
487 
488  state = op->op_code == M0_OC_READ ?
490  M0_ASSERT(ioreq_sm_state(ioo) == state);
491  if (ioo->ioo_rc != 0) {
492  rc = ioo->ioo_rc;
493  M0_LOG(M0_DEBUG, "ioo->ioo_rc = %d", rc);
494  goto fail_locked;
495  }
496  if (state == IRS_READ_COMPLETE) {
497  /*
498  * Returns immediately if all devices are
499  * in healthy state.
500  */
501  rc = ioo->ioo_ops->iro_dgmode_read(ioo, rmw);
502  if (rc != 0) {
503  M0_LOG(M0_INFO,
504  "iro_dgmode_read() returns error: %d",
505  rc);
506  goto fail_locked;
507  }
508 
509  /*
510  * If ioo's state has been changed to IRS_READING
511  * or IRS_DEGRADED_READING, this means iro_dgmode_read
512  * has just issue DGMODE IO, simply exit and it
513  * will re-entry here later. Otherwise proceed to
514  * read_verify and to copy data to APP.
515  */
516  if (ioreq_sm_state(ioo) != IRS_READ_COMPLETE)
517  goto out;
518 
519  rc = ioo->ioo_ops->iro_parity_verify(ioo);
520  if (rc != 0) {
522  "parity verification failed: rc=%d", rc);
523  goto fail_locked;
524  }
525 
526  if ((op->op_code == M0_OC_READ &&
527  instance->m0c_config->mc_is_read_verify) &&
528  ioo->ioo_dgmap_nr > 0)
529  rc = ioo->ioo_ops->iro_dgmode_recover(ioo);
530 
531  /* Valid data are available now, copy to application */
533  CD_COPY_TO_APP, 0);
534  if (rc != 0) {
535  M0_LOG(M0_ERROR, "iro_application_data_copy() "
536  "failed (to APP): rc=%d", rc);
537  goto fail_locked;
538  }
539  } else {
540  M0_ASSERT(state == IRS_WRITE_COMPLETE);
541 
542  /*
543  * Returns immediately if all devices are
544  * in healthy state.
545  */
546  rc = ioo->ioo_ops->iro_dgmode_write(ioo, rmw);
547  if (rc != 0) {
548  M0_LOG(M0_ERROR, "iro_dgmode_write() failed, "
549  "rc=%d", rc);
550  goto fail_locked;
551  }
552 
553  rc = truncate_dispatch(ioo);
554  if (rc != 0) {
555  M0_LOG(M0_ERROR, "nxo_dispatch() failed: "
556  "rc=%d", rc);
557  goto fail_locked;
558  }
559 
561  goto out;
562  }
563  } else {
564  /*
565  * First stage of RMW done: ioo's state should be
566  * IRS_READ_COMPLETE when it reaches here.
567  */
568  if (ioreq_sm_state(ioo) == IRS_READ_COMPLETE &&
569  op->op_code != M0_OC_FREE) {
570  /*
571  * If fops dispatch fails, we need to wait till all io
572  * fop callbacks are acked since IO fops have already
573  * been dispatched.
574  *
575  * Only fully modified pages from parity groups which
576  * have chosen read-rest approach or aligned parity
577  * groups, are copied since read-old approach needs
578  * reading of all spanned pages,(no matter fully
579  * modified or paritially modified) in order to
580  * calculate parity correctly.
581  */
584  if (rc != 0) {
585  M0_LOG(M0_ERROR, "iro_application_data_copy() "
586  "on FULLPAGE failed: rc=%d", rc);
587  goto fail_locked;
588  }
589 
590  /* Copies
591  * - fully modified pages from parity groups which have
592  * chosen read_old approach and
593  * - partially modified pages from all parity groups.
594  */
596  ioo, CD_COPY_FROM_APP, 0);
597  if (rc != 0) {
598  M0_LOG(M0_ERROR, "iro_application_data_copy() "
599  "failed: rc=%d", rc);
600  goto fail_locked;
601  }
602  }
603  if (ioreq_sm_state(ioo) == IRS_READ_COMPLETE) {
604  /* Finalizes the old read fops. */
605  if (ioo->ioo_rmw_read_pages > 0) {
607  &ioo->ioo_nwxfer, rmw);
608 
609  /*
610  * There is a subtle case for first write
611  * to an object when CROW optimisation is used:
612  * if it is a RMW write, it sends a read request
613  * first as Client doesn't have the concept of
614  * object size and an -ENOENT error will be
615  * returned as there isn't any thing exists in
616  * ios yet.
617  *
618  * Client has to trust the application that it
619  * has checked the existence of an object, so
620  * we can safely ignore the -ENOENT error here.
621  */
622  if (ioo->ioo_rc == -ENOENT)
623  ioreq_ioo_reset(ioo);
624  else if (ioo->ioo_rc != 0) {
625  M0_LOG(M0_ERROR, "ioo->ioo_rc=%d",
626  ioo->ioo_rc);
627 
628  rc = ioo->ioo_rc;
629  goto fail_locked;
630  }
632  }
633 
634  /* Prepare for the Write fops*/
636  if (!m0_pdclust_is_replicated(play)) {
637  rc = ioo->ioo_ops->iro_parity_recalc(ioo);
638  if (rc != 0) {
639  M0_LOG(M0_ERROR, "iro_parity_recalc()"
640  "failed: rc=%d", rc);
641  goto fail_locked;
642  }
643  }
644 
646  &ioo->ioo_nwxfer);
647  if (rc != 0) {
648  M0_LOG(M0_ERROR, "nxo_dispatch() failed: "
649  "rc=%d", rc);
650  goto fail_locked;
651  }
652 
653  /*
654  * Simply return here as WRITE op will re-entry
655  * ioreq_iosm_handle_executed with different state.
656  */
657  goto out;
658 
659  } else {
660  /* 2nd stage of RMW done [WRITE] */
662 
663  /*
664  * Returns immediately if all devices are in healthy
665  * state.
666  */
667  rc = ioo->ioo_ops->iro_dgmode_write(ioo, rmw);
668  if (rc != 0) {
669  M0_LOG(M0_ERROR, "iro_dgmode_write() failed: "
670  "rc=%d", rc);
671  goto fail_locked;
672  }
673 
674  rc = truncate_dispatch(ioo);
675  if (rc != 0) {
676  M0_LOG(M0_ERROR, "nxo_dispatch() failed: "
677  "rc=%d", rc);
678  goto fail_locked;
679  }
680 
682  goto out;
683  }
684  }
685 done:
686  ioo->ioo_nwxfer.nxr_ops->nxo_complete(&ioo->ioo_nwxfer, rmw);
687  ioo->ioo_rc = 0;
688 
689 #ifdef CLIENT_FOR_M0T1FS
690  /* XXX: TODO: update the inode size on the mds */
691 #endif
692 
693  if (rmw)
695 
696  /*
697  * Move the operation state machine along: due to the lack of
698  * mechanism in Motr to inform Client if data(or FOL) has been safely
699  * written to disk (this can be done by piggying back max commited tx
700  * id or explict syncing data), Client assumes data is safe when
701  * it receive all replies from ioservies at this moment (although it
702  * is not true) and moves the state of this 'op' to STABLE.
703  *
704  * Client introduced SYNC APIs to allow an application explictly to
705  * flush data to disks.
706  */
707 
708  m0_sm_group_lock(&op->op_sm_group);
709  m0_sm_move(&op->op_sm, ioo->ioo_rc, M0_OS_EXECUTED);
711  if (M0_IN(op->op_code, (M0_OC_READ, M0_OC_WRITE,
712  M0_OC_FREE))) {
713  m0_sm_move(&op->op_sm, ioo->ioo_rc, M0_OS_STABLE);
714  m0_op_stable(op);
715  }
716  m0_sm_group_unlock(&op->op_sm_group);
717 
718  /* Post-processing for object op. */
720 
721 out:
722  M0_LEAVE();
723  return;
724 
725 fail_locked:
726  ioo->ioo_rc = rc;
728  /* N.B. Failed is not a terminal state */
730  /* XXX: a workaround to prevent kernel panic. how to do it correctly? */
731 #if 1 || BACKPORT_UPSTREAM_FIX
732  ioo->ioo_nwxfer.nxr_ops->nxo_complete(&ioo->ioo_nwxfer, false);
733 #else
735 #endif
736 
737  /* As per bug MOTR-2575, rc will be reported in op->op_rc and the
738  * op will be completed with status M0_OS_STABLE */
739  op->op_rc = ioo->ioo_rc;
740  /* Move the operation state machine along */
741  m0_sm_group_lock(&op->op_sm_group);
742  m0_sm_move(&op->op_sm, 0, M0_OS_EXECUTED);
744  m0_sm_move(&op->op_sm, 0, M0_OS_STABLE);
745  m0_op_stable(op);
746  m0_sm_group_unlock(&op->op_sm_group);
747 
749 
750  M0_LOG(M0_DEBUG, "ioreq_iosm_handle_executed failed, rc=%d", rc);
751  M0_LEAVE();
752  return;
753 }
754 
761 static void ioreq_iomaps_destroy(struct m0_op_io *ioo)
762 {
763  uint64_t i;
764 
765  M0_ENTRY("op_io %p", ioo);
766 
767  M0_PRE(ioo != NULL);
768  M0_PRE(ioo->ioo_iomaps != NULL);
769 
770  for (i = 0; i < ioo->ioo_iomap_nr; ++i) {
771  if (ioo->ioo_iomaps[i] != NULL) {
772  pargrp_iomap_fini(ioo->ioo_iomaps[i], ioo->ioo_obj);
773  m0_free0(&ioo->ioo_iomaps[i]);
774  }
775  }
776  m0_free0(&ioo->ioo_iomaps);
777  ioo->ioo_iomap_nr = 0;
778 
779  M0_LEAVE();
780 }
781 
787 {
788  uint64_t seg;
789  uint64_t grp;
790  uint64_t grpstart;
791  uint64_t grpend;
792  uint64_t *grparray;
793  uint64_t grparray_sz;
794  struct m0_pdclust_layout *play;
795 
796  M0_ENTRY();
797 
798  play = pdlayout_get(ioo);
799 
800  /* Array of maximum possible number of groups spanned by req. */
801  grparray_sz = m0_vec_count(&ioo->ioo_ext.iv_vec) / data_size(play) +
802  2 * SEG_NR(&ioo->ioo_ext);
803  M0_LOG(M0_DEBUG, "ioo=%p arr_sz=%"PRIu64, ioo, grparray_sz);
804  M0_ALLOC_ARR(grparray, grparray_sz);
805  if (grparray == NULL)
806  return M0_ERR_INFO(-ENOMEM, "Failed to allocate memory"
807  " for grparray");
808  /*
809  * Finds out the total number of parity groups spanned by
810  * m0_op_io::ioo_ext.
811  */
812  for (seg = 0; seg < SEG_NR(&ioo->ioo_ext); ++seg) {
813  grpstart = group_id(INDEX(&ioo->ioo_ext, seg), data_size(play));
814  grpend = group_id(seg_endpos(&ioo->ioo_ext, seg) - 1,
815  data_size(play));
816  for (grp = grpstart; grp <= grpend; ++grp) {
817  uint64_t i;
818  /*
819  * grparray is a temporary array to record found groups.
820  * Scan this array for [grpstart, grpend].
821  * If not found, we got a new grop, record it and
822  * increase ioo_iomap_nr.
823  */
824  for (i = 0; i < ioo->ioo_iomap_nr; ++i) {
825  if (grparray[i] == grp)
826  break;
827  }
828  if (i == ioo->ioo_iomap_nr) { /* new grp */
829  M0_ASSERT_INFO(i < grparray_sz,
830  "nr=%" PRIu64 " size=%"PRIu64,
831  i , grparray_sz);
832  grparray[i] = grp;
833  ++ioo->ioo_iomap_nr;
834  }
835  }
836  }
837  m0_free(grparray);
838  return M0_RC(0);
839 }
840 
841 static void set_paritybuf_type(struct m0_op_io *ioo)
842 {
843 
844  struct m0_pdclust_layout *play = pdlayout_get(ioo);
845  struct m0_op *op = &ioo->ioo_oo.oo_oc.oc_op;
846  struct m0_client *cinst = m0__op_instance(op);
847 
850  ioo->ioo_pbuf_type = M0_PBUF_DIR;
851  else if (m0__is_update_op(op) && m0_pdclust_is_replicated(play))
852  ioo->ioo_pbuf_type = M0_PBUF_IND;
853  else
855 }
856 
864 static int ioreq_iomaps_prepare(struct m0_op_io *ioo)
865 {
866  bool bufvec = true;
867  int rc;
868  uint64_t i;
869  struct pargrp_iomap *iomap;
870  struct m0_pdclust_layout *play;
871  struct m0_ivec_cursor cursor;
872  struct m0_bufvec_cursor buf_cursor;
873 
874  M0_ENTRY("op_io = %p", ioo);
875 
876  M0_PRE(ioo != NULL);
877 
878  set_paritybuf_type(ioo);
879 
881  if (rc != 0)
882  return M0_RC(rc);
883 
884  if (ioo->ioo_oo.oo_oc.oc_op.op_code == M0_OC_FREE)
885  bufvec = false;
886 
887  play = pdlayout_get(ioo);
888 
889  M0_LOG(M0_DEBUG, "ioo=%p spanned_groups=%"PRIu64
890  " [N,K,us]=[%d,%d,%" PRIu64 "]",
891  ioo, ioo->ioo_iomap_nr, layout_n(play),
892  layout_k(play), layout_unit_size(play));
893 
894  /* ioo->ioo_iomaps is zeroed out on allocation. */
896  if (ioo->ioo_iomaps == NULL) {
897  rc = -ENOMEM;
898  goto failed;
899  }
900 
901  m0_ivec_cursor_init(&cursor, &ioo->ioo_ext);
902  if (bufvec)
903  m0_bufvec_cursor_init(&buf_cursor, &ioo->ioo_data);
904  /*
905  * cursor is advanced maximum by parity group size in one iteration
906  * of this loop.
907  * This is done by pargrp_iomap::pi_ops::pi_populate().
908  */
909  for (i = 0; !m0_ivec_cursor_move(&cursor, 0); ++i) {
910  M0_ASSERT(i < ioo->ioo_iomap_nr);
911  M0_ASSERT(ioo->ioo_iomaps[i] == NULL);
912  M0_ALLOC_PTR(ioo->ioo_iomaps[i]);
913  if (ioo->ioo_iomaps[i] == NULL) {
914  rc = -ENOMEM;
915  goto failed;
916  }
917  iomap = ioo->ioo_iomaps[i];
918 
919  rc = pargrp_iomap_init(iomap, ioo,
921  data_size(play)));
922  if (rc != 0) {
923  m0_free0(&ioo->ioo_iomaps[i]);
924  goto failed;
925  }
926 
927  /* @cursor is advanced in the following function */
928  rc = iomap->pi_ops->pi_populate(iomap, &cursor,
929  bufvec ? &buf_cursor : NULL);
930  if (rc != 0)
931  goto failed;
932  M0_LOG(M0_INFO, "iomap_id=%" PRIu64 " is populated",
933  iomap->pi_grpid);
934  }
935 
936  return M0_RC(0);
937 failed:
938  if (ioo->ioo_iomaps != NULL)
940 
941  return M0_ERR(rc);
942 }
943 
956 static uint64_t data_buf_copy(struct data_buf *data,
957  struct m0_bufvec_cursor *app_datacur,
958  enum copy_direction dir)
959 {
960  void *app_data;
961  uint32_t app_data_len;
962  uint64_t copied = 0;
963  uint64_t bytes;
964 
965  M0_ENTRY();
966 
967  M0_PRE(data != NULL);
968  M0_PRE(app_datacur != NULL);
971 
972  bytes = data->db_buf.b_nob;
973  while (bytes > 0) {
974  app_data = m0_bufvec_cursor_addr(app_datacur);
975  app_data_len = m0_bufvec_cursor_step(app_datacur);
976 
977  /* Don't copy more bytes than we were supposed to */
978  app_data_len = (app_data_len < bytes)?app_data_len:bytes;
979 
980  if (app_data == NULL)
981  break;
982 
983  /* app_data == data->db_buf.b_addr implies zero copy */
984  if (app_data != data->db_buf.b_addr) {
985  if (dir == CD_COPY_FROM_APP)
986  memcpy((char*)data->db_buf.b_addr +
987  copied, app_data, app_data_len);
988  else
989  memcpy(app_data,
990  (char*)data->db_buf.b_addr +
991  copied, app_data_len);
992  }
993 
994  bytes -= app_data_len;
995  copied += app_data_len;
996 
997  if (m0_bufvec_cursor_move(app_datacur, app_data_len))
998  break;
999  }
1000 
1001  M0_LEAVE();
1002  return copied;
1003 }
1004 
1019  struct m0_obj *obj,
1021  m0_bindex_t end,
1022  struct m0_bufvec_cursor *datacur,
1023  enum copy_direction dir,
1024  enum page_attr filter)
1025 {
1026  uint64_t bytes;
1027  uint32_t row = 0;
1028  uint32_t col = 0;
1029  uint32_t m_col;
1030  struct data_buf *data;
1031  struct m0_pdclust_layout *play;
1032  struct m0_key_val *key_val;
1033  m0_bindex_t mask;
1034  m0_bindex_t grp_size;
1035 
1036  M0_ENTRY("Copy %s application, start = %8" PRIu64 ", end = %8"PRIu64,
1037  dir == CD_COPY_FROM_APP ? (char *)"from" : (char *)" to ",
1038  start, end);
1039 
1041  M0_PRE(map != NULL);
1042  M0_PRE(obj != NULL);
1043  M0_PRE(datacur != NULL);
1044  /* XXX: get rid of obj from the parameters */
1045  M0_PRE(map->pi_ioo->ioo_obj == obj);
1047  M0_PRE(end > start);
1048  /* start/end are in the same object block */
1049  M0_PRE(start >> obj->ob_attr.oa_bshift ==
1050  (end - 1) >> obj->ob_attr.oa_bshift);
1051  M0_PRE(datacur != NULL);
1052 
1053  play = pdlayout_get(map->pi_ioo);
1054  grp_size = data_size(play) * map->pi_grpid;
1055  /* Finds out the page from pargrp_iomap::pi_databufs. */
1056  page_pos_get(map, start, grp_size, &row, &col);
1058  if (play->pl_attr.pa_K == 0 ||
1059  m0_key_val_is_null(&map->pi_databufs[row][col]->db_maj_ele))
1060  data = map->pi_databufs[row][col];
1061  else {
1062  key_val = &map->pi_databufs[row][col]->db_maj_ele;
1063  m_col = *(uint32_t *)(key_val->kv_key.b_addr);
1064  if (m0_pdclust_unit_classify(play, m_col) == M0_PUT_DATA) {
1065  M0_ASSERT(m_col == 0);
1066  data = map->pi_databufs[row][m_col];
1067  } else if (m0_pdclust_unit_classify(play, m_col) ==
1068  M0_PUT_PARITY)
1069  data = map->pi_paritybufs[row][m_col - 1];
1070  else
1071  /* No way of getting spares. */
1072  M0_IMPOSSIBLE();
1073  }
1074  M0_ASSERT(data != NULL);
1075  mask = ~SHIFT2MASK(obj->ob_attr.oa_bshift);
1076 
1077  /* Client only supports whole block operations */
1078  M0_ASSERT(end - start == data->db_buf.b_nob);
1079 
1080  if (dir == CD_COPY_FROM_APP) {
1081  if ((data->db_flags & filter) == filter) {
1082  if (data->db_flags & PA_COPY_FRMUSR_DONE) {
1083  m0_bufvec_cursor_move(datacur, end - start);
1084  return M0_RC(0);
1085  }
1086 
1087  /*
1088  * Note: data has been read into auxiliary buffer
1089  * directly for READOLD method.
1090  */
1091  if (data->db_auxbuf.b_addr != NULL &&
1092  map->pi_rtype == PIR_READOLD) {
1093  if (filter != 0) {
1095  datacur, end - start);
1096  return M0_RC(0);
1097  }
1098  }
1099 
1100  /* Copies to appropriate offset within page. */
1101  bytes = data_buf_copy(data, datacur, dir);
1102  M0_LOG(M0_DEBUG, "%"PRIu64
1103  " bytes copied from application "
1104  "from offset %"PRIu64, bytes, start);
1105  map->pi_ioo->ioo_copied_nr += bytes;
1106 
1107  /*
1108  * application_data_copy() may be called to handle
1109  * only part of PA_FULLPAGE_MODIFY page.
1110  * In this case we should mark the page as done only
1111  * when the last piece is processed.
1112  * Otherwise, the rest piece of the page
1113  * will be ignored.
1114  */
1115  if (ergo(data->db_flags & PA_FULLPAGE_MODIFY,
1116  (end & mask) == 0))
1117  data->db_flags |= PA_COPY_FRMUSR_DONE;
1118 
1119  if (bytes != end - start)
1120  return M0_ERR_INFO(
1121  -EFAULT, "[%p] Failed to"
1122  " copy_from_user: %" PRIu64 " !="
1123  " %" PRIu64 " - %" PRIu64,
1124  map->pi_ioo, bytes, end, start);
1125  }
1126  } else {
1127  bytes = data_buf_copy(data, datacur, dir);
1128 
1129  map->pi_ioo->ioo_copied_nr += end - start - bytes;
1130 
1131  M0_LOG(M0_DEBUG, "%"PRIu64
1132  " bytes copied to application from offset " "%"PRIu64,
1133  bytes, start);
1134 
1135  if (bytes != end - start)
1136  return M0_ERR(-EFAULT);
1137  }
1138 
1139  return M0_RC(0);
1140 }
1141 
1142 /* This function calculates and verify checksum for data read.
1143  * It divides the data in multiple units and call the client api
1144  * to verify checksum for each data unit.
1145  */
1146 static bool verify_checksum(struct m0_op_io *ioo)
1147 {
1148  struct m0_pi_seed seed;
1149  struct m0_bufvec user_data = {};
1150  int usz;
1151  int rc;
1152  int count;
1153  int i;
1154  struct m0_generic_pi *pi_ondisk;
1155  struct m0_bufvec_cursor datacur;
1156  struct m0_bufvec_cursor tmp_datacur;
1157  struct m0_ivec_cursor extcur;
1158  uint32_t nr_seg;
1159  int attr_idx = 0;
1160  m0_bcount_t bytes;
1161 
1162  M0_ENTRY();
1164  m0__obj_lid(ioo->ioo_obj));
1165 
1166  m0_bufvec_cursor_init(&datacur, &ioo->ioo_data);
1167  m0_bufvec_cursor_init(&tmp_datacur, &ioo->ioo_data);
1168  m0_ivec_cursor_init(&extcur, &ioo->ioo_ext);
1169 
1170  while ( !m0_bufvec_cursor_move(&datacur, 0) &&
1171  !m0_ivec_cursor_move(&extcur, 0) &&
1172  attr_idx < ioo->ioo_attr.ov_vec.v_nr){
1173 
1174  /* calculate number of segments required for 1 data unit */
1175  nr_seg = 0;
1176  count = usz;
1177  while (count > 0) {
1178  nr_seg++;
1179  bytes = m0_bufvec_cursor_step(&tmp_datacur);
1180  if (bytes < count) {
1181  m0_bufvec_cursor_move(&tmp_datacur, bytes);
1182  count -= bytes;
1183  }
1184  else {
1185  m0_bufvec_cursor_move(&tmp_datacur, count);
1186  count = 0;
1187  }
1188  }
1189 
1190  /* allocate an empty buf vec */
1191  rc = m0_bufvec_empty_alloc(&user_data, nr_seg);
1192  if (rc != 0) {
1193  M0_LOG(M0_ERROR, "buffer allocation failed, rc %d", rc);
1194  return false;
1195  }
1196 
1197  /* populate the empty buf vec with data pointers
1198  * and create 1 data unit worth of buf vec
1199  */
1200  i = 0;
1201  count = usz;
1202  while (count > 0) {
1203  bytes = m0_bufvec_cursor_step(&datacur);
1204  if (bytes < count) {
1205  user_data.ov_vec.v_count[i] = bytes;
1206  user_data.ov_buf[i] = m0_bufvec_cursor_addr(&datacur);
1207  m0_bufvec_cursor_move(&datacur, bytes);
1208  count -= bytes;
1209  }
1210  else {
1212  user_data.ov_buf[i] = m0_bufvec_cursor_addr(&datacur);
1213  m0_bufvec_cursor_move(&datacur, count);
1214  count = 0;
1215  }
1216  i++;
1217  }
1218 
1219  if (ioo->ioo_attr.ov_vec.v_nr && ioo->ioo_attr.ov_vec.v_count[attr_idx] != 0) {
1220 
1221  seed.pis_data_unit_offset = m0_ivec_cursor_index(&extcur);
1222  seed.pis_obj_id.f_container = ioo->ioo_obj->ob_entity.en_id.u_hi;
1223  seed.pis_obj_id.f_key = ioo->ioo_obj->ob_entity.en_id.u_lo;
1224 
1225  pi_ondisk = (struct m0_generic_pi *)ioo->ioo_attr.ov_buf[attr_idx];
1226 
1227  if (!m0_calc_verify_cksum_one_unit(pi_ondisk, &seed, &user_data)) {
1228  return false;
1229  }
1230  }
1231 
1232  attr_idx++;
1233  m0_ivec_cursor_move(&extcur, usz);
1234 
1236  }
1237 
1238  if (m0_bufvec_cursor_move(&datacur, 0) &&
1239  m0_ivec_cursor_move(&extcur, 0) &&
1240  attr_idx == ioo->ioo_attr.ov_vec.v_nr) {
1241  return true;
1242  }
1243  else {
1244  /* something wrong, we terminated early */
1245  M0_IMPOSSIBLE("something wrong while arranging data");
1246  }
1247 }
1248 
1259 static int ioreq_application_data_copy(struct m0_op_io *ioo,
1260  enum copy_direction dir,
1261  enum page_attr filter)
1262 {
1263  int rc;
1264  uint64_t i;
1265  m0_bindex_t grpstart;
1266  m0_bindex_t grpend;
1267  m0_bindex_t pgstart;
1268  m0_bindex_t pgend;
1270  struct m0_bufvec_cursor appdatacur;
1271  struct m0_ivec_cursor extcur;
1272  struct m0_pdclust_layout *play;
1273 
1274  M0_ENTRY("op_io : %p, %s application. filter = 0x%x", ioo,
1275  dir == CD_COPY_FROM_APP ? (char *)"from" : (char *)"to",
1276  filter);
1277 
1280 
1281  m0_bufvec_cursor_init(&appdatacur, &ioo->ioo_data);
1282  m0_ivec_cursor_init(&extcur, &ioo->ioo_ext);
1283 
1284  play = pdlayout_get(ioo);
1285 
1286  for (i = 0; i < ioo->ioo_iomap_nr; ++i) {
1288 
1289  count = 0;
1290  grpstart = data_size(play) * ioo->ioo_iomaps[i]->pi_grpid;
1291  grpend = grpstart + data_size(play);
1292 
1293  while (!m0_ivec_cursor_move(&extcur, count) &&
1294  m0_ivec_cursor_index(&extcur) < grpend) {
1295 
1296  pgstart = m0_ivec_cursor_index(&extcur);
1297  pgend = min64u(m0_round_up(pgstart + 1,
1298  m0__page_size(ioo)),
1299  pgstart + m0_ivec_cursor_step(&extcur));
1300  count = pgend - pgstart;
1301 
1302  /*
1303  * This takes care of finding correct page from
1304  * current pargrp_iomap structure from pgstart
1305  * and pgend.
1306  */
1308  ioo->ioo_iomaps[i], ioo->ioo_obj,
1309  pgstart, pgend, &appdatacur, dir, filter);
1310  if (rc != 0)
1311  return M0_ERR_INFO(
1312  rc, "[%p] Copy failed (pgstart=%" PRIu64
1313  " pgend=%" PRIu64 ")",
1314  ioo, pgstart, pgend);
1315  }
1316 
1317  }
1318 
1319  if (dir == CD_COPY_TO_APP) {
1320  /* verify the checksum during data read.
1321  * skip checksum verification during degraded I/O
1322  */
1323  if (ioreq_sm_state(ioo) != IRS_DEGRADED_READING &&
1325  !verify_checksum(ioo)) {
1326  return M0_RC(-EIO);
1327  }
1328  }
1329 
1330  return M0_RC(0);
1331 }
1332 
1340 static int ioreq_parity_recalc(struct m0_op_io *ioo)
1341 {
1342  int rc = 0;
1343  uint64_t i;
1344  struct pargrp_iomap *iomap;
1345 
1346  M0_ENTRY("io_request : %p", ioo);
1348 
1350 
1351  for (i = 0; i < ioo->ioo_iomap_nr; ++i) {
1352  iomap = ioo->ioo_iomaps[i];
1353  rc = iomap->pi_ops->pi_parity_recalc(iomap);
1354  if (rc != 0)
1355  break;
1356  }
1357 
1359 
1360  return rc == 0 ? M0_RC(rc) :
1361  M0_ERR_INFO(rc, "Parity recalc failed for grpid=%3"PRIu64,
1362  iomap->pi_grpid);
1363 }
1364 
1372 static int ioreq_dgmode_recover(struct m0_op_io *ioo)
1373 {
1374  struct m0_pdclust_layout *play;
1375  int rc = 0;
1376  uint64_t i;
1377  struct pargrp_iomap *iomap;
1378 
1379  M0_ENTRY();
1382 
1383  play = pdlayout_get(ioo);
1384  for (i = 0; i < ioo->ioo_iomap_nr; ++i) {
1385  iomap = ioo->ioo_iomaps[i];
1386  if (iomap->pi_state == PI_DEGRADED) {
1387  if (m0_pdclust_is_replicated(play))
1388  rc = iomap->pi_ops->pi_replica_recover(iomap);
1389  else
1390  rc = iomap->pi_ops->pi_dgmode_recover(iomap);
1391  if (rc != 0)
1392  return M0_ERR(rc);
1393  }
1394  }
1395 
1396  return M0_RC(rc);
1397 }
1398 
1405 static bool is_session_marked(struct m0_op_io *ioo,
1406  struct m0_rpc_session *session)
1407 {
1408  uint64_t i;
1409  uint64_t max_failures;
1410  uint64_t session_id;
1411 
1412  session_id = session->s_session_id;
1413  max_failures = tolerance_of_level(ioo, M0_CONF_PVER_LVL_CTRLS);
1414  for (i = 0; i < max_failures; ++i) {
1415  if (ioo->ioo_failed_session[i] == session_id)
1416  return M0_RC(true);
1417  else if (ioo->ioo_failed_session[i] == ~(uint64_t)0) {
1418  ioo->ioo_failed_session[i] = session_id;
1419  return M0_RC(false);
1420  }
1421  }
1422  return M0_RC(false);
1423 }
1424 
1430 static bool is_node_marked(struct m0_op_io *ioo,
1431  uint64_t node_id)
1432 {
1433  uint64_t i;
1434  uint64_t max_failures;
1435 
1436  max_failures = tolerance_of_level(ioo, M0_CONF_PVER_LVL_ENCLS);
1437  for (i = 0; i < max_failures; ++i) {
1438  if (ioo->ioo_failed_nodes[i] == node_id)
1439  return M0_RC(true);
1440  else if (ioo->ioo_failed_nodes[i] == ~(uint64_t)0) {
1441  ioo->ioo_failed_nodes[i] = node_id;
1442  return M0_RC(false);
1443  }
1444  }
1445  return M0_RC(false);
1446 }
1447 
1459 static int device_check(struct m0_op_io *ioo)
1460 {
1461  int rc = 0;
1462  uint32_t fdev_nr = 0;
1463  uint32_t fsvc_nr = 0;
1464  uint32_t fnode_nr = 0;
1465  uint64_t max_svc_failures;
1466  uint64_t max_node_failures;
1467  uint64_t node_id;
1468  enum m0_pool_nd_state state;
1469  enum m0_pool_nd_state node_state;
1470  struct m0_poolnode *node_obj;
1471  struct target_ioreq *ti;
1472  struct m0_pdclust_layout *play;
1473  struct m0_client *instance;
1474  struct m0_poolmach *pm;
1475  struct m0_pool_version *pv;
1476 
1477  M0_ENTRY();
1478  M0_PRE(ioo != NULL);
1479  M0_PRE(M0_IN(ioreq_sm_state(ioo),
1481 
1483  play = pdlayout_get(ioo);
1484  max_svc_failures = tolerance_of_level(ioo, M0_CONF_PVER_LVL_CTRLS);
1485  max_node_failures = tolerance_of_level(ioo, M0_CONF_PVER_LVL_ENCLS);
1486 
1487  pv = m0_pool_version_find(&instance->m0c_pools_common, &ioo->ioo_pver);
1488  M0_ASSERT(pv != NULL);
1489  pm = &pv->pv_mach;
1490 
1491  m0_htable_for (tioreqht, ti, &ioo->ioo_nwxfer.nxr_tioreqs_hash) {
1492  rc = m0_poolmach_device_state(pm, ti->ti_obj, &state);
1493  if (rc != 0)
1494  return M0_ERR(rc);
1495 
1496  rc = m0_poolmach_device_node_return(pm, ti->ti_obj, &node_obj);
1497  if (rc != 0)
1498  return M0_ERR(rc);
1499 
1501  node_state = node_obj->pn_state;
1503 
1504  node_id = node_obj->pn_id.f_key;
1505 
1506  ti->ti_state = state;
1507 
1508  if (M0_IN(node_state, (M0_PNDS_FAILED, M0_PNDS_OFFLINE))) {
1509  if (!is_node_marked(ioo, node_id))
1510  M0_CNT_INC(fnode_nr);
1511  is_session_marked(ioo, ti->ti_session);
1512  } else if (M0_IN(ti->ti_rc, (-ECANCELED, -ENOTCONN)) &&
1513  !is_session_marked(ioo, ti->ti_session)) {
1514  M0_CNT_INC(fsvc_nr);
1515  } else if ((M0_IN(state, (M0_PNDS_FAILED, M0_PNDS_OFFLINE,
1517  || ti->ti_rc != 0 /* any error */) &&
1518  !is_session_marked(ioo, ti->ti_session)) {
1519  /*
1520  * If services failure toleratance is not enabled,
1521  * is_session_marked() will return false always, and
1522  * we count failed devices under any services. But
1523  * if services failure tolerance is enabled, we count
1524  * failed devices under different services - only
1525  * these failures matter in this check (those that
1526  * belong to different upper failure domains).
1527  */
1528  M0_CNT_INC(fdev_nr);
1529  }
1530 
1531  } m0_htable_endfor;
1532 
1533  M0_LOG(M0_DEBUG, "failed devices = %d\ttolerance=%d", (int)fdev_nr,
1534  (int)layout_k(play));
1535  M0_LOG(M0_DEBUG, "failed services = %d\ttolerance=%d", (int)fsvc_nr,
1536  (int)max_svc_failures);
1537  M0_LOG(M0_DEBUG, "failed nodes = %d\ttolerance=%d", (int)fnode_nr,
1538  (int)max_node_failures);
1539 
1540  if (is_pver_dud(fdev_nr, layout_k(play), fsvc_nr, max_svc_failures,
1541  fnode_nr, max_node_failures))
1542  return M0_ERR_INFO(-EIO, "[%p] too many failures: "
1543  "nodes=%lu + svcs=%lu + devs=%lu, allowed: "
1544  "nodes=%lu or svcs=%lu or devs=%lu", ioo,
1545  (unsigned long)fnode_nr,
1546  (unsigned long)fsvc_nr,
1547  (unsigned long)fdev_nr,
1548  (unsigned long)max_node_failures,
1549  (unsigned long)max_svc_failures,
1550  (unsigned long)layout_k(play));
1551 
1552  return M0_RC(fdev_nr | fsvc_nr | fnode_nr);
1553 }
1554 
1563 static int ioreq_dgmode_read(struct m0_op_io *ioo, bool rmw)
1564 {
1565  int rc = 0;
1566  uint64_t i;
1567  struct nw_xfer_request *xfer;
1568  struct pargrp_iomap *iomap;
1569  struct ioreq_fop *irfop;
1570  struct target_ioreq *ti;
1571  struct m0_poolmach *pm;
1572 
1573  M0_ENTRY();
1575 
1576  /*
1577  * If all devices are ONLINE, all requests return success.
1578  * In case of read before write, due to CROW, COB will not be present,
1579  * resulting into ENOENT error.
1580  */
1581  xfer = &ioo->ioo_nwxfer;
1582  if ((xfer->nxr_rc == 0 || xfer->nxr_rc == -ENOENT) &&
1583  !ioo->ioo_dgmode_io_sent)
1584  return M0_RC(xfer->nxr_rc);
1585 
1586  /*
1587  * Number of failed devices is not a criteria good enough
1588  * by itself. Even if one/more devices failed but IO request
1589  * could complete if IO request did not send any pages to
1590  * failed device(s) at all.
1591  */
1592  rc = device_check(ioo);
1593  if (rc < 0)
1594  return M0_RC(rc);
1595 
1596  pm = ioo_to_poolmach(ioo);
1597  M0_ASSERT(pm != NULL);
1598 
1599  rc = 0;
1600  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
1601  /*
1602  * Data was retrieved successfully from this target.
1603  */
1604  if (ti->ti_rc == 0)
1605  continue;
1606  /*
1607  * Finds out parity groups for which read IO failed and marks
1608  * them as DEGRADED. This is necessary since read IO request
1609  * could be reading only a part of a parity group but if it
1610  * failed, rest of the parity group also needs to be read
1611  * (subject to file size) in order to re-generate lost data.
1612  */
1613  m0_tl_for (iofops, &ti->ti_iofops, irfop) {
1614  rc = ioreq_fop_dgmode_read(irfop);
1615  if (rc != 0)
1616  break;
1617  } m0_tl_endfor;
1618  } m0_htable_endfor;
1619 
1620  if (rc != 0)
1621  return M0_ERR_INFO(rc, "[%p] dgmode failed", ioo);
1622 
1623  /*
1624  * Recovers lost data using parity recovery algorithms.
1625  *
1626  * Note: iro_dgmode_recover() should be called after
1627  * ioreq_fop_dgmode_read(), which marks units PA_READ_FAILED
1628  * (including parity ones).
1629  */
1630  if (ioo->ioo_dgmode_io_sent)
1631  return M0_RC(ioo->ioo_ops->iro_dgmode_recover(ioo));
1632 
1633  M0_LOG(M0_DEBUG, "[%p] dgmap_nr=%u is in dgmode",
1634  ioo, ioo->ioo_dgmap_nr);
1635  /*
1636  * Starts processing the pages again if any of the parity groups
1637  * spanned by input IO-request is in degraded mode.
1638  */
1639  if (ioo->ioo_dgmap_nr > 0) {
1640  M0_LOG(M0_WARN, "Process failed parity groups in dgmode/read "
1641  "ioo=%p dgmap_nr=%u",
1642  ioo, ioo->ioo_dgmap_nr);
1643  if (ioreq_sm_state(ioo) == IRS_READ_COMPLETE)
1645  for (i = 0; i < ioo->ioo_iomap_nr; ++i) {
1646  iomap = ioo->ioo_iomaps[i];
1647  rc = iomap->pi_ops->pi_dgmode_postprocess(iomap);
1648  if (rc != 0)
1649  break;
1650  }
1651  ioo->ioo_dgmode_io_sent = true;
1652  } else {
1655  /*
1656  * By this time, the page count in target_ioreq::ti_ivec and
1657  * target_ioreq::ti_bufvec is greater than 1, but it is
1658  * invalid since the distribution[Sining: layout] is about to
1659  * change.
1660  * Ergo, page counts in index and buffer vectors are reset.
1661  */
1662 
1663  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
1664  ti->ti_ivec.iv_vec.v_nr = 0;
1665  } m0_htable_endfor;
1666  }
1667 
1668  xfer->nxr_ops->nxo_complete(xfer, rmw);
1669 
1670  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
1671  ti->ti_databytes = 0;
1672  ti->ti_parbytes = 0;
1673  ti->ti_rc = 0;
1674  } m0_htable_endfor;
1675 
1676  /* Resets the status code before starting degraded mode read IO. */
1677  ioo->ioo_rc = xfer->nxr_rc = 0;
1678 
1679  rc = xfer->nxr_ops->nxo_distribute(xfer);
1680  if (rc != 0)
1681  return M0_ERR(rc);
1682 
1683  rc = xfer->nxr_ops->nxo_dispatch(xfer);
1684  if (rc != 0)
1685  return M0_ERR(rc);
1686 
1687  return M0_RC(rc);
1688 }
1689 
1698 static int ioreq_dgmode_write(struct m0_op_io *ioo, bool rmw)
1699 {
1700  int rc;
1701  struct target_ioreq *ti;
1702  struct nw_xfer_request *xfer;
1703  struct m0_pdclust_layout *play;
1704 
1705  M0_ENTRY();
1707 
1708  xfer = &ioo->ioo_nwxfer;
1709 
1710  /* See the comments in ioreq_dgmode_read */
1711  if (ioo->ioo_dgmode_io_sent)
1712  return M0_RC(xfer->nxr_rc);
1713 
1714  /* -E2BIG: see commit 52c1072141d */
1715  if (M0_IN(xfer->nxr_rc, (0, -E2BIG)))
1716  return M0_RC(xfer->nxr_rc);
1717 
1718  rc = device_check(ioo);
1719  if (rc < 0)
1720  return M0_RC(rc);
1721 
1722  play = pdlayout_get(ioo);
1723  if (rc > 0 && play->pl_attr.pa_S == 0) {
1724  /*
1725  * Some units write failed, but no more than K (otherwise,
1726  * rc would be < 0), and there are no spare units configured
1727  * in the parity groups. In this case, there is no point in
1728  * degraded write, and it's OK to return success for now (XXX).
1729  * The redundancy of the user data will be restored during
1730  * SNS repair later.
1731  */
1732  m0_htable_for (tioreqht, ti, &xfer->nxr_tioreqs_hash) {
1733  ti->ti_rc = 0;
1734  } m0_htable_endfor;
1735 
1736  xfer->nxr_rc = 0;
1737  ioo->ioo_rc = 0;
1738 
1739  M0_LOG(M0_NOTICE, "user data written with degraded redundancy: "
1740  "off=%" PRIu64 " len=%" PRIu64 " failed_devs=%d",
1741  INDEX(&ioo->ioo_ext, 0),
1742  m0_vec_count(&ioo->ioo_ext.iv_vec), rc);
1743 
1744  return M0_RC(0);
1745  }
1746 
1747  /*
1748  * This IO request has already acquired distributed lock on the
1749  * file by this time.
1750  * Degraded mode write needs to handle 2 prime use-cases.
1751  * 1. SNS repair still to start on associated global fid.
1752  * 2. SNS repair has completed for associated global fid.
1753  * Both use-cases imply unavailability of one or more devices.
1754  *
1755  * In first use-case, repair is yet to start on file. Hence,
1756  * rest of the file data which goes on healthy devices can be
1757  * written safely.
1758  * In this case, the fops meant for failed device(s) will be simply
1759  * dropped and rest of the fops will be sent to respective ioservice
1760  * instances for writing data to servers.
1761  * Later when this IO request relinquishes the distributed lock on
1762  * associated global fid and SNS repair starts on the file, the lost
1763  * data will be regenerated using parity recovery algorithms.
1764  *
1765  * The second use-case implies completion of SNS repair for associated
1766  * global fid and the lost data is regenerated on distributed spare
1767  * units.
1768  * Ergo, all the file data meant for lost device(s) will be redirected
1769  * towards corresponding spare unit(s). Later when SNS rebalance phase
1770  * commences, it will migrate the data from spare to a new device, thus
1771  * making spare available for recovery again.
1772  * In this case, old fops will be discarded and all pages spanned by
1773  * IO request will be reshuffled by redirecting pages meant for
1774  * failed device(s) to its corresponding spare unit(s).
1775  */
1777 
1778  /*
1779  * Finalizes current fops which are not valid anymore.
1780  * Fops need to be finalized in either case since old network buffers
1781  * from IO fops are still enqueued in transfer machine and removal
1782  * of these buffers would lead to finalization of rpc bulk object.
1783  */
1784  xfer->nxr_ops->nxo_complete(xfer, rmw);
1785 
1786  /*
1787  * Resets count of data bytes and parity bytes along with
1788  * return status.
1789  * Fops meant for failed devices are dropped in
1790  * nw_xfer_req_dispatch().
1791  */
1792  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
1793  ti->ti_databytes = 0;
1794  ti->ti_parbytes = 0;
1795  ti->ti_rc = 0;
1796  ti->ti_req_type = TI_NONE;
1797  } m0_htable_endfor;
1798 
1799  /*
1800  * Redistributes all pages by routing pages for failed devices
1801  * to spare units for each parity group.
1802  */
1803  rc = xfer->nxr_ops->nxo_distribute(xfer);
1804  if (rc != 0)
1805  return M0_ERR_INFO(rc, "Failed to prepare dgmode write fops");
1806 
1807  xfer->nxr_rc = 0;
1808  ioo->ioo_rc = 0;
1809 
1810  rc = xfer->nxr_ops->nxo_dispatch(xfer);
1811  if (rc != 0)
1812  return M0_ERR_INFO(rc, "Failed to dispatch degraded mode"
1813  "write IO fops");
1814 
1815  ioo->ioo_dgmode_io_sent = true;
1816 
1817  return M0_RC(xfer->nxr_rc);
1818 }
1819 
1820 static int ioreq_parity_verify(struct m0_op_io *ioo)
1821 {
1822  struct pargrp_iomap *iomap = NULL;
1823  struct m0_pdclust_layout *play;
1824  struct m0_client *instance;
1825  struct m0_op *op;
1826  int rc = 0;
1827  uint64_t i;
1828 
1829  M0_ENTRY("m0_op_io : %p", ioo);
1831 
1832  op = &ioo->ioo_oo.oo_oc.oc_op;
1834  play = pdlayout_get(ioo);
1835 
1836  if (op->op_code != M0_OC_READ ||
1837  !instance->m0c_config->mc_is_read_verify)
1838  return M0_RC(0);
1839 
1841 
1842  for (i = 0; i < ioo->ioo_iomap_nr; ++i) {
1843  iomap = ioo->ioo_iomaps[i];
1844  if (iomap->pi_state == PI_DEGRADED) {
1845  /* data is recovered from existing data and parity.
1846  * It's meaningless to do parity verification */
1847  continue;
1848  }
1849  if (m0_pdclust_is_replicated(play))
1850  rc = iomap->pi_ops->pi_parity_replica_verify(iomap);
1851  else
1852  rc = iomap->pi_ops->pi_parity_verify(iomap);
1853  if (rc != 0)
1854  break;
1855  }
1856 
1858  return rc != 0 ? M0_ERR_INFO(rc, "Parity verification failed for "
1859  "grpid=%"PRIu64,
1860  iomap->pi_grpid) : M0_RC(rc);
1861 }
1862 /* XXX (Sining): should we rename ioreq_xxx to ioo_xxx?*/
1863 const struct m0_op_io_ops ioo_ops = {
1865  .iro_iomaps_destroy = ioreq_iomaps_destroy,
1866  .iro_application_data_copy = ioreq_application_data_copy,
1867  .iro_parity_recalc = ioreq_parity_recalc,
1868  .iro_parity_verify = ioreq_parity_verify,
1869  .iro_iosm_handle_launch = ioreq_iosm_handle_launch,
1870  .iro_iosm_handle_executed = ioreq_iosm_handle_executed,
1871  .iro_dgmode_read = ioreq_dgmode_read,
1872  .iro_dgmode_write = ioreq_dgmode_write,
1873  .iro_dgmode_recover = ioreq_dgmode_recover,
1874 };
1875 
1876 #undef M0_TRACE_SUBSYSTEM
1877 
1878 /*
1879  * Local variables:
1880  * c-indentation-style: "K&R"
1881 
1882  * c-basic-offset: 8
1883  * tab-width: 8
1884  * fill-column: 80
1885  * scroll-step: 1
1886  * End:
1887  */
1888 /*
1889  * vim: tabstop=8 shiftwidth=8 noexpandtab textwidth=80 nowrap
1890  */
int(* iro_dgmode_read)(struct m0_op_io *ioo, bool rmw)
Definition: pg.h:634
M0_INTERNAL void m0_ivec_cursor_init(struct m0_ivec_cursor *cur, const struct m0_indexvec *ivec)
Definition: vec.c:707
uint64_t ioo_copied_nr
#define M0_PRE(cond)
static void nw_xfer_device_state_reset(struct nw_xfer_request *xfer)
Definition: io_req.c:265
M0_INTERNAL bool m0_pdclust_is_replicated(struct m0_pdclust_layout *play)
Definition: pdclust.c:829
#define M0_ALLOC_ARR(arr, nr)
Definition: memory.h:84
M0_INTERNAL void m0_sm_fail(struct m0_sm *mach, int fail_state, int32_t rc)
Definition: sm.c:468
int(* iro_parity_recalc)(struct m0_op_io *ioo)
Definition: pg.h:595
Definition: client.h:794
#define m0_htable_for(name, var, htable)
Definition: hash.h:483
static m0_bcount_t seg_endpos(const struct m0_indexvec *ivec, uint32_t i)
Definition: file.c:420
#define NULL
Definition: misc.h:38
map
Definition: processor.c:112
M0_INTERNAL m0_bcount_t m0_ivec_cursor_step(const struct m0_ivec_cursor *cur)
Definition: vec.c:726
uint64_t * ioo_failed_nodes
static void pargrp_iomap_fini(struct pargrp_iomap *map)
Definition: file.c:1881
#define ergo(a, b)
Definition: misc.h:293
void(* sa_cb)(struct m0_sm_group *grp, struct m0_sm_ast *)
Definition: sm.h:506
static uint64_t data_buf_copy(struct data_buf *data, struct m0_bufvec_cursor *app_datacur, enum copy_direction dir)
Definition: io_req.c:956
Definition: sm.h:350
void * b_addr
Definition: buf.h:39
static void ioreq_iosm_handle_executed(struct m0_sm_group *grp, struct m0_sm_ast *ast)
Definition: io_req.c:451
M0_INTERNAL struct m0_pool_version * m0_pool_version_find(struct m0_pools_common *pc, const struct m0_fid *id)
Definition: pool.c:586
int(* iro_parity_verify)(struct m0_op_io *ioo)
Definition: pg.h:605
static struct m0_sm_group * grp
Definition: bytecount.c:38
struct m0_pool_version * pv
Definition: dir.c:629
struct m0_poolmach pv_mach
Definition: pool.h:133
#define M0_LOG(level,...)
Definition: trace.h:167
struct m0_bufvec * user_data
M0_LEAVE()
const struct m0_op_io_ops * ioo_ops
M0_INTERNAL void m0_sm_ast_post(struct m0_sm_group *grp, struct m0_sm_ast *ast)
Definition: sm.c:135
static uint32_t layout_k(const struct m0_pdclust_layout *play)
Definition: file.c:520
struct m0_sm_group * oo_sm_grp
M0_INTERNAL int ioreq_fop_dgmode_read(struct ioreq_fop *irfop)
Definition: io_req_fop.c:739
uint32_t pa_K
Definition: pdclust.h:107
int(* iro_dgmode_write)(struct m0_op_io *ioo, bool rmw)
Definition: pg.h:655
struct m0_vec ov_vec
Definition: vec.h:147
void(* nxo_complete)(struct nw_xfer_request *xfer, bool rmw)
struct m0_bufvec data
Definition: di.c:40
M0_BOB_DEFINE(M0_INTERNAL, &ioo_bobtype, m0_op_io)
static bool is_node_marked(struct m0_op_io *ioo, uint64_t node_id)
Definition: io_req.c:1430
int(* pi_dgmode_recover)(struct pargrp_iomap *map)
struct m0_sm_conf io_sm_conf
Definition: io_req.c:141
enum target_ioreq_type ti_req_type
struct m0_indexvec_varr ti_bufvec
struct m0_op oc_op
int(* pi_parity_replica_verify)(struct pargrp_iomap *map)
Definition: pg.h:509
uint64_t m0_bindex_t
Definition: types.h:80
uint64_t ti_obj
struct m0_varr ti_pageattrs
#define M0_BITS(...)
Definition: misc.h:236
uint32_t pa_S
Definition: pdclust.h:110
M0_INTERNAL void * m0_bufvec_cursor_addr(struct m0_bufvec_cursor *cur)
Definition: vec.c:597
uint64_t m0_bcount_t
Definition: types.h:77
Definition: sm.h:504
M0_INTERNAL int m0_poolmach_device_state(struct m0_poolmach *pm, uint32_t device_index, enum m0_pool_nd_state *state_out)
Definition: pool_machine.c:816
static struct m0_rpc_session session
Definition: formation2.c:38
M0_INTERNAL bool m0__is_update_op(struct m0_op *op)
Definition: utils.c:290
struct m0_pdclust_attr pl_attr
Definition: pdclust.h:150
void ** ov_buf
Definition: vec.h:149
static struct foo * obj
Definition: tlist.c:302
const char * bt_name
Definition: bob.h:73
M0_INTERNAL uint64_t m0__obj_lid(struct m0_obj *obj)
Definition: obj.c:126
static m0_bcount_t count
Definition: xcode.c:167
M0_INTERNAL uint64_t m0_round_up(uint64_t val, uint64_t size)
Definition: misc.c:181
struct m0_sm ioo_sm
#define SEG_NR(ivec)
Definition: file.c:393
enum m0_pool_nd_state ti_state
#define m0_tl_endfor
Definition: tlist.h:700
int(* iro_application_data_copy)(struct m0_op_io *ioo, enum copy_direction dir, enum page_attr filter)
Definition: pg.h:583
struct m0_vec iv_vec
Definition: vec.h:139
return M0_RC(rc)
op
Definition: libdemo.c:64
unsigned int op_code
Definition: client.h:656
#define M0_ASSERT_EX(cond)
struct m0_semaphore cpus_sem
Definition: io_req.c:45
int(* pi_replica_recover)(struct pargrp_iomap *map)
Definition: pg.h:556
#define M0_ENTRY(...)
Definition: trace.h:170
static struct m0_sm_state_descr io_states[]
Definition: io_req.c:48
static struct m0_sm_ast ast[NR]
Definition: locality.c:44
M0_INTERNAL void m0_sm_group_unlock(struct m0_sm_group *grp)
Definition: sm.c:96
void(* iro_iosm_handle_executed)(struct m0_sm_group *grp, struct m0_sm_ast *ast)
Definition: pg.h:622
M0_INTERNAL bool m0_bufvec_cursor_move(struct m0_bufvec_cursor *cur, m0_bcount_t count)
Definition: vec.c:574
Definition: filter.py:1
static int ioreq_parity_recalc(struct m0_op_io *ioo)
Definition: io_req.c:1340
int m0_obj_layout_id_to_unit_size(uint64_t layout_id)
Definition: obj.c:851
uint64_t * ioo_failed_session
int i
Definition: dir.c:1033
bool ioo_dgmode_io_sent
static int device_check(struct m0_op_io *ioo)
Definition: io_req.c:1459
#define PRIu64
Definition: types.h:58
M0_INTERNAL bool m0__obj_is_parity_verify_mode(struct m0_client *instance)
Definition: io.c:655
Definition: client.h:647
static void ioreq_iosm_handle_launch(struct m0_sm_group *grp, struct m0_sm_ast *ast)
Definition: io_req.c:316
struct nw_xfer_request ioo_nwxfer
static int truncate_dispatch(struct m0_op_io *ioo)
Definition: io_req.c:239
#define M0_ERR_INFO(rc, fmt,...)
Definition: trace.h:215
int(* nxo_distribute)(struct nw_xfer_request *xfer)
static bool verify_checksum(struct m0_op_io *ioo)
Definition: io_req.c:1146
uint64_t ti_parbytes
return M0_ERR(-EOPNOTSUPP)
struct m0_op_obj ioo_oo
M0_INTERNAL struct m0_poolmach * ioo_to_poolmach(struct m0_op_io *ioo)
Definition: io.c:75
Definition: trace.h:482
M0_INTERNAL struct m0_client * m0__op_instance(const struct m0_op *op)
Definition: client.c:236
int(* pi_populate)(struct pargrp_iomap *iomap, struct m0_ivec_varr_cursor *cursor)
M0_INTERNAL int m0_op_stable(struct m0_op *op)
Definition: client.c:520
struct m0_indexvec ioo_ext
if(value==NULL)
Definition: dir.c:350
M0_INTERNAL m0_bcount_t m0_bufvec_cursor_step(const struct m0_bufvec_cursor *cur)
Definition: vec.c:581
enum pargrp_iomap_state pi_state
#define m0_free0(pptr)
Definition: memory.h:77
#define M0_ASSERT(cond)
const char * scf_name
Definition: sm.h:352
struct m0_fid ioo_pver
static int ioreq_dgmode_recover(struct m0_op_io *ioo)
Definition: io_req.c:1372
uint32_t ioo_dgmap_nr
void m0_sm_state_set(struct m0_sm *mach, int state)
Definition: sm.c:478
m0_pool_nd_state
Definition: pool_machine.h:57
const struct nw_xfer_ops * nxr_ops
static bool is_pver_dud(uint32_t fdev_nr, uint32_t dev_k, uint32_t fsvc_nr, uint32_t svc_k, uint32_t fnode_nr, uint32_t node_k)
Definition: io_req.c:168
#define bob_of(ptr, type, field, bt)
Definition: bob.h:140
struct m0_bufvec ioo_data
M0_INTERNAL void ioreq_sm_state_set_locked(struct m0_op_io *ioo, int state)
Definition: io_req.c:193
uint64_t pi_grpid
M0_INTERNAL void m0_bufvec_cursor_init(struct m0_bufvec_cursor *cur, const struct m0_bufvec *bvec)
Definition: vec.c:563
struct m0_obj * ioo_obj
uint64_t ioo_rmw_read_pages
enum m0_pbuf_type ioo_pbuf_type
const struct m0_bob_type ioo_bobtype
Definition: io_req.c:150
static bool is_session_marked(struct m0_op_io *ioo, struct m0_rpc_session *session)
Definition: io_req.c:1405
uint64_t u_hi
Definition: types.h:36
static uint64_t layout_unit_size(const struct m0_pdclust_layout *play)
Definition: file.c:525
struct m0_sm_group * sm_grp
Definition: sm.h:321
int(* pi_dgmode_postprocess)(struct pargrp_iomap *map)
struct m0_sm_ast ioo_ast
uint32_t v_nr
Definition: vec.h:51
static uint64_t tolerance_of_level(struct io_request *req, uint64_t lv)
Definition: file.c:3597
static int ioreq_dgmode_write(struct m0_op_io *ioo, bool rmw)
Definition: io_req.c:1698
int(* pi_parity_verify)(struct pargrp_iomap *map)
struct m0_htable nxr_tioreqs_hash
uint64_t ioo_map_idx
M0_INTERNAL bool m0__is_read_op(struct m0_op *op)
Definition: utils.c:296
m0_bcount_t * v_count
Definition: vec.h:53
struct m0_rpc_session * ti_session
static bool pargrp_iomap_invariant(struct pargrp_iomap *map)
Definition: file.c:1185
ioreq_state
Definition: pg.h:53
static uint64_t min64u(uint64_t a, uint64_t b)
Definition: arith.h:66
M0_INTERNAL bool m0_ivec_cursor_move(struct m0_ivec_cursor *cur, m0_bcount_t count)
Definition: vec.c:718
struct m0_tl ti_iofops
struct m0_uint128 en_id
Definition: client.h:714
struct m0_op_common oo_oc
M0_INTERNAL int m0_poolmach_device_node_return(struct m0_poolmach *pm, uint32_t device_index, struct m0_poolnode **node_out)
Definition: pool_machine.c:850
static void set_paritybuf_type(struct m0_op_io *ioo)
Definition: io_req.c:841
static void page_pos_get(struct pargrp_iomap *map, m0_bindex_t index, uint32_t *row, uint32_t *col)
Definition: file.c:725
static int application_data_copy(struct pargrp_iomap *map, struct m0_obj *obj, m0_bindex_t start, m0_bindex_t end, struct m0_bufvec_cursor *datacur, enum copy_direction dir, enum page_attr filter)
Definition: io_req.c:1018
static uint64_t data_size(const struct m0_pdclust_layout *play)
Definition: file.c:550
M0_INTERNAL int m0_op_executed(struct m0_op *op)
Definition: client.c:500
M0_INTERNAL m0_bcount_t m0_vec_count(const struct m0_vec *vec)
Definition: vec.c:53
static uint32_t layout_n(const struct m0_pdclust_layout *play)
Definition: file.c:515
copy_direction
static struct m0_pdclust_layout * pdlayout_get(const struct io_request *req)
Definition: file.c:510
int32_t ioo_rc
uint32_t sd_flags
Definition: sm.h:378
static struct m0_client cinst
Definition: sync.c:84
uint64_t ti_databytes
M0_INTERNAL void ioreq_sm_failed_locked(struct m0_op_io *ioo, int rc)
Definition: io_req.c:212
#define M0_CNT_INC(cnt)
Definition: arith.h:226
static int ioreq_iomaps_parity_groups_cal(struct m0_op_io *ioo)
Definition: io_req.c:786
uint64_t f_key
Definition: fid.h:40
static void ioreq_iomaps_destroy(struct m0_op_io *ioo)
Definition: io_req.c:761
M0_INTERNAL int m0_op_failed(struct m0_op *op)
Definition: client.c:548
static int pargrp_iomap_init(struct pargrp_iomap *map, struct io_request *req, uint64_t grpid)
Definition: file.c:1795
#define M0_ALLOC_PTR(ptr)
Definition: memory.h:86
struct m0_buf kv_key
Definition: misc.h:418
int(* nxo_dispatch)(struct nw_xfer_request *xfer)
M0_INTERNAL bool m0__obj_is_cksum_validation_allowed(struct m0_op_io *ioo)
Definition: io.c:665
static int ioreq_dgmode_read(struct m0_op_io *ioo, bool rmw)
Definition: io_req.c:1563
enum nw_xfer_state nxr_state
int(* iro_dgmode_recover)(struct m0_op_io *ioo)
Definition: pg.h:643
M0_INTERNAL void m0__obj_op_done(struct m0_op *op)
Definition: io.c:691
M0_INTERNAL enum m0_pdclust_unit_type m0_pdclust_unit_classify(const struct m0_pdclust_layout *pl, int unit)
Definition: pdclust.c:425
struct m0_entity ob_entity
Definition: client.h:795
struct m0_fid pn_id
Definition: pool.h:395
page_attr
static int start(struct m0_fom *fom)
Definition: trigger_fom.c:321
M0_INTERNAL m0_bindex_t m0_ivec_cursor_index(const struct m0_ivec_cursor *cur)
Definition: vec.c:733
M0_INTERNAL void m0_sm_move(struct m0_sm *mach, int32_t rc, int state)
Definition: sm.c:485
M0_INTERNAL void m0_rwlock_read_lock(struct m0_rwlock *lock)
Definition: rwlock.c:52
static int ioreq_iomaps_prepare(struct m0_op_io *ioo)
Definition: io_req.c:864
int(* iro_iomaps_prepare)(struct m0_op_io *ioo)
Definition: pg.h:567
static struct m0 instance
Definition: main.c:78
M0_INTERNAL void m0_sm_group_lock(struct m0_sm_group *grp)
Definition: sm.c:83
int(* pi_parity_recalc)(struct pargrp_iomap *map)
static uint64_t group_id(m0_bindex_t index, m0_bcount_t dtsize)
Definition: file.c:560
static struct m0_be_seg * seg
Definition: btree.c:40
uint64_t ioo_iomap_nr
static uint32_t ioreq_sm_state(const struct io_request *req)
Definition: file.c:975
#define M0_ASSERT_INFO(cond, fmt,...)
static unsigned done
Definition: storage.c:91
struct m0_rwlock pm_lock
Definition: pool_machine.h:178
struct inode * dir
Definition: dir.c:1028
M0_INTERNAL void m0_rwlock_read_unlock(struct m0_rwlock *lock)
Definition: rwlock.c:57
M0_INTERNAL void m0_semaphore_down(struct m0_semaphore *semaphore)
Definition: semaphore.c:49
const struct m0_op_io_ops ioo_ops
Definition: io_req.c:1863
#define out(...)
Definition: gen.c:41
M0_INTERNAL uint64_t m0__page_size(const struct m0_op_io *ioo)
Definition: utils.c:41
static int ioreq_application_data_copy(struct m0_op_io *ioo, enum copy_direction dir, enum page_attr filter)
Definition: io_req.c:1259
M0_INTERNAL void m0_semaphore_up(struct m0_semaphore *semaphore)
Definition: semaphore.c:65
Definition: pg.h:859
static int ioreq_parity_verify(struct m0_op_io *ioo)
Definition: io_req.c:1820
uint64_t u_lo
Definition: types.h:37
#define M0_PRE_EX(cond)
uint64_t s_session_id
Definition: session.h:309
#define m0_tl_for(name, head, obj)
Definition: tlist.h:695
void m0_free(void *data)
Definition: memory.c:146
static struct m0_sm_trans_descr ioo_trans[]
Definition: io_req.c:104
#define m0_htable_endfor
Definition: hash.h:491
bool m0_calc_verify_cksum_one_unit(struct m0_generic_pi *pi, struct m0_pi_seed *seed, struct m0_bufvec *bvec)
Definition: cksum.c:194
struct m0_bufvec ioo_attr
#define SHIFT2MASK(x)
Definition: io.h:43
static bool data_buf_invariant(const struct data_buf *db)
Definition: file.c:1110
M0_INTERNAL void m0_bufvec_free2(struct m0_bufvec *bufvec)
Definition: vec.c:401
int32_t rc
Definition: trigger_fop.h:47
#define ARRAY_SIZE(a)
Definition: misc.h:45
const struct pargrp_iomap_ops * pi_ops
struct m0_indexvec ti_ivec
Definition: pg.h:793
#define offsetof(typ, memb)
Definition: misc.h:29
M0_INTERNAL bool m0_sm_group_is_locked(const struct m0_sm_group *grp)
Definition: sm.c:107
M0_INTERNAL bool m0_key_val_is_null(struct m0_key_val *kv)
Definition: misc.c:363
struct pargrp_iomap ** ioo_iomaps
Definition: trace.h:478
Definition: vec.h:145
M0_INTERNAL int m0_bufvec_empty_alloc(struct m0_bufvec *bufvec, uint32_t num_segs)
Definition: vec.c:213
M0_INTERNAL bool m0_op_io_invariant(const struct m0_op_io *iop)
Definition: io.c:161
static void ioreq_sm_executed_post(struct m0_op_io *ioo)
Definition: io_req.c:225
#define M0_IMPOSSIBLE(fmt,...)
static void ioreq_ioo_reset(struct m0_op_io *ioo)
Definition: io_req.c:287