Motr  M0
file.c
Go to the documentation of this file.
1 /* -*- C -*- */
2 /*
3  * Copyright (c) 2012-2021 Seagate Technology LLC and/or its Affiliates
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  * For any questions about this software or licensing,
18  * please email opensource@seagate.com or cortx-questions@seagate.com.
19  *
20  */
21 
22 
23 #include <linux/version.h> /* LINUX_VERSION_CODE */
24 #if LINUX_VERSION_CODE <= KERNEL_VERSION(4,11,0)
25 #include <asm/uaccess.h> /* VERIFY_READ, VERIFY_WRITE */
26 #endif
27 #include <asm/atomic.h> /* atomic_get */
28 #include <linux/mm.h> /* get_user_pages, get_page, put_page */
29 #include <linux/fs.h> /* struct file_operations */
30 #include <linux/mount.h> /* struct vfsmount (f_path.mnt) */
31 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)
32 #include <linux/uio.h> /* struct iovec */
33 #include <linux/aio.h> /* struct kiocb */
34 #endif
35 
36 #define M0_TRACE_SUBSYSTEM M0_TRACE_SUBSYS_M0T1FS
37 #include "lib/trace.h"
38 
39 #include "fop/fom_generic.h"/* m0_rpc_item_is_generic_reply_fop */
40 #include "lib/memory.h" /* m0_alloc, m0_free */
41 #include "lib/misc.h" /* m0_round_{up/down} */
42 #include "lib/bob.h" /* m0_bob_type */
43 #include "lib/ext.h" /* m0_ext */
44 #include "lib/arith.h" /* min_type */
45 #include "lib/finject.h" /* M0_FI_ENABLED */
46 #include "layout/pdclust.h" /* M0_PUT_*, m0_layout_to_pdl, */
47 #include "lib/bob.h" /* m0_bob_type */
48 #include "lib/tlist.h"
49 #include "rpc/rpc_machine.h" /* m0_rpc_machine, m0_rpc_machine_lock */
50 #include "ioservice/io_fops.h" /* m0_io_fop */
51 #include "motr/magic.h" /* M0_T1FS_IOREQ_MAGIC */
52 #include "m0t1fs/linux_kernel/m0t1fs.h" /* m0t1fs_sb */
53 #include "file/file.h"
54 #include "fd/fd.h" /* m0_fd_fwd_map m0_fd_bwd_map */
55 #include "lib/hash.h" /* m0_htable */
56 #include "sns/parity_repair.h" /*m0_sns_repair_spare_map() */
57 #include "addb2/addb2.h"
61 #include "ioservice/fid_convert.h" /* m0_fid_cob_device_id */
62 
322 struct io_mem_stats iommstats;
323 
324 M0_INTERNAL void iov_iter_advance(struct iov_iter *i, size_t bytes);
325 
326 /* Imports */
327 struct m0_net_domain;
328 M0_INTERNAL bool m0t1fs_inode_bob_check(struct m0t1fs_inode *bob);
329 M0_TL_DECLARE(rpcbulk, M0_INTERNAL, struct m0_rpc_bulk_buf);
330 M0_TL_DESCR_DECLARE(rpcbulk, M0_EXTERN);
331 
332 M0_TL_DESCR_DEFINE(iofops, "List of IO fops", static,
333  struct io_req_fop, irf_link, irf_magic,
335 
336 M0_TL_DEFINE(iofops, static, struct io_req_fop);
337 
338 static const struct m0_bob_type tioreq_bobtype;
340 static const struct m0_bob_type ioreq_bobtype;
341 static const struct m0_bob_type pgiomap_bobtype;
342 static const struct m0_bob_type nwxfer_bobtype;
343 static const struct m0_bob_type dtbuf_bobtype;
344 
351 
352 static const struct m0_bob_type ioreq_bobtype = {
353  .bt_name = "io_request_bobtype",
354  .bt_magix_offset = offsetof(struct io_request, ir_magic),
355  .bt_magix = M0_T1FS_IOREQ_MAGIC,
356  .bt_check = NULL,
357 };
358 
359 static const struct m0_bob_type pgiomap_bobtype = {
360  .bt_name = "pargrp_iomap_bobtype",
361  .bt_magix_offset = offsetof(struct pargrp_iomap, pi_magic),
362  .bt_magix = M0_T1FS_PGROUP_MAGIC,
363  .bt_check = NULL,
364 };
365 
366 static const struct m0_bob_type nwxfer_bobtype = {
367  .bt_name = "nw_xfer_request_bobtype",
368  .bt_magix_offset = offsetof(struct nw_xfer_request, nxr_magic),
369  .bt_magix = M0_T1FS_NWREQ_MAGIC,
370  .bt_check = NULL,
371 };
372 
373 static const struct m0_bob_type dtbuf_bobtype = {
374  .bt_name = "data_buf_bobtype",
375  .bt_magix_offset = offsetof(struct data_buf, db_magic),
376  .bt_magix = M0_T1FS_DTBUF_MAGIC,
377  .bt_check = NULL,
378 };
379 
380 static const struct m0_bob_type tioreq_bobtype = {
381  .bt_name = "target_ioreq",
382  .bt_magix_offset = offsetof(struct target_ioreq, ti_magic),
383  .bt_magix = M0_T1FS_TIOREQ_MAGIC,
384  .bt_check = NULL,
385 };
386 
387 /*
388  * These are used as macros since they are used as lvalues which is
389  * not possible by using static inline functions.
390  */
391 #define INDEX(ivec, i) ((ivec)->iv_index[(i)])
392 #define COUNT(ivec, i) ((ivec)->iv_vec.v_count[(i)])
393 #define SEG_NR(ivec) ((ivec)->iv_vec.v_nr)
394 
395 #define V_INDEX(ivec, i) (*(m0_bindex_t*)(m0_varr_ele_get(&(ivec)->iv_index, (i))))
396 #define V_ADDR(bv, i) (*(void**) (m0_varr_ele_get(&(bv )->iv_index, (i))))
397 #define V_COUNT(ivec, i) (*(m0_bcount_t*)(m0_varr_ele_get(&(ivec)->iv_count, (i))))
398 #define V_SEG_NR(ivec) ((ivec)->iv_nr)
399 
400 #define PA(pa, i) (*(enum page_attr*)(m0_varr_ele_get((pa), (i))))
401 
402 #define indexvec_dump(ivec) \
403 do { \
404  int seg; \
405  for (seg = 0; seg < SEG_NR((ivec)); ++seg) { \
406  M0_LOG(M0_DEBUG, "seg# %d: [pos, +len) = [%llu, +%llu)", \
407  seg, INDEX((ivec), seg), COUNT((ivec), seg)); \
408  } \
409 } while (0)
410 
411 #define indexvec_varr_dump(ivec) \
412 do { \
413  int seg; \
414  for (seg = 0; seg < V_SEG_NR((ivec)); ++seg) { \
415  M0_LOG(M0_DEBUG, "seg# %d: [pos, +len) = [%llu, +%llu)", \
416  seg, V_INDEX((ivec), seg), V_COUNT((ivec), seg)); \
417  } \
418 } while (0)
419 
420 static inline m0_bcount_t seg_endpos(const struct m0_indexvec *ivec, uint32_t i)
421 {
422  M0_PRE(ivec != NULL);
423 
424  return INDEX(ivec, i) + COUNT(ivec, i);
425 }
426 
427 static inline m0_bcount_t
428 v_seg_endpos(struct m0_indexvec_varr *ivec, uint32_t i)
429 {
430  M0_PRE(ivec != NULL);
431 
432  return V_INDEX(ivec, i) + V_COUNT(ivec, i);
433 }
434 
435 M0_INTERNAL struct inode *m0t1fs_file_to_inode(const struct file *file)
436 {
437 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
438  return file->f_path.dentry->d_inode;
439 #else
440  return file->f_dentry->d_inode;
441 #endif
442 }
443 
444 M0_INTERNAL struct m0t1fs_inode *m0t1fs_file_to_m0inode(const struct file *file)
445 {
446  return M0T1FS_I(m0t1fs_file_to_inode(file));
447 }
448 
449 M0_INTERNAL struct m0_pool_version *m0t1fs_file_to_pver(const struct file *file)
450 {
451  struct m0t1fs_inode *inode = M0T1FS_I(m0t1fs_file_to_inode(file));
452  struct m0t1fs_sb *csb = M0T1FS_SB(m0t1fs_file_to_inode(file)->i_sb);
453 
454  return m0_pool_version_find(&csb->csb_pools_common, &inode->ci_pver);
455 }
456 
457 M0_INTERNAL struct m0_poolmach *m0t1fs_file_to_poolmach(const struct file *file)
458 {
459  return &m0t1fs_file_to_pver(file)->pv_mach;
460 }
461 
462 M0_INTERNAL struct m0t1fs_inode *m0t1fs_inode_to_m0inode(const struct inode *inode)
463 {
464  return M0T1FS_I(inode);
465 }
466 
467 static inline struct inode *iomap_to_inode(const struct pargrp_iomap *map)
468 {
469  return m0t1fs_file_to_inode(map->pi_ioreq->ir_file);
470 }
471 
472 M0_INTERNAL struct m0t1fs_sb *m0inode_to_sb(const struct m0t1fs_inode *m0inode)
473 {
474  return M0T1FS_SB(m0inode->ci_inode.i_sb);
475 }
476 
477 static inline const struct m0_fid *file_to_fid(const struct file *file)
478 {
480 }
481 
482 static inline struct m0t1fs_sb *file_to_sb(const struct file *file)
483 {
484  return M0T1FS_SB(m0t1fs_file_to_inode(file)->i_sb);
485 }
486 
487 static inline struct m0_sm_group *file_to_smgroup(const struct file *file)
488 {
489  return &file_to_sb(file)->csb_iogroup;
490 }
491 
492 static inline uint64_t page_nr(m0_bcount_t size)
493 {
494  return size >> PAGE_SHIFT;
495 }
496 
497 static struct m0_layout_instance *
499 {
500  return m0t1fs_file_to_m0inode(req->ir_file)->ci_layout_instance;
501 }
502 
503 static inline struct m0_pdclust_instance *
505 {
506  return m0_layout_instance_to_pdi(li);
507 }
508 
509 static inline struct m0_pdclust_layout *
511 {
512  return m0_layout_to_pdl(layout_instance(req)->li_l);
513 }
514 
515 static inline uint32_t layout_n(const struct m0_pdclust_layout *play)
516 {
517  return play->pl_attr.pa_N;
518 }
519 
520 static inline uint32_t layout_k(const struct m0_pdclust_layout *play)
521 {
522  return play->pl_attr.pa_K;
523 }
524 
525 static inline uint64_t layout_unit_size(const struct m0_pdclust_layout *play)
526 {
527  return play->pl_attr.pa_unit_size;
528 }
529 
530 static inline uint64_t parity_units_page_nr(const struct m0_pdclust_layout *play)
531 {
532  return page_nr(layout_unit_size(play)) * layout_k(play);
533 }
534 
535 static inline uint64_t indexvec_varr_count(struct m0_indexvec_varr *varr)
536 {
537  uint64_t sum = 0;
538 
539  m0_varr_for(&varr->iv_count, uint64_t *, i, countp) {
540  sum += *(uint64_t*)countp;
541  } m0_varr_endfor;
542  return sum;
543 }
544 
545 static inline uint64_t iomap_page_nr(struct pargrp_iomap *map)
546 {
547  return page_nr(indexvec_varr_count(&map->pi_ivv));
548 }
549 
550 static inline uint64_t data_size(const struct m0_pdclust_layout *play)
551 {
552  return layout_n(play) * layout_unit_size(play);
553 }
554 
555 static inline struct m0_parity_math *parity_math(struct io_request *req)
556 {
558 }
559 
560 static inline uint64_t group_id(m0_bindex_t index, m0_bcount_t dtsize)
561 {
562  return index / dtsize;
563 }
564 
565 static inline bool is_page_read(struct data_buf *dbuf)
566 {
567  return dbuf->db_flags & PA_READ &&
568  dbuf->db_tioreq != NULL && dbuf->db_tioreq->ti_rc == 0;
569 }
570 
571 static inline uint64_t target_offset(uint64_t frame,
572  struct m0_pdclust_layout *play,
573  m0_bindex_t gob_offset)
574 {
575  return frame * layout_unit_size(play) +
576  (gob_offset % layout_unit_size(play));
577 }
578 
579 static inline uint32_t target_ioreq_type_get(struct target_ioreq *ti)
580 {
581  return ti->ti_req_type;
582 }
583 
584 static inline void target_ioreq_type_set(struct target_ioreq *ti,
585  enum target_ioreq_type type)
586 {
587  ti->ti_req_type = type;
588 }
589 
590 static bool is_pver_dud(uint32_t fdev_nr, uint32_t dev_k, uint32_t fsvc_nr,
591  uint32_t svc_k);
592 
593 static uint64_t tioreqs_hash_func(const struct m0_htable *htable, const void *k)
594 {
595  const uint64_t *key = (uint64_t *)k;
596 
597  return *key % htable->h_bucket_nr;
598 }
599 
600 static bool tioreq_key_eq(const void *key1, const void *key2)
601 {
602  const uint64_t *k1 = (uint64_t *)key1;
603  const uint64_t *k2 = (uint64_t *)key2;
604 
605  return *k1 == *k2;
606 }
607 
608 M0_HT_DESCR_DEFINE(tioreqht, "Hash of target_ioreq objects", static,
609  struct target_ioreq, ti_link, ti_magic,
611  ti_fid.f_container, tioreqs_hash_func, tioreq_key_eq);
612 
613 M0_HT_DEFINE(tioreqht, static, struct target_ioreq, uint64_t);
614 
615 /* Finds the parity group associated with a given target offset.
616  * index - target offset for intended IO.
617  * req - IO-request holding information about IO.
618  * tio_req - io-request for given target.
619  * src - output parity group.
620  */
622  const struct io_request *req,
623  const struct target_ioreq *tio_req,
624  struct m0_pdclust_src_addr *src)
625 {
626  struct m0_pdclust_tgt_addr tgt;
627  struct m0_pdclust_layout *play;
628 
629  M0_PRE(req != NULL);
630  M0_PRE(src != NULL);
631 
632  play = pdlayout_get(req);
633  tgt.ta_obj = tio_req->ti_obj;
636 }
637 
638 static inline uint64_t pargrp_id_find(m0_bindex_t index,
639  const struct io_request *req,
640  const struct io_req_fop *ir_fop)
641 {
642  struct m0_pdclust_src_addr src;
643 
644  pargrp_src_addr(index, req, ir_fop->irf_tioreq, &src);
645  return src.sa_group;
646 }
647 
649  const struct pargrp_iomap *map,
650  const struct m0_pdclust_layout *play,
651  const struct m0_pdclust_src_addr *src)
652 {
653  m0_bindex_t goff;
654 
655  M0_PRE(map != NULL);
656  M0_PRE(play != NULL);
657 
658  M0_ENTRY("grpid = %llu, target_off = %llu", map->pi_grpid, toff);
659 
660  goff = map->pi_grpid * data_size(play) +
661  src->sa_unit * layout_unit_size(play) +
662  toff % layout_unit_size(play);
663  M0_LEAVE("global file offset = %llu", goff);
664 
665  return goff;
666 }
667 
668 static inline struct m0_fid target_fid(const struct io_request *req,
669  struct m0_pdclust_tgt_addr *tgt)
670 {
671  struct m0_fid fid;
672 
675  &fid);
676  return fid;
677 }
678 
679 static inline struct m0_rpc_session *target_session(struct io_request *req,
680  struct m0_fid tfid)
681 {
683  m0_fid_cob_device_id(&tfid));
684 }
685 
686 static inline uint64_t page_id(m0_bindex_t offset)
687 {
688  return offset >> PAGE_SHIFT;
689 }
690 
691 static inline uint32_t rows_nr(struct m0_pdclust_layout *play)
692 {
693  return page_nr(layout_unit_size(play));
694 }
695 
696 #if !defined(round_down)
697 static inline uint64_t round_down(uint64_t val, uint64_t size)
698 {
700 
701  /*
702  * Returns current value if it is already a multiple of size,
703  * else m0_round_down() is invoked.
704  */
705  return (val & (size - 1)) == 0 ?
707 }
708 #endif
709 
710 #if !defined(round_up)
711 static inline uint64_t round_up(uint64_t val, uint64_t size)
712 {
714 
715  /*
716  * Returns current value if it is already a multiple of size,
717  * else m0_round_up() is invoked.
718  */
719  return (val & (size - 1)) == 0 ?
720  val : m0_round_up(val, size);
721 }
722 #endif
723 
724 /* Returns the position of page in matrix of data buffers. */
725 static void page_pos_get(struct pargrp_iomap *map,
727  uint32_t *row,
728  uint32_t *col)
729 {
730  uint64_t pg_id;
731  struct m0_pdclust_layout *play;
732 
733  M0_PRE(map != NULL);
734  M0_PRE(row != NULL);
735  M0_PRE(col != NULL);
736 
737  play = pdlayout_get(map->pi_ioreq);
738 
739  pg_id = page_id(index - data_size(play) * map->pi_grpid);
740  *row = pg_id % rows_nr(play);
741  *col = pg_id / rows_nr(play);
742 }
743 
746  uint32_t *row,
747  uint32_t *col)
748 {
749  uint64_t pg_id;
750  struct m0_pdclust_layout *play;
751 
752  M0_PRE(map != NULL);
753  M0_PRE(row != NULL);
754  M0_PRE(col != NULL);
755 
756  play = pdlayout_get(map->pi_ioreq);
757 
758  pg_id = page_id(index);
759  *row = pg_id % rows_nr(play);
760  *col = pg_id / rows_nr(play);
761 }
762 
763 /*
764  * Returns the starting offset of page given its position in data matrix.
765  * Acts as opposite of page_pos_get() API.
766  */
768  uint32_t row,
769  uint32_t col)
770 {
771  struct m0_pdclust_layout *play;
773 
774  M0_PRE(map != NULL);
775  M0_ENTRY("gid = %llu, row = %u, col = %u", map->pi_grpid, row, col);
776 
777  play = pdlayout_get(map->pi_ioreq);
778 
779  M0_ASSERT(row < rows_nr(play));
780  M0_ASSERT(col < layout_n(play));
781 
782  out = data_size(play) * map->pi_grpid +
783  col * layout_unit_size(play) + row * PAGE_SIZE;
784 
785  M0_LEAVE("offsef = %llu", out);
786  return out;
787 }
788 
789 /* Invoked during m0t1fs mount. */
790 M0_INTERNAL void io_bob_tlists_init(void)
791 {
795 }
796 
797 static void device_state_reset(struct nw_xfer_request *xfer, bool rmw);
798 
799 static void io_rpc_item_cb (struct m0_rpc_item *item);
800 static void io_req_fop_release(struct m0_ref *ref);
801 static void cc_rpc_item_cb(struct m0_rpc_item *item);
802 static void cc_fop_release(struct m0_ref *ref);
803 
804 /*
805  * io_rpc_item_cb can not be directly invoked from io fops code since it
806  * leads to build dependency of ioservice code over kernel-only code (m0t1fs).
807  * Hence, a new m0_rpc_item_ops structure is used for fops dispatched
808  * by m0t1fs io requests.
809  */
810 static const struct m0_rpc_item_ops io_item_ops = {
812 };
813 
814 static const struct m0_rpc_item_ops cc_item_ops = {
816 };
817 
818 static bool nw_xfer_request_invariant(const struct nw_xfer_request *xfer);
819 
820 static int nw_xfer_io_distribute(struct nw_xfer_request *xfer);
821 static void nw_xfer_req_complete (struct nw_xfer_request *xfer,
822  bool rmw);
823 static int nw_xfer_req_dispatch (struct nw_xfer_request *xfer);
824 
825 static int nw_xfer_tioreq_map (struct nw_xfer_request *xfer,
826  const struct m0_pdclust_src_addr *src,
827  struct m0_pdclust_tgt_addr *tgt,
828  struct target_ioreq **tio);
829 
830 static int nw_xfer_tioreq_get (struct nw_xfer_request *xfer,
831  const struct m0_fid *fid,
832  uint64_t ta_obj,
833  struct m0_rpc_session *session,
834  uint64_t size,
835  struct target_ioreq **out);
836 
837 static const struct nw_xfer_ops xfer_ops = {
839  .nxo_complete = nw_xfer_req_complete,
840  .nxo_dispatch = nw_xfer_req_dispatch,
841  .nxo_tioreq_map = nw_xfer_tioreq_map,
842 };
843 
844 static int pargrp_iomap_populate (struct pargrp_iomap *map,
845  struct m0_ivec_varr_cursor *cursor);
846 
847 static bool pargrp_iomap_spans_seg (struct pargrp_iomap *map,
850 
851 static int pargrp_iomap_readrest (struct pargrp_iomap *map);
852 
853 
854 static int pargrp_iomap_seg_process (struct pargrp_iomap *map,
855  uint64_t seg,
856  bool rmw);
857 
858 static int pargrp_iomap_parity_recalc(struct pargrp_iomap *map);
859 static int pargrp_iomap_parity_verify(struct pargrp_iomap *map);
860 
861 static uint64_t pargrp_iomap_fullpages_count(struct pargrp_iomap *map);
862 
864 
866 
867 static int pargrp_iomap_dgmode_process (struct pargrp_iomap *map,
868  struct target_ioreq *tio,
870  uint32_t count);
871 
873 
874 static int pargrp_iomap_dgmode_recover (struct pargrp_iomap *map);
875 
876 static const struct pargrp_iomap_ops iomap_ops = {
878  .pi_spans_seg = pargrp_iomap_spans_seg,
879  .pi_readrest = pargrp_iomap_readrest,
880  .pi_fullpages_find = pargrp_iomap_fullpages_count,
881  .pi_seg_process = pargrp_iomap_seg_process,
882  .pi_readold_auxbuf_alloc = pargrp_iomap_readold_auxbuf_alloc,
883  .pi_parity_recalc = pargrp_iomap_parity_recalc,
884  .pi_parity_verify = pargrp_iomap_parity_verify,
885  .pi_paritybufs_alloc = pargrp_iomap_paritybufs_alloc,
886  .pi_dgmode_process = pargrp_iomap_dgmode_process,
887  .pi_dgmode_postprocess = pargrp_iomap_dgmode_postprocess,
888  .pi_dgmode_recover = pargrp_iomap_dgmode_recover,
889 };
890 
891 static bool pargrp_iomap_invariant_nr (struct io_request *req);
892 static bool target_ioreq_invariant (struct target_ioreq *ti);
893 
894 static void target_ioreq_fini (struct target_ioreq *ti);
895 
896 static int target_ioreq_iofops_prepare(struct target_ioreq *ti,
897  enum page_attr filter);
898 
899 static void target_ioreq_seg_add(struct target_ioreq *ti,
900  const struct m0_pdclust_src_addr *src,
901  const struct m0_pdclust_tgt_addr *tgt,
902  m0_bindex_t gob_offset,
904  struct pargrp_iomap *map);
905 
906 static int target_cob_create_fop_prepare(struct target_ioreq *ti);
907 static const struct target_ioreq_ops tioreq_ops = {
909  .tio_iofops_prepare = target_ioreq_iofops_prepare,
910  .tio_cc_fops_prepare = target_cob_create_fop_prepare,
911 };
912 
913 static int io_req_fop_dgmode_read(struct io_req_fop *irfop);
914 
915 static struct data_buf *data_buf_alloc_init(enum page_attr pattr);
916 
917 static void data_buf_dealloc_fini(struct data_buf *buf);
918 
919 static void io_bottom_half(struct m0_sm_group *grp, struct m0_sm_ast *ast);
920 
921 static void cc_bottom_half(struct m0_sm_group *grp, struct m0_sm_ast *ast);
922 
923 static int ioreq_iomaps_prepare(struct io_request *req);
924 
925 static void ioreq_iomaps_destroy(struct io_request *req);
926 
927 static int ioreq_user_data_copy (struct io_request *req,
928  enum copy_direction dir,
929  enum page_attr filter);
930 
931 static int ioreq_parity_recalc (struct io_request *req);
932 static int ioreq_parity_verify (struct io_request *req);
933 
934 static int ioreq_iosm_handle (struct io_request *req);
935 
936 static int ioreq_file_lock (struct io_request *req);
937 static void ioreq_file_unlock (struct io_request *req);
938 static int ioreq_no_lock (struct io_request *req);
939 static void ioreq_no_unlock (struct io_request *req);
940 
941 static int ioreq_dgmode_read (struct io_request *req, bool rmw);
942 static int ioreq_dgmode_write (struct io_request *req, bool rmw);
943 static int ioreq_dgmode_recover (struct io_request *req);
944 
945 static bool should_req_sm_complete(struct io_request *req);
946 
947 static const struct io_request_ops ioreq_ops = {
949  .iro_iomaps_destroy = ioreq_iomaps_destroy,
950  .iro_user_data_copy = ioreq_user_data_copy,
951  .iro_parity_recalc = ioreq_parity_recalc,
952  .iro_parity_verify = ioreq_parity_verify,
953  .iro_iosm_handle = ioreq_iosm_handle,
954  .iro_file_lock = ioreq_file_lock,
955  .iro_file_unlock = ioreq_file_unlock,
956  .iro_dgmode_read = ioreq_dgmode_read,
957  .iro_dgmode_write = ioreq_dgmode_write,
958  .iro_dgmode_recover = ioreq_dgmode_recover,
959 };
960 
961 static const struct io_request_ops ioreq_oostore_ops = {
963  .iro_iomaps_destroy = ioreq_iomaps_destroy,
964  .iro_user_data_copy = ioreq_user_data_copy,
965  .iro_parity_recalc = ioreq_parity_recalc,
966  .iro_parity_verify = ioreq_parity_verify,
967  .iro_iosm_handle = ioreq_iosm_handle,
968  .iro_file_lock = ioreq_no_lock,
969  .iro_file_unlock = ioreq_no_unlock,
970  .iro_dgmode_read = ioreq_dgmode_read,
971  .iro_dgmode_write = ioreq_dgmode_write,
972  .iro_dgmode_recover = ioreq_dgmode_recover,
973 };
974 
975 static inline uint32_t ioreq_sm_state(const struct io_request *req)
976 {
977  return req->ir_sm.sm_state;
978 }
979 
980 static struct m0_sm_state_descr io_states[] = {
981  [IRS_INITIALIZED] = {
983  .sd_name = "IO_initial",
984  .sd_allowed = M0_BITS(IRS_READING, IRS_WRITING,
986  },
987  [IRS_READING] = {
988  .sd_name = "IO_reading",
989  .sd_allowed = M0_BITS(IRS_READ_COMPLETE, IRS_FAILED)
990  },
991  [IRS_READ_COMPLETE] = {
992  .sd_name = "IO_read_complete",
993  .sd_allowed = M0_BITS(IRS_WRITING, IRS_REQ_COMPLETE,
995  IRS_READING)
996  },
998  .sd_name = "IO_degraded_read",
999  .sd_allowed = M0_BITS(IRS_READ_COMPLETE, IRS_FAILED)
1000  },
1001  [IRS_DEGRADED_WRITING] = {
1002  .sd_name = "IO_degraded_write",
1003  .sd_allowed = M0_BITS(IRS_WRITE_COMPLETE, IRS_FAILED)
1004  },
1005  [IRS_WRITING] = {
1006  .sd_name = "IO_writing",
1007  .sd_allowed = M0_BITS(IRS_WRITE_COMPLETE, IRS_FAILED)
1008  },
1009  [IRS_WRITE_COMPLETE] = {
1010  .sd_name = "IO_write_complete",
1011  .sd_allowed = M0_BITS(IRS_REQ_COMPLETE, IRS_FAILED,
1013  },
1014  [IRS_FAILED] = {
1015  .sd_flags = M0_SDF_FAILURE,
1016  .sd_name = "IO_req_failed",
1017  .sd_allowed = M0_BITS(IRS_REQ_COMPLETE)
1018  },
1019  [IRS_REQ_COMPLETE] = {
1020  .sd_flags = M0_SDF_TERMINAL,
1021  .sd_name = "IO_req_complete",
1022  },
1023 };
1024 
1025 static const struct m0_sm_conf io_sm_conf = {
1026  .scf_name = "IO request state machine configuration",
1027  .scf_nr_states = ARRAY_SIZE(io_states),
1028  .scf_state = io_states,
1029 };
1030 
1031 static void ioreq_sm_failed(struct io_request *req, int rc)
1032 {
1033  M0_LOG(M0_DEBUG, "[%p] rc %d", req, rc);
1037 }
1038 
1039 static void ioreq_sm_state_set(struct io_request *req, int state)
1040 {
1041  M0_LOG(M0_INFO, "[%p] change state %s -> %s",
1042  req, io_states[ioreq_sm_state(req)].sd_name,
1043  io_states[state].sd_name);
1045  m0_sm_state_set(&req->ir_sm, state);
1047 }
1048 
1049 static void ioreq_sm_state_set_nolock(struct io_request *req, int state)
1050 {
1051  M0_LOG(M0_INFO, "[%p] change state %s -> %s",
1052  req, io_states[ioreq_sm_state(req)].sd_name,
1053  io_states[state].sd_name);
1054  m0_sm_state_set(&req->ir_sm, state);
1055 }
1056 
1058 {
1059  return
1060  _0C(io_request_bob_check(req)) &&
1061  _0C(req->ir_type <= IRT_TYPE_NR) &&
1062  _0C(req->ir_iovec != NULL) &&
1063  _0C(req->ir_ops != NULL) &&
1065 
1067  !tioreqht_htable_is_empty(&req->ir_nwxfer.
1068  nxr_tioreqs_hash))) &&
1069 
1071  !tioreqht_htable_is_empty(&req->ir_nwxfer.
1072  nxr_tioreqs_hash))) &&
1073 
1078 
1079  _0C(indexvec_varr_count(&req->ir_ivv) > 0) &&
1080 
1081  m0_forall(i, V_SEG_NR(&req->ir_ivv) - 1,
1082  _0C(v_seg_endpos(&req->ir_ivv, i) <=
1083  V_INDEX(&req->ir_ivv, i+1))) &&
1084 
1086 
1088 }
1089 
1090 static bool nw_xfer_request_invariant(const struct nw_xfer_request *xfer)
1091 {
1092  return _0C(nw_xfer_request_bob_check(xfer)) &&
1093  _0C(xfer->nxr_state <= NXS_STATE_NR) &&
1094 
1095  _0C(ergo(xfer->nxr_state == NXS_INITIALIZED,
1096  xfer->nxr_rc == 0 && xfer->nxr_bytes == 0 &&
1097  m0_atomic64_get(&xfer->nxr_iofop_nr) == 0)) &&
1098 
1099  _0C(ergo(xfer->nxr_state == NXS_INFLIGHT,
1100  !tioreqht_htable_is_empty(&xfer->nxr_tioreqs_hash))) &&
1101 
1102  _0C(ergo(xfer->nxr_state == NXS_COMPLETE,
1103  m0_atomic64_get(&xfer->nxr_iofop_nr) == 0 &&
1104  m0_atomic64_get(&xfer->nxr_rdbulk_nr) == 0)) &&
1105 
1106  m0_htable_forall(tioreqht, tioreq, &xfer->nxr_tioreqs_hash,
1107  target_ioreq_invariant(tioreq));
1108 }
1109 
1110 static bool data_buf_invariant(const struct data_buf *db)
1111 {
1112  return
1113  db != NULL &&
1114  data_buf_bob_check(db) &&
1115  ergo(db->db_buf.b_addr != NULL, db->db_buf.b_nob > 0);
1116 }
1117 
1118 static bool data_buf_invariant_nr(const struct pargrp_iomap *map)
1119 {
1120  uint32_t row;
1121  uint32_t col;
1122  struct m0_pdclust_layout *play;
1123 
1124  play = pdlayout_get(map->pi_ioreq);
1125  for (row = 0; row < rows_nr(play); ++row) {
1126  for (col = 0; col < layout_n(play); ++col) {
1127  if (map->pi_databufs[row][col] != NULL &&
1128  !data_buf_invariant(map->pi_databufs[row][col]))
1129  return false;
1130  }
1131  }
1132 
1133  if (map->pi_paritybufs != NULL) {
1134  for (row = 0; row < rows_nr(play); ++row) {
1135  for (col = 0; col < layout_k(play); ++col) {
1136  if (map->pi_paritybufs[row][col] != NULL &&
1137  !data_buf_invariant(map->pi_paritybufs
1138  [row][col]))
1139  return false;
1140  }
1141  }
1142  }
1143  return true;
1144 }
1145 
1146 static void data_buf_init(struct data_buf *buf, void *addr, uint64_t flags)
1147 {
1148  M0_PRE(buf != NULL);
1149  M0_PRE(addr != NULL);
1150 
1151  data_buf_bob_init(buf);
1152  buf->db_flags = flags;
1153  m0_buf_init(&buf->db_buf, addr, PAGE_SIZE);
1154  buf->db_tioreq = NULL;
1155 }
1156 
1157 static void data_buf_fini(struct data_buf *buf)
1158 {
1159  M0_PRE(buf != NULL);
1160 
1161  data_buf_bob_fini(buf);
1162  buf->db_flags = PA_NONE;
1163 }
1164 
1165 static bool io_req_fop_invariant(const struct io_req_fop *fop)
1166 {
1167  return
1168  _0C(io_req_fop_bob_check(fop)) &&
1169  _0C(fop->irf_tioreq != NULL) &&
1170  _0C(fop->irf_ast.sa_cb != NULL) &&
1171  _0C(fop->irf_ast.sa_mach != NULL);
1172 }
1173 
1174 static bool target_ioreq_invariant(struct target_ioreq *ti)
1175 {
1176  return
1177  _0C(target_ioreq_bob_check(ti)) &&
1178  _0C(ti->ti_session != NULL) &&
1179  _0C(ti->ti_nwxfer != NULL) &&
1180  _0C(m0_fid_is_valid(&ti->ti_fid)) &&
1181  m0_tl_forall(iofops, iofop, &ti->ti_iofops,
1182  io_req_fop_invariant(iofop));
1183 }
1184 
1186 {
1187  return
1188  pargrp_iomap_bob_check(map) &&
1189  map->pi_ops != NULL &&
1190  map->pi_rtype < PIR_NR &&
1191  map->pi_databufs != NULL &&
1192  map->pi_ioreq != NULL &&
1193  ergo(indexvec_varr_count(&map->pi_ivv) > 0 &&
1194  V_SEG_NR(&map->pi_ivv) >= 2,
1195  m0_forall(i, V_SEG_NR(&map->pi_ivv) - 1,
1196  v_seg_endpos(&map->pi_ivv, i) <=
1197  V_INDEX(&map->pi_ivv, i+1))) &&
1199 }
1200 
1202 {
1203  return m0_forall(i, req->ir_iomap_nr,
1205 }
1206 
1207 static void nw_xfer_request_init(struct nw_xfer_request *xfer)
1208 {
1209  struct io_request *req;
1210  struct m0_pdclust_layout *play;
1211 
1212  M0_ENTRY("nw_xfer_request : %p", xfer);
1213  M0_PRE(xfer != NULL);
1214 
1215  req = bob_of(xfer, struct io_request, ir_nwxfer, &ioreq_bobtype);
1216  nw_xfer_request_bob_init(xfer);
1217  xfer->nxr_rc = 0;
1218  xfer->nxr_bytes = 0;
1219  m0_atomic64_set(&xfer->nxr_ccfop_nr, 0);
1220  m0_atomic64_set(&xfer->nxr_iofop_nr, 0);
1221  m0_atomic64_set(&xfer->nxr_rdbulk_nr, 0);
1222  xfer->nxr_state = NXS_INITIALIZED;
1223  xfer->nxr_ops = &xfer_ops;
1224  m0_mutex_init(&xfer->nxr_lock);
1225 
1226  play = pdlayout_get(req);
1227  xfer->nxr_rc = tioreqht_htable_init(&xfer->nxr_tioreqs_hash,
1228  layout_n(play) + 2 * layout_k(play));
1229 
1231  M0_LEAVE();
1232 }
1233 
1234 static void nw_xfer_request_fini(struct nw_xfer_request *xfer)
1235 {
1236  M0_PRE(xfer != NULL && xfer->nxr_state == NXS_COMPLETE);
1238  M0_ENTRY("nw_xfer_request : %p, nxr_rc %d", xfer, xfer->nxr_rc);
1239 
1240  xfer->nxr_ops = NULL;
1241  m0_mutex_fini(&xfer->nxr_lock);
1242  nw_xfer_request_bob_fini(xfer);
1243  tioreqht_htable_fini(&xfer->nxr_tioreqs_hash);
1244  M0_LEAVE();
1245 }
1246 
1247 M0_INTERNAL int user_page_map(struct data_buf *dbuf, unsigned long user_addr)
1248 {
1249  void *kmapped;
1250  int rc;
1251 
1252  M0_ASSERT_INFO((user_addr & ~PAGE_MASK) == 0,
1253  "user_addr = %lx", user_addr);
1254  M0_ASSERT_INFO(dbuf->db_page == NULL,
1255  "dbuf->db_page = %p", dbuf->db_page);
1256 
1257  /* XXX these calls can block */
1258  /* XXX
1259  * semaphore locking copy-pasted
1260  * from m0_net implementation
1261  */
1262  /*
1263  * XXX use PAGE_SIZE and
1264  * pin more than one page if needed
1265  */
1266  down_read(&current->mm->mmap_sem);
1267 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0)
1268  rc = get_user_pages(user_addr, 1, FOLL_WRITE,
1269  &dbuf->db_page, NULL);
1270 #else
1271  rc = get_user_pages(current, current->mm, user_addr, 1, 1, 0,
1272  &dbuf->db_page, NULL);
1273 #endif
1274  up_read(&current->mm->mmap_sem);
1275  if (rc == 1) {
1276  kmapped = kmap(dbuf->db_page);
1277  rc = kmapped == NULL ? -EFAULT : 0;
1278  if (kmapped != NULL)
1279  data_buf_init(dbuf, kmapped, 0);
1280  }
1281  return M0_RC(rc);
1282 }
1283 
1284 static void user_page_unmap(struct data_buf *dbuf, bool set_dirty)
1285 {
1286  M0_ASSERT(dbuf->db_page != NULL);
1287  kunmap(dbuf->db_page);
1288  if (set_dirty)
1289  set_page_dirty(dbuf->db_page);
1290  put_page(dbuf->db_page);
1291  dbuf->db_page = NULL;
1292 }
1293 
1294 static int user_data_copy(struct pargrp_iomap *map,
1296  m0_bindex_t end,
1297  struct iov_iter *it,
1298  enum copy_direction dir,
1299  enum page_attr filter)
1300 {
1301  /*
1302  * iov_iter should be able to be used with copy_to_user() as well
1303  * since it is as good as a vector cursor.
1304  * Present kernel 2.6.32 has no support for such requirement.
1305  */
1306  uint64_t bytes;
1307  uint32_t row;
1308  uint32_t col;
1309  struct page *page;
1310  struct data_buf *dbuf;
1311 
1312  M0_ENTRY("Copy %s user-space, start = %8llu, end = %8llu",
1313  dir == CD_COPY_FROM_USER ? (char *)"from" : (char *)" to ",
1314  start, end);
1316  M0_PRE(it != NULL);
1318  M0_PRE(start >> PAGE_SHIFT == (end - 1) >> PAGE_SHIFT);
1319 
1320  /* Finds out the page from pargrp_iomap::pi_databufs. */
1321  page_pos_get(map, start, &row, &col);
1322  dbuf = map->pi_databufs[row][col];
1323  M0_ASSERT(dbuf != NULL);
1324  M0_ASSERT(ergo(dbuf->db_page != NULL, map->pi_ioreq->ir_direct_io));
1325 
1326  if (dir == CD_COPY_FROM_USER) {
1327  if ((dbuf->db_flags & filter) == filter) {
1328  if (dbuf->db_flags & PA_COPY_FRMUSR_DONE)
1329  return M0_RC(0);
1330 
1331  /*
1332  * Copies page to auxiliary buffer before it gets
1333  * overwritten by user data. This is needed in order
1334  * to calculate delta parity in case of read-old
1335  * approach.
1336  */
1337  if (dbuf->db_auxbuf.b_addr != NULL &&
1338  map->pi_rtype == PIR_READOLD) {
1339  if (filter == 0) {
1340  M0_ASSERT(dbuf->db_page == NULL);
1341  memcpy(dbuf->db_auxbuf.b_addr,
1342  dbuf->db_buf.b_addr, PAGE_SIZE);
1343  } else
1344  return M0_RC(0);
1345  }
1346 
1347  if (dbuf->db_page == NULL) {
1348  page = virt_to_page(dbuf->db_buf.b_addr);
1349  /* Copies to appropriate offset within page. */
1350 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
1351  bytes = iov_iter_copy_from_user_atomic(page, it,
1352  start & ~PAGE_MASK,
1353  end - start);
1354 #else
1355  bytes = iov_iter_copy_from_user(page, it,
1356  start & ~PAGE_MASK,
1357  end - start);
1358 #endif
1359 
1360  } else
1361  bytes = end - start;
1362 
1363  M0_LOG(M0_DEBUG, "[%p] %llu bytes copied from "
1364  "user-space from offset %llu", map->pi_ioreq,
1365  bytes, start);
1366 
1367  map->pi_ioreq->ir_copied_nr += bytes;
1368  /*
1369  * user_data_copy() may be called to handle only part
1370  * of PA_FULLPAGE_MODIFY page. In this case we should
1371  * mark the page as done only when the last piece is
1372  * processed. Otherwise, the rest piece of the page
1373  * will be ignored.
1374  */
1375  if (ergo(dbuf->db_flags & PA_FULLPAGE_MODIFY,
1376  (end & ~PAGE_MASK) == 0))
1377  dbuf->db_flags |= PA_COPY_FRMUSR_DONE;
1378 
1379  if (bytes != end - start)
1380  return M0_ERR_INFO(
1381  -EFAULT, "[%p] Failed to"
1382  " copy_from_user: %" PRIu64 " !="
1383  " %" PRIu64 " - %" PRIu64,
1384  map->pi_ioreq, bytes, end, start);
1385  }
1386  } else {
1387  if (dbuf->db_page == NULL)
1388  bytes = copy_to_user(it->iov->iov_base + it->iov_offset,
1389  (char *)dbuf->db_buf.b_addr +
1390  (start & ~PAGE_MASK),
1391  end - start);
1392  else
1393  bytes = 0;
1394 
1395  map->pi_ioreq->ir_copied_nr += end - start - bytes;
1396 
1397  M0_LOG(M0_DEBUG, "[%p] %llu bytes copied to user-space from "
1398  "offset %llu", map->pi_ioreq, end - start - bytes,
1399  start);
1400 
1401  if (bytes != 0)
1402  return M0_ERR_INFO(-EFAULT, "[%p] Failed to "
1403  "copy_to_user", map->pi_ioreq);
1404  }
1405 
1406  return M0_RC(0);
1407 }
1408 
1410 {
1411  int rc;
1412  uint32_t row;
1413  uint32_t col;
1414  struct m0_buf *dbufs;
1415  struct m0_buf *pbufs;
1416  struct m0_buf *old_pbuf;
1417  struct m0_pdclust_layout *play;
1418  struct inode *inode;
1419  struct m0t1fs_sb *csb;
1420  struct page *page;
1421  unsigned long zpage;
1422 
1423  M0_ENTRY("[%p] map = %p", map->pi_ioreq, map);
1425 
1427  csb = M0T1FS_SB(inode->i_sb);
1428  if (!(map->pi_ioreq->ir_type == IRT_READ && csb->csb_verify))
1429  return M0_RC(0);
1430 
1431  play = pdlayout_get(map->pi_ioreq);
1432  M0_ALLOC_ARR(dbufs, layout_n(play));
1433  M0_ALLOC_ARR(pbufs, layout_k(play));
1434  zpage = get_zeroed_page(GFP_KERNEL);
1435 
1436  if (dbufs == NULL || pbufs == NULL || zpage == 0) {
1437  rc = M0_ERR(-ENOMEM);
1438  goto last;
1439  }
1440 
1441  /* temprary buf to hold parity */
1442  for (col = 0; col < layout_k(play); ++col) {
1443  page = alloc_pages(GFP_KERNEL, 0);
1444  if (page == NULL) {
1445  rc = M0_ERR(-ENOMEM);
1446  goto last;
1447  }
1448 
1449  pbufs[col].b_addr = (void *)page_address(page);
1450  pbufs[col].b_nob = PAGE_SIZE;
1451  }
1452 
1453  for (row = 0; row < rows_nr(play); ++row) {
1454  /* data */
1455  for (col = 0; col < layout_n(play); ++col) {
1456  if (map->pi_databufs[row][col] != NULL) {
1457  dbufs[col] =
1458  map->pi_databufs[row][col]->db_buf;
1459  } else {
1460  dbufs[col].b_addr = (void *)zpage;
1461  dbufs[col].b_nob = PAGE_SIZE;
1462  }
1463  }
1464  /* generate parity into new buf */
1466  dbufs, pbufs);
1467 
1468  /* verify the parity */
1469  for (col = 0; col < layout_k(play); ++col) {
1470  old_pbuf = &map->pi_paritybufs[row][col]->db_buf;
1471  if (memcmp(pbufs[col].b_addr, old_pbuf->b_addr,
1472  PAGE_SIZE)) {
1473  M0_LOG(M0_ERROR, "[%p] parity verification "
1474  "failed for %llu [%u:%u], rc %d",
1475  map->pi_ioreq, map->pi_grpid, row, col,
1476  -EIO);
1477  rc = M0_ERR(-EIO);
1478  goto last;
1479  }
1480  M0_LOG(M0_DEBUG, "[%p] parity verified for %llu "
1481  "[%u:%u]", map->pi_ioreq, map->pi_grpid,
1482  row, col);
1483  }
1484  }
1485 
1486  rc = 0;
1487 last:
1488  if (pbufs != NULL) {
1489  for (col = 0; col < layout_k(play); ++col) {
1490  /* free_page(NULL) is OK */
1491  free_page((unsigned long)pbufs[col].b_addr);
1492  }
1493  }
1494  m0_free(dbufs);
1495  m0_free(pbufs);
1496  free_page(zpage);
1497  M0_LOG(M0_DEBUG, "[%p] parity verified for %llu, rc=%d", map->pi_ioreq,
1498  map->pi_grpid, rc);
1499  return M0_RC(rc);
1500 }
1501 
1503 {
1504  int rc = 0;
1505  uint32_t row;
1506  uint32_t col;
1507  struct m0_buf *dbufs;
1508  struct m0_buf *pbufs;
1509  struct m0_pdclust_layout *play;
1510 
1512 
1513  M0_ENTRY("[%p] map = %p", map->pi_ioreq, map);
1514 
1515  play = pdlayout_get(map->pi_ioreq);
1516  M0_ALLOC_ARR(dbufs, layout_n(play));
1517  M0_ALLOC_ARR(pbufs, layout_k(play));
1518 
1519  if (dbufs == NULL || pbufs == NULL) {
1520  rc = M0_ERR(-ENOMEM);
1521  goto last;
1522  }
1523 
1524  if ((map->pi_ioreq->ir_type == IRT_WRITE && map->pi_rtype == PIR_NONE)
1525  || map->pi_rtype == PIR_READREST) {
1526 
1527  unsigned long zpage;
1528 
1529  zpage = get_zeroed_page(GFP_KERNEL);
1530  if (zpage == 0) {
1531  rc = M0_ERR(-ENOMEM);
1532  goto last;
1533  }
1534 
1535  for (row = 0; row < rows_nr(play); ++row) {
1536  for (col = 0; col < layout_n(play); ++col)
1537  if (map->pi_databufs[row][col] != NULL) {
1538  dbufs[col] = map->pi_databufs
1539  [row][col]->db_buf;
1540  } else {
1541  dbufs[col].b_addr = (void *)zpage;
1542  dbufs[col].b_nob = PAGE_SIZE;
1543  }
1544 
1545  for (col = 0; col < layout_k(play); ++col)
1546  pbufs[col] = map->pi_paritybufs[row][col]->
1547  db_buf;
1548 
1550  dbufs, pbufs);
1551  }
1552  free_page(zpage);
1553  M0_LOG(M0_DEBUG, "[%p] Parity recalculated for %s",
1554  map->pi_ioreq,
1555  map->pi_rtype == PIR_READREST ? "read-rest" :
1556  "aligned write");
1557 
1558  } else {
1559  struct m0_buf *old;
1560 
1561  M0_ALLOC_ARR(old, layout_n(play));
1562  if (old == NULL) {
1563  rc = M0_ERR(-ENOMEM);
1564  goto last;
1565  }
1566 
1567  for (row = 0; row < rows_nr(play); ++row) {
1568  for (col = 0; col < layout_k(play); ++col)
1569  pbufs[col] = map->pi_paritybufs[row][col]->
1570  db_buf;
1571 
1572  for (col = 0; col < layout_n(play); ++col) {
1573  /*
1574  * During rmw-IO request with read-old approach
1575  * we allocate primary and auxiliary buffers
1576  * for those units from a parity group, that
1577  * are spanned by input rmw-IO request. If
1578  * these units belong to failed devices then
1579  * during the degraded reading, primary buffers
1580  * are allocated for rest of the units from the
1581  * parity group in order to recover the failed
1582  * units. Thus if a parity group is in dgmode,
1583  * then every unit will have a primary buffer,
1584  * but may not have an auxiliary buffer.
1585  */
1586  if (map->pi_databufs[row][col] == NULL ||
1587  map->pi_databufs[row][col]->
1588  db_auxbuf.b_addr == NULL)
1589  continue;
1590 
1591  dbufs[col] = map->pi_databufs[row][col]->db_buf;
1592  old[col] = map->pi_databufs[row][col]->
1593  db_auxbuf;
1594 
1595  rc = m0_parity_math_diff(parity_math(map->pi_ioreq),
1596  old, dbufs, pbufs, col);
1597  if (rc != 0) {
1598  m0_free(old);
1599  goto last;
1600  }
1601  }
1602  }
1603  m0_free(old);
1604  }
1605 last:
1606  m0_free(dbufs);
1607  m0_free(pbufs);
1608  return M0_RC(rc);
1609 }
1610 
1612 {
1613  int rc = 0;
1614  uint64_t i;
1615  struct pargrp_iomap *iomap;
1616  struct inode *inode;
1617  struct m0t1fs_sb *csb;
1618 
1619  M0_ENTRY("[%p]", req);
1621 
1623  csb = M0T1FS_SB(inode->i_sb);
1624 
1625  if (!(req->ir_type == IRT_READ && csb->csb_verify))
1626  return M0_RC(0);
1627 
1629 
1630  for (i = 0; i < req->ir_iomap_nr; ++i) {
1631  iomap = req->ir_iomaps[i];
1632  if (iomap->pi_state == PI_DEGRADED) {
1633  /* data is recovered from existing data and parity.
1634  * It's meaningless to do parity verification */
1635  continue;
1636  }
1637  rc = iomap->pi_ops->pi_parity_verify(iomap);
1638  if (rc != 0)
1639  break;
1640  }
1641 
1643 
1644  return rc != 0 ? M0_ERR_INFO(rc, "[%p] Parity verification failed for "
1645  "grpid=%llu", req,
1646  iomap->pi_grpid) : M0_RC(rc);
1647 }
1648 
1650 {
1651  int rc = 0;
1652  uint64_t i;
1653  struct pargrp_iomap *iomap;
1654 
1655  M0_ENTRY("[%p]", req);
1657 
1659 
1660  for (i = 0; i < req->ir_iomap_nr; ++i) {
1661  iomap = req->ir_iomaps[i];
1662  rc = iomap->pi_ops->pi_parity_recalc(iomap);
1663  if (rc != 0)
1664  break;
1665  }
1666 
1668 
1669  return rc == 0 ? M0_RC(rc) :
1670  M0_ERR_INFO(rc, "Parity recalc failed for grpid=%3"PRIu64,
1671  iomap->pi_grpid);
1672 }
1673 
1674 /* Finds out pargrp_iomap from array of such structures in io_request. */
1675 static void ioreq_pgiomap_find(struct io_request *req,
1676  uint64_t grpid,
1677  uint64_t *cursor,
1678  struct pargrp_iomap **out)
1679 {
1680  uint64_t i;
1681 
1682  M0_PRE(req != NULL);
1683  M0_PRE(out != NULL);
1684  M0_PRE(cursor != NULL);
1685  M0_PRE(*cursor < req->ir_iomap_nr);
1686  M0_ENTRY("[%p] group_id=%llu cursor=%llu", req, grpid, *cursor);
1687 
1688  for (i = *cursor; i < req->ir_iomap_nr; ++i) {
1689  if (req->ir_iomaps[i]->pi_grpid == grpid) {
1690  *out = req->ir_iomaps[i];
1691  *cursor = i;
1692  break;
1693  }
1694  }
1695 
1696  M0_POST(i < req->ir_iomap_nr);
1697  M0_LEAVE("[%p] result iomap=%llu", req, i);
1698 }
1699 
1701  enum copy_direction dir,
1702  enum page_attr filter)
1703 {
1704  int rc;
1705  uint64_t i;
1706  m0_bindex_t grpstart;
1707  m0_bindex_t grpend;
1708  m0_bindex_t pgstart;
1709  m0_bindex_t pgend;
1711  struct iov_iter it;
1712  struct m0_ivec_varr_cursor srccur;
1713  struct m0_pdclust_layout *play;
1714  struct pargrp_iomap *iomap;
1715 
1716  M0_ENTRY("[%p] %s user-space. filter = 0x%x",
1717  req, dir == CD_COPY_FROM_USER ? (char *)"from" : (char *)"to",
1718  filter);
1720  M0_PRE(dir < CD_NR);
1721 
1722 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
1723  iov_iter_init(&it, WRITE, req->ir_iovec, V_SEG_NR(&req->ir_ivv),
1725 #else
1726  iov_iter_init(&it, req->ir_iovec, V_SEG_NR(&req->ir_ivv),
1728 #endif
1729  m0_ivec_varr_cursor_init(&srccur, &req->ir_ivv);
1730  play = pdlayout_get(req);
1731 
1732  for (i = 0; i < req->ir_iomap_nr; ++i) {
1733  iomap = req->ir_iomaps[i];
1735 
1736  count = 0;
1737  grpstart = data_size(play) * iomap->pi_grpid;
1738  grpend = grpstart + data_size(play);
1739 
1740  while (!m0_ivec_varr_cursor_move(&srccur, count) &&
1741  m0_ivec_varr_cursor_index(&srccur) < grpend) {
1742 
1743  pgstart = m0_ivec_varr_cursor_index(&srccur);
1744  pgend = min64u(m0_round_up(pgstart + 1, PAGE_SIZE),
1745  pgstart + m0_ivec_varr_cursor_step(&srccur));
1746  count = pgend - pgstart;
1747 
1748  /*
1749  * This takes care of finding correct page from
1750  * current pargrp_iomap structure from pgstart
1751  * and pgend.
1752  */
1753  rc = user_data_copy(iomap, pgstart, pgend,
1754  &it, dir, filter);
1755  if (rc != 0)
1756  return M0_ERR_INFO(
1757  rc, "[%p] Copy failed (pgstart=%" PRIu64
1758  " pgend=%" PRIu64 ")",
1759  req, pgstart, pgend);
1760 
1762  }
1763  }
1764 
1765  return M0_RC(0);
1766 }
1767 
1768 static void indexvec_sort(struct m0_indexvec_varr *ivec)
1769 {
1770  uint32_t i;
1771  uint32_t j;
1772 
1773  M0_ENTRY("indexvec = %p", ivec);
1774  M0_PRE(ivec != NULL && indexvec_varr_count(ivec) != 0);
1775 
1776  /*
1777  * TODO Should be replaced by an efficient sorting algorithm,
1778  * something like heapsort which is fairly inexpensive in kernel
1779  * mode with the least worst case scenario.
1780  * Existing heap sort from kernel code can not be used due to
1781  * apparent disconnect between index vector and its associated
1782  * count vector for same index.
1783  */
1784  for (i = 0; i < V_SEG_NR(ivec); ++i) {
1785  for (j = i+1; j < V_SEG_NR(ivec); ++j) {
1786  if (V_INDEX(ivec, i) > V_INDEX(ivec, j)) {
1787  M0_SWAP(V_INDEX(ivec, i), V_INDEX(ivec, j));
1788  M0_SWAP(V_COUNT(ivec, i), V_COUNT(ivec, j));
1789  }
1790  }
1791  }
1792  M0_LEAVE();
1793 }
1794 
1796  struct io_request *req,
1797  uint64_t grpid)
1798 {
1799  int rc;
1800  int row;
1801  struct m0_pdclust_layout *play;
1802  struct inode *inode;
1803  struct m0t1fs_sb *csb;
1804 
1805  M0_ENTRY("[%p] map = %p, grpid = %llu", req, map, grpid);
1806  M0_PRE(map != NULL);
1807  M0_PRE(req != NULL);
1808 
1809  pargrp_iomap_bob_init(map);
1810  map->pi_ops = &iomap_ops;
1811  map->pi_rtype = PIR_NONE;
1812  map->pi_grpid = grpid;
1813  map->pi_ioreq = req;
1814  map->pi_state = PI_HEALTHY;
1815  map->pi_paritybufs = NULL;
1816 
1818  csb = M0T1FS_SB(inode->i_sb);
1819 
1820  play = pdlayout_get(req);
1821  rc = m0_indexvec_varr_alloc(&map->pi_ivv, page_nr(data_size(play)));
1822  if (rc != 0)
1823  goto fail_iv;
1824 
1825  /*
1826  * This number is incremented only when a valid segment
1827  * is added to the index vector.
1828  */
1829  V_SEG_NR(&map->pi_ivv) = 0;
1830 
1831  M0_ALLOC_ARR(map->pi_databufs, rows_nr(play));
1832  if (map->pi_databufs == NULL)
1833  goto fail;
1834 
1835  for (row = 0; row < rows_nr(play); ++row) {
1836  M0_ALLOC_ARR(map->pi_databufs[row], layout_n(play));
1837  if (map->pi_databufs[row] == NULL)
1838  goto fail;
1839  }
1840 
1841  if (req->ir_type == IRT_WRITE ||
1842  (req->ir_type == IRT_READ && csb->csb_verify)) {
1843  M0_ALLOC_ARR(map->pi_paritybufs, rows_nr(play));
1844  if (map->pi_paritybufs == NULL)
1845  goto fail;
1846 
1847  for (row = 0; row < rows_nr(play); ++row) {
1848  M0_ALLOC_ARR(map->pi_paritybufs[row],
1849  layout_k(play));
1850  if (map->pi_paritybufs[row] == NULL)
1851  goto fail;
1852  }
1853  }
1854 
1855  M0_LOG(M0_DEBUG, "[%p] grpid=%llu, ivec has %llu segs, "
1856  "databufs=[%u x %u] paritybufs=[%u x %u]",
1857  req, grpid, page_nr(data_size(play)),
1858  rows_nr(play), layout_n(play),
1859  rows_nr(play), layout_k(play));
1860 
1862  return M0_RC(0);
1863 
1864 fail:
1865  m0_indexvec_varr_free(&map->pi_ivv);
1866 
1867  if (map->pi_databufs != NULL) {
1868  for (row = 0; row < rows_nr(play); ++row)
1869  m0_free(map->pi_databufs[row]);
1870  m0_free(map->pi_databufs);
1871  }
1872  if (map->pi_paritybufs != NULL) {
1873  for (row = 0; row < rows_nr(play); ++row)
1874  m0_free(map->pi_paritybufs[row]);
1875  m0_free(map->pi_paritybufs);
1876  }
1877 fail_iv:
1878  return M0_ERR_INFO(-ENOMEM, "[%p] Memory allocation failed", req);
1879 }
1880 
1881 static void pargrp_iomap_fini(struct pargrp_iomap *map)
1882 {
1883  uint32_t row;
1884  uint32_t col;
1885  struct m0_pdclust_layout *play;
1886 
1887  M0_ENTRY("[%p] map %p", map->pi_ioreq, map);
1889 
1890  play = pdlayout_get(map->pi_ioreq);
1891  map->pi_ops = NULL;
1892  map->pi_rtype = PIR_NONE;
1893  map->pi_state = PI_NONE;
1894 
1895  pargrp_iomap_bob_fini(map);
1896  m0_indexvec_varr_free(&map->pi_ivv);
1897 
1898  for (row = 0; row < rows_nr(play); ++row) {
1899  for (col = 0; col < layout_n(play); ++col) {
1900  if (map->pi_databufs[row][col] != NULL) {
1902  pi_databufs[row][col]);
1903  map->pi_databufs[row][col] = NULL;
1904  }
1905  }
1906  m0_free0(&map->pi_databufs[row]);
1907  }
1908 
1909  if (map->pi_paritybufs != NULL) {
1910  for (row = 0; row < rows_nr(play); ++row) {
1911  for (col = 0; col < layout_k(play); ++col) {
1912  if (map->pi_paritybufs[row][col] != NULL) {
1914  pi_paritybufs[row][col]);
1915  map->pi_paritybufs[row][col] = NULL;
1916  }
1917  }
1918  m0_free0(&map->pi_paritybufs[row]);
1919  }
1920  }
1921 
1922  m0_free0(&map->pi_databufs);
1923  m0_free0(&map->pi_paritybufs);
1924  map->pi_ioreq = NULL;
1925  M0_LEAVE();
1926 }
1927 
1933 {
1934  uint32_t seg;
1935  bool spanned = false;
1936 
1937  M0_ENTRY("[%p] map %p", map->pi_ioreq, map);
1938 
1940 
1941  for (seg = 0; seg < V_SEG_NR(&map->pi_ivv); ++seg) {
1942  if (V_INDEX(&map->pi_ivv, seg) <= index &&
1943  index + count <= v_seg_endpos(&map->pi_ivv, seg)) {
1944  spanned = true;
1945  break;
1946  }
1947  }
1948  return M0_RC(!!spanned);
1949 }
1950 
1952  uint32_t row,
1953  uint32_t col)
1954 {
1955  M0_PRE(map != NULL);
1956  M0_PRE(map->pi_databufs[row][col] == NULL);
1957 
1958  M0_ENTRY("[%p] map %p, row %u col %u", map->pi_ioreq, map, row, col);
1959  map->pi_databufs[row][col] = data_buf_alloc_init(0);
1960 
1961  return map->pi_databufs[row][col] == NULL ? M0_ERR(-ENOMEM) : 0;
1962 }
1963 
1964 /* Allocates data_buf structures as needed and populates the buffer flags. */
1966  uint64_t seg,
1967  bool rmw)
1968 {
1969  int rc;
1970  int flags;
1971  bool ret;
1972  uint32_t row;
1973  uint32_t col;
1974  uint64_t count = 0;
1976  m0_bindex_t end;
1977  struct inode *inode;
1978  struct m0_ivec_varr_cursor cur;
1979  struct m0_pdclust_layout *play;
1980  struct io_request *req = map->pi_ioreq;
1981 
1982  M0_ENTRY("[%p] map %p", map->pi_ioreq, map);
1983  M0_LOG(M0_DEBUG, "[%p] pgid %llu seg %llu = [%llu, +%llu), %s",
1984  map->pi_ioreq, map->pi_grpid, seg,
1985  V_INDEX(&map->pi_ivv, seg),
1986  V_COUNT(&map->pi_ivv, seg),
1987  rmw ? "rmw" : "aligned");
1988  play = pdlayout_get(req);
1990  m0_ivec_varr_cursor_init(&cur, &map->pi_ivv);
1991  ret = m0_ivec_varr_cursor_move_to(&cur, V_INDEX(&map->pi_ivv, seg));
1992  M0_ASSERT(!ret);
1993 
1994  /* process a page at each iteration */
1995  while (!m0_ivec_varr_cursor_move(&cur, count)) {
1997  end = min64u(m0_round_up(start + 1, PAGE_SIZE),
1999  count = end - start;
2000 
2001  flags = 0;
2002  if (req->ir_type == IRT_WRITE) {
2003  flags |= PA_WRITE;
2004  flags |= count == PAGE_SIZE ?
2006 
2007  /*
2008  * Even if PA_PARTPAGE_MODIFY flag is set in
2009  * this buffer, the auxiliary buffer can not be
2010  * allocated until ::pi_rtype is selected.
2011  */
2012  if (rmw && (flags & PA_PARTPAGE_MODIFY) &&
2013  (end < inode->i_size ||
2014  (inode->i_size > 0 &&
2015  page_id(end - 1) == page_id(inode->i_size - 1))))
2016  flags |= PA_READ;
2017  } else {
2018  /*
2019  * For read IO requests, file_aio_read() has already
2020  * delimited the index vector to EOF boundary.
2021  */
2022  flags |= PA_READ;
2023  }
2024 
2025  page_pos_get(map, start, &row, &col);
2026  rc = pargrp_iomap_databuf_alloc(map, row, col);
2027  M0_LOG(M0_DEBUG, "[%p] alloc start %8llu count %4llu pgid "
2028  "%3llu row %u col %u f 0x%x addr %p",
2029  req, start, count, map->pi_grpid, row, col, flags,
2030  map->pi_databufs[row][col] != NULL ?
2031  map->pi_databufs[row][col]->db_buf.b_addr : NULL);
2032  if (rc != 0)
2033  goto err;
2034  map->pi_databufs[row][col]->db_flags = flags;
2035  }
2036 
2037  return M0_RC(0);
2038 err:
2039  for (row = 0; row < rows_nr(play); ++row) {
2040  for (col = 0; col < layout_n(play); ++col) {
2041  if (map->pi_databufs[row][col] != NULL) {
2042  data_buf_dealloc_fini(map->pi_databufs
2043  [row][col]);
2044  map->pi_databufs[row][col] = NULL;
2045  }
2046  }
2047  }
2048  return M0_ERR_INFO(rc, "[%p] databuf_alloc failed", req);
2049 }
2050 
2052 {
2053  uint32_t row;
2054  uint32_t col;
2055  uint64_t nr = 0;
2056  struct m0_pdclust_layout *play;
2057 
2059 
2060  M0_ENTRY("[%p] map %p", map->pi_ioreq, map);
2061  play = pdlayout_get(map->pi_ioreq);
2062 
2063  for (row = 0; row < rows_nr(play); ++row) {
2064  for (col = 0; col < layout_n(play); ++col) {
2065 
2066  if (map->pi_databufs[row][col] &&
2067  map->pi_databufs[row][col]->db_flags &
2069  ++nr;
2070  }
2071  }
2072  M0_LEAVE();
2073  return nr;
2074 }
2075 
2077  uint32_t row,
2078  uint32_t col)
2079 {
2081  M0_PRE(map->pi_rtype == PIR_READOLD);
2082 
2083  M0_ENTRY("[%p] map %p", map->pi_ioreq, map);
2084  map->pi_databufs[row][col]->db_auxbuf.b_addr = (void *)
2085  get_zeroed_page(GFP_KERNEL);
2086 
2087  if (map->pi_databufs[row][col]->db_auxbuf.b_addr == NULL)
2088  return M0_ERR(-ENOMEM);
2089  ++iommstats.a_page_nr;
2090  map->pi_databufs[row][col]->db_auxbuf.b_nob = PAGE_SIZE;
2091 
2092  return M0_RC(0);
2093 }
2094 
2095 /*
2096  * Allocates auxiliary buffer for data_buf structures in
2097  * pargrp_iomap structure.
2098  */
2100 {
2101  int rc = 0;
2102  uint64_t start;
2103  uint64_t end;
2104  uint64_t count = 0;
2105  uint32_t row;
2106  uint32_t col;
2107  struct inode *inode;
2108  struct m0_ivec_varr_cursor cur;
2109 
2111  M0_PRE(map->pi_rtype == PIR_READOLD);
2112 
2113  M0_ENTRY("[%p] map %p", map->pi_ioreq, map);
2114  inode = m0t1fs_file_to_inode(map->pi_ioreq->ir_file);
2115  m0_ivec_varr_cursor_init(&cur, &map->pi_ivv);
2116 
2117  while (!m0_ivec_varr_cursor_move(&cur, count)) {
2119  end = min64u(m0_round_up(start + 1, PAGE_SIZE),
2121  count = end - start;
2122  page_pos_get(map, start, &row, &col);
2123 
2124  if (map->pi_databufs[row][col] != NULL) {
2125  /*
2126  * In Readold approach, all valid pages have to
2127  * be read regardless of whether they are fully
2128  * occupied or partially occupied.
2129  * This is needed in order to calculate correct
2130  * parity in differential manner.
2131  * Also, read flag should be set only for pages
2132  * which lie within end-of-file boundary.
2133  */
2134  if (end < inode->i_size ||
2135  (inode->i_size > 0 &&
2136  page_id(end - 1) == page_id(inode->i_size - 1)))
2137  map->pi_databufs[row][col]->db_flags |=
2138  PA_READ;
2139 
2140  rc = pargrp_iomap_auxbuf_alloc(map, row, col);
2141  if (rc != 0)
2142  return M0_ERR_INFO(rc, "[%p] auxbuf_alloc "
2143  "failed", map->pi_ioreq);
2144  }
2145  }
2146  return M0_RC(rc);
2147 }
2148 
2149 /*
2150  * A read request from rmw IO request can lead to either
2151  *
2152  * read_old - Read the old data for the extent spanned by current
2153  * IO request, along with the old parity unit. This approach needs
2154  * to calculate new parity in _iterative_ manner. This approach is
2155  * selected only if current IO extent lies within file size.
2156  *
2157  * read_rest - Read rest of the parity group, which is _not_ spanned
2158  * by current IO request, so that data for whole parity group can
2159  * be availble for parity calculation.
2160  * This approach reads the extent from start of parity group to the
2161  * point where a page is completely spanned by incoming IO request.
2162  *
2163  * Typically, the approach which leads to least size of data to be
2164  * read and written from server is selected.
2165  *
2166  * N = 5, P = 1, K = 1, unit_size = 4k
2167  * F => Fully occupied
2168  * P' => Partially occupied
2169  * # => Parity unit
2170  * * => Spare unit
2171  * x => Start of actual file extent.
2172  * y => End of actual file extent.
2173  * a => Rounded down value of x.
2174  * b => Rounded up value of y.
2175  *
2176  * Read-rest approach
2177  *
2178  * a x
2179  * +---+---+---+---+---+---+---+
2180  * | | P'| F | F | F | # | * | PG#0
2181  * +---+---+---+---+---+---+---+
2182  * | F | F | F | F | F | # | * | PG#1
2183  * +---+---+---+---+---+---+---+
2184  * | F | F | F | P'| | # | * | PG#2
2185  * +---+---+---+---+---+---+---+
2186  * N N N N N K P
2187  * y b
2188  *
2189  * Read-old approach
2190  *
2191  * a x
2192  * +---+---+---+---+---+---+---+
2193  * | | | | P'| F | # | * | PG#0
2194  * +---+---+---+---+---+---+---+
2195  * | F | F | F | F | F | # | * | PG#1
2196  * +---+---+---+---+---+---+---+
2197  * | F | P'| | | | # | * | PG#2
2198  * +---+---+---+---+---+---+---+
2199  * N N N N N K P
2200  * y b
2201  *
2202  */
2204 {
2205  int rc;
2206  uint32_t row;
2207  uint32_t col;
2208  uint32_t seg;
2209  uint32_t seg_nr;
2210  m0_bindex_t grpstart;
2211  m0_bindex_t grpend;
2213  m0_bindex_t end;
2214  m0_bcount_t count = 0;
2215  struct inode *inode;
2216  struct m0_indexvec_varr *ivec;
2217  struct m0_ivec_varr_cursor cur;
2218  struct m0_pdclust_layout *play;
2219 
2221  M0_PRE(map->pi_rtype == PIR_READREST);
2222 
2223  M0_ENTRY("[%p] map %p", map->pi_ioreq, map);
2224  play = pdlayout_get(map->pi_ioreq);
2225  ivec = &map->pi_ivv;
2226  seg_nr = V_SEG_NR(&map->pi_ivv);
2227  grpstart = data_size(play) * map->pi_grpid;
2228  grpend = grpstart + data_size(play);
2229 
2230  /* Extends first segment to align with start of parity group. */
2231  V_COUNT(ivec, 0) += (V_INDEX(ivec, 0) - grpstart);
2232  V_INDEX(ivec, 0) = grpstart;
2233 
2234  /* Extends last segment to align with end of parity group. */
2235  V_COUNT(ivec, seg_nr - 1) = grpend - V_INDEX(ivec, seg_nr - 1);
2236 
2237  /*
2238  * All io extents _not_ spanned by pargrp_iomap::pi_ivv
2239  * need to be included so that _all_ pages from parity group
2240  * are available to do IO.
2241  */
2242  for (seg = 1; seg_nr > 2 && seg <= seg_nr - 2; ++seg) {
2243  if (v_seg_endpos(ivec, seg) < V_INDEX(ivec, seg + 1))
2244  V_COUNT(ivec, seg) += V_INDEX(ivec, seg + 1) -
2245  v_seg_endpos(ivec, seg);
2246  }
2247 
2248  inode = m0t1fs_file_to_inode(map->pi_ioreq->ir_file);
2249  m0_ivec_varr_cursor_init(&cur, &map->pi_ivv);
2250 
2251  while (!m0_ivec_varr_cursor_move(&cur, count)) {
2252 
2254  end = min64u(m0_round_up(start + 1, PAGE_SIZE),
2256  count = end - start;
2257  page_pos_get(map, start, &row, &col);
2258 
2259  if (map->pi_databufs[row][col] == NULL) {
2260  rc = pargrp_iomap_databuf_alloc(map, row, col);
2261  if (rc != 0)
2262  return M0_ERR_INFO(rc, "[%p] databuf_alloc "
2263  "failed", map->pi_ioreq);
2264 
2265  if (end <= inode->i_size || (inode->i_size > 0 &&
2266  page_id(end - 1) == page_id(inode->i_size - 1)))
2267  map->pi_databufs[row][col]->db_flags |=
2268  PA_READ;
2269  }
2270  }
2271 
2272  return M0_RC(0);
2273 }
2274 
2276 {
2277  uint32_t row;
2278  uint32_t col;
2279  struct m0_pdclust_layout *play;
2280  struct inode *inode;
2281  struct m0t1fs_sb *csb;
2282  struct data_buf *dbuf;
2283 
2285 
2286  M0_ENTRY("[%p] map %p grpid=%llu", map->pi_ioreq, map, map->pi_grpid);
2288  csb = M0T1FS_SB(inode->i_sb);
2289 
2290  play = pdlayout_get(map->pi_ioreq);
2291  for (row = 0; row < rows_nr(play); ++row) {
2292  for (col = 0; col < layout_k(play); ++col) {
2293  struct file *irf;
2294 
2295  map->pi_paritybufs[row][col] = data_buf_alloc_init(0);
2296  if (map->pi_paritybufs[row][col] == NULL)
2297  goto err;
2298  dbuf = map->pi_paritybufs[row][col];
2299  if (map->pi_ioreq->ir_type == IRT_WRITE)
2300  dbuf->db_flags |= PA_WRITE;
2301 
2302  irf = map->pi_ioreq->ir_file;
2303  if ((map->pi_rtype == PIR_READOLD ||
2304  (map->pi_ioreq->ir_type == IRT_READ &&
2305  csb->csb_verify)) &&
2306  m0t1fs_file_to_inode(irf)->i_size >
2307  data_size(play) * map->pi_grpid)
2308  dbuf->db_flags |= PA_READ;
2309  }
2310  }
2311  return M0_RC(0);
2312 err:
2313  for (row = 0; row < rows_nr(play); ++row) {
2314  for (col = 0; col < layout_k(play); ++col)
2315  m0_free0(&map->pi_paritybufs[row][col]);
2316  }
2317  return M0_ERR_INFO(-ENOMEM, "[%p] Memory allocation failed for "
2318  "data_buf.", map->pi_ioreq);
2319 }
2320 
2329 static m0_bindex_t seg_set(struct pargrp_iomap *map, uint32_t seg,
2330  struct m0_ivec_varr_cursor *cur, m0_bindex_t grpend)
2331 {
2332  m0_bindex_t end = m0_ivec_varr_cursor_conti(cur, grpend);
2333 
2335  V_COUNT(&map->pi_ivv, seg) = end - V_INDEX(&map->pi_ivv, seg);
2336 
2337  return end;
2338 }
2339 
2341 static void seg_idx_inc_round(struct pargrp_iomap *map, uint32_t seg,
2342  uint64_t sz)
2343 {
2344  m0_bindex_t idx = m0_round_up(V_INDEX(&map->pi_ivv, seg) + 1, sz);
2345 
2346  V_COUNT(&map->pi_ivv, seg) -= idx - V_INDEX(&map->pi_ivv, seg);
2347  V_INDEX(&map->pi_ivv, seg) = idx;
2348 }
2349 
2351 static void seg_align(struct pargrp_iomap *map, uint32_t seg,
2352  m0_bindex_t end, uint64_t sz)
2353 {
2354  m0_bindex_t idx = round_down(V_INDEX(&map->pi_ivv, seg), sz);
2355 
2356  V_INDEX(&map->pi_ivv, seg) = idx;
2357  V_COUNT(&map->pi_ivv, seg) = round_up(end, sz) - idx;
2358 }
2359 
2365  struct m0_ivec_varr_cursor *cursor,
2366  bool rmw)
2367 {
2368  int rc;
2369  uint32_t seg;
2370  m0_bindex_t seg_end = 0;
2371  m0_bcount_t grpsize;
2372  m0_bcount_t count = 0;
2373  m0_bindex_t grpstart;
2374  m0_bindex_t grpend;
2375  struct m0_pdclust_layout *play;
2376  struct inode *inode;
2377 
2378  M0_PRE(map != NULL);
2379 
2380  play = pdlayout_get(map->pi_ioreq);
2381  grpsize = data_size(play);
2382  grpstart = grpsize * map->pi_grpid;
2383  grpend = grpstart + grpsize;
2385 
2386  for (seg = 0; !m0_ivec_varr_cursor_move(cursor, count) &&
2387  m0_ivec_varr_cursor_index(cursor) < grpend;) {
2388  /*
2389  * Skips the current segment if it is completely spanned by
2390  * rounding up/down of an earlier segment.
2391  */
2392  if (map->pi_ops->pi_spans_seg(map,
2393  m0_ivec_varr_cursor_index(cursor),
2394  m0_ivec_varr_cursor_step(cursor))) {
2395  count = m0_ivec_varr_cursor_step(cursor);
2396  continue;
2397  }
2398 
2399  /* Make sure read IO does not go beyond EOF. */
2400  if (map->pi_ioreq->ir_type == IRT_READ &&
2401  grpend > inode->i_size) {
2402  if (V_INDEX(&map->pi_ivv, seg) >= inode->i_size) {
2403  count = m0_ivec_varr_cursor_step(cursor);
2404  continue;
2405  }
2406  seg_end = seg_set(map, seg, cursor, inode->i_size);
2407  } else
2408  seg_end = seg_set(map, seg, cursor, grpend);
2409 
2410  /*
2411  * If current segment is _partially_ spanned by previous
2412  * segment in pargrp_iomap::pi_ivv, start of segment is
2413  * rounded up to move to next page.
2414  */
2415  if (seg > 0 && V_INDEX(&map->pi_ivv, seg) <
2416  v_seg_endpos(&map->pi_ivv, seg - 1))
2418 
2419  ++V_SEG_NR(&map->pi_ivv);
2420 
2421  M0_LOG(M0_DEBUG, "[%p] pre grp_id=%" PRIu64 " seg=%"PRIu32
2422  " =[%" PRIu64 ",+%" PRIu64 ")", map->pi_ioreq,
2423  map->pi_grpid,seg, V_INDEX(&map->pi_ivv, seg),
2424  V_COUNT(&map->pi_ivv, seg));
2425 
2426  rc = map->pi_ops->pi_seg_process(map, seg, rmw);
2427  if (rc != 0)
2428  return M0_ERR(rc);
2429 
2430  seg_align(map, seg, seg_end, PAGE_SIZE);
2431 
2432  M0_LOG(M0_DEBUG, "[%p] post grp_id=%" PRIu64 " seg=%"PRIu32
2433  " =[%" PRIu64 ",+%" PRIu64 ")", map->pi_ioreq,
2434  map->pi_grpid, seg, V_INDEX(&map->pi_ivv, seg),
2435  V_COUNT(&map->pi_ivv, seg));
2436 
2437  count = seg_end - m0_ivec_varr_cursor_index(cursor);
2438  M0_LOG(M0_DEBUG, "[%p] cursor advance +%" PRIu64 " from %"PRIu64,
2439  map->pi_ioreq, count, m0_ivec_varr_cursor_index(cursor));
2440  ++seg;
2441  }
2442 
2443  return M0_RC(0);
2444 }
2445 
2446 /*
2447  * Decides whether to undertake a read-old or read-rest approach for
2448  * the parity group RMW IO request based on the total number of pages
2449  * to be read and written.
2450  *
2451  * In read-old approach, the old data and parity units are read and
2452  * the new parity is calculated incrementally based on the difference
2453  * between old and new data and parity units.
2454  *
2455  * In read-rest approach, the rest data units of the group are read
2456  * and the new parity is calculated based on them and the new data
2457  * units to be written.
2458  *
2459  * In both approaches, the number of units to be written is the same
2460  * (new data units and udpated parity units), so we compare only the
2461  * number of units (pages) to be read.
2462  *
2463  * By default, the segments in index vector pargrp_iomap::pi_ivec
2464  * are suitable for read-old approach. Hence the index vector is
2465  * changed only if read-rest approach is selected.
2466  *
2467  * @param map is the parity group iomap
2468  * @param data_pages_nr is the number of data pages in group
2469  * @param parity_pages_nr is the number of parity pages in group
2470  */
2472  m0_bcount_t data_pages_nr,
2473  m0_bcount_t parity_pages_nr)
2474 {
2475  int rc;
2476  /*
2477  * In read-old the number of pages to be read is the same as
2478  * the number of pages to be written.
2479  *
2480  * TODO: Can use number of data_buf structures instead of using
2481  * indexvec_page_nr().
2482  */
2483  uint64_t ro_pages_nr = iomap_page_nr(map) + parity_pages_nr;
2484  /*
2485  * In read-rest the number of pages to be read is all data
2486  * pages which are not fully spanned by the io vector.
2487  */
2488  uint64_t rr_pages_nr = data_pages_nr -
2489  map->pi_ops->pi_fullpages_find(map);
2490 
2491  if (rr_pages_nr < ro_pages_nr) {
2492  M0_LOG(M0_DEBUG, "[%p] Read-rest selected", map->pi_ioreq);
2493  map->pi_rtype = PIR_READREST;
2494  rc = map->pi_ops->pi_readrest(map);
2495  if (rc != 0)
2496  return M0_ERR(rc);
2497  } else {
2498  M0_LOG(M0_DEBUG, "[%p] Read-old selected", map->pi_ioreq);
2499  map->pi_rtype = PIR_READOLD;
2500  rc = map->pi_ops->pi_readold_auxbuf_alloc(map);
2501  }
2502 
2503  return M0_RC(rc);
2504 }
2505 
2507  struct m0_ivec_varr_cursor *cursor)
2508 {
2509  int rc;
2510  bool rmw = false;
2511  uint64_t grpsize;
2512  m0_bcount_t count = 0;
2513  m0_bindex_t grpstart;
2514  m0_bindex_t grpend;
2515  struct m0_pdclust_layout *play;
2516  struct inode *inode;
2517  struct m0t1fs_sb *csb;
2518  struct io_request *req;
2519 
2520  M0_PRE(map != NULL);
2521  M0_PRE(cursor->vc_ivv != NULL);
2522 
2523  req = map->pi_ioreq;
2524  play = pdlayout_get(map->pi_ioreq);
2525  grpsize = data_size(play);
2526  grpstart = grpsize * map->pi_grpid;
2527  grpend = grpstart + grpsize;
2529  csb = M0T1FS_SB(inode->i_sb);
2530 
2531  M0_ENTRY("[%p] map=%p ivec=%p", req, map, cursor->vc_ivv);
2532 
2533  /*
2534  * For a write, if this map does not span the whole parity group,
2535  * it is a read-modify-write.
2536  */
2537  if (map->pi_ioreq->ir_type == IRT_WRITE && grpstart < inode->i_size &&
2538  (m0_ivec_varr_cursor_index(cursor) > grpstart ||
2539  m0_ivec_varr_cursor_conti(cursor, grpend) < grpend))
2540  rmw = true;
2541 
2542  M0_LOG(M0_INFO, "[%p] grp_id=%llu: %s", req, map->pi_grpid,
2543  rmw ? "rmw" : "aligned");
2544 
2545  /* In 'verify mode', read all data units in this parity group */
2546  if (map->pi_ioreq->ir_type == IRT_READ && csb->csb_verify) {
2547  indexvec_varr_dump(&map->pi_ivv);
2548  M0_LOG(M0_DEBUG, "[%p] ivec=[%llu, +%llu)", req,
2549  grpstart, grpsize);
2550  V_SEG_NR(&map->pi_ivv) = 1;
2551  V_INDEX(&map->pi_ivv, 0) = grpstart;
2552  /* limit to file size. */
2553  count = min64u(grpend, inode->i_size) - grpstart;
2554  V_COUNT(&map->pi_ivv, 0) = round_up(count, PAGE_SIZE);
2555  rc = map->pi_ops->pi_seg_process(map, 0, rmw);
2556  m0_ivec_varr_cursor_move_to(cursor, grpend);
2557  } else
2558  rc = pargrp_iomap_populate_pi_ivec(map, cursor, rmw);
2559 
2560  if (rc != 0)
2561  return M0_ERR_INFO(rc, "[%p] failed", req);
2562 
2563  if (rmw) {
2565  parity_units_page_nr(play));
2566  if (rc != 0)
2567  return M0_ERR_INFO(rc, "[%p] failed", req);
2568  }
2569 
2570  /* For READ in verify mode or WRITE */
2571  if (map->pi_ioreq->ir_type == IRT_WRITE ||
2572  (map->pi_ioreq->ir_type == IRT_READ && csb->csb_verify))
2573  rc = map->pi_ops->pi_paritybufs_alloc(map);
2574 
2576 
2577  return M0_RC(rc);
2578 }
2579 
2585 {
2586  int rc = 0;
2587  uint32_t row;
2588  uint32_t row_nr;
2589  uint32_t col;
2590  uint32_t col_nr;
2591  struct data_buf ***bufs;
2592  struct m0_pdclust_layout *play;
2593  M0_PRE(map != NULL);
2594  M0_PRE(M0_IN(type, (M0_PUT_DATA, M0_PUT_PARITY)));
2595  M0_ENTRY("[%p] map %p", map->pi_ioreq, map);
2596 
2597  play = pdlayout_get(map->pi_ioreq);
2598 
2599  if (type == M0_PUT_DATA) {
2600  M0_ASSERT(map->pi_databufs != NULL);
2601  row_nr = rows_nr(play);
2602  col_nr = layout_n(play);
2603  bufs = map->pi_databufs;
2604  } else {
2605  row_nr = rows_nr(play);
2606  col_nr = layout_k(play);
2607  bufs = map->pi_paritybufs;
2608  }
2609 
2610  /*
2611  * Allocates data_buf structures from either ::pi_databufs
2612  * or ::pi_paritybufs array.
2613  * The loop traverses the matrix, column (unit) by column (unit).
2614  */
2615  for (col = 0; col < col_nr; ++col) {
2616  for (row = 0; row < row_nr; ++row) {
2617  /*
2618  * If the page is marked as PA_READ_FAILED, all
2619  * other pages belonging to the unit same as
2620  * the failed one, are also marked as PA_READ_FAILED,
2621  * hence the loop breaks from here.
2622  */
2623  if (bufs[row][col] != NULL &&
2624  bufs[row][col]->db_flags & PA_READ_FAILED)
2625  break;
2626  }
2627 
2628  if (row == row_nr)
2629  continue;
2630 
2631  for (row = 0; row < row_nr; ++row) {
2632  if (bufs[row][col] == NULL) {
2633  bufs[row][col] = data_buf_alloc_init(0);
2634  if (bufs[row][col] == NULL) {
2635  rc = M0_ERR(-ENOMEM);
2636  break;
2637  }
2638  }
2639  bufs[row][col]->db_flags |= PA_READ_FAILED;
2640  }
2641  }
2642  return M0_RC(rc);
2643 }
2644 
2645 static int unit_state(const struct m0_pdclust_src_addr *src,
2646  const struct io_request *req,
2647  enum m0_pool_nd_state *state)
2648 {
2649  struct m0_pdclust_instance *play_instance;
2650  struct m0_pdclust_tgt_addr tgt;
2651  int rc;
2652  struct m0_poolmach *pm;
2653 
2654  M0_ENTRY("[%p]", req);
2655 
2656  play_instance = pdlayout_instance(layout_instance(req));
2657  m0_fd_fwd_map(play_instance, src, &tgt);
2658 
2660  M0_ASSERT(pm != NULL);
2661  rc = m0_poolmach_device_state(pm, tgt.ta_obj, state);
2662  if (rc != 0)
2663  return M0_RC(rc);
2664  return M0_RC(rc);
2665 }
2666 
2667 static int io_spare_map(const struct pargrp_iomap *map,
2668  const struct m0_pdclust_src_addr *src,
2669  uint32_t *spare_slot, uint32_t *spare_slot_prev,
2670  enum m0_pool_nd_state *eff_state)
2671 {
2672 
2673  struct m0_pdclust_layout *play;
2674  struct m0_pdclust_instance *play_instance;
2675  const struct m0_fid *gfid;
2676  struct m0_pdclust_src_addr spare;
2677  int rc;
2678  struct m0_poolmach *pm;
2679 
2680  M0_ENTRY("[%p]", map->pi_ioreq);
2681  play = pdlayout_get(map->pi_ioreq);
2682  play_instance = pdlayout_instance(layout_instance(map->pi_ioreq));
2683  gfid = file_to_fid(map->pi_ioreq->ir_file);
2684 
2685  pm = m0t1fs_file_to_poolmach(map->pi_ioreq->ir_file);
2686  M0_ASSERT(pm != NULL);
2687  rc = m0_sns_repair_spare_map(pm, gfid, play, play_instance,
2688  src->sa_group, src->sa_unit,
2689  spare_slot, spare_slot_prev);
2690  if (rc != 0) {
2691  return M0_RC(rc);
2692  }
2693  /* Check if there is an effective failure of unit. */
2694  spare.sa_group = src->sa_group;
2695  spare.sa_unit = *spare_slot_prev;
2696  rc = unit_state(&spare, map->pi_ioreq, eff_state);
2697  return M0_RC(rc);
2698 }
2699 
2700 
2701 static void mark_page_as_read_failed(struct pargrp_iomap *map, uint32_t row,
2702  uint32_t col, enum page_attr page_type)
2703 {
2704  struct m0_pdclust_layout *play;
2705  struct m0_pdclust_src_addr src;
2706  enum m0_pool_nd_state state;
2707  uint32_t spare_slot;
2708  uint32_t spare_prev;
2709  int rc;
2710 
2711  M0_ENTRY("[%p] pid=%llu, row = %u, col=%u, type=0x%x",
2712  map->pi_ioreq, map->pi_grpid, row, col, page_type);
2713  M0_PRE(M0_IN(page_type,(PA_DATA, PA_PARITY)));
2714  M0_PRE(ergo(page_type == PA_DATA, map->pi_databufs[row][col] != NULL));
2715  M0_PRE(ergo(page_type == PA_PARITY,
2716  map->pi_paritybufs[row][col] != NULL));
2717 
2718  play = pdlayout_get(map->pi_ioreq);
2719  src.sa_group = map->pi_grpid;
2720  if (page_type == PA_DATA)
2721  src.sa_unit = col;
2722  else
2723  src.sa_unit = col + layout_n(play);
2724 
2725  rc = unit_state(&src, map->pi_ioreq, &state);
2726  M0_ASSERT(rc == 0);
2727  if (state == M0_PNDS_SNS_REPAIRED) {
2728  /* gets the state of corresponding spare unit */
2729  rc = io_spare_map(map, &src, &spare_slot, &spare_prev,
2730  &state);
2731  M0_ASSERT(rc == 0);
2732  }
2733  /*
2734  * Checking state M0_PNDS_SNS_REBALANCING allows concurrent read during
2735  * sns rebalancing in oostore mode. This works similarly to
2736  * M0_PNDS_FAILED.
2737  * To handle concurrent i/o in non-oostore mode, some more changes are
2738  * required to write data to live unit (on earlier failed device) if the
2739  * device state is M0_PNDS_SNS_REBALANCING.
2740  */
2741  if (M0_IN(state, (M0_PNDS_FAILED, M0_PNDS_OFFLINE,
2743  if (page_type == PA_DATA)
2744  map->pi_databufs[row][col]->db_flags |=
2746  else
2747  map->pi_paritybufs[row][col]->db_flags |=
2749  }
2750  M0_LEAVE();
2751 }
2752 
2760  struct target_ioreq *tio,
2761  m0_bindex_t *index,
2762  uint32_t count)
2763 {
2764  int rc = 0;
2765  uint32_t row;
2766  uint32_t col;
2767  m0_bindex_t goff;
2768  struct m0_pdclust_layout *play;
2769  struct m0_pdclust_src_addr src;
2770  enum m0_pool_nd_state dev_state;
2771  uint32_t spare_slot;
2772  uint32_t spare_slot_prev;
2773  struct m0_poolmach *pm;
2774  struct io_request *req;
2775 
2777  M0_ENTRY("[%p] grpid = %llu, count = %u\n",
2778  map->pi_ioreq, map->pi_grpid, count);
2779  M0_PRE(tio != NULL);
2780  M0_PRE(index != NULL);
2781  M0_PRE(count > 0);
2782 
2783  req = map->pi_ioreq;
2784  pm = m0t1fs_file_to_poolmach(map->pi_ioreq->ir_file);
2785  M0_ASSERT(pm != NULL);
2786  rc = m0_poolmach_device_state(pm, tio->ti_obj, &dev_state);
2787  play = pdlayout_get(req);
2788  pargrp_src_addr(index[0], req, tio, &src);
2789  M0_ASSERT(src.sa_group == map->pi_grpid);
2790  M0_ASSERT(src.sa_unit < layout_n(play) + layout_k(play));
2791  M0_LOG(M0_DEBUG, "[%p] src=[%llu:%llu] device state=%d",
2792  map->pi_ioreq, src.sa_group, src.sa_unit, dev_state);
2793  if (dev_state == M0_PNDS_SNS_REPAIRED) {
2794  rc = io_spare_map(map, &src, &spare_slot, &spare_slot_prev,
2795  &dev_state);
2796  M0_ASSERT(rc == 0);
2797  M0_LOG(M0_DEBUG, "[%p] spare=[%u] spare_prev=[%u] state=%d",
2798  map->pi_ioreq, spare_slot,
2799  spare_slot_prev, dev_state);
2800  if (dev_state == M0_PNDS_SNS_REPAIRED) {
2801  M0_LOG(M0_DEBUG, "reading from spare");
2802  return M0_RC(0);
2803  }
2804  }
2805  map->pi_state = PI_DEGRADED;
2806  ++req->ir_dgmap_nr;
2807  /* Failed segment belongs to a data unit. */
2808  if (src.sa_unit < layout_n(play)) {
2809  goff = gfile_offset(index[0], map, play, &src);
2810  page_pos_get(map, goff, &row, &col);
2811  M0_ASSERT(map->pi_databufs[row][col] != NULL);
2812  map->pi_databufs[row][col]->db_flags |= PA_READ_FAILED;
2813  } else {
2814  /* Failed segment belongs to a parity unit. */
2815  row = page_nr(index[0]) % page_nr(layout_unit_size(play));
2816  col = src.sa_unit - layout_n(play);
2817  M0_ASSERT(map->pi_paritybufs[row][col] != NULL);
2818  map->pi_paritybufs[row][col]->db_flags |= PA_READ_FAILED;
2819  }
2820  /*
2821  * Since m0_parity_math_recover() API will recover one or more
2822  * _whole_ units, all pages from a failed unit can be marked as
2823  * PA_READ_FAILED. These pages need not be read again.
2824  */
2826  if (rc != 0)
2827  return M0_ERR_INFO(rc, "[%p] Failed to mark pages from parity "
2828  "group", req);
2829 
2830  /*
2831  * If parity buffers are not allocated, they should be allocated
2832  * since they are needed for recovering lost data.
2833  */
2834  if (map->pi_paritybufs == NULL) {
2835  M0_ALLOC_ARR(map->pi_paritybufs, rows_nr(play));
2836  if (map->pi_paritybufs == NULL)
2837  return M0_ERR_INFO(-ENOMEM, "[%p] Failed to allocate "
2838  "parity buffers", req);
2839 
2840  for (row = 0; row < rows_nr(play); ++row) {
2841  M0_ALLOC_ARR(map->pi_paritybufs[row],
2842  layout_k(play));
2843  if (map->pi_paritybufs[row] == NULL) {
2844  rc = M0_ERR(-ENOMEM);
2845  goto par_fail;
2846  }
2847  }
2848  }
2850  return M0_RC(rc);
2851 
2852 par_fail:
2853  M0_ASSERT(rc != 0);
2854  for (row = 0; row < rows_nr(play); ++row)
2855  m0_free0(&map->pi_paritybufs[row]);
2856  m0_free0(&map->pi_paritybufs);
2857 
2858  return M0_ERR_INFO(rc, "[%p] dgmode_process failed", req);
2859 }
2860 
2862 {
2863  int rc = 0;
2864  bool within_eof;
2865  uint32_t row;
2866  uint32_t col;
2868  struct inode *inode;
2869  struct data_buf *dbuf;
2870  struct m0_pdclust_layout *play;
2871  struct m0t1fs_sb *csb;
2872  struct io_request *req;
2873 
2875 
2876  /*
2877  * read_old: Reads unavailable data subject to condition that
2878  * data lies within file size. Parity is already read.
2879  * read_rest: Reads parity units. Data for parity group is already
2880  * read.
2881  * simple_read: Reads unavailable data subject to condition that
2882  * data lies within file size. Parity also has to be read.
2883  */
2884 
2885  req = map->pi_ioreq;
2886  M0_ENTRY("[%p] parity group id %llu, map state = %d",
2887  req, map->pi_grpid, map->pi_state);
2888 
2890  play = pdlayout_get(req);
2891 
2892  /*
2893  * Data matrix from parity group.
2894  * The loop traverses column by column to be in sync with
2895  * increasing file offset.
2896  * This is necessary in order to generate correct index vector.
2897  */
2898  for (col = 0; col < layout_n(play); ++col) {
2899  for (row = 0; row < rows_nr(play); ++row) {
2900 
2901  start = data_page_offset_get(map, row, col);
2902  within_eof = start + PAGE_SIZE < inode->i_size ||
2903  (inode->i_size > 0 &&
2904  page_id(start + PAGE_SIZE - 1) ==
2905  page_id(inode->i_size - 1));
2906  if (map->pi_databufs[row][col] != NULL) {
2907  if (map->pi_databufs[row][col]->db_flags &
2909  continue;
2910  } else {
2911  /*
2912  * If current parity group map is degraded,
2913  * then recovery is needed and a new
2914  * data buffer needs to be allocated subject to
2915  * limitation of file size.
2916  */
2917  if (map->pi_state == PI_DEGRADED &&
2918  within_eof) {
2919  map->pi_databufs[row][col] =
2921  if (map->pi_databufs[row][col] ==
2922  NULL) {
2923  rc = M0_ERR(-ENOMEM);
2924  break;
2925  }
2926  mark_page_as_read_failed(map, row, col,
2927  PA_DATA);
2928  }
2929  if (map->pi_state == PI_HEALTHY)
2930  continue;
2931  }
2932  dbuf = map->pi_databufs[row][col];
2933  /*
2934  * Marks only those data buffers which lie within EOF.
2935  * Since all IO fops receive error
2936  * once sns repair starts (M0_PNDS_SNS_REPAIRING state)
2937  * read is not done for any of these fops.
2938  * Hence all pages other than the one which encountered
2939  * failure (PA_READ_FAILED flag set) are read in
2940  * degraded mode.
2941  */
2942  if (within_eof) {
2943  if (dbuf->db_flags & PA_READ_FAILED ||
2944  is_page_read(dbuf)) {
2945  continue;
2946  }
2947  dbuf->db_flags |= PA_DGMODE_READ;
2948  }
2949  }
2950  }
2951 
2952  if (rc != 0)
2953  goto err;
2954 
2955  csb = M0T1FS_SB(inode->i_sb);
2956  /* If parity group is healthy, there is no need to read parity. */
2957  if (map->pi_state != PI_DEGRADED && !csb->csb_verify)
2958  return M0_RC(0);
2959 
2960  /*
2961  * Populates the index vector if original read IO request did not
2962  * span it. Since recovery is needed using parity algorithms,
2963  * whole parity group needs to be read subject to file size limitation.
2964  * Ergo, parity group index vector contains only one segment
2965  * worth the parity group in size.
2966  */
2967  V_INDEX(&map->pi_ivv, 0) = map->pi_grpid * data_size(play);
2968  V_COUNT(&map->pi_ivv, 0) = min64u(V_INDEX(&map->pi_ivv, 0) +
2969  data_size(play),
2970  inode->i_size) -
2971  V_INDEX(&map->pi_ivv, 0);
2972  /*
2973  * m0_0vec requires all members except the last one to have data count
2974  * multiple of 4K.
2975  */
2976  V_COUNT(&map->pi_ivv, 0) = round_up(
2977  V_COUNT(&map->pi_ivv, 0),
2978  PAGE_SIZE);
2979  V_SEG_NR(&map->pi_ivv) = 1;
2980  indexvec_varr_dump(&map->pi_ivv);
2981  /* parity matrix from parity group. */
2982  for (row = 0; row < rows_nr(play); ++row) {
2983  for (col = 0; col < layout_k(play); ++col) {
2984 
2985  if (map->pi_paritybufs[row][col] == NULL) {
2986  map->pi_paritybufs[row][col] =
2988  if (map->pi_paritybufs[row][col] == NULL) {
2989  rc = M0_ERR(-ENOMEM);
2990  break;
2991  }
2992  }
2993  dbuf = map->pi_paritybufs[row][col];
2995  /* Skips the page if it is marked as PA_READ_FAILED. */
2996  if (dbuf->db_flags & PA_READ_FAILED ||
2997  is_page_read(dbuf)) {
2998  continue;
2999  }
3000  dbuf->db_flags |= PA_DGMODE_READ;
3001  }
3002  }
3003  if (rc != 0)
3004  goto err;
3005  return M0_RC(rc);
3006 err:
3007  return M0_ERR_INFO(rc,"[%p] %s", req,
3008  rc == -ENOMEM ? "Failed to allocate "
3009  "data buffer": "Illegal device queried for status");
3010 }
3011 
3013  uint8_t *failed)
3014 {
3015  struct m0_pdclust_layout *play;
3016  uint32_t col;
3017  uint32_t K = 0;
3018 
3019  play = pdlayout_get(map->pi_ioreq);
3020  for (col = 0; col < layout_n(play); ++col) {
3021  if (map->pi_databufs[0][col] != NULL &&
3022  map->pi_databufs[0][col]->db_flags &
3023  PA_READ_FAILED) {
3024  failed[col] = 1;
3025  ++K;
3026  }
3027 
3028  }
3029  for (col = 0; col < layout_k(play); ++col) {
3030  M0_ASSERT(map->pi_paritybufs[0][col] != NULL);
3031  if (map->pi_paritybufs[0][col]->db_flags &
3032  PA_READ_FAILED) {
3033  failed[col + layout_n(play)] = 1;
3034  ++K;
3035  }
3036  }
3037  return K;
3038 }
3039 
3041 {
3042  int rc = 0;
3043  uint32_t row;
3044  uint32_t col;
3045  uint32_t K;
3046  unsigned long zpage;
3047  struct m0_buf *data;
3048  struct m0_buf *parity;
3049  struct m0_buf failed;
3050  struct m0_pdclust_layout *play;
3051 
3053  M0_PRE(map->pi_state == PI_DEGRADED);
3054 
3055  M0_ENTRY("[%p] map %p", map->pi_ioreq, map);
3056 
3057  play = pdlayout_get(map->pi_ioreq);
3058  M0_ALLOC_ARR(data, layout_n(play));
3059  if (data == NULL)
3060  return M0_ERR_INFO(-ENOMEM, "[%p] Failed to allocate memory"
3061  " for data buf", map->pi_ioreq);
3062 
3063  M0_ALLOC_ARR(parity, layout_k(play));
3064  if (parity == NULL) {
3065  m0_free(data);
3066  return M0_ERR_INFO(-ENOMEM, "[%p] Failed to allocate memory"
3067  " for parity buf", map->pi_ioreq);
3068  }
3069 
3070  zpage = get_zeroed_page(GFP_KERNEL);
3071  if (zpage == 0) {
3072  m0_free(data);
3073  m0_free(parity);
3074  return M0_ERR_INFO(-ENOMEM, "[%p] Failed to allocate page.",
3075  map->pi_ioreq);
3076  }
3077 
3078  failed.b_nob = layout_n(play) + layout_k(play);
3079  failed.b_addr = m0_alloc(failed.b_nob);
3080  if (failed.b_addr == NULL) {
3081  m0_free(data);
3082  m0_free(parity);
3083  free_page(zpage);
3084  return M0_ERR_INFO(-ENOMEM, "[%p] Failed to allocate memory "
3085  "for m0_buf", map->pi_ioreq);
3086  }
3087  K = iomap_dgmode_recov_prepare(map, (uint8_t *)failed.b_addr);
3088  if (K > layout_k(play)) {
3089  M0_LOG(M0_ERROR, "More failures in group %d",
3090  (int)map->pi_grpid);
3091  rc = -EIO;
3092  goto end;
3093  }
3094 
3095  /* Populates data and failed buffers. */
3096  for (row = 0; row < rows_nr(play); ++row) {
3097  for (col = 0; col < layout_n(play); ++col) {
3098  data[col].b_nob = PAGE_SIZE;
3099  if (map->pi_databufs[row][col] == NULL) {
3100  data[col].b_addr = (void *)zpage;
3101  continue;
3102  }
3103  data[col].b_addr = map->pi_databufs[row][col]->
3104  db_buf.b_addr;
3105  }
3106  for (col = 0; col < layout_k(play); ++col) {
3107  M0_ASSERT(map->pi_paritybufs[row][col] != NULL);
3108  parity[col].b_addr = map->pi_paritybufs[row][col]->
3109  db_buf.b_addr;
3110  parity[col].b_nob = PAGE_SIZE;
3111  }
3113  parity, &failed, M0_LA_INVERSE);
3114  if (rc != 0)
3115  goto end;
3116  }
3117 
3118 end:
3119  m0_free(data);
3120  m0_free(parity);
3121  m0_free(failed.b_addr);
3122  free_page(zpage);
3123  return rc == 0 ? M0_RC(0) : M0_ERR_INFO(rc, "Number of failed units"
3124  "in parity group exceeds the"
3125  "total number of parity units"
3126  "in a parity group %d.",
3127  (int)map->pi_grpid);
3128 }
3129 
3131 {
3132  uint64_t seg;
3133  uint64_t grp;
3134  uint64_t grpstart;
3135  uint64_t grpend;
3136  uint64_t *grparray;
3137  uint64_t grparray_sz;
3138  struct m0_pdclust_layout *play;
3139 
3140  M0_ENTRY("[%p]", req);
3141 
3142  play = pdlayout_get(req);
3143 
3144  /* Array of maximum possible number of groups spanned by req. */
3145  grparray_sz = indexvec_varr_count(&req->ir_ivv) / data_size(play) +
3146  2 * V_SEG_NR(&req->ir_ivv);
3147  M0_LOG(M0_DEBUG, "[%p] arr_sz=%llu", req, grparray_sz);
3148  M0_ALLOC_ARR(grparray, grparray_sz);
3149  if (grparray == NULL)
3150  return M0_ERR_INFO(-ENOMEM, "[%p] Failed to allocate memory"
3151  " for int array", req);
3152  /*
3153  * Finds out total number of parity groups spanned by
3154  * io_request::ir_ivec.
3155  */
3156  for (seg = 0; seg < V_SEG_NR(&req->ir_ivv); ++seg) {
3157  grpstart = group_id(V_INDEX(&req->ir_ivv, seg),
3158  data_size(play));
3159  grpend = group_id(v_seg_endpos(&req->ir_ivv, seg) - 1,
3160  data_size(play));
3161  for (grp = grpstart; grp <= grpend; ++grp) {
3162  uint64_t i;
3163  /*
3164  * grparray is a temporary array to record found groups.
3165  * Scan this array for [grpstart, grpend].
3166  * If not found, record it in this array and
3167  * increase ir_iomap_nr.
3168  */
3169  for (i = 0; i < req->ir_iomap_nr; ++i) {
3170  if (grparray[i] == grp)
3171  break;
3172  }
3173  /* 'grp' is not found. Adding it to @grparray */
3174  if (i == req->ir_iomap_nr) {
3175  M0_ASSERT_INFO(i < grparray_sz,
3176  "[%p] nr=%llu size=%llu",
3177  req, i , grparray_sz);
3178  grparray[i] = grp;
3179  ++req->ir_iomap_nr;
3180  }
3181  }
3182  }
3183  m0_free(grparray);
3184  return M0_RC(0);
3185 }
3186 
3188 {
3189  int rc;
3190  uint64_t map;
3191  struct m0_ivec_varr_cursor cursor;
3192  struct m0_pdclust_layout *play;
3193 
3194  M0_PRE(req != NULL);
3195 
3196  M0_ENTRY("[%p]", req);
3197  play = pdlayout_get(req);
3198 
3200  if (rc != 0)
3201  return M0_RC(rc);
3202 
3203  M0_LOG(M0_DEBUG, "[%p] spanned_groups=%llu [N,K,us]=[%d,%d,%llu]",
3204  req, req->ir_iomap_nr, layout_n(play),
3205  layout_k(play), layout_unit_size(play));
3206 
3207  /* req->ir_iomaps is zeroed out on allocation. */
3209  if (req->ir_iomaps == NULL) {
3210  rc = M0_ERR(-ENOMEM);
3211  goto failed;
3212  }
3213 
3214  m0_ivec_varr_cursor_init(&cursor, &req->ir_ivv);
3215 
3216  /*
3217  * cursor is advanced maximum by parity group size in one iteration
3218  * of this loop.
3219  * This is done by pargrp_iomap::pi_ops::pi_populate().
3220  */
3221  for (map = 0; !m0_ivec_varr_cursor_move(&cursor, 0); ++map) {
3222  M0_ASSERT(map < req->ir_iomap_nr);
3223  M0_ASSERT(req->ir_iomaps[map] == NULL);
3225  if (req->ir_iomaps[map] == NULL) {
3226  rc = M0_ERR(-ENOMEM);
3227  goto failed;
3228  }
3229 
3230  ++iommstats.a_pargrp_iomap_nr;
3233  data_size(play)));
3234  if (rc != 0) {
3235  m0_free0(&req->ir_iomaps[map]);
3236  goto failed;
3237  }
3238 
3239  /* @cursor is advanced in the following function */
3241  ir_iomaps[map], &cursor);
3242  if (rc != 0)
3243  goto failed;
3244  M0_LOG(M0_INFO, "[%p] pargrp_iomap id : %llu populated",
3245  req, req->ir_iomaps[map]->pi_grpid);
3246  }
3247  return M0_RC(0);
3248 failed:
3249  if (req->ir_iomaps != NULL)
3251 
3252  return M0_ERR_INFO(rc, "[%p] iomaps_prepare failed", req);
3253 }
3254 
3256 {
3257  uint64_t i;
3258 
3259  M0_ENTRY("[%p]", req);
3260 
3261  M0_PRE(req != NULL);
3262  M0_PRE(req->ir_iomaps != NULL);
3263 
3264  for (i = 0; i < req->ir_iomap_nr; ++i) {
3265  if (req->ir_iomaps[i] != NULL) {
3267  m0_free(req->ir_iomaps[i]);
3268  ++iommstats.d_pargrp_iomap_nr;
3269  }
3270  }
3271  m0_free0(&req->ir_iomaps);
3272  req->ir_iomap_nr = 0;
3273 }
3274 
3276 {
3277  int rc;
3278  uint64_t cnt;
3279  struct io_request *req;
3280  struct dgmode_rwvec *dg;
3281  struct m0_pdclust_layout *play;
3282 
3283  M0_ENTRY();
3284  M0_PRE(ti != NULL);
3285  M0_PRE(ti->ti_dgvec == NULL);
3286 
3287  req = bob_of(ti->ti_nwxfer, struct io_request, ir_nwxfer,
3288  &ioreq_bobtype);
3289  play = pdlayout_get(req);
3291  (layout_n(play) + layout_k(play)));
3292  M0_LOG(M0_DEBUG, "[%p]", req);
3293 
3294  M0_ALLOC_PTR(dg);
3295  if (dg == NULL) {
3296  rc = M0_ERR(-ENOMEM);
3297  goto failed;
3298  }
3299 
3300  dg->dr_tioreq = ti;
3301 
3303  if (rc != 0)
3304  goto failed_free_dg;
3305 
3307  if (rc != 0)
3308  goto failed_free_iv;
3309 
3310  rc = m0_varr_init(&dg->dr_pageattrs, cnt, sizeof(enum page_attr),
3311  (size_t)m0_pagesize_get());
3312  if (rc != 0)
3313  goto failed_free_bv;
3314 
3315  /*
3316  * This value is incremented every time a new segment is added
3317  * to this index vector.
3318  */
3319  V_SEG_NR(&dg->dr_ivec_varr) = 0;
3320 
3321  ti->ti_dgvec = dg;
3322  return M0_RC(0);
3323 
3324 failed_free_bv:
3326 failed_free_iv:
3328 failed_free_dg:
3329  m0_free(dg);
3330 failed:
3331  return M0_ERR_INFO(rc, "[%p] Dgmode read vector allocation failed",
3332  req);
3333 }
3334 
3336 {
3337  M0_ENTRY();
3338 
3339  M0_PRE(dg != NULL);
3340 
3341  dg->dr_tioreq = NULL;
3344  m0_varr_fini(&dg->dr_pageattrs);
3345 }
3346 
3351 static void databufs_set_dgw_mode(struct pargrp_iomap *iomap,
3352  struct m0_ext *ext)
3353 {
3354  uint32_t row_start;
3355  uint32_t row_end;
3356  uint32_t row;
3357  uint32_t col;
3358  struct data_buf *dbuf;
3359 
3360  page_pos_get(iomap, ext->e_start, &row_start, &col);
3361  page_pos_get(iomap, ext->e_end - 1, &row_end, &col);
3362 
3363  for (row = row_start; row <= row_end; ++row) {
3364  dbuf = iomap->pi_databufs[row][col];
3365  if (dbuf->db_flags & PA_WRITE)
3366  dbuf->db_flags |= PA_DGMODE_WRITE;
3367  }
3368 }
3369 
3373 static void paritybufs_set_dgw_mode(struct pargrp_iomap *iomap,
3374  struct m0_pdclust_layout *play,
3375  uint64_t unit)
3376 {
3377  uint32_t row;
3378  uint32_t col;
3379  uint64_t unit_size = layout_unit_size(play);
3380  struct data_buf *dbuf;
3381 
3382  parity_page_pos_get(iomap, unit * unit_size, &row, &col);
3383  for (; row < rows_nr(play); ++row) {
3384  dbuf = iomap->pi_paritybufs[row][col];
3385  if (dbuf->db_flags & PA_WRITE)
3386  dbuf->db_flags |= PA_DGMODE_WRITE;
3387  }
3388 }
3389 
3390 /*
3391  * Distributes file data into target_ioreq objects as required and populates
3392  * target_ioreq::ti_ivv and target_ioreq::ti_bufvec.
3393  */
3394 static int nw_xfer_io_distribute(struct nw_xfer_request *xfer)
3395 {
3396  int rc;
3397  uint64_t i;
3398  uint64_t unit;
3399  uint64_t unit_size;
3400  uint64_t count;
3401  uint64_t pgstart;
3402  uint64_t pgend;
3403  /* Extent representing a data unit. */
3404  struct m0_ext u_ext;
3405  /* Extent representing resultant extent. */
3406  struct m0_ext r_ext;
3407  /* Extent representing a segment from index vector. */
3408  struct m0_ext v_ext;
3409  struct io_request *req;
3410  struct target_ioreq *ti;
3411  struct m0_ivec_varr_cursor cur;
3412  struct m0_pdclust_layout *play;
3413  enum m0_pdclust_unit_type unit_type;
3414  struct m0_pdclust_src_addr src;
3415  struct m0_pdclust_tgt_addr tgt;
3416  struct pargrp_iomap *iomap;
3417  struct inode *inode;
3418  struct m0t1fs_sb *csb;
3419 
3420  M0_ENTRY("nw_xfer_request %p", xfer);
3422 
3423  req = bob_of(xfer, struct io_request, ir_nwxfer, &ioreq_bobtype);
3424  play = pdlayout_get(req);
3425  unit_size = layout_unit_size(play);
3426 
3427  for (i = 0; i < req->ir_iomap_nr; ++i) {
3428  count = 0;
3429  iomap = req->ir_iomaps[i];
3430  pgstart = data_size(play) * iomap->pi_grpid;
3431  pgend = pgstart + data_size(play);
3432  src.sa_group = iomap->pi_grpid;
3433 
3434  M0_LOG(M0_DEBUG, "[%p] iomap=%p [grpid=%llu state=%u]",
3435  req, iomap, iomap->pi_grpid, iomap->pi_state);
3436 
3437  /* traverse parity group ivec by units */
3438  m0_ivec_varr_cursor_init(&cur, &iomap->pi_ivv);
3439  while (!m0_ivec_varr_cursor_move(&cur, count)) {
3440 
3441  unit = (m0_ivec_varr_cursor_index(&cur) - pgstart) /
3442  unit_size;
3443 
3444  u_ext.e_start = pgstart + unit * unit_size;
3445  u_ext.e_end = u_ext.e_start + unit_size;
3446 
3448  v_ext.e_end = v_ext.e_start +
3450 
3451  m0_ext_intersection(&u_ext, &v_ext, &r_ext);
3452  M0_ASSERT(m0_ext_is_valid(&r_ext));
3453  count = m0_ext_length(&r_ext);
3454 
3455  unit_type = m0_pdclust_unit_classify(play, unit);
3456  M0_ASSERT(unit_type == M0_PUT_DATA);
3457 
3459  databufs_set_dgw_mode(iomap, &r_ext);
3460 
3461  src.sa_unit = unit;
3462  rc = xfer->nxr_ops->nxo_tioreq_map(xfer, &src, &tgt,
3463  &ti);
3464  if (rc != 0) {
3465  M0_LOG(M0_DEBUG, "[%p] iomap=%p "
3466  "nxo_tioreq_map() failed, rc=%d",
3467  req, iomap, rc);
3468  goto err;
3469  }
3470 
3471  M0_LOG(M0_DEBUG, "[%p] adding data. ti state=%d\n",
3472  req, ti->ti_state);
3473  ti->ti_ops->tio_seg_add(ti, &src, &tgt, r_ext.e_start,
3474  m0_ext_length(&r_ext), iomap);
3475  }
3476 
3477  inode = iomap_to_inode(iomap);
3478  csb = M0T1FS_SB(inode->i_sb);
3479 
3480  /* process parity units */
3481  if (req->ir_type == IRT_WRITE ||
3482  (req->ir_type == IRT_READ && csb->csb_verify) ||
3484  iomap->pi_state == PI_DEGRADED)) {
3485 
3486  for (unit = 0; unit < layout_k(play); ++unit) {
3487 
3488  src.sa_unit = layout_n(play) + unit;
3489 
3491  src.sa_unit) == M0_PUT_PARITY);
3492 
3493  rc = xfer->nxr_ops->nxo_tioreq_map(xfer, &src,
3494  &tgt, &ti);
3495  if (rc != 0) {
3496  M0_LOG(M0_DEBUG, "[%p] iomap=%p "
3497  "nxo_tioreq_map() failed, rc=%d",
3498  req, iomap, rc);
3499  goto err;
3500  }
3501 
3503  paritybufs_set_dgw_mode(iomap, play,
3504  unit);
3505 
3506  ti->ti_ops->tio_seg_add(ti, &src, &tgt, pgstart,
3507  layout_unit_size(play),
3508  iomap);
3509  }
3510 
3511  if (!csb->csb_oostore || req->ir_type != IRT_WRITE)
3512  continue;
3513 
3514  /* Cob create for spares. */
3515  for (unit = layout_k(play); unit < 2 * layout_k(play);
3516  ++unit) {
3517  src.sa_unit = layout_n(play) + unit;
3518  rc = xfer->nxr_ops->nxo_tioreq_map(xfer, &src,
3519  &tgt, &ti);
3520  if (rc != 0) {
3521  M0_LOG(M0_ERROR, "[%p] iomap=%p "
3522  "nxo_tioreq_map() failed, rc=%d",
3523  req, iomap, rc);
3524  }
3525  if (target_ioreq_type_get(ti) != TI_NONE)
3526  continue;
3528  }
3529  }
3530  }
3531 
3532  return M0_RC(0);
3533 err:
3534  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
3535  tioreqht_htable_del(&xfer->nxr_tioreqs_hash, ti);
3536  M0_LOG(M0_INFO, "[%p] target_ioreq deleted for "FID_F,
3537  req, FID_P(&ti->ti_fid));
3538  target_ioreq_fini(ti);
3539  m0_free0(&ti);
3540  ++iommstats.d_target_ioreq_nr;
3541  } m0_htable_endfor;
3542 
3543  return M0_ERR_INFO(rc, "[%p] io_prepare failed", req);
3544 }
3545 
3546 static inline int ioreq_sm_timedwait(struct io_request *req,
3547  uint64_t state)
3548 {
3549  int rc;
3550  M0_PRE(req != NULL);
3551 
3552  M0_ENTRY("[%p] Waiting for %s -> %s, Pending fops %llu, "
3553  "Pending rdbulk %llu", req,
3554  io_states[ioreq_sm_state(req)].sd_name,
3555  io_states[state].sd_name,
3558 
3561  M0_TIME_NEVER);
3563 
3564  if (rc != 0)
3565  M0_LOG(M0_DEBUG, "[%p] rc %d", req, rc);
3566  M0_LEAVE("[%p] rc %d", req, rc);
3567  return rc;
3568 }
3569 
3571 {
3572  int rc = 0;
3573  uint64_t i;
3574  struct pargrp_iomap *iomap;
3575 
3576  M0_ENTRY("[%p]", req);
3579 
3580  for (i = 0; i < req->ir_iomap_nr; ++i) {
3581  iomap = req->ir_iomaps[i];
3582  if (iomap->pi_state == PI_DEGRADED) {
3583  rc = iomap->pi_ops->pi_dgmode_recover(iomap);
3584  if (rc != 0)
3585  return M0_ERR_INFO(rc, "[%p] Failed to recover"
3586  " data", req);
3587  }
3588  }
3589 
3590  return M0_RC(rc);
3591 }
3592 
3597 static uint64_t tolerance_of_level(struct io_request *req, uint64_t lv)
3598 {
3599  struct m0_pdclust_instance *play_instance;
3600  struct m0_pool_version *pver;
3601 
3603 
3604  play_instance = pdlayout_instance(layout_instance(req));
3605  pver = play_instance->pi_base.li_l->l_pver;
3606  return pver->pv_fd_tol_vec[lv];
3607 }
3608 
3615 static bool is_session_marked(struct io_request *req,
3616  struct m0_rpc_session *session)
3617 {
3618  uint64_t i;
3619  uint64_t max_failures;
3620  uint64_t session_id;
3621 
3622  session_id = session->s_session_id;
3624  for (i = 0; i < max_failures; ++i) {
3625  if (req->ir_failed_session[i] == session_id)
3626  return true;
3627  else if (req->ir_failed_session[i] == ~(uint64_t)0) {
3628  req->ir_failed_session[i] = session_id;
3629  return false;
3630  }
3631  }
3632  return false;
3633 }
3634 
3641 static int device_check(struct io_request *req)
3642 {
3643  int rc = 0;
3644  uint32_t fdev_nr = 0;
3645  uint32_t fsvc_nr = 0;
3646  struct target_ioreq *ti;
3647  struct m0_pdclust_layout *play;
3648  enum m0_pool_nd_state state;
3649  uint64_t max_failures;
3651 
3653 
3654  M0_ENTRY("[%p]", req);
3655  M0_PRE(req != NULL);
3657  IRS_WRITE_COMPLETE)));
3658  play = pdlayout_get(req);
3659  m0_htable_for (tioreqht, ti, &req->ir_nwxfer.nxr_tioreqs_hash) {
3660  rc = m0_poolmach_device_state(pm, ti->ti_obj, &state);
3661  if (rc != 0)
3662  return M0_ERR_INFO(rc, "[%p] Failed to retrieve target "
3663  "device state", req);
3664  /* The case when a particular service is down. */
3665  if (ti->ti_rc == -ECANCELED) {
3666  if (!is_session_marked(req, ti->ti_session)) {
3667  M0_CNT_INC(fsvc_nr);
3668  }
3669  /* The case when multiple devices under the same service are
3670  * unavailable. */
3671  } else if (M0_IN(state, (M0_PNDS_FAILED, M0_PNDS_OFFLINE,
3673  !is_session_marked(req, ti->ti_session)) {
3674  M0_CNT_INC(fdev_nr);
3675  }
3676  } m0_htable_endfor;
3677  M0_LOG(M0_DEBUG, "failed devices = %d\ttolerance=%d", (int)fdev_nr,
3678  (int)layout_k(play));
3679  if (is_pver_dud(fdev_nr, layout_k(play), fsvc_nr, max_failures))
3680  return M0_ERR_INFO(-EIO, "[%p] Failed to recover data "
3681  "since number of failed data units "
3682  "(%lu) exceeds number of parity "
3683  "units in parity group (%lu) OR "
3684  "number of failed services (%lu) "
3685  "exceeds number of max failures "
3686  "supported (%lu)",
3687  req, (unsigned long)fdev_nr,
3688  (unsigned long)layout_k(play),
3689  (unsigned long)fsvc_nr,
3690  (unsigned long)max_failures);
3691  return M0_RC(fdev_nr);
3692 }
3693 
3694 /* If there are F(l) failures at level l, and K(l) failures are tolerable for
3695  * the level l, then the condition for pool-version to be non-dud is:
3696  * \sum_over_l {F(l) / K(l)} <= 1
3697  * Once MOTR-899 lands into dev, this function will go away.
3698  */
3699 static bool is_pver_dud(uint32_t fdev_nr, uint32_t dev_k, uint32_t fsvc_nr,
3700  uint32_t svc_k)
3701 {
3702  if (fdev_nr > 0 && dev_k == 0)
3703  return true;
3704  if (fsvc_nr > 0 && svc_k == 0)
3705  return true;
3706  return (svc_k + fsvc_nr > 0) ?
3707  (fdev_nr * svc_k + fsvc_nr * dev_k) > dev_k * svc_k :
3708  fdev_nr > dev_k;
3709 }
3710 
3711 static int ioreq_dgmode_write(struct io_request *req, bool rmw)
3712 {
3713  int rc;
3714  struct target_ioreq *ti;
3715  struct m0t1fs_sb *csb;
3716  struct nw_xfer_request *xfer;
3717 
3719 
3720  xfer = &req->ir_nwxfer;
3721  M0_ENTRY("[%p]", req);
3722  csb = file_to_sb(req->ir_file);
3723  /* In oostore mode we do not enter the degraded mode write. */
3724  if (csb->csb_oostore || M0_IN(xfer->nxr_rc, (0, -E2BIG, -ESTALE)))
3725  return M0_RC(xfer->nxr_rc);
3726 
3727  rc = device_check(req);
3728  if (rc < 0 ) {
3729  return M0_RC(rc);
3730  }
3732  /*
3733  * This IO request has already acquired distributed lock on the
3734  * file by this time.
3735  * Degraded mode write needs to handle 2 prime use-cases.
3736  * 1. SNS repair still to start on associated global fid.
3737  * 2. SNS repair has completed for associated global fid.
3738  * Both use-cases imply unavailability of one or more devices.
3739  *
3740  * In first use-case, repair is yet to start on file. Hence,
3741  * rest of the file data which goes on healthy devices can be
3742  * written safely.
3743  * In this case, the fops meant for failed device(s) will be simply
3744  * dropped and rest of the fops will be sent to respective ioservice
3745  * instances for writing data to servers.
3746  * Later when this IO request relinquishes the distributed lock on
3747  * associated global fid and SNS repair starts on the file, the lost
3748  * data will be regenerated using parity recovery algorithms.
3749  *
3750  * The second use-case implies completion of SNS repair for associated
3751  * global fid and the lost data is regenerated on distributed spare
3752  * units.
3753  * Ergo, all the file data meant for lost device(s) will be redirected
3754  * towards corresponding spare unit(s). Later when SNS rebalance phase
3755  * commences, it will migrate the data from spare to a new device, thus
3756  * making spare available for recovery again.
3757  * In this case, old fops will be discarded and all pages spanned by
3758  * IO request will be reshuffled by redirecting pages meant for
3759  * failed device(s) to its corresponding spare unit(s).
3760  */
3761 
3762  /*
3763  * Finalizes current fops which are not valid anymore.
3764  * Fops need to be finalized in either case since old network buffers
3765  * from IO fops are still enqueued in transfer machine and removal
3766  * of these buffers would lead to finalization of rpc bulk object.
3767  */
3768  M0_LOG(M0_ERROR, "[%p] Degraded write:About to nxo_complete()", req);
3769  xfer->nxr_ops->nxo_complete(xfer, rmw);
3770  /*
3771  * Resets count of data bytes and parity bytes along with
3772  * return status.
3773  * Fops meant for failed devices are dropped in
3774  * nw_xfer_req_dispatch().
3775  */
3776  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
3777  ti->ti_databytes = 0;
3778  ti->ti_parbytes = 0;
3779  ti->ti_rc = 0;
3780  ti->ti_req_type = TI_NONE;
3781  } m0_htable_endfor;
3782 
3783  /*
3784  * Redistributes all pages by routing pages for repaired devices
3785  * to spare units for each parity group.
3786  */
3787  rc = xfer->nxr_ops->nxo_distribute(xfer);
3788  if (rc != 0)
3789  return M0_ERR_INFO(rc, "[%p] Failed to redistribute file data "
3790  "between target_ioreq objects", req);
3791 
3792  xfer->nxr_rc = 0;
3793  req->ir_rc = xfer->nxr_rc;
3794 
3795  rc = xfer->nxr_ops->nxo_dispatch(xfer);
3796  if (rc != 0)
3797  return M0_ERR_INFO(rc, "[%p] Failed to dispatch degraded mode"
3798  "write IO fops", req);
3799 
3801  if (rc != 0)
3802  return M0_ERR_INFO(rc, "[%p] Degraded mode write IO failed",
3803  req);
3804  return M0_RC(xfer->nxr_rc);
3805 }
3806 
3807 static int ioreq_dgmode_read(struct io_request *req, bool rmw)
3808 {
3809  int rc = 0;
3810  uint64_t i;
3811  struct io_req_fop *irfop;
3812  struct target_ioreq *ti;
3813  struct m0_poolmach *pm;
3814  struct nw_xfer_request *xfer;
3815  struct pargrp_iomap *iomap;
3816  struct m0t1fs_sb *csb;
3817 
3818 
3820 
3821  csb = M0T1FS_SB(m0t1fs_file_to_inode(req->ir_file)->i_sb);
3822  xfer = &req->ir_nwxfer;
3823  M0_ENTRY("[%p] xfer->nxr_rc=%d", req, xfer->nxr_rc);
3824 
3825  /*
3826  * If all devices are ONLINE, all requests return success.
3827  * In case of read before write, due to CROW, COB will not be present,
3828  * resulting into ENOENT error. When conf cache is drained io should
3829  * not proceed.
3830  */
3831  if (M0_IN(xfer->nxr_rc, (0, -ENOENT, -ESTALE)) ||
3832  /*
3833  * For rmw in oostore case return immediately without
3834  * bothering to check if degraded read can be done.
3835  * Write IO should be aborted in this case.
3836  */
3837  (csb->csb_oostore && req->ir_type == IRT_WRITE))
3838  return M0_RC(xfer->nxr_rc);
3839 
3840  rc = device_check(req);
3841  /*
3842  * Number of failed devices is not a criteria good enough
3843  * by itself. Even if one/more devices failed but IO request
3844  * could complete if IO request did not send any pages to
3845  * failed device(s) at all.
3846  */
3847  if (rc < 0)
3848  return M0_RC(rc);
3849 
3850  M0_LOG(M0_DEBUG, "[%p] Proceeding with the degraded read", req);
3852  M0_ASSERT(pm != NULL);
3853  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
3854  /*
3855  * Data was retrieved successfully from this target.
3856  */
3857  if (ti->ti_rc == 0)
3858  continue;
3859  /*
3860  * Finds out parity groups for which read IO failed and marks
3861  * them as DEGRADED. This is necessary since read IO request
3862  * could be reading only a part of a parity group but if it
3863  * failed, rest of the parity group also needs to be read
3864  * (subject to file size) in order to re-generate lost data.
3865  */
3866  m0_tl_for (iofops, &ti->ti_iofops, irfop) {
3867  rc = io_req_fop_dgmode_read(irfop);
3868  if (rc != 0)
3869  break;
3870  } m0_tl_endfor;
3871  } m0_htable_endfor;
3872 
3873  if (rc != 0)
3874  return M0_ERR_INFO(rc, "[%p] dgmode failed", req);
3875 
3876  M0_LOG(M0_DEBUG, "[%p] dgmap_nr=%u is in dgmode",
3877  req, req->ir_dgmap_nr);
3878  /*
3879  * Starts processing the pages again if any of the parity groups
3880  * spanned by input IO-request is in degraded mode.
3881  */
3882  if (req->ir_dgmap_nr > 0) {
3883  M0_LOG(M0_DEBUG, "[%p] processing the failed parity groups",
3884  req);
3887 
3888  for (i = 0; i < req->ir_iomap_nr; ++i) {
3889  iomap = req->ir_iomaps[i];
3890  rc = iomap->pi_ops->pi_dgmode_postprocess(iomap);
3891  if (rc != 0)
3892  break;
3893  }
3894  } else {
3897  /*
3898  * By this time, the page count in target_ioreq::ti_ivec and
3899  * target_ioreq::ti_bufvec is greater than 1, but it is
3900  * invalid since the distribution is about to change.
3901  * Ergo, page counts in index and buffer vectors are reset.
3902  */
3903 
3904  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
3905  V_SEG_NR(&ti->ti_ivv) = 0;
3906  } m0_htable_endfor;
3907  }
3908 
3909  M0_LOG(M0_DEBUG, "[%p] About to nxo_complete()", req);
3910  xfer->nxr_ops->nxo_complete(xfer, rmw);
3911 
3912  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
3913  ti->ti_databytes = 0;
3914  ti->ti_parbytes = 0;
3915  ti->ti_rc = 0;
3916  } m0_htable_endfor;
3917 
3918  /* Resets the status code before starting degraded mode read IO. */
3919  req->ir_rc = xfer->nxr_rc = 0;
3920 
3921  rc = xfer->nxr_ops->nxo_distribute(xfer);
3922  if (rc != 0)
3923  return M0_ERR_INFO(rc, "[%p] Failed to prepare dgmode IO "
3924  "fops.", req);
3925 
3926  rc = xfer->nxr_ops->nxo_dispatch(xfer);
3927  if (rc != 0)
3928  return M0_ERR_INFO(rc, "[%p] Failed to dispatch degraded mode "
3929  "IO.", req);
3930 
3932  if (rc != 0)
3933  return M0_ERR_INFO(rc, "[%p] Degraded mode read IO failed.",
3934  req);
3935 
3936  if (xfer->nxr_rc != 0)
3937  return M0_ERR_INFO(xfer->nxr_rc,
3938  "[%p] Degraded mode read IO failed.", req);
3939  /*
3940  * Recovers lost data using parity recovery algorithms.
3941  */
3942  if (req->ir_dgmap_nr > 0) {
3944  if (rc != 0)
3945  return M0_ERR_INFO(rc, "[%p] Failed to recover lost "
3946  "data.", req);
3947  }
3948 
3949  return M0_RC(rc);
3950 }
3951 
3952 extern const struct m0_uint128 m0_rm_m0t1fs_group;
3953 
3954 static int ioreq_file_lock(struct io_request *req)
3955 {
3956  int rc;
3957  struct m0t1fs_inode *mi;
3958 
3959  M0_PRE(req != NULL);
3960  M0_ENTRY("[%p]", req);
3961 
3964  m0_file_lock(&mi->ci_fowner, &req->ir_in);
3965  m0_rm_owner_lock(&mi->ci_fowner);
3968  M0_TIME_NEVER);
3969  m0_rm_owner_unlock(&mi->ci_fowner);
3970  rc = rc ?: req->ir_in.rin_rc;
3971 
3972  return M0_RC(rc);
3973 }
3974 
3975 static void ioreq_file_unlock(struct io_request *req)
3976 {
3977  M0_PRE(req != NULL);
3978  M0_ENTRY("[%p]", req);
3980 }
3981 
3982 static int ioreq_no_lock(struct io_request *req)
3983 {
3984  return 0;
3985 }
3986 
3987 static void ioreq_no_unlock(struct io_request *req)
3988 {;}
3989 
3990 static void device_state_reset(struct nw_xfer_request *xfer, bool rmw)
3991 {
3992  struct target_ioreq *ti;
3993 
3994  M0_PRE(xfer != NULL);
3995  M0_PRE(xfer->nxr_state == NXS_COMPLETE);
3996 
3997  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
3998  ti->ti_state = M0_PNDS_ONLINE;
3999  } m0_htable_endfor;
4000 }
4001 
4002 static int ioreq_iosm_handle(struct io_request *req)
4003 {
4004  int rc;
4005  bool rmw;
4006  uint64_t i;
4007  struct inode *inode;
4008  struct target_ioreq *ti;
4009  struct nw_xfer_request *xfer;
4010  struct m0t1fs_sb *csb;
4011 
4013  xfer = &req->ir_nwxfer;
4014  M0_ENTRY("[%p] sb %p", req, file_to_sb(req->ir_file));
4015  csb = M0T1FS_SB(m0t1fs_file_to_inode(req->ir_file)->i_sb);
4016 
4017  for (i = 0; i < req->ir_iomap_nr; ++i) {
4018  if (M0_IN(req->ir_iomaps[i]->pi_rtype,
4020  break;
4021  }
4022 
4023  /*
4024  * Acquires lock before proceeding to do actual IO.
4025  */
4026  rc = req->ir_ops->iro_file_lock(req);
4027  if (rc != 0) {
4028  M0_LOG(M0_ERROR, "[%p] iro_file_lock() failed: rc=%d", req, rc);
4029  goto fail;
4030  }
4031 
4032  /* @todo Do error handling based on m0_sm::sm_rc. */
4033  /*
4034  * Since m0_sm is part of io_request, for any parity group
4035  * which is partial, read-modify-write state transition is followed
4036  * for all parity groups.
4037  */
4038  M0_LOG(M0_DEBUG, "[%p] map=%llu map_nr=%llu",
4039  req, i, req->ir_iomap_nr);
4040  if (i == req->ir_iomap_nr) {
4041  enum io_req_state state;
4042 
4043  rmw = false;
4044  state = req->ir_type == IRT_READ ? IRS_READING :
4045  IRS_WRITING;
4046  if (state == IRS_WRITING) {
4048  CD_COPY_FROM_USER, 0);
4049  if (rc != 0) {
4050  M0_LOG(M0_ERROR, "[%p] iro_user_data_copy() "
4051  "failed: rc=%d", req, rc);
4052  goto fail_locked;
4053  }
4055  if (rc != 0) {
4056  M0_LOG(M0_ERROR, "[%p] iro_parity_recalc() "
4057  "failed: rc=%d", req, rc);
4058  goto fail_locked;
4059  }
4060  }
4061  ioreq_sm_state_set(req, state);
4062  rc = xfer->nxr_ops->nxo_dispatch(xfer);
4063  if (rc != 0) {
4064  M0_LOG(M0_ERROR, "[%p] nxo_dispatch() failed: rc=%d",
4065  req, rc);
4066  goto fail_locked;
4067  }
4068  state = req->ir_type == IRT_READ ? IRS_READ_COMPLETE:
4070  rc = ioreq_sm_timedwait(req, state);
4071  if (rc != 0) {
4072  M0_LOG(M0_ERROR, "[%p] ioreq_sm_timedwait() failed: "
4073  "rc=%d", req, rc);
4074  goto fail_locked;
4075  }
4076  if (req->ir_rc != 0) {
4077  rc = req->ir_rc;
4078  M0_LOG(M0_ERROR, "[%p] ir_rc=%d", req, rc);
4079  goto fail_locked;
4080  }
4081  if (state == IRS_READ_COMPLETE) {
4082 
4083  /*
4084  * Returns immediately if all devices are
4085  * in healthy state.
4086  */
4087  rc = req->ir_ops->iro_dgmode_read(req, rmw);
4088  if (rc != 0) {
4089  M0_LOG(M0_ERROR, "[%p] iro_dgmode_read() "
4090  "failed: rc=%d", req, rc);
4091  goto fail_locked;
4092  }
4094  if (rc != 0) {
4095  M0_LOG(M0_ERROR, "[%p] parity verification "
4096  "failed: rc=%d", req, rc);
4097  goto fail_locked;
4098  }
4100  CD_COPY_TO_USER, 0);
4101  if (rc != 0) {
4102  M0_LOG(M0_ERROR, "[%p] iro_user_data_copy() "
4103  "failed: rc=%d", req, rc);
4104  goto fail_locked;
4105  }
4106  } else {
4107  M0_ASSERT(state == IRS_WRITE_COMPLETE);
4108  /*
4109  * Returns immediately if all devices are
4110  * in healthy state.
4111  */
4112  rc = req->ir_ops->iro_dgmode_write(req, rmw);
4113  if (rc != 0) {
4114  M0_LOG(M0_ERROR, "[%p] iro_dgmode_write() "
4115  "failed: rc=%d", req, rc);
4116  goto fail_locked;
4117  }
4118  }
4119  } else {
4120  uint32_t seg;
4121  m0_bcount_t read_pages = 0;
4122 
4123  rmw = true;
4124  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
4125  for (seg = 0; seg < V_SEG_NR(&ti->ti_bufvec); ++seg)
4126  if (PA(&ti->ti_pageattrs, seg) & PA_READ)
4127  ++read_pages;
4128  } m0_htable_endfor;
4129 
4130  /* Read IO is issued only if byte count > 0. */
4131  if (read_pages > 0) {
4133  rc = xfer->nxr_ops->nxo_dispatch(xfer);
4134  if (rc != 0) {
4135  M0_LOG(M0_ERROR, "[%p] nxo_dispatch() failed: "
4136  "rc=%d", req, rc);
4137  goto fail_locked;
4138  }
4139  }
4140 
4141  /* Waits for read completion if read IO was issued. */
4142  if (read_pages > 0) {
4144  if (rc != 0) {
4145  M0_LOG(M0_ERROR, "[%p] ioreq_sm_timedwait() "
4146  "failed: rc=%d", req, rc);
4147  goto fail_locked;
4148  }
4149 
4150  /*
4151  * Returns immediately if all devices are
4152  * in healthy state.
4153  */
4154  rc = req->ir_ops->iro_dgmode_read(req, rmw);
4155  if (rc != 0) {
4156  M0_LOG(M0_ERROR, "[%p] iro_dgmode_read() "
4157  "failed: rc=%d", req, rc);
4158  goto fail_locked;
4159  }
4160  }
4161 
4162  /*
4163  * If fops dispatch fails, we need to wait till all io fop
4164  * callbacks are acked since some IO fops might have been
4165  * dispatched.
4166  *
4167  * Only fully modified pages from parity groups which have
4168  * chosen read-rest approach or aligned parity groups,
4169  * are copied since read-old approach needs reading of
4170  * all spanned pages,
4171  * (no matter fully modified or partially modified)
4172  * in order to calculate parity correctly.
4173  */
4176  if (rc != 0) {
4177  M0_LOG(M0_ERROR, "[%p] iro_user_data_copy() failed: "
4178  "rc=%d", req, rc);
4179  goto fail_locked;
4180  }
4181 
4182  /* Copies
4183  * - fully modified pages from parity groups which have
4184  * chosen read_old approach and
4185  * - partially modified pages from all parity groups.
4186  */
4188  if (rc != 0) {
4189  M0_LOG(M0_ERROR, "[%p] iro_user_data_copy() failed: "
4190  "rc=%d", req, rc);
4191  goto fail_locked;
4192  }
4193 
4194  /* Finalizes the old read fops. */
4195  if (read_pages > 0) {
4196  M0_LOG(M0_DEBUG, "[%p] About to nxo_complete()", req);
4197  xfer->nxr_ops->nxo_complete(xfer, rmw);
4198  if (req->ir_rc != 0) {
4199  M0_LOG(M0_ERROR, "[%p] nxo_complete() failed: "
4200  "rc=%d", req, rc);
4201  rc = req->ir_rc;
4202  goto fail_locked;
4203  }
4204  device_state_reset(xfer, rmw);
4205  }
4208  if (rc != 0) {
4209  M0_LOG(M0_ERROR, "[%p] iro_parity_recalc() failed: "
4210  "rc=%d", req, rc);
4211  goto fail_locked;
4212  }
4213  rc = xfer->nxr_ops->nxo_dispatch(xfer);
4214  if (rc != 0) {
4215  M0_LOG(M0_ERROR, "[%p] nxo_dispatch() failed: rc=%d",
4216  req, rc);
4217  goto fail_locked;
4218  }
4219 
4221  if (rc != 0) {
4222  M0_LOG(M0_ERROR, "[%p] ioreq_sm_timedwait() failed: "
4223  "rc=%d", req,
4224  rc);
4225  goto fail_locked;
4226  }
4227 
4228  /* Returns immediately if all devices are in healthy state. */
4229  rc = req->ir_ops->iro_dgmode_write(req, rmw);
4230  if (rc != 0) {
4231  M0_LOG(M0_ERROR, "[%p] iro_dgmode_write() failed: "
4232  "rc=%d", req, rc);
4233  goto fail_locked;
4234  }
4235  }
4236 
4237  /*
4238  * Updates file size on successful write IO.
4239  * New file size is maximum value between old file size and
4240  * valid file position written in current write IO call.
4241  */
4244  uint64_t newsize = max64u(inode->i_size,
4246  V_SEG_NR(&req->ir_ivv) - 1));
4247 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
4248  rc = m0t1fs_size_update(req->ir_file->f_path.dentry, newsize);
4249 #else
4250  rc = m0t1fs_size_update(req->ir_file->f_dentry, newsize);
4251 #endif
4252  m0_mutex_lock(&csb->csb_confc_state.cus_lock);
4253  if (rc != 0 && csb->csb_confc_state.cus_state != M0_CC_READY) {
4254  m0_mutex_unlock(&csb->csb_confc_state.cus_lock);
4255  rc = M0_ERR(-ESTALE);
4256  goto fail_locked;
4257  }
4258  m0_mutex_unlock(&csb->csb_confc_state.cus_lock);
4259  M0_LOG(M0_INFO, "[%p] File size set to %llu", req,
4260  inode->i_size);
4261  }
4262 
4264 
4265  M0_LOG(M0_DEBUG, "[%p] About to nxo_complete()", req);
4266  xfer->nxr_ops->nxo_complete(xfer, rmw);
4267 
4268  if (rmw)
4270 
4271  return M0_RC(0);
4272 
4273 fail_locked:
4275 fail:
4277  M0_LOG(M0_DEBUG, "[%p] About to nxo_complete()", req);
4278  xfer->nxr_ops->nxo_complete(xfer, false);
4280  return M0_ERR_INFO(rc, "[%p] ioreq_iosm_handle failed", req);
4281 }
4282 
4283 static int io_request_init(struct io_request *req,
4284  struct file *file,
4285  const struct iovec *iov,
4286  struct m0_indexvec_varr *ivv,
4287  enum io_req_type rw)
4288 {
4289  struct m0t1fs_inode *ci;
4290  struct m0t1fs_sb *csb;
4291  struct m0_pool_version *pver;
4292  struct m0_layout_instance *li;
4293  int rc;
4294  uint32_t seg;
4295  uint32_t i;
4296  uint32_t max_failures;
4297 
4298  M0_ENTRY("[%p] rw %d", req, rw);
4299 
4300  M0_PRE(req != NULL);
4301  M0_PRE(file != NULL);
4302  M0_PRE(iov != NULL);
4303  M0_PRE(ivv != NULL);
4304  M0_PRE(M0_IN(rw, (IRT_READ, IRT_WRITE)));
4305  M0_PRE(M0_IS0(req));
4306 
4307  csb = file_to_sb(file);
4309  if (rc != 0)
4310  return M0_ERR(rc);
4311  req->ir_rc = 0;
4312  req->ir_file = file;
4313  req->ir_type = rw;
4314  req->ir_iovec = iov;
4315  req->ir_iomap_nr = 0;
4316  req->ir_copied_nr = 0;
4317  req->ir_direct_io = !!(file->f_flags & O_DIRECT);
4319  req->ir_ops = csb->csb_oostore ? &ioreq_oostore_ops : &ioreq_ops;
4320 
4321  /*
4322  * rconfc might have refreshed pool versions, and pool version for
4323  * this file might have got evicted forever. Check if we still have
4324  * the ground underneath.
4325  */
4327  pver = m0_pool_version_find(&csb->csb_pools_common, &ci->ci_pver);
4328  if (pver == NULL) {
4329  rc = M0_ERR_INFO(-ENOENT, "Cannot find pool version "FID_F,
4330  FID_P(&ci->ci_pver));
4331  goto err;
4332  }
4333  li = ci->ci_layout_instance;
4334  /*
4335  * File resides on a virtual pool version that got refreshed during
4336  * rconfc update leading to evicting the layout.
4337  */
4338  if (li == NULL) {
4340  if (rc != 0)
4341  goto err;
4342  }
4343  io_request_bob_init(req);
4345  if (req->ir_nwxfer.nxr_rc != 0) {
4347  "[%p] nw_xfer_req_init() failed", req);
4348  goto err;
4349  }
4351  M0_ALLOC_ARR(req->ir_failed_session, max_failures + 1);
4352  if (req->ir_failed_session == NULL) {
4353  rc = M0_ERR_INFO(-ENOMEM, "[%p] Allocation of an array of "
4354  "failed sessions.", req);
4355  goto err;
4356  }
4357  for (i = 0; i < max_failures; ++i) {
4358  req->ir_failed_session[i] = ~(uint64_t)0;
4359  }
4360 
4363 
4365 
4366  if (rc != 0) {
4368  M0_LOG(M0_ERROR, "[%p] Allocation of m0_indexvec_varr", req);
4369  goto err;
4370  }
4371 
4372  for (seg = 0; seg < V_SEG_NR(ivv); ++seg) {
4373  V_INDEX(&req->ir_ivv, seg) = V_INDEX(ivv, seg);
4374  V_COUNT(&req->ir_ivv, seg) = V_COUNT(ivv, seg);
4375  }
4376 
4377  /* Sorts the index vector in increasing order of file offset. */
4381 
4382  return M0_RC(0);
4383 err:
4385  return M0_ERR(rc);
4386 }
4387 
4388 static void io_request_fini(struct io_request *req)
4389 {
4390  struct target_ioreq *ti;
4391  struct m0_sm_group *grp;
4392  struct m0t1fs_sb *csb;
4393 
4395 
4396  M0_ENTRY("[%p]", req);
4397 
4398  csb = file_to_sb(req->ir_file);
4399  grp = req->ir_sm.sm_grp;
4400 
4402 
4403  m0_sm_fini(&req->ir_sm);
4404  io_request_bob_fini(req);
4405  req->ir_file = NULL;
4406  req->ir_iovec = NULL;
4407  req->ir_iomaps = NULL;
4408  req->ir_ops = NULL;
4410 
4411  m0_htable_for(tioreqht, ti, &req->ir_nwxfer.nxr_tioreqs_hash) {
4412  tioreqht_htable_del(&req->ir_nwxfer.nxr_tioreqs_hash, ti);
4413  M0_LOG(M0_DEBUG, "[%p] target_ioreq %p deleted for "FID_F,
4414  req, ti, FID_P(&ti->ti_fid));
4415  /*
4416  * All io_req_fop structures in list target_ioreq::ti_iofops
4417  * are already finalized in nw_xfer_req_complete().
4418  */
4419  target_ioreq_fini(ti);
4420  m0_free(ti);
4421  ++iommstats.d_target_ioreq_nr;
4422  } m0_htable_endfor;
4423 
4425 
4429  M0_LEAVE();
4430 }
4431 
4492  enum m0_pool_nd_state dev_state)
4493 {
4494  return (M0_IN(ioreq_sm_state(req),
4496  dev_state == M0_PNDS_SNS_REPAIRED)
4497  ||
4499  (dev_state == M0_PNDS_SNS_REPAIRED ||
4500  (dev_state == M0_PNDS_SNS_REPAIRING &&
4502 }
4503 
4504 static int nw_xfer_tioreq_map(struct nw_xfer_request *xfer,
4505  const struct m0_pdclust_src_addr *src,
4506  struct m0_pdclust_tgt_addr *tgt,
4507  struct target_ioreq **tio)
4508 {
4509  struct m0_fid tfid;
4510  const struct m0_fid *gfid;
4511  struct io_request *req;
4512  struct m0_rpc_session *session;
4513  struct m0_pdclust_layout *play;
4514  struct m0_pdclust_instance *play_instance;
4515  enum m0_pool_nd_state dev_state;
4516  int rc;
4517  struct m0_poolmach *pm;
4518 
4519  M0_ENTRY("nw_xfer_request %p", xfer);
4521  M0_PRE(src != NULL);
4522  M0_PRE(tgt != NULL);
4523 
4524  req = bob_of(xfer, struct io_request, ir_nwxfer, &ioreq_bobtype);
4525  play = pdlayout_get(req);
4526  play_instance = pdlayout_instance(layout_instance(req));
4527 
4528  m0_fd_fwd_map(play_instance, src, tgt);
4529  tfid = target_fid(req, tgt);
4530 
4531  M0_LOG(M0_DEBUG, "[%p] src_id[%llu:%llu] -> dest_id[%llu:%llu] "
4532  "@ tfid "FID_F, req, src->sa_group, src->sa_unit,
4533  tgt->ta_frame, tgt->ta_obj, FID_P(&tfid));
4534 
4536  M0_ASSERT(pm != NULL);
4537 
4538  rc = m0_poolmach_device_state(pm, tgt->ta_obj, &dev_state);
4539  if (rc != 0)
4540  return M0_RC(rc);
4541 
4543  tfid.f_container, tfid.f_key,
4545  dev_state, tgt->ta_frame, tgt->ta_obj,
4546  src->sa_group, src->sa_unit);
4547 
4548  if (M0_FI_ENABLED("poolmach_client_repaired_device1") &&
4549  tfid.f_container == 1)
4550  dev_state = M0_PNDS_SNS_REPAIRED;
4551 
4552  M0_LOG(M0_INFO, "[%p] tfid="FID_F" dev_state=%d\n",
4553  req, FID_P(&tfid), dev_state);
4554 
4555  if (should_spare_be_mapped(req, dev_state)) {
4556  struct m0_pdclust_src_addr spare = *src;
4557  uint32_t spare_slot;
4558  uint32_t spare_slot_prev;
4559  enum m0_pool_nd_state dev_state_prev;
4560 
4562  rc = m0_sns_repair_spare_map(pm, gfid, play, play_instance,
4563  src->sa_group, src->sa_unit,
4564  &spare_slot, &spare_slot_prev);
4565  if (M0_FI_ENABLED("poolmach_client_repaired_device1") &&
4566  tfid.f_container == 1) {
4567  rc = 0;
4568  spare_slot = layout_n(play) + layout_k(play);
4569  }
4570  if (rc != 0)
4571  return M0_RC(rc);
4572 
4573  /* Check if there is an effective-failure. */
4574  if (spare_slot_prev != src->sa_unit) {
4575  spare.sa_unit = spare_slot_prev;
4576  m0_fd_fwd_map(play_instance, &spare, tgt);
4577  tfid = target_fid(req, tgt);
4579  &dev_state_prev);
4580  if (rc != 0)
4581  return M0_RC(rc);
4582  } else
4583  dev_state_prev = M0_PNDS_SNS_REPAIRED;
4584 
4585  if (dev_state_prev == M0_PNDS_SNS_REPAIRED) {
4586  spare.sa_unit = spare_slot;
4587  m0_fd_fwd_map(play_instance, &spare, tgt);
4588  tfid = target_fid(req, tgt);
4589  }
4590  dev_state = dev_state_prev;
4591  M0_LOG(M0_DEBUG, "[%p] REPAIRED: [%llu:%llu] -> [%llu:%llu] "
4592  "@ tfid " FID_F, req, spare.sa_group, spare.sa_unit,
4593  tgt->ta_frame, tgt->ta_obj, FID_P(&tfid));
4595  tfid.f_container, tfid.f_key,
4596  m0_pdclust_unit_classify(play, spare.sa_unit),
4597  dev_state,
4598  tgt->ta_frame, tgt->ta_obj,
4599  spare.sa_group, spare.sa_unit);
4600  }
4601 
4602  session = target_session(req, tfid);
4603 
4604  rc = nw_xfer_tioreq_get(xfer, &tfid, tgt->ta_obj, session,
4605  layout_unit_size(play) * req->ir_iomap_nr, tio);
4606 
4609  dev_state != M0_PNDS_SNS_REPAIRED)
4610  (*tio)->ti_state = dev_state;
4611 
4612  return M0_RC(rc);
4613 }
4614 
4615 static int target_ioreq_init(struct target_ioreq *ti,
4616  struct nw_xfer_request *xfer,
4617  const struct m0_fid *cobfid,
4618  uint64_t ta_obj,
4619  struct m0_rpc_session *session,
4620  uint64_t size)
4621 {
4622  int rc;
4623  struct io_request *req;
4624  uint64_t cnt;
4625 
4626  M0_PRE(ti != NULL);
4627  M0_PRE(xfer != NULL);
4628  M0_PRE(cobfid != NULL);
4629  M0_PRE(session != NULL);
4630  M0_PRE(size > 0);
4631 
4632  M0_ENTRY("target_ioreq %p, nw_xfer_request %p, "FID_F,
4633  ti, xfer, FID_P(cobfid));
4634 
4635  ti->ti_rc = 0;
4636  ti->ti_ops = &tioreq_ops;
4637  ti->ti_fid = *cobfid;
4638  ti->ti_nwxfer = xfer;
4639  ti->ti_dgvec = NULL;
4640  ti->ti_req_type = TI_NONE;
4641  M0_SET0(&ti->ti_cc_fop);
4642  ti->ti_cc_fop_inited = false;
4643  /*
4644  * Target object is usually in ONLINE state unless explicitly
4645  * told otherwise.
4646  */
4647  ti->ti_state = M0_PNDS_ONLINE;
4648  ti->ti_session = session;
4649  ti->ti_parbytes = 0;
4650  ti->ti_databytes = 0;
4651 
4652  req = bob_of(xfer, struct io_request, ir_nwxfer, &ioreq_bobtype);
4653  ti->ti_obj = ta_obj;
4654 
4655  M0_LOG(M0_DEBUG, "[%p] ti %p", req, ti);
4656  iofops_tlist_init(&ti->ti_iofops);
4657  tioreqht_tlink_init(ti);
4658  target_ioreq_bob_init(ti);
4659  cnt = page_nr(size);
4660 
4662  if (rc != 0)
4663  goto fail;
4664 
4666  if (rc != 0)
4667  goto fail_free_iv;
4668 
4669  rc = m0_varr_init(&ti->ti_pageattrs, cnt, sizeof(enum page_attr),
4670  (size_t)m0_pagesize_get());
4671  if (rc != 0)
4672  goto fail_free_bv;
4673 
4674  /*
4675  * This value is incremented when new segments are added to the
4676  * index vector in target_ioreq_seg_add().
4677  */
4678  V_SEG_NR(&ti->ti_ivv) = 0;
4679 
4681  return M0_RC(0);
4682 
4683 fail_free_bv:
4685 fail_free_iv:
4687 fail:
4688  return M0_ERR_INFO(-ENOMEM, "[%p] Failed to allocate memory in "
4689  "target_ioreq_init", req);
4690 }
4691 
4692 static void target_ioreq_fini(struct target_ioreq *ti)
4693 {
4694  M0_ENTRY("target_ioreq %p, ti->ti_nwxfer %p", ti, ti->ti_nwxfer);
4696 
4697  target_ioreq_bob_fini(ti);
4698  tioreqht_tlink_fini(ti);
4699  iofops_tlist_fini(&ti->ti_iofops);
4700  ti->ti_ops = NULL;
4701  ti->ti_session = NULL;
4702  ti->ti_nwxfer = NULL;
4703 
4706  m0_varr_fini(&ti->ti_pageattrs);
4707  if (ti->ti_dgvec != NULL)
4709 
4710  if (ti->ti_cc_fop_inited) {
4711  struct m0_rpc_item *item = &ti->ti_cc_fop.crf_fop.f_item;
4712  M0_LOG(M0_DEBUG, "item=%p %s osr_xid=%"PRIu64,
4715  ti->ti_cc_fop_inited = false;
4717  }
4718 
4719  M0_LEAVE();
4720 }
4721 
4723  const struct m0_fid *fid)
4724 {
4725  struct target_ioreq *ti;
4726 
4727  M0_ENTRY("nw_xfer_request %p, fid %p", xfer, fid);
4729  M0_PRE(fid != NULL);
4730 
4731  ti = tioreqht_htable_lookup(&xfer->nxr_tioreqs_hash, &fid->f_container);
4732  M0_ASSERT(ergo(ti != NULL, m0_fid_cmp(fid, &ti->ti_fid) == 0));
4733 
4734  M0_LEAVE();
4735  return ti;
4736 }
4737 
4738 static int nw_xfer_tioreq_get(struct nw_xfer_request *xfer,
4739  const struct m0_fid *fid,
4740  uint64_t ta_obj,
4741  struct m0_rpc_session *session,
4742  uint64_t size,
4743  struct target_ioreq **out)
4744 {
4745  int rc = 0;
4746  struct target_ioreq *ti;
4747  struct io_request *req;
4748 
4750  M0_PRE(fid != NULL);
4751  M0_PRE(session != NULL);
4752  M0_PRE(out != NULL);
4753 
4754  req = bob_of(xfer, struct io_request, ir_nwxfer, &ioreq_bobtype);
4755  M0_ENTRY("[%p] "FID_F" ta_obj=%llu size=%llu",
4756  req, FID_P(fid), ta_obj, size);
4757 
4758  ti = target_ioreq_locate(xfer, fid);
4759  if (ti == NULL) {
4760  M0_ALLOC_PTR(ti);
4761  if (ti == NULL)
4762  return M0_ERR_INFO(-ENOMEM, "[%p] Failed to allocate "
4763  "memory for target_ioreq", req);
4764 
4765  rc = target_ioreq_init(ti, xfer, fid, ta_obj, session, size);
4766  if (rc == 0) {
4767  tioreqht_htable_add(&xfer->nxr_tioreqs_hash, ti);
4768  M0_LOG(M0_INFO, "[%p] New target_ioreq %p added for "
4769  FID_F, req, ti, FID_P(fid));
4770  } else {
4771  m0_free(ti);
4772  return M0_ERR_INFO(rc, "[%p] target_ioreq_init() "
4773  "failed", req);
4774  }
4775  ++iommstats.a_target_ioreq_nr;
4776  }
4777  if (ti->ti_dgvec == NULL && M0_IN(ioreq_sm_state(req),
4780 
4781  *out = ti;
4782  return M0_RC(rc);
4783 }
4784 
4785 static struct data_buf *data_buf_alloc_init(enum page_attr pattr)
4786 {
4787  struct data_buf *buf;
4788  unsigned long addr;
4789 
4790  M0_ENTRY();
4791  addr = get_zeroed_page(GFP_KERNEL);
4792  if (addr == 0) {
4793  M0_LOG(M0_ERROR, "Failed to get free page");
4794  return NULL;
4795  }
4796 
4797  ++iommstats.a_page_nr;
4798  M0_ALLOC_PTR(buf);
4799  if (buf == NULL) {
4800  free_page(addr);
4801  M0_LOG(M0_ERROR, "Failed to allocate data_buf");
4802  return NULL;
4803  }
4804 
4805  ++iommstats.a_data_buf_nr;
4806  data_buf_init(buf, (void *)addr, pattr);
4808  M0_LEAVE();
4809  return buf;
4810 }
4811 
4812 static void buf_page_free(struct m0_buf *buf)
4813 {
4814  M0_PRE(buf != NULL);
4815 
4816  free_page((unsigned long)buf->b_addr);
4817  ++iommstats.d_page_nr;
4818  buf->b_addr = NULL;
4819  buf->b_nob = 0;
4820 }
4821 
4822 static void data_buf_dealloc_fini(struct data_buf *buf)
4823 {
4824  M0_ENTRY("data_buf %p", buf);
4826 
4827  if (buf->db_page != NULL)
4828  user_page_unmap(buf, (buf->db_flags & PA_WRITE) ? false : true);
4829  else if (buf->db_buf.b_addr != NULL)
4830  buf_page_free(&buf->db_buf);
4831 
4832  if (buf->db_auxbuf.b_addr != NULL)
4833  buf_page_free(&buf->db_auxbuf);
4834 
4835  data_buf_fini(buf);
4836  m0_free(buf);
4837  ++iommstats.d_data_buf_nr;
4838  M0_LEAVE();
4839 }
4840 
4841 static void target_ioreq_seg_add(struct target_ioreq *ti,
4842  const struct m0_pdclust_src_addr *src,
4843  const struct m0_pdclust_tgt_addr *tgt,
4844  m0_bindex_t gob_offset,
4846  struct pargrp_iomap *map)
4847 {
4848  uint32_t seg;
4849  m0_bindex_t toff;
4850  m0_bindex_t goff;
4851  m0_bindex_t pgstart;
4852  m0_bindex_t pgend;
4853  struct data_buf *buf;
4854  struct io_request *req;
4855  struct m0_pdclust_layout *play;
4856  uint64_t frame = tgt->ta_frame;
4857  uint64_t unit = src->sa_unit;
4858  struct m0_indexvec_varr *ivv;
4859  struct m0_indexvec_varr *bvec;
4860  enum m0_pdclust_unit_type unit_type;
4861  struct m0_varr *pattr;
4862  uint64_t cnt;
4863 
4864  M0_ENTRY("tio req %p, gob_offset %llu, count %llu frame %llu unit %llu",
4865  ti, gob_offset, count, frame, unit);
4867  M0_PRE(map != NULL);
4868 
4869  req = bob_of(ti->ti_nwxfer, struct io_request, ir_nwxfer,
4870  &ioreq_bobtype);
4871  play = pdlayout_get(req);
4872 
4873  unit_type = m0_pdclust_unit_classify(play, unit);
4874  M0_ASSERT(M0_IN(unit_type, (M0_PUT_DATA, M0_PUT_PARITY)));
4875 
4876  toff = target_offset(frame, play, gob_offset);
4877  pgstart = toff;
4878  goff = unit_type == M0_PUT_DATA ? gob_offset : 0;
4879 
4880  M0_LOG(M0_DEBUG, "[%p] %llu: "
4881  "[gpos %6llu, +%llu][%llu,%llu]->[%llu,%llu] %c",
4882  req, map->pi_grpid,
4883  gob_offset, count, src->sa_group, src->sa_unit,
4884  tgt->ta_frame, tgt->ta_obj,
4885  unit_type == M0_PUT_DATA ? 'D' : 'P');
4886 
4887  /* Use ti_dgvec as long as it is dgmode-read/write. */
4890  M0_ASSERT(ti->ti_dgvec != NULL);
4891  ivv = &ti->ti_dgvec->dr_ivec_varr;
4892  bvec = &ti->ti_dgvec->dr_bufvec;
4893  pattr = &ti->ti_dgvec->dr_pageattrs;
4895  (layout_n(play) + layout_k(play)));
4896  M0_LOG(M0_DEBUG, "[%p] map_nr=%llu req state=%u cnt=%llu",
4898  } else {
4899  ivv = &ti->ti_ivv;
4900  bvec = &ti->ti_bufvec;
4901  pattr = &ti->ti_pageattrs;
4903  layout_n(play));
4904  M0_LOG(M0_DEBUG, "[%p] map_nr=%llu req state=%u cnt=%llu",
4906  }
4907 
4908  while (pgstart < toff + count) {
4909  pgend = min64u(pgstart + PAGE_SIZE, toff + count);
4910  seg = V_SEG_NR(ivv);
4911 
4912  V_INDEX(ivv, seg) = pgstart;
4913  V_COUNT(ivv, seg) = pgend - pgstart;
4914 
4915  if (unit_type == M0_PUT_DATA) {
4916  uint32_t row;
4917  uint32_t col;
4918 
4919  page_pos_get(map, goff, &row, &col);
4920  buf = map->pi_databufs[row][col];
4921 
4922  PA(pattr,seg) |= PA_DATA;
4923  M0_LOG(M0_DEBUG, "[%p] ti %p, Data seg %u added",
4924  req, ti, seg);
4925  } else {
4926  buf = map->pi_paritybufs[page_id(goff)]
4927  [unit % layout_n(play)];
4928  PA(pattr,seg) |= PA_PARITY;
4929  M0_LOG(M0_DEBUG, "[%p] ti %p, Parity seg %u added",
4930  req, ti, seg);
4931  }
4932  buf->db_tioreq = ti;
4933  V_ADDR (bvec, seg) = buf->db_buf.b_addr;
4934  V_COUNT(bvec, seg) = V_COUNT(ivv, seg);
4935  PA(pattr, seg) |= buf->db_flags;
4936  M0_LOG(M0_DEBUG, "[%p] ti %p, Seg id %d pageaddr=%p "
4937  "[%llu, %llu] added to target_ioreq with "FID_F
4938  " with flags 0x%x", req, ti, seg, V_ADDR(bvec, seg),
4939  V_INDEX(ivv, seg),
4940  V_COUNT(ivv, seg),
4941  FID_P(&ti->ti_fid),
4942  PA(pattr, seg));
4943 
4944  goff += V_COUNT(ivv, seg);
4945  pgstart = pgend;
4946  ++ V_SEG_NR(ivv);
4947  M0_ASSERT_INFO(V_SEG_NR(ivv) <= cnt,
4948  "[%p] ti %p, v_nr=%u, page_nr=%llu",
4949  req, ti, V_SEG_NR(ivv), cnt);
4950  }
4952  M0_LEAVE();
4953 }
4954 
4955 static int io_req_fop_init(struct io_req_fop *fop,
4956  struct target_ioreq *ti,
4957  enum page_attr pattr)
4958 {
4959  int rc;
4960  struct io_request *req;
4961 
4962  M0_ENTRY("io_req_fop %p, target_ioreq %p", fop, ti);
4963  M0_PRE(fop != NULL);
4964  M0_PRE(ti != NULL);
4965  M0_PRE(M0_IN(pattr, (PA_DATA, PA_PARITY)));
4966 
4967  io_req_fop_bob_init(fop);
4968  iofops_tlink_init(fop);
4969  fop->irf_pattr = pattr;
4970  fop->irf_tioreq = ti;
4971  fop->irf_reply_rc = 0;
4972  fop->irf_ast.sa_cb = io_bottom_half;
4973 
4974  req = bob_of(ti->ti_nwxfer, struct io_request, ir_nwxfer,
4975  &ioreq_bobtype);
4976  M0_ASSERT(M0_IN(ioreq_sm_state(req),
4979 
4980  fop->irf_ast.sa_mach = &req->ir_sm;
4981 
4982  rc = m0_io_fop_init(&fop->irf_iofop, file_to_fid(req->ir_file),
4983  M0_IN(ioreq_sm_state(req),
4987  /*
4988  * Changes ri_ops of rpc item so as to execute m0t1fs's own
4989  * callback on receiving a reply.
4990  */
4991  fop->irf_iofop.if_fop.f_item.ri_ops = &io_item_ops;
4992 
4993  M0_LOG(M0_DEBUG, "[%p] fop %p, m0_ref %p, "FID_F", %p[%u], "
4994  "rbulk %p", req, &fop->irf_iofop.if_fop,
4995  &fop->irf_iofop.if_fop.f_ref,
4996  FID_P(&fop->irf_tioreq->ti_fid), &fop->irf_iofop.if_fop.f_item,
4997  fop->irf_iofop.if_fop.f_item.ri_type->rit_opcode,
4998  &fop->irf_iofop.if_rbulk);
5000  return M0_RC(rc);
5001 }
5002 
5003 static void io_req_fop_fini(struct io_req_fop *fop)
5004 {
5005  M0_ENTRY("io_req_fop %p", fop);
5007 
5008  /*
5009  * IO fop is finalized (m0_io_fop_fini()) through rpc sessions code
5010  * using m0_rpc_item::m0_rpc_item_ops::rio_free().
5011  * see m0_io_item_free().
5012  */
5013 
5014  iofops_tlink_fini(fop);
5015 
5016  /*
5017  * io_req_bob_fini() is not done here so that struct io_req_fop
5018  * can be retrieved from struct m0_rpc_item using bob_of() and
5019  * magic numbers can be checked.
5020  */
5021 
5022  fop->irf_tioreq = NULL;
5023  fop->irf_ast.sa_cb = NULL;
5024  fop->irf_ast.sa_mach = NULL;
5025  M0_LEAVE();
5026 }
5027 
5028 static void irfop_fini(struct io_req_fop *irfop)
5029 {
5030  M0_PRE(irfop != NULL);
5031 
5032  M0_ENTRY("io_req_fop %p, rbulk %p, fop %p, %p[%u]", irfop,
5033  &irfop->irf_iofop.if_rbulk, &irfop->irf_iofop.if_fop,
5034  &irfop->irf_iofop.if_fop.f_item,
5037  io_req_fop_fini(irfop);
5038  m0_free(irfop);
5039  M0_LEAVE();
5040 }
5041 
5042 static void ioreq_failed_fini(struct io_request *req, int rc)
5043 {
5048 }
5049 
5050 /*
5051  * This function can be used by the ioctl which supports fully vectored
5052  * scatter-gather IO. The caller is supposed to provide an index vector
5053  * aligned with user buffers in struct iovec array.
5054  * This function is also used by file->f_op->aio_{read/write} path.
5055  */
5056 M0_INTERNAL ssize_t m0t1fs_aio(struct kiocb *kcb,
5057  const struct iovec *iov,
5058  struct m0_indexvec_varr *ivv,
5059  enum io_req_type rw)
5060 {
5061  int rc;
5062  ssize_t count;
5063  struct io_request *req;
5064  struct m0t1fs_sb *csb;
5065 
5067  M0_ENTRY("indexvec %p, rw %d", ivv, rw);
5068  M0_PRE(kcb != NULL);
5069  M0_PRE(iov != NULL);
5070  M0_PRE(ivv != NULL);
5071  M0_PRE(M0_IN(rw, (IRT_READ, IRT_WRITE)));
5072 
5073  csb = file_to_sb(kcb->ki_filp);
5074 again:
5075  M0_ALLOC_PTR(req);
5076  if (req == NULL)
5077  return M0_ERR_INFO(-ENOMEM, "Failed to allocate memory"
5078  " for io_request");
5079  ++iommstats.a_ioreq_nr;
5080 
5081  rc = io_request_init(req, kcb->ki_filp, iov, ivv, rw);
5082  if (rc != 0) {
5083  count = 0;
5084  goto last;
5085  }
5087  if (rc != 0) {
5088  M0_LOG(M0_ERROR, "[%p] Failed to prepare IO fops, rc %d",
5089  req, rc);
5091  count = 0;
5092  goto last;
5093  }
5094 
5096  if (rc != 0) {
5097  M0_LOG(M0_ERROR, "[%p] Failed to distribute file data "
5098  "between target_ioreq objects, rc %d", req, rc);
5101  count = 0;
5102  goto last;
5103  }
5104 
5106  if (rc == 0)
5107  rc = req->ir_rc;
5109  M0_LOG(M0_INFO, "[%p] nxr_bytes = %llu, copied_nr = %llu, count %lu, "
5110  "rc %d", req, req->ir_nwxfer.nxr_bytes, req->ir_copied_nr,
5111  count, rc);
5112 
5114 
5116 last:
5117  M0_LOG(M0_DEBUG, "[%p] rc = %d, io request returned %lu bytes",
5118  req, rc, count);
5119  m0_free(req);
5120  ++iommstats.d_ioreq_nr;
5121 
5122  if (rc == -EAGAIN)
5123  goto again;
5124 
5125  M0_LEAVE();
5126  return rc != 0 ? rc : count;
5127 }
5128 
5129 static struct m0_indexvec_varr *indexvec_create(unsigned long seg_nr,
5130  const struct iovec *iov,
5131  loff_t pos)
5132 {
5133  int rc;
5134  uint32_t i;
5135  struct m0_indexvec_varr *ivv;
5136 
5137  /*
5138  * Apparently, we need to use a new API to process io request
5139  * which can accept m0_indexvec_varr so that it can be reused by
5140  * the ioctl which provides fully vectored scatter-gather IO
5141  * to cluster library users.
5142  * For that, we need to prepare a m0_indexvec_varr and supply it
5143  * to this function.
5144  */
5145  M0_ENTRY("seg_nr %lu position %llu", seg_nr, pos);
5146  M0_ALLOC_PTR(ivv);
5147  if (ivv == NULL) {
5148  M0_LEAVE();
5149  return NULL;
5150  }
5151 
5153  if (rc != 0) {
5154  m0_free(ivv);
5155  M0_LEAVE();
5156  return NULL;
5157  }
5158 
5159  for (i = 0; i < seg_nr; ++i) {
5160  V_INDEX(ivv, i) = pos;
5161  V_COUNT(ivv, i) = iov[i].iov_len;
5162  pos += iov[i].iov_len;
5163  }
5164  M0_POST(indexvec_varr_count(ivv) > 0);
5165 
5166  M0_LEAVE();
5167  return ivv;
5168 }
5169 
5170 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
5171 static ssize_t file_dio_write(struct kiocb *kcb, struct iov_iter *from)
5172 {
5173  struct file *file = kcb->ki_filp;
5174  struct inode *inode = m0t1fs_file_to_inode(file);
5175  ssize_t written;
5176 
5178  M0_ENTRY();
5179 
5180  inode_lock(inode);
5181  written = __generic_file_write_iter(kcb, from);
5182  inode_unlock(inode);
5183 
5184  if (written > 0)
5185  written = generic_write_sync(kcb, written);
5186 
5187  M0_LEAVE();
5188  return written;
5189 }
5190 #else
5191 static ssize_t file_dio_write(struct kiocb *kcb,
5192  const struct iovec *iov,
5193  unsigned long seg_nr,
5194  loff_t pos)
5195 {
5196  struct file *file = kcb->ki_filp;
5197  struct inode *inode = m0t1fs_file_to_inode(file);
5198  ssize_t written;
5199 
5201  M0_ENTRY();
5202  BUG_ON(kcb->ki_pos != pos);
5203 
5204  mutex_lock(&inode->i_mutex);
5205  written = __generic_file_aio_write(kcb, iov, seg_nr, &kcb->ki_pos);
5206  mutex_unlock(&inode->i_mutex);
5207 
5208  if (written > 0) {
5209  written = generic_write_sync(file, pos, written) ?: written;
5210  }
5211 
5212  M0_LEAVE();
5213  return written;
5214 }
5215 #endif
5216 
5217 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
5218 static ssize_t aio_write(struct kiocb *kcb, struct iov_iter *from)
5219 {
5220  size_t count = 0;
5221  ssize_t written;
5222  struct m0_indexvec_varr *ivv;
5223 
5225  M0_PRE(kcb != NULL);
5226  M0_PRE(from != NULL);
5227  M0_ENTRY("struct iovec %p position %llu seg_nr %lu", from->iov, kcb->ki_pos, from->nr_segs);
5228 
5229  if (!file_to_sb(kcb->ki_filp)->csb_active) {
5230  M0_LEAVE();
5231  return M0_ERR(-EINVAL);
5232  }
5233 
5234  count = generic_write_checks(kcb, from);
5235  if (count <= 0) {
5236  M0_LEAVE();
5237  return 0;
5238  }
5239 
5240  if (kcb->ki_filp->f_flags & O_DIRECT) {
5241  written = file_dio_write(kcb, from);
5242  M0_LEAVE();
5243  return written;
5244  }
5245 
5246  ivv = indexvec_create(from->nr_segs, from->iov, kcb->ki_pos);
5247  if (ivv == NULL)
5248  return M0_ERR(-ENOMEM);
5249 
5250  indexvec_varr_dump(ivv);
5251 
5252  M0_LOG(M0_INFO, "Write vec-count = %llu seg_nr %lu",
5253  indexvec_varr_count(ivv), from->nr_segs);
5254  written = m0t1fs_aio(kcb, from->iov, ivv, IRT_WRITE);
5255 
5256  /* Updates file position. */
5257  if (written > 0)
5258  kcb->ki_pos = kcb->ki_pos + written;
5259 
5260  m0_indexvec_varr_free(ivv);
5261  m0_free(ivv);
5262  M0_LOG(M0_DEBUG, "written %llu", (unsigned long long)written);
5263  M0_LEAVE();
5264  return written;
5265 }
5266 #else
5267 static ssize_t aio_write(struct kiocb *kcb, const struct iovec *iov,
5268  unsigned long seg_nr, loff_t pos)
5269 {
5270  int rc;
5271  size_t count = 0;
5272  size_t saved_count;
5273  ssize_t written;
5274  struct m0_indexvec_varr *ivv;
5275 
5277  M0_ENTRY("struct iovec %p position %llu seg_nr %lu", iov, pos, seg_nr);
5278  M0_PRE(kcb != NULL);
5279  M0_PRE(iov != NULL);
5280  M0_PRE(seg_nr > 0);
5281 
5282  if (!file_to_sb(kcb->ki_filp)->csb_active) {
5283  M0_LEAVE();
5284  return M0_ERR(-EINVAL);
5285  }
5286 
5287  rc = generic_segment_checks(iov, &seg_nr, &count, VERIFY_READ);
5288  if (rc != 0) {
5289  M0_LEAVE();
5290  return 0;
5291  }
5292 
5293  saved_count = count;
5294  rc = generic_write_checks(kcb->ki_filp, &pos, &count, 0);
5295  if (rc != 0 || count == 0) {
5296  M0_LEAVE();
5297  return 0;
5298  }
5299 
5300  if (count != saved_count)
5301  seg_nr = iov_shorten((struct iovec *)iov, seg_nr, count);
5302 
5303  if (kcb->ki_filp->f_flags & O_DIRECT) {
5304  written = file_dio_write(kcb, iov, seg_nr, pos);
5305  M0_LEAVE();
5306  return written;
5307  }
5308 
5309  ivv = indexvec_create(seg_nr, iov, pos);
5310  if (ivv == NULL)
5311  return M0_ERR(-ENOMEM);
5312 
5313  indexvec_varr_dump(ivv);
5314 
5315  M0_LOG(M0_INFO, "Write vec-count = %llu seg_nr %lu",
5316  indexvec_varr_count(ivv), seg_nr);
5317  written = m0t1fs_aio(kcb, iov, ivv, IRT_WRITE);
5318 
5319  /* Updates file position. */
5320  if (written > 0)
5321  kcb->ki_pos = pos + written;
5322 
5323  m0_indexvec_varr_free(ivv);
5324  m0_free(ivv);
5325  M0_LOG(M0_DEBUG, "written %llu", (unsigned long long)written);
5326  M0_LEAVE();
5327  return written;
5328 }
5329 #endif
5330 
5331 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
5332 static ssize_t file_aio_write(struct kiocb *kcb, struct iov_iter *from)
5333 #else
5334 static ssize_t file_aio_write(struct kiocb *kcb,
5335  const struct iovec *iov,
5336  unsigned long seg_nr,
5337  loff_t pos)
5338 #endif
5339 {
5340  ssize_t res;
5341  struct m0t1fs_inode *ci = m0t1fs_file_to_m0inode(kcb->ki_filp);
5342 
5344 
5346 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
5347  res = aio_write(kcb, from);
5348  M0_ADDB2_ADD(M0_AVI_FS_IO_DESCR, kcb->ki_pos, res);
5349 #else
5350  res = aio_write(kcb, iov, seg_nr, pos);
5352 #endif
5354  return res;
5355 }
5356 
5357 
5358 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
5359 static ssize_t aio_read(struct kiocb *kcb, struct iov_iter *from)
5360 {
5361  int seg;
5362  size_t count = 0;
5363  loff_t size;
5364  ssize_t res;
5365  struct file *filp;
5366  struct m0_indexvec_varr *ivv;
5367 
5369  M0_PRE(kcb != NULL);
5370  M0_PRE(from != NULL);
5371  M0_ENTRY("struct iovec %p position %llu", from->iov, kcb->ki_pos);
5372 
5373  filp = kcb->ki_filp;
5374  size = i_size_read(m0t1fs_file_to_inode(filp));
5375 
5376  /* Returns if super block is inactive. */
5377  if (!file_to_sb(filp)->csb_active)
5378  return M0_ERR(-EINVAL);
5379  if (kcb->ki_pos >= size)
5380  return M0_RC(0);
5381 
5382  if (filp->f_flags & O_DIRECT) {
5383  res = generic_file_read_iter(kcb, from);
5384  M0_LEAVE();
5385  return res;
5386  }
5387 
5388  count = iov_iter_count(from);
5389  if (count == 0)
5390  /*
5391  * And thus spake POSIX: "Before any action described below is
5392  * taken, and if nbyte is zero, the read() function may detect
5393  * and return errors as described below. In the absence of
5394  * errors, or if error detection is not performed, the read()
5395  * function shall return zero and have no other results."
5396  */
5397  return M0_RC(0);
5398 
5399  /* Index vector has to be created before io_request is created. */
5400  ivv = indexvec_create(from->nr_segs, from->iov, kcb->ki_pos);
5401  if (ivv == NULL)
5402  return M0_ERR(-ENOMEM);
5403 
5404  /*
5405  * For read IO, if any segment from index vector goes beyond EOF,
5406  * they are dropped and the index vector is truncated to EOF boundary.
5407  */
5408  for (seg = 0; seg < V_SEG_NR(ivv); ++seg) {
5409  if (v_seg_endpos(ivv, seg) > size) {
5410  V_COUNT(ivv, seg) = size - V_INDEX(ivv, seg);
5411  V_SEG_NR(ivv) = seg + 1;
5412  break;
5413  }
5414  }
5415  indexvec_varr_dump(ivv);
5416  if (indexvec_varr_count(ivv) == 0) {
5417  m0_indexvec_varr_free(ivv);
5418  m0_free(ivv);
5419  return M0_RC(0);
5420  }
5421 
5422  M0_LOG(M0_INFO, "Read vec-count = %llu", indexvec_varr_count(ivv));
5423  res = m0t1fs_aio(kcb, from->iov, ivv, IRT_READ);
5424  M0_LOG(M0_DEBUG, "Read @%llu vec-count = %8llu return = %8llu(%d)",
5425  kcb->ki_pos, indexvec_varr_count(ivv),
5426  (unsigned long long)res, (int)res);
5427  /* Updates file position. */
5428  if (res > 0)
5429  kcb->ki_pos = kcb->ki_pos + res;
5430 
5431  m0_indexvec_varr_free(ivv);
5432  m0_free(ivv);
5433  M0_LEAVE();
5434  return res;
5435 }
5436 #else
5437 static ssize_t aio_read(struct kiocb *kcb, const struct iovec *iov,
5438  unsigned long seg_nr, loff_t pos)
5439 {
5440  int seg;
5441  size_t count = 0;
5442  loff_t size;
5443  ssize_t res;
5444  struct file *filp;
5445  struct m0_indexvec_varr *ivv;
5446 
5448  M0_ENTRY("struct iovec %p position %llu", iov, pos);
5449  M0_PRE(kcb != NULL);
5450  M0_PRE(iov != NULL);
5451  M0_PRE(seg_nr > 0);
5452 
5453  filp = kcb->ki_filp;
5454  size = i_size_read(m0t1fs_file_to_inode(filp));
5455 
5456  /* Returns if super block is inactive. */
5457  if (!file_to_sb(filp)->csb_active)
5458  return M0_ERR(-EINVAL);
5459  if (pos >= size)
5460  return M0_RC(0);
5461 
5462  if (filp->f_flags & O_DIRECT) {
5463  res = generic_file_aio_read(kcb, iov, seg_nr, pos);
5464  M0_LEAVE();
5465  return res;
5466  }
5467 
5468  /*
5469  * Checks for access privileges and adjusts all segments
5470  * for proper count and total number of segments.
5471  */
5472  res = generic_segment_checks(iov, &seg_nr, &count, VERIFY_WRITE);
5473  if (res != 0) {
5474  M0_LEAVE();
5475  return res;
5476  }
5477 
5478  if (count == 0)
5479  /*
5480  * And thus spake POSIX: "Before any action described below is
5481  * taken, and if nbyte is zero, the read() function may detect
5482  * and return errors as described below. In the absence of
5483  * errors, or if error detection is not performed, the read()
5484  * function shall return zero and have no other results."
5485  */
5486  return M0_RC(0);
5487 
5488  /* Index vector has to be created before io_request is created. */
5489  ivv = indexvec_create(seg_nr, iov, pos);
5490  if (ivv == NULL)
5491  return M0_ERR(-ENOMEM);
5492 
5493  /*
5494  * For read IO, if any segment from index vector goes beyond EOF,
5495  * they are dropped and the index vector is truncated to EOF boundary.
5496  */
5497  for (seg = 0; seg < V_SEG_NR(ivv); ++seg) {
5498  if (v_seg_endpos(ivv, seg) > size) {
5499  V_COUNT(ivv, seg) = size - V_INDEX(ivv, seg);
5500  V_SEG_NR(ivv) = seg + 1;
5501  break;
5502  }
5503  }
5504  indexvec_varr_dump(ivv);
5505  if (indexvec_varr_count(ivv) == 0) {
5506  m0_indexvec_varr_free(ivv);
5507  m0_free(ivv);
5508  return M0_RC(0);
5509  }
5510 
5511  M0_LOG(M0_INFO, "Read vec-count = %llu", indexvec_varr_count(ivv));
5512  res = m0t1fs_aio(kcb, iov, ivv, IRT_READ);
5513  M0_LOG(M0_DEBUG, "Read @%llu vec-count = %8llu return = %8llu(%d)",
5514  pos, indexvec_varr_count(ivv),
5515  (unsigned long long)res, (int)res);
5516  /* Updates file position. */
5517  if (res > 0)
5518  kcb->ki_pos = pos + res;
5519 
5520  m0_indexvec_varr_free(ivv);
5521  m0_free(ivv);
5522  M0_LEAVE();
5523  return res;
5524 }
5525 #endif
5526 
5527 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
5528 static ssize_t file_aio_read(struct kiocb *kcb, struct iov_iter *from)
5529 #else
5530 static ssize_t file_aio_read(struct kiocb *kcb,
5531  const struct iovec *iov,
5532  unsigned long seg_nr,
5533  loff_t pos)
5534 #endif
5535 {
5536  ssize_t res;
5537  struct m0t1fs_inode *ci = m0t1fs_file_to_m0inode(kcb->ki_filp);
5538 
5540 
5542 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
5543  res = aio_read(kcb, from);
5544  M0_ADDB2_ADD(M0_AVI_FS_IO_DESCR, kcb->ki_pos, res);
5545 #else
5546  res = aio_read(kcb, iov, seg_nr, pos);
5548 #endif
5549 
5551  return res;
5552 }
5553 
5554 int m0t1fs_flush(struct file *file, fl_owner_t id)
5555 {
5556  struct inode *inode = m0t1fs_file_to_inode(file);
5557  struct m0t1fs_inode *ci = M0T1FS_I(inode);
5558  struct m0t1fs_mdop mo;
5559  struct m0t1fs_sb *csb = m0inode_to_sb(ci);
5560  int rc;
5561 
5563  M0_ENTRY("inode links:%d inode writecount = %d close size %d",
5564  (unsigned int)inode->i_nlink,
5565  atomic_read(&inode->i_writecount),
5566  (unsigned int)inode->i_size);
5567 
5568  if (!csb->csb_oostore || inode->i_nlink == 0 ||
5569  atomic_read(&inode->i_writecount) == 0)
5570  return M0_RC(0);
5571 
5572  M0_SET0(&mo);
5573  mo.mo_attr.ca_tfid = *m0t1fs_inode_fid(ci);
5574  mo.mo_attr.ca_size = inode->i_size;
5575  mo.mo_attr.ca_nlink = inode->i_nlink;
5576  mo.mo_attr.ca_pver = m0t1fs_file_to_pver(file)->pv_id;
5577  mo.mo_attr.ca_lid = ci->ci_layout_id;
5578  mo.mo_attr.ca_valid |= (M0_COB_SIZE | M0_COB_NLINK |
5580 
5582  return rc != 0 ? M0_ERR_INFO(rc, FID_F, FID_P(&mo.mo_attr.ca_tfid)) :
5583  M0_RC(rc);
5584 }
5585 
5586 const struct file_operations m0t1fs_reg_file_operations = {
5587  .llseek = generic_file_llseek,
5588 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
5589  .read_iter = file_aio_read,
5590  .write_iter = file_aio_write,
5591 #else
5592  .aio_read = file_aio_read,
5593  .aio_write = file_aio_write,
5594  .read = do_sync_read,
5595  .write = do_sync_write,
5596 #endif
5597 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)
5598  .unlocked_ioctl = m0t1fs_ioctl,
5599 #else
5600  .ioctl = m0t1fs_ioctl,
5601 #endif
5602  .fsync = m0t1fs_fsync,
5603  .flush = m0t1fs_flush,
5604 };
5605 
5606 static void client_passive_recv(const struct m0_net_buffer_event *evt)
5607 {
5608  struct m0_rpc_bulk *rbulk;
5609  struct m0_rpc_bulk_buf *buf;
5610  struct m0_net_buffer *nb;
5611  struct m0_io_fop *iofop;
5612  struct io_req_fop *reqfop;
5613  struct io_request *ioreq;
5614  uint32_t req_sm_state;
5615 
5616  M0_ENTRY();
5617 
5618  M0_PRE(evt != NULL);
5619  M0_PRE(evt->nbe_buffer != NULL);
5620 
5621  nb = evt->nbe_buffer;
5622  buf = (struct m0_rpc_bulk_buf *)nb->nb_app_private;
5623  rbulk = buf->bb_rbulk;
5624  iofop = container_of(rbulk, struct m0_io_fop, if_rbulk);
5625  reqfop = bob_of(iofop, struct io_req_fop, irf_iofop, &iofop_bobtype);
5626  ioreq = bob_of(reqfop->irf_tioreq->ti_nwxfer, struct io_request,
5627  ir_nwxfer, &ioreq_bobtype);
5628  M0_ASSERT(rbulk == &reqfop->irf_iofop.if_rbulk);
5629  M0_LOG(M0_DEBUG, "[%p] PASSIVE recv, e %p, status %d, len %llu, "
5630  "nbuf %p", ioreq, evt, evt->nbe_status, evt->nbe_length, nb);
5631 
5632  M0_ASSERT(m0_is_read_fop(&iofop->if_fop));
5633  M0_LOG(M0_DEBUG, "[%p] Pending fops %llu, Pending rdbulk %llu, "
5634  "fop %p, item %p, "FID_F", rbulk %p",
5635  ioreq, m0_atomic64_get(&ioreq->ir_nwxfer.nxr_iofop_nr),
5637  &iofop->if_fop, &iofop->if_fop.f_item,
5638  FID_P(&reqfop->irf_tioreq->ti_fid), rbulk);
5639 
5640  /*
5641  * buf will be released in this callback. But rbulk is still valid
5642  * after that.
5643  */
5645  if (evt->nbe_status != 0)
5646  return;
5647  m0_mutex_lock(&ioreq->ir_nwxfer.nxr_lock);
5648  req_sm_state = ioreq_sm_state(ioreq);
5649  if (req_sm_state != IRS_READ_COMPLETE &&
5650  req_sm_state != IRS_WRITE_COMPLETE) {
5651  /*
5652  * It is possible that io_bottom_half() has already
5653  * reduced the nxr_rdbulk_nr to 0 by this time, due to FOP
5654  * receiving some error.
5655  */
5656  if (m0_atomic64_get(&ioreq->ir_nwxfer.nxr_rdbulk_nr) > 0)
5658  if (should_req_sm_complete(ioreq)) {
5659  ioreq_sm_state_set(ioreq,
5660  (M0_IN(req_sm_state,
5661  (IRS_READING,
5665  }
5666  }
5667 
5669  M0_LEAVE();
5670 }
5671 
5673  .nbc_cb = {
5678  }
5679 };
5680 
5681 static int iofop_async_submit(struct m0_io_fop *iofop,
5682  struct m0_rpc_session *session)
5683 {
5684  int rc;
5685  struct m0_fop_cob_rw *rwfop;
5686  struct io_req_fop *reqfop;
5687  struct io_request *req;
5688  struct m0_rpc_item *item;
5689 
5690  M0_ENTRY("m0_io_fop %p m0_rpc_session %p", iofop, session);
5691  M0_PRE(iofop != NULL);
5692  M0_PRE(session != NULL);
5693 
5694  rwfop = io_rw_get(&iofop->if_fop);
5695  M0_ASSERT(rwfop != NULL);
5696 
5697  reqfop = bob_of(iofop, struct io_req_fop, irf_iofop, &iofop_bobtype);
5698  req = bob_of(reqfop->irf_tioreq->ti_nwxfer, struct io_request,
5699  ir_nwxfer, &ioreq_bobtype);
5700 
5702  rwfop->crw_desc.id_descs,
5704  if (rc != 0)
5705  goto out;
5706 
5707  iofop->if_fop.f_item.ri_session = session;
5708  item = &iofop->if_fop.f_item;
5709  item->ri_nr_sent_max = M0T1FS_RPC_MAX_RETRIES;
5710  item->ri_resend_interval = M0T1FS_RPC_RESEND_INTERVAL;
5711  rc = m0_rpc_post(item);
5712  M0_LOG(M0_DEBUG, "[%p] IO fop %p, %p[%u], rbulk %p, submitted to rpc, "
5713  "rc %d, ri_error %d", req, &iofop->if_fop, item,
5714  item->ri_type->rit_opcode, &iofop->if_rbulk, rc, item->ri_error);
5715  /*
5716  * Ignoring error from m0_rpc_post() so that the subsequent fop
5717  * submission goes on. This is to ensure that the ioreq gets into dgmode
5718  * subsequently without exiting from the healthy mode IO itself.
5719  */
5720 
5721  return M0_RC(0);
5722  /*
5723  * In case error is encountered either by m0_rpc_bulk_store() or
5724  * m0_rpc_post(), queued net buffers, if any, will be deleted at
5725  * io_req_fop_release.
5726  */
5727 out:
5728  return M0_RC(rc);
5729 }
5730 
5731 static void io_req_fop_release(struct m0_ref *ref)
5732 {
5733  struct m0_fop *fop;
5734  struct m0_io_fop *iofop;
5735  struct io_req_fop *reqfop;
5736  struct m0_rpc_bulk *rbulk;
5737  struct nw_xfer_request *xfer;
5738  struct m0_fop_cob_rw *rwfop;
5739  struct m0_rpc_machine *rmach;
5740  struct m0_rpc_item *item;
5741  struct io_request *req;
5742 
5743  M0_ENTRY("ref %p", ref);
5744  M0_PRE(ref != NULL);
5745 
5746  fop = container_of(ref, struct m0_fop, f_ref);
5747  rmach = m0_fop_rpc_machine(fop);
5748  iofop = container_of(fop, struct m0_io_fop, if_fop);
5749  reqfop = bob_of(iofop, struct io_req_fop, irf_iofop, &iofop_bobtype);
5750  rbulk = &iofop->if_rbulk;
5751  xfer = reqfop->irf_tioreq->ti_nwxfer;
5752  req = bob_of(xfer, struct io_request, ir_nwxfer, &ioreq_bobtype);
5753  item = &fop->f_item;
5754 
5755  M0_LOG(M0_DEBUG, "[%p] fop %p, Pending fops %llu, Pending rdbulk %llu",
5756  req, fop,
5757  (unsigned long long)m0_atomic64_get(&xfer->nxr_iofop_nr),
5758  (unsigned long long)m0_atomic64_get(&xfer->nxr_rdbulk_nr));
5759  M0_LOG(M0_DEBUG, "[%p] fop %p, "FID_F", %p[%u], ri_error %d, "
5760  "rbulk %p", req, &iofop->if_fop,
5761  FID_P(&reqfop->irf_tioreq->ti_fid), item,
5762  item->ri_type->rit_opcode, item->ri_error, rbulk);
5763 
5764  /*
5765  * Release the net buffers if rpc bulk object is still dirty.
5766  * And wait on channel till all net buffers are deleted from
5767  * transfer machine.
5768  */
5769  m0_mutex_lock(&xfer->nxr_lock);
5770  m0_mutex_lock(&rbulk->rb_mutex);
5771  if (!m0_tlist_is_empty(&rpcbulk_tl, &rbulk->rb_buflist)) {
5772  struct m0_clink clink;
5773  size_t buf_nr;
5774  size_t non_queued_buf_nr;
5775 
5777  m0_clink_add(&rbulk->rb_chan, &clink);
5778  buf_nr = rpcbulk_tlist_length(&rbulk->rb_buflist);
5779  non_queued_buf_nr = m0_rpc_bulk_store_del_unqueued(rbulk);
5780  m0_mutex_unlock(&rbulk->rb_mutex);
5781 
5782  m0_rpc_bulk_store_del(rbulk);
5783  M0_LOG(M0_DEBUG, "[%p] fop %p, %p[%u], bulk %p, buf_nr %llu, "
5784  "non_queued_buf_nr %llu", req, &iofop->if_fop, item,
5785  item->ri_type->rit_opcode, rbulk,
5786  (unsigned long long)buf_nr,
5787  (unsigned long long)non_queued_buf_nr);
5788  if (m0_is_read_fop(&iofop->if_fop))
5790  non_queued_buf_nr);
5792  /* rio_replied() is not invoked for this item. */
5793  m0_atomic64_dec(&xfer->nxr_iofop_nr);
5794  m0_mutex_unlock(&xfer->nxr_lock);
5795  /*
5796  * If there were some queued net bufs which had to be deleted,
5797  * then it is required to wait for their callbacks.
5798  */
5799  if (buf_nr > non_queued_buf_nr) {
5800  /*
5801  * rpc_machine_lock may be needed from nlx_tm_ev_worker
5802  * thread, which is going to wake us up. So we should
5803  * release it to avoid deadlock.
5804  */
5805  m0_rpc_machine_unlock(rmach);
5806  m0_chan_wait(&clink);
5807  m0_rpc_machine_lock(rmach);
5808  }
5810  m0_clink_fini(&clink);
5811  } else {
5812  m0_mutex_unlock(&rbulk->rb_mutex);
5813  m0_mutex_unlock(&xfer->nxr_lock);
5814  }
5816  M0_LOG(M0_DEBUG, "[%p] fop %p, Pending fops %llu, Pending rdbulk %llu",
5817  req, fop,
5818  (unsigned long long)m0_atomic64_get(&xfer->nxr_iofop_nr),
5819  (unsigned long long)m0_atomic64_get(&xfer->nxr_rdbulk_nr));
5820  M0_LOG(M0_DEBUG, "[%p] fop %p, "FID_F", %p[%u], ri_error %d, "
5821  "rbulk %p", req, &iofop->if_fop,
5822  FID_P(&reqfop->irf_tioreq->ti_fid), item,
5823  item->ri_type->rit_opcode, item->ri_error, rbulk);
5824 
5825  rwfop = io_rw_get(&iofop->if_fop);
5826  M0_ASSERT(rwfop != NULL);
5827  io_req_fop_fini(reqfop);
5828  /* see io_req_fop_fini(). */
5829  io_req_fop_bob_fini(reqfop);
5830  m0_io_fop_fini(iofop);
5831  m0_free(reqfop);
5832  ++iommstats.d_io_req_fop_nr;
5833 }
5834 
5835 static void cc_rpc_item_cb(struct m0_rpc_item *item)
5836 {
5837  struct io_request *req;
5838  struct cc_req_fop *cc_fop;
5839  struct target_ioreq *ti;
5840  struct m0_fop *fop;
5841  struct m0_fop *rep_fop;
5842 
5844  cc_fop = container_of(fop, struct cc_req_fop, crf_fop);
5845  ti = container_of(cc_fop, struct target_ioreq, ti_cc_fop);
5846  req = bob_of(ti->ti_nwxfer, struct io_request,
5847  ir_nwxfer, &ioreq_bobtype);
5848  cc_fop->crf_ast.sa_cb = cc_bottom_half;
5849  cc_fop->crf_ast.sa_datum = (void *)ti;
5850  /* Reference on fop and its reply are released in cc_bottom_half. */
5851  m0_fop_get(fop);
5852  if (item->ri_reply != NULL) {
5855  }
5856 
5857  m0_sm_ast_post(req->ir_sm.sm_grp, &cc_fop->crf_ast);
5858 }
5859 
5860 static void cc_bottom_half(struct m0_sm_group *grp, struct m0_sm_ast *ast)
5861 {
5862  struct nw_xfer_request *xfer;
5863  struct target_ioreq *ti;
5864  struct cc_req_fop *cc_fop;
5865  struct io_request *req;
5866  struct m0_fop_cob_op_reply *reply;
5867  struct m0_fop *reply_fop = NULL;
5868  struct m0t1fs_inode *inode;
5869  struct m0t1fs_sb *csb;
5870  struct m0_rpc_item *req_item;
5871  struct m0_rpc_item *reply_item;
5872  int rc;
5873 
5874  ti = (struct target_ioreq *)ast->sa_datum;
5875  req = bob_of(ti->ti_nwxfer, struct io_request, ir_nwxfer,
5876  &ioreq_bobtype);
5877  xfer = ti->ti_nwxfer;
5878  cc_fop = &ti->ti_cc_fop;
5879  req_item = &cc_fop->crf_fop.f_item;
5880  reply_item = req_item->ri_reply;
5881  rc = req_item->ri_error;
5882  if (reply_item != NULL) {
5883  reply_fop = m0_rpc_item_to_fop(reply_item);
5884  rc = rc ?: m0_rpc_item_generic_reply_rc(reply_item);
5885  }
5886  if (rc < 0 || reply_item == NULL) {
5887  M0_ASSERT(ergo(reply_item == NULL, rc != 0));
5888  goto ref_dec;
5889  }
5890 
5891  reply = m0_fop_data(m0_rpc_item_to_fop(reply_item));
5892  /*
5893  * Ignoring the case when an attempt is made to create a cob on target
5894  * where previous IO had created it.
5895  */
5896  rc = rc ? M0_IN(reply->cor_rc, (0, -EEXIST)) ? 0 : reply->cor_rc : 0;
5897 
5898  /*
5899  * In case the conf is updated is revoked
5900  * abort the ongoing request.
5901  */
5903  csb = M0T1FS_SB(inode->ci_inode.i_sb);
5904  m0_mutex_lock(&csb->csb_confc_state.cus_lock);
5905  if (csb->csb_confc_state.cus_state != M0_CC_READY)
5906  rc = M0_ERR(-ESTALE);
5907  m0_mutex_unlock(&csb->csb_confc_state.cus_lock);
5908 ref_dec:
5909  if (ti->ti_rc == 0 && rc != 0)
5910  ti->ti_rc = rc;
5911  if (xfer->nxr_rc == 0 && rc != 0)
5912  xfer->nxr_rc = rc;
5913  m0_fop_put0_lock(&cc_fop->crf_fop);
5914  if (reply_fop != NULL)
5916  m0_mutex_lock(&xfer->nxr_lock);
5917  m0_atomic64_dec(&xfer->nxr_ccfop_nr);
5920  m0_mutex_unlock(&xfer->nxr_lock);
5921 }
5922 
5924 {
5925  struct m0t1fs_sb *csb;
5926  struct m0t1fs_inode *inode;
5927 
5928 
5930  csb = M0T1FS_SB(inode->ci_inode.i_sb);
5931 
5932  return m0_atomic64_get(&req->ir_nwxfer.nxr_iofop_nr) == 0 &&
5934  ((csb->csb_oostore && ioreq_sm_state(req) == IRS_WRITING) ?
5935  m0_atomic64_get(&req->ir_nwxfer.nxr_ccfop_nr) == 0 : true);
5936 }
5937 
5938 static void io_rpc_item_cb(struct m0_rpc_item *item)
5939 {
5940  struct m0_fop *fop;
5941  struct m0_fop *rep_fop;
5942  struct m0_io_fop *iofop;
5943  struct io_req_fop *reqfop;
5944  struct io_request *ioreq;
5945 
5946  M0_PRE(item != NULL);
5947  M0_ENTRY("rpc_item %p[%u]", item, item->ri_type->rit_opcode);
5948 
5950  iofop = container_of(fop, struct m0_io_fop, if_fop);
5951  reqfop = bob_of(iofop, struct io_req_fop, irf_iofop, &iofop_bobtype);
5952  ioreq = bob_of(reqfop->irf_tioreq->ti_nwxfer, struct io_request,
5954  /*
5955  * NOTE: RPC errors are handled in io_bottom_half(), which is called
5956  * by reqfop->irf_ast.
5957  */
5958 
5959  /*
5960  * Acquire a reference on IO reply fop since its contents
5961  * are needed for policy decisions in io_bottom_half().
5962  * io_bottom_half() takes care of releasing the reference.
5963  */
5964  if (item->ri_reply != NULL) {
5967  }
5968 
5969  M0_LOG(M0_INFO, "[%p] io_req_fop %p, target fid "FID_F" item %p[%u], "
5970  "ri_error %d", ioreq, reqfop, FID_P(&reqfop->irf_tioreq->ti_fid),
5972  /*
5973  * Acquire a reference on IO fop so that it does not get
5974  * released until io_bottom_half() is executed for it.
5975  * io_bottom_half() takes care of releasing the reference.
5976  */
5977  m0_fop_get(&reqfop->irf_iofop.if_fop);
5978  m0_sm_ast_post(ioreq->ir_sm.sm_grp, &reqfop->irf_ast);
5979  M0_LEAVE();
5980 }
5981 
5982 M0_INTERNAL struct m0_file *m0_fop_to_file(struct m0_fop *fop)
5983 {
5984  struct m0_io_fop *iofop;
5985  struct io_req_fop *irfop;
5986  struct io_request *ioreq;
5987 
5988  iofop = container_of(fop, struct m0_io_fop, if_fop);
5989  irfop = bob_of(iofop, struct io_req_fop, irf_iofop, &iofop_bobtype);
5990  ioreq = bob_of(irfop->irf_tioreq->ti_nwxfer, struct io_request,
5992 
5993  return &m0t1fs_file_to_m0inode(ioreq->ir_file)->ci_flock;
5994 }
5995 
5996 M0_INTERNAL struct m0t1fs_sb *m0_fop_to_sb(struct m0_fop *fop)
5997 {
5998  struct m0_io_fop *iofop;
5999  struct io_req_fop *irfop;
6000  struct io_request *ioreq;
6001 
6002  iofop = container_of(fop, struct m0_io_fop, if_fop);
6003  irfop = bob_of(iofop, struct io_req_fop, irf_iofop, &iofop_bobtype);
6004  ioreq = bob_of(irfop->irf_tioreq->ti_nwxfer, struct io_request,
6006  return file_to_sb(ioreq->ir_file);
6007 }
6008 
6009 static void io_bottom_half(struct m0_sm_group *grp, struct m0_sm_ast *ast)
6010 {
6011  struct io_req_fop *irfop;
6012  struct io_request *req;
6013  struct target_ioreq *tioreq;
6014  struct nw_xfer_request *xfer;
6015  struct m0_io_fop *iofop;
6016  struct m0_fop *reply_fop = NULL;
6017  struct m0_rpc_item *req_item;
6018  struct m0_rpc_item *reply_item;
6019  struct m0_fop_cob_rw_reply *rw_reply;
6020  struct m0_reqh_service_ctx *ctx;
6021  struct m0t1fs_inode *inode;
6022  struct m0t1fs_sb *csb;
6023  struct m0_be_tx_remid *remid;
6024  uint64_t actual_bytes = 0;
6025  int rc;
6026 
6027  M0_ENTRY("sm_group %p sm_ast %p", grp, ast);
6028  M0_PRE(grp != NULL);
6029  M0_PRE(ast != NULL);
6030 
6031  irfop = bob_of(ast, struct io_req_fop, irf_ast, &iofop_bobtype);
6032  tioreq = irfop->irf_tioreq;
6033  req = bob_of(tioreq->ti_nwxfer, struct io_request, ir_nwxfer,
6034  &ioreq_bobtype);
6035  xfer = tioreq->ti_nwxfer;
6036 
6037  M0_ASSERT(xfer == &req->ir_nwxfer);
6038  M0_ASSERT(M0_IN(irfop->irf_pattr, (PA_DATA, PA_PARITY)));
6042  IRS_FAILED)));
6043  M0_ASSERT(req->ir_file != NULL);
6044 
6045  iofop = &irfop->irf_iofop;
6046  req_item = &iofop->if_fop.f_item;
6047  reply_item = req_item->ri_reply;
6048  M0_LOG(M0_DEBUG, "[%p] nxr_iofop_nr %llu, nxr_rdbulk_nr %llu, "
6049  "req item %p[%u], ri_error %d", req,
6050  (unsigned long long)m0_atomic64_get(&xfer->nxr_iofop_nr),
6051  (unsigned long long)m0_atomic64_get(&xfer->nxr_rdbulk_nr),
6052  req_item, req_item->ri_type->rit_opcode, req_item->ri_error);
6053 
6054  rc = req_item->ri_error;
6055  if (reply_item != NULL) {
6056  rc = rc ?: m0_rpc_item_generic_reply_rc(reply_item);
6057  }
6058  if (rc < 0 || reply_item == NULL) {
6059  M0_ASSERT(ergo(reply_item == NULL, rc != 0));
6060  M0_LOG(M0_ERROR, "[%p] item %p, rc=%d", req, req_item, rc);
6061  goto ref_dec;
6062  }
6063 
6064  reply_fop = m0_rpc_item_to_fop(reply_item);
6066 
6067  rw_reply = io_rw_rep_get(reply_fop);
6068  rc = rw_reply->rwr_rc;
6069  remid = &rw_reply->rwr_mod_rep.fmr_remid;
6070  req->ir_sns_state = rw_reply->rwr_repair_done;
6071  M0_LOG(M0_DEBUG, "[%p] item %p[%u], reply received = %d, "
6072  "sns state = %d", req, req_item,
6073  req_item->ri_type->rit_opcode, rc, req->ir_sns_state);
6074 
6075  irfop->irf_reply_rc = rc;
6076 
6077  /* update pending transaction number */
6080  csb = M0T1FS_SB(inode->ci_inode.i_sb);
6081  m0_mutex_lock(&csb->csb_confc_state.cus_lock);
6082  if (csb->csb_confc_state.cus_state != M0_CC_READY) {
6083  m0_mutex_unlock(&csb->csb_confc_state.cus_lock);
6084  rc = M0_ERR(-ESTALE);
6085  goto ref_dec;
6086  }
6087  m0_mutex_unlock(&csb->csb_confc_state.cus_lock);
6089  actual_bytes = rw_reply->rwr_count;
6090 
6091 ref_dec:
6092  /* For whatever reason, io didn't complete successfully.
6093  * Clear read bulk count */
6094  if (rc < 0 && m0_is_read_fop(&iofop->if_fop))
6096  m0_rpc_bulk_buf_length(&iofop->if_rbulk));
6097  if (tioreq->ti_rc == 0)
6098  tioreq->ti_rc = rc;
6099 
6100  /* For stale conf cache override the error. */
6101  if (rc == -ESTALE || (xfer->nxr_rc == 0 && rc != 0)) {
6102  xfer->nxr_rc = rc;
6103  M0_LOG(M0_ERROR, "[%p][type=%d] rc %d, tioreq->ti_rc %d, "
6104  "nwxfer rc = %d @"FID_F,
6105  req, req->ir_type, rc, tioreq->ti_rc,
6106  xfer->nxr_rc, FID_P(&tioreq->ti_fid));
6107  }
6108 
6109  if (irfop->irf_pattr == PA_DATA)
6110  tioreq->ti_databytes += iofop->if_rbulk.rb_bytes;
6111  else
6112  tioreq->ti_parbytes += iofop->if_rbulk.rb_bytes;
6113 
6114  M0_LOG(M0_INFO, "[%p] fop %p, Returned no of bytes = %llu, "
6115  "expected = %llu", req, &iofop->if_fop, actual_bytes,
6116  iofop->if_rbulk.rb_bytes);
6117  /* Drop reference on request and reply fop. */
6118  m0_fop_put0_lock(&iofop->if_fop);
6120  m0_atomic64_dec(&file_to_sb(req->ir_file)->csb_pending_io_nr);
6121 
6122  m0_mutex_lock(&xfer->nxr_lock);
6123  m0_atomic64_dec(&xfer->nxr_iofop_nr);
6124  if (should_req_sm_complete(req)) {
6128  }
6129  m0_mutex_unlock(&xfer->nxr_lock);
6130 
6131  M0_LOG(M0_DEBUG, "[%p] item %p, ref %llu, "FID_F", Pending fops %llu, "
6132  "Pending rdbulk %llu", req, req_item,
6133  (unsigned long long)m0_ref_read(&iofop->if_fop.f_ref),
6134  FID_P(&tioreq->ti_fid), m0_atomic64_get(&xfer->nxr_iofop_nr),
6135  m0_atomic64_get(&xfer->nxr_rdbulk_nr));
6136  M0_LEAVE();
6137 }
6138 
6139 static int nw_xfer_req_dispatch(struct nw_xfer_request *xfer)
6140 {
6141  int rc = 0;
6142  struct io_req_fop *irfop;
6143  struct io_request *req;
6144  struct target_ioreq *ti;
6145  struct m0t1fs_sb *csb;
6146  uint64_t nr_dispatched = 0;
6147  int post_error = 0;
6148  int ri_error;
6149 
6150  M0_ENTRY();
6151 
6152  M0_PRE(xfer != NULL);
6153  req = bob_of(xfer, struct io_request, ir_nwxfer, &ioreq_bobtype);
6154 
6155  M0_LOG(M0_DEBUG, "[%p]", req);
6157  csb = req->ir_file->f_path.mnt->mnt_sb->s_fs_info;
6158  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
6159  if (ti->ti_state != M0_PNDS_ONLINE) {
6160  M0_LOG(M0_INFO, "[%p] Skipped iofops prepare for "FID_F,
6161  req, FID_P(&ti->ti_fid));
6162  continue;
6163  }
6164  if (target_ioreq_type_get(ti) == TI_COB_CREATE &&
6166  rc = ti->ti_ops->tio_cc_fops_prepare(ti);
6167  if (rc != 0)
6168  return M0_ERR_INFO(rc, "[%p] cob create fop"
6169  "failed", req);
6170  continue;
6171  }
6172  rc = ti->ti_ops->tio_iofops_prepare(ti, PA_DATA);
6173  if (rc != 0)
6174  return M0_ERR_INFO(rc, "[%p] data fop failed", req);
6175 
6176  rc = ti->ti_ops->tio_iofops_prepare(ti, PA_PARITY);
6177  if (rc != 0)
6178  return M0_ERR_INFO(rc, "[%p] parity fop failed", req);
6179  } m0_htable_endfor;
6180 
6181  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
6182  /* Skips the target device if it is not online. */
6183  if (ti->ti_state != M0_PNDS_ONLINE) {
6184  M0_LOG(M0_INFO, "[%p] Skipped device "FID_F,
6185  req, FID_P(&ti->ti_fid));
6186  continue;
6187  }
6188  M0_LOG(M0_DEBUG, "[%p] Before Submitting fops for device "FID_F
6189  ", fops length of ti %u, total fops nr %llu", req,
6190  FID_P(&ti->ti_fid),
6191  (int)iofops_tlist_length(&ti->ti_iofops),
6192  m0_atomic64_get(&xfer->nxr_iofop_nr));
6193 
6194  if (target_ioreq_type_get(ti) == TI_COB_CREATE &&
6196  /*
6197  * An error returned by rpc post has been ignored.
6198  * It will be handled in the respective bottom half.
6199  */
6201  continue;
6202  }
6203  m0_tl_for (iofops, &ti->ti_iofops, irfop) {
6204  rc = iofop_async_submit(&irfop->irf_iofop,
6205  ti->ti_session);
6206  ri_error = irfop->irf_iofop.if_fop.f_item.ri_error;
6207  M0_LOG(M0_DEBUG, "[%p] Submitted fops for device "
6208  FID_F"@%p, item %p, fops nr=%llu, rc=%d, "
6209  "ri_error=%d", req, FID_P(&ti->ti_fid), irfop,
6210  &irfop->irf_iofop.if_fop.f_item,
6211  m0_atomic64_get(&xfer->nxr_iofop_nr), rc,
6212  ri_error);
6213  if (rc != 0)
6214  goto out;
6215 
6217  csb_pending_io_nr);
6218  if (ri_error == 0)
6219  M0_CNT_INC(nr_dispatched);
6220  else if (post_error == 0)
6221  post_error = ri_error;
6222  } m0_tl_endfor;
6223 
6224  } m0_htable_endfor;
6225 
6226 out:
6227  if (rc == 0 && nr_dispatched == 0 && post_error == 0) {
6228  /* No fop has been dispatched.
6229  *
6230  * This might happen in dgmode reading:
6231  * In 'parity verify' mode, a whole parity group, including
6232  * data and parity units are all read from ioservices.
6233  * If some units failed to read, no need to read extra unit.
6234  * The units needed for recovery are ready.
6235  */
6237  M0_ASSERT(req->ir_type == IRT_READ && csb->csb_verify);
6239  } else if (rc == 0)
6240  xfer->nxr_state = NXS_INFLIGHT;
6241  M0_LOG(M0_DEBUG, "[%p] nxr_iofop_nr %llu, nxr_rdbulk_nr %llu, "
6242  "nr_dispatched %llu", req,
6243  (unsigned long long)m0_atomic64_get(&xfer->nxr_iofop_nr),
6244  (unsigned long long)m0_atomic64_get(&xfer->nxr_rdbulk_nr),
6245  (unsigned long long)nr_dispatched);
6246 
6247  return M0_RC(rc);
6248 }
6249 
6250 static void nw_xfer_req_complete(struct nw_xfer_request *xfer, bool rmw)
6251 {
6252  struct io_request *req;
6253  struct target_ioreq *ti;
6254  struct io_req_fop *irfop;
6255  struct m0_fop *fop;
6256  struct m0_rpc_item *item;
6257  struct m0t1fs_inode *inode;
6258  struct m0t1fs_sb *csb;
6259 
6260  M0_ENTRY("nw_xfer_request %p, rmw %s", xfer,
6261  rmw ? (char *)"true" : (char *)"false");
6262  M0_PRE(xfer != NULL);
6263 
6264  xfer->nxr_state = NXS_COMPLETE;
6265  req = bob_of(xfer, struct io_request, ir_nwxfer, &ioreq_bobtype);
6267  csb = M0T1FS_SB(inode->ci_inode.i_sb);
6268 
6269  M0_LOG(M0_DEBUG, "[%p] nxr_iofop_nr %llu, nxr_rdbulk_nr %llu, "
6270  "rmw %s", req,
6271  (unsigned long long)m0_atomic64_get(&xfer->nxr_iofop_nr),
6272  (unsigned long long)m0_atomic64_get(&xfer->nxr_rdbulk_nr),
6273  rmw ? (char *)"true" : (char *)"false");
6274 
6275  m0_htable_for(tioreqht, ti, &xfer->nxr_tioreqs_hash) {
6276  /* Maintains only the first error encountered. */
6277  if (xfer->nxr_rc == 0) {
6278  xfer->nxr_rc = ti->ti_rc;
6279  M0_LOG(M0_DEBUG, "[%p] nwxfer rc = %d",
6280  req, xfer->nxr_rc);
6281  }
6282 
6283  xfer->nxr_bytes += ti->ti_databytes;
6284  ti->ti_databytes = 0;
6285 
6286  if (csb->csb_oostore && ti->ti_req_type == TI_COB_CREATE &&
6289  continue;
6290  }
6291  m0_tl_teardown(iofops, &ti->ti_iofops, irfop) {
6292  fop = &irfop->irf_iofop.if_fop;
6294  M0_LOG(M0_DEBUG, "[%p] fop %p, ref %llu, "
6295  "item %p[%u], ri_error %d, ri_state %d",
6296  req, fop,
6297  (unsigned long long)m0_ref_read(&fop->f_ref),
6299  item->ri_sm.sm_state);
6300 
6301  /* Maintains only the first error encountered. */
6302  if (xfer->nxr_rc == 0 &&
6304  xfer->nxr_rc = item->ri_error;
6305  M0_LOG(M0_DEBUG, "[%p] nwxfer rc = %d",
6306  req, xfer->nxr_rc);
6307  }
6308 
6311  item->ri_rmachine != NULL));
6312  if (item->ri_rmachine == NULL) {
6313  M0_ASSERT(ti->ti_session != NULL);
6316  }
6317 
6318  M0_LOG(M0_DEBUG, "[%p] item %p, target fid "
6319  FID_F"fop %p, "
6320  "ref %llu", req, item, FID_P(&ti->ti_fid), fop,
6321  (unsigned long long)m0_ref_read(&fop->f_ref));
6323  }
6324  } m0_htable_endfor;
6325 
6326  M0_LOG(M0_INFO, "[%p] Number of bytes %s = %llu",
6327  req, req->ir_type == IRT_READ? "read" : "written",
6328  xfer->nxr_bytes);
6329 
6330  M0_LOG(M0_DEBUG, "[%p] nxr_rc %d, nxr_iofop_nr %llu, "
6331  "nxr_rdbulk_nr %llu", req, xfer->nxr_rc,
6332  (unsigned long long)m0_atomic64_get(&xfer->nxr_iofop_nr),
6333  (unsigned long long)m0_atomic64_get(&xfer->nxr_rdbulk_nr));
6334  M0_ASSERT(ergo(xfer->nxr_rc == 0, nw_xfer_request_invariant(xfer)));
6335 
6336  /*
6337  * This function is invoked from 4 states - IRS_READ_COMPLETE,
6338  * IRS_WRITE_COMPLETE, IRS_DEGRADED_READING, IRS_DEGRADED_WRITING.
6339  * And the state change is applicable only for healthy state IO,
6340  * meaning for states IRS_READ_COMPLETE and IRS_WRITE_COMPLETE.
6341  */
6342  if (M0_IN(ioreq_sm_state(req),
6344  if (!rmw)
6346  else if (ioreq_sm_state(req) == IRS_READ_COMPLETE)
6347  xfer->nxr_bytes = 0;
6348  }
6349  req->ir_rc = xfer->nxr_rc;
6350  M0_LEAVE();
6351 }
6352 
6358 static int io_req_fop_dgmode_read(struct io_req_fop *irfop)
6359 {
6360  int rc;
6361  uint32_t cnt;
6362  uint32_t seg;
6363  uint32_t seg_nr;
6364  uint64_t grpid;
6365  uint64_t pgcur = 0;
6366  m0_bindex_t *index;
6367  struct io_request *req;
6368  struct m0_fop *fop;
6369  struct m0_rpc_bulk *rbulk;
6370  struct pargrp_iomap *map = NULL;
6371  struct m0_rpc_bulk_buf *rbuf;
6372 
6373  M0_PRE(irfop != NULL);
6374 
6375  req = bob_of(irfop->irf_tioreq->ti_nwxfer, struct io_request,
6376  ir_nwxfer, &ioreq_bobtype);
6377  rbulk = &irfop->irf_iofop.if_rbulk;
6378  fop = &irfop->irf_iofop.if_fop;
6379 
6380  M0_ENTRY("[%p] target fid "FID_F", fop %p, %p[%u] ", req,
6381  FID_P(&irfop->irf_tioreq->ti_fid), fop,
6382  &fop->f_item, m0_fop_opcode(fop));
6383 
6384  m0_tl_for (rpcbulk, &rbulk->rb_buflist, rbuf) {
6385 
6386  index = rbuf->bb_zerovec.z_index;
6388 
6389  for (seg = 0; seg < seg_nr; ) {
6390 
6391  grpid = pargrp_id_find(index[seg], req, irfop);
6392  for (cnt = 1, ++seg; seg < seg_nr; ++seg) {
6393 
6394  M0_ASSERT(ergo(seg > 0, index[seg] >
6395  index[seg - 1]));
6396  M0_ASSERT((index[seg] & ~PAGE_MASK) == 0);
6397 
6398  if (grpid ==
6399  pargrp_id_find(index[seg], req, irfop))
6400  ++cnt;
6401  else
6402  break;
6403  }
6404  ioreq_pgiomap_find(req, grpid, &pgcur, &map);
6405  M0_ASSERT(map != NULL);
6406  rc = map->pi_ops->pi_dgmode_process(map,
6407  irfop->irf_tioreq, &index[seg - cnt],
6408  cnt);
6409  if (rc != 0)
6410  return M0_ERR_INFO(rc, "[%p] fop %p, %p[%u] "
6411  "Parity group dgmode process failed",
6412  req, fop, &fop->f_item,
6413  m0_fop_opcode(fop));
6414  }
6415  } m0_tl_endfor;
6416  return M0_RC(0);
6417 }
6418 
6419 /*
6420  * Used in precomputing io fop size while adding rpc bulk buffer and
6421  * data buffers.
6422  */
6423 static inline uint32_t io_desc_size(struct m0_net_domain *ndom)
6424 {
6425  return
6426  /* size of variables ci_nr and nbd_len */
6429  /* size of nbd_data */
6431 }
6432 
6433 static inline uint32_t io_seg_size(void)
6434 {
6435  return sizeof(struct m0_ioseg);
6436 }
6437 
6438 static uint32_t io_di_size(const struct io_request *req)
6439 {
6440  struct m0_file *file;
6441 
6442  file = &m0t1fs_file_to_m0inode(req->ir_file)->ci_flock;
6443  if (file->fi_di_ops->do_out_shift(file) == 0)
6444  return 0;
6446 }
6447 
6448 static int bulk_buffer_add(struct io_req_fop *irfop,
6449  struct m0_net_domain *dom,
6450  struct m0_rpc_bulk_buf **rbuf,
6451  uint32_t *delta,
6452  uint32_t maxsize)
6453 {
6454  int rc;
6455  int seg_nr;
6456  struct io_request *req;
6457  struct m0_indexvec_varr *ivv;
6458 
6459  M0_PRE(irfop != NULL);
6460  M0_PRE(dom != NULL);
6461  M0_PRE(rbuf != NULL);
6462  M0_PRE(delta != NULL);
6463  M0_PRE(maxsize > 0);
6464  M0_ENTRY("io_req_fop %p net_domain %p delta_size %d",
6465  irfop, dom, *delta);
6466 
6467  req = bob_of(irfop->irf_tioreq->ti_nwxfer, struct io_request,
6468  ir_nwxfer, &ioreq_bobtype);
6469 
6470  if (M0_IN(ioreq_sm_state(req), (IRS_READING, IRS_WRITING))) {
6471  ivv = &irfop->irf_tioreq->ti_ivv;
6472  } else {
6473  ivv = &irfop->irf_tioreq->ti_dgvec->dr_ivec_varr;
6474  }
6475 
6477  V_SEG_NR(ivv));
6478  *delta += io_desc_size(dom);
6479 
6480  if (m0_io_fop_size_get(&irfop->irf_iofop.if_fop) + *delta < maxsize) {
6481 
6483  dom, NULL, rbuf);
6484  if (rc != 0) {
6485  *delta -= io_desc_size(dom);
6486  return M0_ERR_INFO(rc, "[%p] Failed to add "
6487  "rpc_bulk_buffer", req);
6488  }
6489  } else {
6490  rc = M0_ERR(-ENOSPC);
6491  *delta -= io_desc_size(dom);
6492  }
6493 
6494  M0_POST(ergo(rc == 0, *rbuf != NULL));
6495  return M0_RC(rc);
6496 }
6497 
6498 static void cc_fop_release(struct m0_ref *ref)
6499 {
6500  struct m0_fop *fop;
6501 
6502  M0_ENTRY();
6503  fop = container_of(ref, struct m0_fop, f_ref);
6504  m0_fop_fini(fop);
6505  M0_LEAVE();
6506 }
6507 
6509 {
6510  struct m0_fop *fop;
6511  struct m0_fop_cob_common *common;
6512  struct io_request *req;
6513  int rc;
6514 
6516  fop = &ti->ti_cc_fop.crf_fop;
6519  if (rc != 0) {
6520  m0_fop_fini(fop);
6521  goto out;
6522  }
6523  ti->ti_cc_fop_inited = true;
6527  fop->f_item.ri_nr_sent_max = M0T1FS_RPC_MAX_RETRIES;
6528  fop->f_item.ri_resend_interval = M0T1FS_RPC_RESEND_INTERVAL;
6529  req = bob_of(ti->ti_nwxfer, struct io_request, ir_nwxfer,
6530  &ioreq_bobtype);
6531  common = m0_cobfop_common_get(fop);
6532  common->c_gobfid = *file_to_fid(req->ir_file);
6533  common->c_cobfid = ti->ti_fid;
6534  common->c_pver = m0t1fs_file_to_m0inode(req->ir_file)->ci_pver;
6535  common->c_cob_type = M0_COB_IO;
6536  common->c_cob_idx = m0_fid_cob_device_id(&ti->ti_fid);
6537  common->c_flags |= M0_IO_FLAG_CROW;
6538  common->c_body.b_pver = m0t1fs_file_to_m0inode(req->ir_file)->ci_pver;
6539  common->c_body.b_nlink = 1;
6540  common->c_body.b_valid |= M0_COB_PVER;
6541  common->c_body.b_valid |= M0_COB_NLINK;
6542  common->c_body.b_valid |= M0_COB_LID;
6543  common->c_body.b_lid = m0t1fs_file_to_m0inode(req->ir_file)->ci_layout_id;
6545 
6546 out:
6547  return M0_RC(rc);
6548 }
6549 
6551  enum page_attr filter)
6552 {
6553  int rc = 0;
6554  uint32_t seg = 0;
6555  /* Number of segments in one m0_rpc_bulk_buf structure. */
6556  uint32_t bbsegs;
6557  uint32_t maxsize;
6558  uint32_t delta;
6559  enum page_attr rw;
6560  struct m0_varr *pattr;
6561  struct m0_indexvec_varr *bvec;
6562  struct io_request *req;
6563  struct m0_indexvec_varr *ivv = NULL;
6564  struct io_req_fop *irfop;
6565  struct m0_net_domain *ndom;
6566  struct m0_rpc_bulk_buf *rbuf;
6567  struct m0_io_fop *iofop;
6568  struct m0_fop_cob_rw *rw_fop;
6569  struct nw_xfer_request *xfer;
6570 
6572  M0_PRE(M0_IN(filter, (PA_DATA, PA_PARITY)));
6573 
6574  xfer = ti->ti_nwxfer;
6575  req = bob_of(xfer, struct io_request, ir_nwxfer, &ioreq_bobtype);
6576 
6577  M0_ASSERT(M0_IN(ioreq_sm_state(req),
6580 
6581  M0_ENTRY("[%p] prepare io fops for target ioreq %p filter 0x%x, tfid "
6582  FID_F, req, ti, filter, FID_P(&ti->ti_fid));
6583 
6585  if (rc != 0 && rc != -ECANCELED)
6586  return M0_ERR(rc);
6587 
6588  if (M0_IN(ioreq_sm_state(req), (IRS_READING, IRS_WRITING))) {
6589  ivv = &ti->ti_ivv;
6590  bvec = &ti->ti_bufvec;
6591  pattr = &ti->ti_pageattrs;
6592  } else {
6593  if (ti->ti_dgvec == NULL) {
6594  return M0_RC(0);
6595  }
6596  ivv = &ti->ti_dgvec->dr_ivec_varr;
6597  bvec = &ti->ti_dgvec->dr_bufvec;
6598  pattr = &ti->ti_dgvec->dr_pageattrs;
6599  }
6600 
6605  PA_READ;
6607 
6608  while (seg < V_SEG_NR(ivv)) {
6609 
6610  delta = 0;
6611  bbsegs = 0;
6612 
6613  M0_LOG(M0_DEBUG, "[%p] seg=%u@%u pageattr=0x%x, filter=0x%x, "
6614  "rw=0x%x",
6615  req, seg, V_SEG_NR(ivv),
6616  PA(pattr, seg), filter, rw);
6617 
6618  if (!(PA(pattr, seg) & filter) || !(PA(pattr, seg) & rw)) {
6619  M0_LOG(M0_DEBUG, "[%p] skipping, pageattr = 0x%x, "
6620  "filter = 0x%x, rw = 0x%x",
6621  req, PA(pattr, seg), filter, rw);
6622  ++seg;
6623  continue;
6624  }
6625  M0_ALLOC_PTR(irfop);
6626  if (irfop == NULL) {
6627  rc = M0_ERR(-ENOMEM);
6628  goto err;
6629  }
6630  rc = io_req_fop_init(irfop, ti, filter);
6631  if (rc != 0) {
6632  m0_free(irfop);
6633  goto err;
6634  }
6635  ++iommstats.a_io_req_fop_nr;
6636 
6637  iofop = &irfop->irf_iofop;
6638  rw_fop = io_rw_get(&iofop->if_fop);
6639 
6640  rc = bulk_buffer_add(irfop, ndom, &rbuf, &delta, maxsize);
6641  if (rc != 0) {
6642  io_req_fop_fini(irfop);
6643  m0_free(irfop);
6644  goto err;
6645  }
6646  delta += io_seg_size();
6647 
6648  /*
6649  * Adds io segments and io descriptor only if it fits within
6650  * permitted size.
6651  */
6652  while (seg < V_SEG_NR(ivv) &&
6653  m0_io_fop_size_get(&iofop->if_fop) + delta < maxsize) {
6654 
6655  M0_LOG(M0_DEBUG, "[%p] adding: seg=%u@%u pa=0x%x, "
6656  "filter=0x%x, rw=0x%x", req, seg,
6657  V_SEG_NR(ivv),
6658  PA(pattr, seg), filter, rw);
6659 
6660  /*
6661  * Adds a page to rpc bulk buffer only if it passes
6662  * through the filter.
6663  */
6664  if ((PA(pattr, seg) & rw) && (PA(pattr, seg) & filter)) {
6665  delta += io_seg_size() + io_di_size(req);
6666 
6668  V_ADDR (bvec, seg),
6669  V_COUNT(ivv, seg),
6670  V_INDEX(ivv, seg),
6671  ndom);
6672 
6673  if (rc == -EMSGSIZE) {
6674 
6675  /*
6676  * Fix the number of segments in
6677  * current m0_rpc_bulk_buf structure.
6678  */
6679  rbuf->bb_nbuf->nb_buffer.ov_vec.v_nr =
6680  bbsegs;
6681  rbuf->bb_zerovec.z_bvec.ov_vec.v_nr =
6682  bbsegs;
6683  bbsegs = 0;
6684 
6685  delta -= io_seg_size() -
6686  io_di_size(req);
6687  rc = bulk_buffer_add(irfop, ndom,
6688  &rbuf, &delta, maxsize);
6689  if (rc == -ENOSPC)
6690  break;
6691  else if (rc != 0)
6692  goto fini_fop;
6693 
6694  /*
6695  * Since current bulk buffer is full,
6696  * new bulk buffer is added and
6697  * existing segment is attempted to
6698  * be added to new bulk buffer.
6699  */
6700  continue;
6701  } else if (rc == 0)
6702  ++bbsegs;
6703  }
6704  ++seg;
6705  }
6706 
6707  if (m0_io_fop_byte_count(iofop) == 0) {
6708  irfop_fini(irfop);
6709  continue;
6710  }
6711 
6712  rbuf->bb_nbuf->nb_buffer.ov_vec.v_nr = bbsegs;
6713  rbuf->bb_zerovec.z_bvec.ov_vec.v_nr = bbsegs;
6714 
6715  rw_fop->crw_fid = ti->ti_fid;
6716  rw_fop->crw_index = ti->ti_obj;
6717  rw_fop->crw_pver =
6718  m0t1fs_file_to_m0inode(req->ir_file)->ci_pver;
6719  rw_fop->crw_lid = m0t1fs_file_to_m0inode(req->ir_file)->ci_layout_id;
6720 
6721  rc = m0_io_fop_prepare(&iofop->if_fop);
6722  if (rc != 0)
6723  goto fini_fop;
6724 
6725  if (m0_is_read_fop(&iofop->if_fop))
6728  &iofop->if_rbulk));
6729 
6730  m0_atomic64_inc(&xfer->nxr_iofop_nr);
6731  iofops_tlist_add(&ti->ti_iofops, irfop);
6732 
6733  M0_LOG(M0_DEBUG, "[%p] fop=%p bulk=%p (%s) @"FID_F
6734  " pending io fops = %llu, pending read bulks = %llu "
6735  "list_len=%d",
6736  req, &iofop->if_fop, &iofop->if_rbulk,
6737  m0_is_read_fop(&iofop->if_fop) ? "r" : "w",
6738  FID_P(&ti->ti_fid),
6739  m0_atomic64_get(&xfer->nxr_iofop_nr),
6741  (int)iofops_tlist_length(&ti->ti_iofops));
6742  }
6743 
6744  return M0_RC(0);
6745 fini_fop:
6746  irfop_fini(irfop);
6747 err:
6748  m0_tl_teardown(iofops, &ti->ti_iofops, irfop) {
6749  irfop_fini(irfop);
6750  }
6751 
6752  return M0_ERR_INFO(rc, "[%p] iofops_prepare failed", req);
6753 }
6754 
6755 const struct inode_operations m0t1fs_reg_inode_operations = {
6756  .setattr = m0t1fs_setattr,
6757  .getattr = m0t1fs_getattr,
6758 #if LINUX_VERSION_CODE < KERNEL_VERSION(4,9,0)
6759  .setxattr = m0t1fs_setxattr,
6760  .getxattr = m0t1fs_getxattr,
6761  .removexattr = m0t1fs_removexattr,
6762 #endif
6763  .listxattr = m0t1fs_listxattr,
6764 };
6765 
6766 
6767 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,7,0)
6768 static ssize_t m0t1fs_direct_IO(struct kiocb *kcb,
6769  struct iov_iter *from)
6770 {
6771  struct m0_indexvec_varr *ivv;
6772  ssize_t retval;
6773  loff_t size;
6774  int seg;
6775  int rw;
6776 
6778  M0_ENTRY();
6779  rw = iov_iter_rw(from);
6780  M0_LOG(M0_DEBUG, "m0t1fs_direct_IO: rw=%s pos=%lld seg_nr=%lu "
6781  "addr=%p len=%lu", rw == READ ? "READ" : "WRITE",
6782  (long long)kcb->ki_pos, from->nr_segs, from->iov->iov_base,
6783  from->iov->iov_len);
6784 
6785  M0_PRE(M0_IN(rw, (READ, WRITE)));
6786 
6787  size = i_size_read(m0t1fs_file_to_inode(kcb->ki_filp));
6788  ivv = indexvec_create(from->nr_segs, from->iov, kcb->ki_pos);
6789  if (ivv == NULL)
6790  return M0_ERR(-ENOMEM);
6791  if (rw == READ) {
6792  /* Truncate vector to eliminate reading beyond the EOF */
6793  for (seg = 0; seg < V_SEG_NR(ivv); ++seg)
6794  if (v_seg_endpos(ivv, seg) > size) {
6795  V_SEG_NR(ivv) = seg + 1;
6796  V_COUNT(ivv, seg) = size - V_INDEX(ivv, seg);
6797  break;
6798  }
6799  }
6800 
6801  retval = m0t1fs_aio(kcb, from->iov, ivv, rw == READ ? IRT_READ : IRT_WRITE);
6802 
6803  /*
6804  * m0t1fs_direct_IO() must process all requested data or return error.
6805  * Otherwise generic kernel code will use unimplemented callbacks to
6806  * continue buffered I/O (e.g. write_begin()).
6807  */
6808  M0_ASSERT_INFO(retval < 0 || retval == indexvec_varr_count(ivv),
6809  "%" PRIi64 " != %" PRIi64, (int64_t)retval,
6810  indexvec_varr_count(ivv));
6811 
6812  m0_indexvec_varr_free(ivv);
6813  m0_free(ivv);
6814  M0_LEAVE();
6815  return retval;
6816 }
6817 
6818 
6819 #else
6820 static ssize_t m0t1fs_direct_IO(int rw,
6821  struct kiocb *kcb,
6822  const struct iovec *iov,
6823  loff_t pos,
6824  unsigned long seg_nr)
6825 {
6826  struct m0_indexvec_varr *ivv;
6827  ssize_t retval;
6828  loff_t size;
6829  int seg;
6830 
6832  M0_ENTRY();
6833  M0_LOG(M0_DEBUG, "m0t1fs_direct_IO: rw=%s pos=%lld seg_nr=%lu "
6834  "addr=%p len=%lu", rw == READ ? "READ" : "WRITE",
6835  (long long)pos, seg_nr, iov->iov_base, iov->iov_len);
6836 
6837  M0_PRE(M0_IN(rw, (READ, WRITE)));
6838 
6839  size = i_size_read(m0t1fs_file_to_inode(kcb->ki_filp));
6840  ivv = indexvec_create(seg_nr, iov, pos);
6841  if (ivv == NULL)
6842  return M0_ERR(-ENOMEM);
6843  if (rw == READ) {
6844  /* Truncate vector to eliminate reading beyond the EOF */
6845  for (seg = 0; seg < V_SEG_NR(ivv); ++seg)
6846  if (v_seg_endpos(ivv, seg) > size) {
6847  V_SEG_NR(ivv) = seg + 1;
6848  V_COUNT(ivv, seg) = size - V_INDEX(ivv, seg);
6849  break;
6850  }
6851  }
6852 
6853  retval = m0t1fs_aio(kcb, iov, ivv, rw == READ ? IRT_READ : IRT_WRITE);
6854 
6855  /*
6856  * m0t1fs_direct_IO() must process all requested data or return error.
6857  * Otherwise generic kernel code will use unimplemented callbacks to
6858  * continue buffered I/O (e.g. write_begin()).
6859  */
6860  M0_ASSERT_INFO(retval < 0 || retval == indexvec_varr_count(ivv),
6861  "%" PRIi64 " != %" PRIi64, (int64_t)retval,
6862  indexvec_varr_count(ivv));
6863 
6864  m0_indexvec_varr_free(ivv);
6865  m0_free(ivv);
6866  M0_LEAVE();
6867  return retval;
6868 }
6869 #endif
6870 
6871 const struct address_space_operations m0t1fs_aops = {
6872  .direct_IO = m0t1fs_direct_IO,
6873 };
6874 
6875 #undef M0_TRACE_SUBSYSTEM
M0_INTERNAL long m0t1fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
Definition: ioctl.c:36
ssize_t m0t1fs_listxattr(struct dentry *dentry, char *buffer, size_t size)
Definition: dir.c:538
static void m0_atomic64_inc(struct m0_atomic64 *a)
uint32_t b_nlink
Definition: md_fops.h:76
M0_INTERNAL int m0_rpc_post(struct m0_rpc_item *item)
Definition: rpc.c:63
static int io_request_init(struct io_request *req, struct file *file, const struct iovec *iov, struct m0_indexvec_varr *ivv, enum io_req_type rw)
Definition: file.c:4283
static ssize_t aio_read(struct kiocb *kcb, struct iov_iter *from)
Definition: file.c:5359
static int ioreq_file_lock(struct io_request *req)
Definition: file.c:3954
static int user_data_copy(struct pargrp_iomap *map, m0_bindex_t start, m0_bindex_t end, struct iov_iter *it, enum copy_direction dir, enum page_attr filter)
Definition: file.c:1294
uint32_t rit_opcode
Definition: item.h:474
uint32_t m0_fop_opcode(const struct m0_fop *fop)
Definition: fop.c:225
static uint32_t iomap_dgmode_recov_prepare(struct pargrp_iomap *map, uint8_t *failed)
Definition: file.c:3012
uint64_t crw_lid
Definition: io_fops.h:392
static size_t nr
Definition: dump.c:1505
M0_INTERNAL void m0_chan_wait(struct m0_clink *link)
Definition: chan.c:336
uint64_t c_flags
Definition: io_fops.h:475
enum sns_repair_state ir_sns_state
M0_INTERNAL bool m0_ivec_varr_cursor_move_to(struct m0_ivec_varr_cursor *cur, m0_bindex_t dest)
Definition: vec.c:1250
uint64_t ir_iomap_nr
m0_time_t ri_resend_interval
Definition: item.h:144
uint64_t rwr_count
Definition: io_fops.h:322
static int ioreq_dgmode_recover(struct io_request *req)
Definition: file.c:3570
#define M0_PRE(cond)
#define M0_ALLOC_ARR(arr, nr)
Definition: memory.h:84
M0_TL_DECLARE(rpcbulk, M0_INTERNAL, struct m0_rpc_bulk_buf)
#define V_INDEX(ivec, i)
Definition: file.c:395
M0_INTERNAL void m0_sm_fail(struct m0_sm *mach, int fail_state, int32_t rc)
Definition: sm.c:468
static const struct io_request_ops ioreq_oostore_ops
Definition: file.c:961
static struct m0_fid target_fid(const struct io_request *req, struct m0_pdclust_tgt_addr *tgt)
Definition: file.c:668
M0_INTERNAL m0_bcount_t m0_ext_length(const struct m0_ext *ext)
Definition: ext.c:42
M0_INTERNAL m0_bcount_t m0_io_fop_byte_count(struct m0_io_fop *iofop)
Definition: io_fops.c:1925
static m0_bindex_t seg_set(struct pargrp_iomap *map, uint32_t seg, struct m0_ivec_varr_cursor *cur, m0_bindex_t grpend)
Definition: file.c:2329
#define COUNT(ivec, i)
Definition: file.c:392
M0_INTERNAL void m0_mutex_unlock(struct m0_mutex *mutex)
Definition: mutex.c:66
static bool target_ioreq_invariant(struct target_ioreq *ti)
Definition: file.c:1174
static const struct io_request_ops ioreq_ops
Definition: file.c:947
M0_INTERNAL struct m0t1fs_sb * m0_fop_to_sb(struct m0_fop *fop)
Definition: file.c:5996
M0_INTERNAL int m0t1fs_inode_layout_init(struct m0t1fs_inode *ci)
Definition: inode.c:570
int m0t1fs_removexattr(struct dentry *dentry, const char *name)
Definition: dir.c:550
M0_INTERNAL struct m0_fop_cob_common * m0_cobfop_common_get(struct m0_fop *fop)
Definition: io_fops.c:990
#define m0_htable_for(name, var, htable)
Definition: hash.h:483
struct m0_fop crf_fop
static void parity_page_pos_get(struct pargrp_iomap *map, m0_bindex_t index, uint32_t *row, uint32_t *col)
Definition: file.c:744
m0_bindex_t e_end
Definition: ext.h:40
static uint32_t seg_nr
Definition: net.c:119
int const char const void size_t int flags
Definition: dir.c:328
struct m0_layout * li_l
Definition: layout.h:590
uint64_t sa_group
Definition: pdclust.h:241
static m0_bcount_t seg_endpos(const struct m0_indexvec *ivec, uint32_t i)
Definition: file.c:420
static const uint64_t k1
Definition: hash_fnc.c:34
uint32_t b_valid
Definition: md_fops.h:71
#define NULL
Definition: misc.h:38
M0_INTERNAL void m0_clink_init(struct m0_clink *link, m0_chan_cb_t cb)
Definition: chan.c:201
map
Definition: processor.c:112
int(* iro_parity_recalc)(struct io_request *req)
static int dgmode_rwvec_alloc_init(struct target_ioreq *ti)
Definition: file.c:3275
uint64_t pa_unit_size
Definition: pdclust.h:118
struct m0_atomic64 nxr_rdbulk_nr
struct m0_indexvec_varr pi_ivv
M0_INTERNAL int m0_rpc_bulk_store(struct m0_rpc_bulk *rbulk, const struct m0_rpc_conn *conn, struct m0_net_buf_desc_data *to_desc, const struct m0_net_buffer_callbacks *bulk_cb)
Definition: bulk.c:520
M0_INTERNAL void m0_clink_del_lock(struct m0_clink *link)
Definition: chan.c:293
struct m0_pool_version * l_pver
Definition: layout.h:261
static void io_req_fop_fini(struct io_req_fop *fop)
Definition: file.c:5003
static void pargrp_iomap_fini(struct pargrp_iomap *map)
Definition: file.c:1881
static struct buffer * cur(struct m0_addb2_mach *mach, m0_bcount_t space)
Definition: addb2.c:791
uint32_t crw_index
Definition: io_fops.h:386
struct m0_bufvec nb_buffer
Definition: net.h:1322
int(* iro_dgmode_write)(struct io_request *req, bool rmw)
Definition: idx_mock.c:52
m0_bindex_t * z_index
Definition: vec.h:516
struct m0_buf db_auxbuf
#define ergo(a, b)
Definition: misc.h:293
static int ioreq_iomaps_prepare(struct io_request *req)
Definition: file.c:3187
uint32_t rwr_repair_done
Definition: io_fops.h:331
Definition: storage.c:103
uint32_t ci_nr
Definition: vec.h:618
void(* sa_cb)(struct m0_sm_group *grp, struct m0_sm_ast *)
Definition: sm.h:506
static bool is_page_read(struct data_buf *dbuf)
Definition: file.c:565
static int target_cob_create_fop_prepare(struct target_ioreq *ti)
Definition: file.c:6508
#define M0_MEMBER_SIZE(type, member)
Definition: misc.h:62
Definition: sm.h:350
const m0_time_t M0_TIME_NEVER
Definition: time.c:108
void * b_addr
Definition: buf.h:39
M0_INTERNAL struct m0_pool_version * m0_pool_version_find(struct m0_pools_common *pc, const struct m0_fid *id)
Definition: pool.c:586
static struct io_request req
Definition: file.c:100
uint32_t ir_dgmap_nr
struct m0_file file
Definition: di.c:36
uint32_t pa_N
Definition: pdclust.h:104
static struct m0_sm_group * grp
Definition: bytecount.c:38
M0_INTERNAL void m0_fop_init(struct m0_fop *fop, struct m0_fop_type *fopt, void *data, void(*fop_release)(struct m0_ref *))
Definition: fop.c:78
M0_INTERNAL int m0_rpc_bulk_buf_databuf_add(struct m0_rpc_bulk_buf *rbuf, void *buf, m0_bcount_t count, m0_bindex_t index, struct m0_net_domain *netdom)
Definition: bulk.c:331
struct data_buf *** pi_paritybufs
struct m0_poolmach pv_mach
Definition: pool.h:133
static void data_buf_fini(struct data_buf *buf)
Definition: file.c:1157
static int nw_xfer_tioreq_map(struct nw_xfer_request *xfer, const struct m0_pdclust_src_addr *src, struct m0_pdclust_tgt_addr *tgt, struct target_ioreq **tio)
Definition: file.c:4504
#define M0_LOG(level,...)
Definition: trace.h:167
M0_LEAVE()
struct m0_sm_ast crf_ast
static void nw_xfer_req_complete(struct nw_xfer_request *xfer, bool rmw)
Definition: file.c:6250
M0_INTERNAL void m0_sm_ast_post(struct m0_sm_group *grp, struct m0_sm_ast *ast)
Definition: sm.c:135
static uint32_t layout_k(const struct m0_pdclust_layout *play)
Definition: file.c:520
static ssize_t file_aio_read(struct kiocb *kcb, struct iov_iter *from)
Definition: file.c:5528
static void m0_atomic64_sub(struct m0_atomic64 *a, int64_t num)
int(* nxo_tioreq_map)(struct nw_xfer_request *xfer, const struct m0_pdclust_src_addr *src, struct m0_pdclust_tgt_addr *tgt, struct target_ioreq **tio)
static struct m0t1fs_sb * file_to_sb(const struct file *file)
Definition: file.c:482
M0_INTERNAL const struct m0_fid * m0t1fs_inode_fid(const struct m0t1fs_inode *ci)
Definition: inode.c:61
struct m0_layout_instance pi_base
Definition: pdclust.h:173
uint32_t pa_K
Definition: pdclust.h:107
M0_INTERNAL int m0_sns_repair_spare_map(struct m0_poolmach *pm, const struct m0_fid *fid, struct m0_pdclust_layout *pl, struct m0_pdclust_instance *pi, uint64_t group, uint64_t unit, uint32_t *spare_slot_out, uint32_t *spare_slot_out_prev)
uint64_t(* do_out_shift)(const struct m0_file *file)
Definition: di.h:109
struct m0_vec ov_vec
Definition: vec.h:147
struct m0_chan rb_chan
Definition: bulk.h:258
static m0_bcount_t v_seg_endpos(struct m0_indexvec_varr *ivec, uint32_t i)
Definition: file.c:428
static ssize_t aio_write(struct kiocb *kcb, struct iov_iter *from)
Definition: file.c:5218
static const struct m0_bob_type ioreq_bobtype
Definition: file.c:340
static bool m0_is_po2(uint64_t val)
Definition: arith.h:153
struct m0_rpc_bulk if_rbulk
Definition: io_fops.h:175
M0_INTERNAL void m0_buf_init(struct m0_buf *buf, void *data, uint32_t nob)
Definition: buf.c:37
struct m0_sm ri_sm
Definition: item.h:181
void(* nxo_complete)(struct nw_xfer_request *xfer, bool rmw)
struct m0_bufvec data
Definition: di.c:40
const struct address_space_operations m0t1fs_aops
Definition: file.c:6871
static uint32_t io_seg_size(void)
Definition: file.c:6433
M0_INTERNAL m0_bindex_t m0_ivec_varr_cursor_conti(const struct m0_ivec_varr_cursor *cur, m0_bindex_t dest)
Definition: vec.c:1271
#define V_ADDR(bv, i)
Definition: file.c:396
uint64_t ta_obj
Definition: pdclust.h:256
M0_INTERNAL m0_bcount_t m0_ivec_varr_cursor_step(const struct m0_ivec_varr_cursor *cur)
Definition: vec.c:1224
static void buf_page_free(struct m0_buf *buf)
Definition: file.c:4812
int(* pi_dgmode_recover)(struct pargrp_iomap *map)
static void seg_idx_inc_round(struct pargrp_iomap *map, uint32_t seg, uint64_t sz)
Definition: file.c:2341
enum target_ioreq_type ti_req_type
struct m0_indexvec_varr ti_bufvec
#define PA(pa, i)
Definition: file.c:400
static int sum
Definition: rwlock.c:53
struct m0_net_domain * ntm_dom
Definition: net.h:853
int32_t ri_error
Definition: item.h:161
struct m0_net_buf_desc_data * id_descs
Definition: io_fops.h:311
void * m0_fop_data(const struct m0_fop *fop)
Definition: fop.c:219
uint32_t nbd_len
uint32_t c_cob_type
Definition: io_fops.h:472
static struct m0_be_emap_cursor it
Definition: extmap.c:46
#define m0_varr_endfor
Definition: varr.h:264
M0_HT_DESCR_DEFINE(tioreqht, "Hash of target_ioreq objects", static, struct target_ioreq, ti_link, ti_magic, M0_T1FS_TIOREQ_MAGIC, M0_T1FS_TLIST_HEAD_MAGIC, ti_fid.f_container, tioreqs_hash_func, tioreq_key_eq)
M0_INTERNAL void m0_file_lock(struct m0_rm_owner *owner, struct m0_rm_incoming *req)
Definition: file.c:522
static struct m0_bob_type iofop_bobtype
Definition: file.c:339
uint64_t m0_bindex_t
Definition: types.h:80
uint64_t ti_obj
struct m0_varr ti_pageattrs
#define M0_BITS(...)
Definition: misc.h:236
struct m0_fid c_cobfid
Definition: io_fops.h:463
uint64_t m0_bcount_t
Definition: types.h:77
Definition: sm.h:504
static void io_rpc_item_cb(struct m0_rpc_item *item)
Definition: file.c:5938
M0_INTERNAL int m0_poolmach_device_state(struct m0_poolmach *pm, uint32_t device_index, enum m0_pool_nd_state *state_out)
Definition: pool_machine.c:816
M0_INTERNAL int m0_parity_math_recover(struct m0_parity_math *math, struct m0_buf *data, struct m0_buf *parity, struct m0_buf *fails, enum m0_parity_linsys_algo algo)
Definition: parity_math.c:383
int m0t1fs_flush(struct file *file, fl_owner_t id)
Definition: file.c:5554
#define PAGE_SIZE
Definition: lnet_ut.c:277
static int void * buf
Definition: dir.c:1019
static uint64_t round_up(uint64_t val, uint64_t size)
Definition: file.c:711
#define container_of(ptr, type, member)
Definition: misc.h:33
struct m0_rm_credit rin_want
Definition: rm.h:1450
static struct m0_rpc_session session
Definition: formation2.c:38
#define M0_SET0(obj)
Definition: misc.h:64
M0_INTERNAL void m0_mutex_lock(struct m0_mutex *mutex)
Definition: mutex.c:49
static void ioreq_sm_state_set(struct io_request *req, int state)
Definition: file.c:1039
M0_ADDB2_ADD(M0_AVI_FS_CREATE, new_fid.f_container, new_fid.f_key, mode, rc)
static int pargrp_iomap_parity_verify(struct pargrp_iomap *map)
Definition: file.c:1409
m0_bcount_t nbe_length
Definition: net.h:1226
int(* iro_parity_verify)(struct io_request *req)
M0_INTERNAL int m0_parity_math_diff(struct m0_parity_math *math, struct m0_buf *old, struct m0_buf *new, struct m0_buf *parity, uint32_t index)
Definition: parity_math.c:371
static int io_req_fop_init(struct io_req_fop *fop, struct target_ioreq *ti, enum page_attr pattr)
Definition: file.c:4955
struct m0_net_buffer * nbe_buffer
Definition: net.h:1194
M0_INTERNAL int m0_fid_cmp(const struct m0_fid *fid0, const struct m0_fid *fid1)
Definition: fid.c:170
struct m0t1fs_sb * csb
Definition: dir.c:330
struct m0_sm_ast irf_ast
struct m0_fid crw_pver
Definition: io_fops.h:389
static int io_req_fop_dgmode_read(struct io_req_fop *irfop)
Definition: file.c:6358
uint64_t bt_magix
Definition: bob.h:77
#define M0_SWAP(v0, v1)
Definition: arith.h:207
static struct m0_rpc_item * item
Definition: item.c:56
struct m0_pdclust_attr pl_attr
Definition: pdclust.h:150
static void databufs_set_dgw_mode(struct pargrp_iomap *iomap, struct m0_ext *ext)
Definition: file.c:3351
struct target_ioreq * irf_tioreq
const char * bt_name
Definition: bob.h:73
static struct inode * iomap_to_inode(const struct pargrp_iomap *map)
Definition: file.c:467
Definition: sock.c:887
static m0_bcount_t count
Definition: xcode.c:167
M0_INTERNAL uint64_t m0_round_up(uint64_t val, uint64_t size)
Definition: misc.c:181
static ssize_t file_aio_write(struct kiocb *kcb, struct iov_iter *from)
Definition: file.c:5332
static int bulk_buffer_add(struct io_req_fop *irfop, struct m0_net_domain *dom, struct m0_rpc_bulk_buf **rbuf, uint32_t *delta, uint32_t maxsize)
Definition: file.c:6448
struct inode * inode
Definition: dir.c:624
M0_INTERNAL bool m0_tlist_is_empty(const struct m0_tl_descr *d, const struct m0_tl *list)
Definition: tlist.c:96
M0_INTERNAL void m0_rpc_bulk_buflist_empty(struct m0_rpc_bulk *rbulk)
Definition: bulk.c:279
static int ioreq_sm_timedwait(struct io_request *req, uint64_t state)
Definition: file.c:3546
struct target_ioreq * dr_tioreq
enum m0_pool_nd_state ti_state
#define m0_tl_endfor
Definition: tlist.h:700
M0_INTERNAL int m0_sm_timedwait(struct m0_sm *mach, uint64_t states, m0_time_t deadline)
Definition: sm.c:387
struct m0_fid fid
Definition: di.c:46
M0_INTERNAL uint64_t m0_round_down(uint64_t val, uint64_t size)
Definition: misc.c:187
static int pargrp_iomap_dgmode_recover(struct pargrp_iomap *map)
Definition: file.c:3040
return M0_RC(rc)
M0_INTERNAL void m0_parity_math_calculate(struct m0_parity_math *math, struct m0_buf *data, struct m0_buf *parity)
Definition: parity_math.c:362
m0_bcount_t ir_copied_nr
#define M0_ASSERT_EX(cond)
static uint32_t unit_size
Definition: layout.c:53
#define M0_ENTRY(...)
Definition: trace.h:170
static uint32_t io_di_size(const struct io_request *req)
Definition: file.c:6438
M0_INTERNAL int m0_pagesize_get(void)
Definition: memory.c:233
Definition: buf.h:37
static struct m0_sm_ast ast[NR]
Definition: locality.c:44
uint64_t osr_xid
Definition: onwire.h:105
M0_INTERNAL void m0_sm_group_unlock(struct m0_sm_group *grp)
Definition: sm.c:96
M0_INTERNAL bool m0t1fs_inode_bob_check(struct m0t1fs_inode *bob)
int32_t m0_rpc_item_generic_reply_rc(const struct m0_rpc_item *reply)
Definition: fom_generic.c:81
Definition: vec.h:625
Definition: filter.py:1
static const struct m0_sm_conf io_sm_conf
Definition: file.c:1025
static struct m0t1fs_inode m0inode
Definition: fsync.c:87
static char * addr
Definition: node_k.c:37
void m0_fop_put0_lock(struct m0_fop *fop)
Definition: fop.c:212
int i
Definition: dir.c:1033
void m0_fop_rpc_machine_set(struct m0_fop *fop, struct m0_rpc_machine *mach)
Definition: fop.c:351
M0_INTERNAL m0_bcount_t m0_rpc_session_get_max_item_payload_size(const struct m0_rpc_session *session)
Definition: session.c:775
struct m0_sm rin_sm
Definition: rm.h:1436
m0_pdclust_unit_type
Definition: pdclust.h:89
enum page_attr db_flags
#define PRIu64
Definition: types.h:58
M0_INTERNAL int m0_indexvec_varr_alloc(struct m0_indexvec_varr *ivec, uint32_t len)
Definition: vec.c:1136
struct m0_rpc_machine * c_rpc_machine
Definition: conn.h:278
struct m0_fid crw_fid
Definition: io_fops.h:383
static m0_bindex_t gfile_offset(m0_bindex_t toff, const struct pargrp_iomap *map, const struct m0_pdclust_layout *play, const struct m0_pdclust_src_addr *src)
Definition: file.c:648
static uint32_t rows_nr(struct m0_pdclust_layout *play)
Definition: file.c:691
static void cc_fop_release(struct m0_ref *ref)
Definition: file.c:6498
int32_t nbe_status
Definition: net.h:1218
M0_INTERNAL bool m0_ext_is_valid(const struct m0_ext *ext)
Definition: ext.c:90
static int ioreq_dgmode_read(struct io_request *req, bool rmw)
Definition: file.c:3807
struct m0_rpc_machine * m0_fop_rpc_machine(const struct m0_fop *fop)
Definition: fop.c:359
#define M0_ERR_INFO(rc, fmt,...)
Definition: trace.h:215
int(* nxo_distribute)(struct nw_xfer_request *xfer)
uint64_t ti_parbytes
static int io_spare_map(const struct pargrp_iomap *map, const struct m0_pdclust_src_addr *src, uint32_t *spare_slot, uint32_t *spare_slot_prev, enum m0_pool_nd_state *eff_state)
Definition: file.c:2667
static void client_passive_recv(const struct m0_net_buffer_event *evt)
Definition: file.c:5606
return M0_ERR(-EOPNOTSUPP)
struct io_mem_stats iommstats
Definition: file.c:322
static void cc_rpc_item_cb(struct m0_rpc_item *item)
Definition: file.c:5835
static void target_ioreq_type_set(struct target_ioreq *ti, enum target_ioreq_type type)
Definition: file.c:584
void * sa_datum
Definition: sm.h:508
M0_INTERNAL void m0_rpc_machine_unlock(struct m0_rpc_machine *machine)
Definition: rpc_machine.c:558
M0_INTERNAL struct m0_fop_cob_rw_reply * io_rw_rep_get(struct m0_fop *fop)
Definition: io_fops.c:1056
struct m0_fop if_fop
Definition: io_fops.h:172
void * b_addr
Definition: buf.h:231
M0_INTERNAL void m0_rpc_bulk_default_cb(const struct m0_net_buffer_event *evt)
Definition: bulk.c:140
Definition: trace.h:482
static void mark_page_as_read_failed(struct pargrp_iomap *map, uint32_t row, uint32_t col, enum page_attr page_type)
Definition: file.c:2701
Definition: cnt.h:36
static m0_bindex_t data_page_offset_get(struct pargrp_iomap *map, uint32_t row, uint32_t col)
Definition: file.c:767
int(* pi_populate)(struct pargrp_iomap *iomap, struct m0_ivec_varr_cursor *cursor)
void m0_addb2_push(uint64_t id, int n, const uint64_t *value)
Definition: addb2.c:412
static void ioreq_no_unlock(struct io_request *req)
Definition: file.c:3987
M0_INTERNAL m0_bindex_t m0_ivec_varr_cursor_index(const struct m0_ivec_varr_cursor *cur)
Definition: vec.c:1237
M0_INTERNAL struct m0_file * m0_fop_to_file(struct m0_fop *fop)
Definition: file.c:5982
Definition: refs.h:34
#define m0_tl_teardown(name, head, obj)
Definition: tlist.h:708
int(* tio_cc_fops_prepare)(struct target_ioreq *ti)
struct m0_fid pv_id
Definition: pool.h:113
static bool io_request_invariant(struct io_request *req)
Definition: file.c:1057
struct m0_net_buffer * bb_nbuf
Definition: bulk.h:177
struct m0_parity_math pi_math
Definition: pdclust.h:223
M0_INTERNAL struct m0t1fs_inode * m0t1fs_inode_to_m0inode(const struct inode *inode)
Definition: file.c:462
static int ioreq_no_lock(struct io_request *req)
Definition: file.c:3982
enum pargrp_iomap_state pi_state
#define m0_free0(pptr)
Definition: memory.h:77
void(* tio_seg_add)(struct target_ioreq *ti, const struct m0_pdclust_src_addr *src, const struct m0_pdclust_tgt_addr *tgt, m0_bindex_t gob_offset, m0_bcount_t count, struct pargrp_iomap *map)
static uint64_t page_nr(m0_bcount_t size)
Definition: file.c:492
M0_INTERNAL size_t m0_io_fop_size_get(struct m0_fop *fop)
Definition: io_fops.c:1589
struct m0_net_transfer_mc rm_tm
Definition: rpc_machine.h:88
m0_bcount_t b_nob
Definition: buf.h:38
struct m0_io_descs crw_desc
Definition: io_fops.h:398
static uint64_t page_id(m0_bindex_t offset)
Definition: file.c:686
#define M0_ASSERT(cond)
const char * scf_name
Definition: sm.h:352
struct m0_buf db_buf
struct m0t1fs_mdop mo
Definition: dir.c:332
struct page * db_page
struct nw_xfer_request ir_nwxfer
M0_THREAD_ENTER
Definition: dir.c:336
struct m0_fid pver
Definition: idx_dix.c:74
struct m0_rpc_item_header2 ri_header
Definition: item.h:193
void m0_sm_state_set(struct m0_sm *mach, int state)
Definition: sm.c:478
struct m0_rpc_machine * m0_fop_session_machine(const struct m0_rpc_session *s)
Definition: fop.c:452
uint32_t c_cob_idx
Definition: io_fops.h:469
m0_pool_nd_state
Definition: pool_machine.h:57
M0_INTERNAL struct m0t1fs_inode * m0t1fs_file_to_m0inode(const struct file *file)
Definition: file.c:444
static void data_buf_dealloc_fini(struct data_buf *buf)
Definition: file.c:4822
static struct m0_pdclust_instance * pdlayout_instance(const struct m0_layout_instance *li)
Definition: file.c:504
const struct nw_xfer_ops * nxr_ops
M0_INTERNAL bool m0_is_io_fop_rep(const struct m0_fop *fop)
Definition: io_fops.c:945
uint64_t ta_frame
Definition: pdclust.h:254
static struct m0_fop reply_fop
Definition: fsync.c:64
struct m0_sm ir_sm
#define M0_ADDB2_OBJ(obj)
Definition: addb2.h:276
#define m0_htable_forall(name, var, htable,...)
Definition: hash.h:465
M0_INTERNAL int m0t1fs_ref_get_lock(struct m0t1fs_sb *csb)
Definition: super.c:722
static ssize_t file_dio_write(struct kiocb *kcb, struct iov_iter *from)
Definition: file.c:5171
struct m0t1fs_inode * ci
Definition: dir.c:622
static void ioreq_file_unlock(struct io_request *req)
Definition: file.c:3975
#define bob_of(ptr, type, field, bt)
Definition: bob.h:140
static void m0_atomic64_dec(struct m0_atomic64 *a)
M0_INTERNAL struct m0_poolmach * m0t1fs_file_to_poolmach(const struct file *file)
Definition: file.c:457
struct m0_atomic64 nxr_ccfop_nr
static int unit_state(const struct m0_pdclust_src_addr *src, const struct io_request *req, enum m0_pool_nd_state *state)
Definition: file.c:2645
static struct m0_bufvec bvec
Definition: xcode.c:169
static void dgmode_rwvec_dealloc_fini(struct dgmode_rwvec *dg)
Definition: file.c:3335
int m0t1fs_setxattr(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) int m0t1fs_setxattr(struct dentry *dentry
M0_INTERNAL int m0_varr_init(struct m0_varr *arr, uint64_t nr, size_t size, size_t bufsize)
Definition: varr.c:114
int32_t rin_rc
Definition: rm.h:1446
static int ioreq_parity_recalc(struct io_request *req)
Definition: file.c:1649
int m0t1fs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
Definition: fsync.c:354
static struct m0_stob_domain * dom
Definition: storage.c:38
struct m0_varr dr_pageattrs
void(* iro_iomaps_destroy)(struct io_request *req)
uint64_t pi_grpid
void * nb_app_private
Definition: net.h:1477
uint64_t b_lid
Definition: md_fops.h:85
M0_INTERNAL struct m0_pdclust_layout * m0_layout_to_pdl(const struct m0_layout *l)
Definition: pdclust.c:382
struct m0_fop * m0_fop_get(struct m0_fop *fop)
Definition: fop.c:161
const struct m0_rpc_item_type * ri_type
Definition: item.h:200
M0_INTERNAL ssize_t m0t1fs_aio(struct kiocb *kcb, const struct iovec *iov, struct m0_indexvec_varr *ivv, enum io_req_type rw)
Definition: file.c:5056
static uint64_t layout_unit_size(const struct m0_pdclust_layout *play)
Definition: file.c:525
struct m0_fid c_gobfid
Definition: io_fops.h:458
struct m0_rpc_item * ri_reply
Definition: item.h:163
void * m0_alloc(size_t size)
Definition: memory.c:126
struct m0_semaphore m0t1fs_cpus_sem
Definition: m0t1fs.c:44
struct m0_fop_mod_rep rwr_mod_rep
Definition: io_fops.h:337
struct m0_sm_group * sm_grp
Definition: sm.h:321
M0_INTERNAL uint32_t m0_fid_cob_device_id(const struct m0_fid *cob_fid)
Definition: fid_convert.c:81
M0_INTERNAL void m0_mutex_init(struct m0_mutex *mutex)
Definition: mutex.c:35
struct m0_fid b_pver
Definition: md_fops.h:88
uint64_t f_container
Definition: fid.h:39
enum pargrp_iomap_rmwtype pi_rtype
uint64_t ri_nr_sent_max
Definition: item.h:146
#define M0_POST(cond)
struct m0_0vec bb_zerovec
Definition: bulk.h:179
Definition: xcode.h:73
target_ioreq_type
static bool nw_xfer_request_invariant(const struct nw_xfer_request *xfer)
Definition: file.c:1090
M0_TL_DEFINE(iofops, static, struct io_req_fop)
int(* pi_dgmode_postprocess)(struct pargrp_iomap *map)
static void ioreq_failed_fini(struct io_request *req, int rc)
Definition: file.c:5042
static struct m0_sm_group * file_to_smgroup(const struct file *file)
Definition: file.c:487
M0_INTERNAL int m0t1fs_setattr(struct dentry *dentry, struct iattr *attr)
Definition: dir.c:1586
struct m0_varr iv_count
Definition: vec.h:708
uint32_t v_nr
Definition: vec.h:51
static int nw_xfer_req_dispatch(struct nw_xfer_request *xfer)
Definition: file.c:6139
static int pargrp_iomap_select_ro_rr(struct pargrp_iomap *map, m0_bcount_t data_pages_nr, m0_bcount_t parity_pages_nr)
Definition: file.c:2471
m0_net_buffer_cb_proc_t nbc_cb[M0_NET_QT_NR]
Definition: net.h:1272
static uint64_t tolerance_of_level(struct io_request *req, uint64_t lv)
Definition: file.c:3597
static bool pargrp_iomap_spans_seg(struct pargrp_iomap *map, m0_bindex_t index, m0_bcount_t count)
Definition: file.c:1931
static m0_bindex_t offset
Definition: dump.c:173
static void pargrp_src_addr(m0_bindex_t index, const struct io_request *req, const struct target_ioreq *tio_req, struct m0_pdclust_src_addr *src)
Definition: file.c:621
M0_INTERNAL void m0_fd_bwd_map(struct m0_pdclust_instance *pi, const struct m0_pdclust_tgt_addr *tgt, struct m0_pdclust_src_addr *src)
Definition: fd.c:959
static int pargrp_iomap_paritybufs_alloc(struct pargrp_iomap *map)
Definition: file.c:2275
static const struct m0_bob_type nwxfer_bobtype
Definition: file.c:342
int(* pi_parity_verify)(struct pargrp_iomap *map)
struct m0_indexvec_varr dr_bufvec
struct m0_htable nxr_tioreqs_hash
M0_INTERNAL int m0_fop_data_alloc(struct m0_fop *fop)
Definition: fop.c:70
static void target_ioreq_fini(struct target_ioreq *ti)
Definition: file.c:4692
M0_INTERNAL void m0_fop_fini(struct m0_fop *fop)
Definition: fop.c:135
struct m0_rpc_session * ti_session
struct m0_indexvec_varr ir_ivv
static bool pargrp_iomap_invariant(struct pargrp_iomap *map)
Definition: file.c:1185
static struct m0_clink clink[RDWR_REQUEST_MAX]
static uint64_t min64u(uint64_t a, uint64_t b)
Definition: arith.h:66
struct m0_tl ti_iofops
static int pargrp_iomap_populate_pi_ivec(struct pargrp_iomap *map, struct m0_ivec_varr_cursor *cursor, bool rmw)
Definition: file.c:2364
static int pargrp_iomap_dgmode_postprocess(struct pargrp_iomap *map)
Definition: file.c:2861
void(* iro_file_unlock)(struct io_request *req)
struct m0_indexvec_varr dr_ivec_varr
M0_INTERNAL int m0t1fs_cob_setattr(struct inode *inode, struct m0t1fs_mdop *mo)
Definition: dir.c:2477
static void page_pos_get(struct pargrp_iomap *map, m0_bindex_t index, uint32_t *row, uint32_t *col)
Definition: file.c:725
static struct fdmi_ctx ctx
Definition: main.c:80
M0_INTERNAL void m0_indexvec_varr_free(struct m0_indexvec_varr *ivec)
Definition: vec.c:1160
#define FID_P(f)
Definition: fid.h:77
static uint64_t data_size(const struct m0_pdclust_layout *play)
Definition: file.c:550
void m0_addb2_pop(uint64_t id)
Definition: addb2.c:440
static const struct m0_rpc_item_ops io_item_ops
Definition: file.c:810
M0_TL_DESCR_DECLARE(rpcbulk, M0_EXTERN)
const struct target_ioreq_ops * ti_ops
int(* iro_dgmode_recover)(struct io_request *req)
static const uint64_t k2
Definition: hash_fnc.c:35
static void irfop_fini(struct io_req_fop *irfop)
Definition: file.c:5028
static uint64_t round_down(uint64_t val, uint64_t size)
Definition: file.c:697
static int nw_xfer_io_distribute(struct nw_xfer_request *xfer)
Definition: file.c:3394
M0_INTERNAL const char * m0_rpc_item_opname(const struct m0_rpc_item *item)
Definition: item.c:1195
struct m0_bufvec z_bvec
Definition: vec.h:514
static uint32_t layout_n(const struct m0_pdclust_layout *play)
Definition: file.c:515
static int ioreq_parity_verify(struct io_request *req)
Definition: file.c:1611
const struct iovec * ir_iovec
static ssize_t m0t1fs_direct_IO(struct kiocb *kcb, struct iov_iter *from)
Definition: file.c:6768
copy_direction
static struct m0_pdclust_layout * pdlayout_get(const struct io_request *req)
Definition: file.c:510
static int64_t m0_atomic64_get(const struct m0_atomic64 *a)
static int pargrp_iomap_dgmode_process(struct pargrp_iomap *map, struct target_ioreq *tio, m0_bindex_t *index, uint32_t count)
Definition: file.c:2759
void(* rio_replied)(struct m0_rpc_item *item)
Definition: item.h:300
static int pargrp_iomap_parity_recalc(struct pargrp_iomap *map)
Definition: file.c:1502
#define m0_forall(var, nr,...)
Definition: misc.h:112
uint64_t sa_unit
Definition: pdclust.h:243
uint32_t sd_flags
Definition: sm.h:378
static int ioreq_dgmode_write(struct io_request *req, bool rmw)
Definition: file.c:3711
M0_INTERNAL int m0_rpc_session_validate(struct m0_rpc_session *session)
Definition: session.c:573
const struct m0_net_buffer_callbacks client_buf_bulk_cb
Definition: file.c:5672
struct m0_fop_type m0_fop_cob_readv_fopt
Definition: io_fops.c:71
M0_INTERNAL void m0_ivec_varr_cursor_init(struct m0_ivec_varr_cursor *cur, struct m0_indexvec_varr *ivec)
Definition: vec.c:1183
M0_INTERNAL size_t m0_rpc_bulk_buf_length(struct m0_rpc_bulk *rbulk)
Definition: bulk.c:550
static const struct m0_bob_type tioreq_bobtype
Definition: file.c:338
#define PRIu32
Definition: types.h:66
uint64_t ti_databytes
M0_INTERNAL size_t m0_rpc_bulk_store_del_unqueued(struct m0_rpc_bulk *rbulk)
Definition: bulk.c:190
static bool should_spare_be_mapped(struct io_request *req, enum m0_pool_nd_state dev_state)
Definition: file.c:4491
static int pargrp_iomap_readold_auxbuf_alloc(struct pargrp_iomap *map)
Definition: file.c:2099
struct m0_pdclust_tgt_addr tgt
Definition: fd.c:110
static uint8_t fail[DATA_UNIT_COUNT_MAX+PARITY_UNIT_COUNT_MAX]
static const struct m0_rpc_item_ops cc_item_ops
Definition: file.c:814
static struct m0_parity_math * parity_math(struct io_request *req)
Definition: file.c:555
static void paritybufs_set_dgw_mode(struct pargrp_iomap *iomap, struct m0_pdclust_layout *play, uint64_t unit)
Definition: file.c:3373
M0_INTERNAL int64_t m0_ref_read(const struct m0_ref *ref)
Definition: refs.c:44
M0_BOB_DEFINE(static, &tioreq_bobtype, target_ioreq)
static void cc_bottom_half(struct m0_sm_group *grp, struct m0_sm_ast *ast)
Definition: file.c:5860
static const struct m0_bob_type pgiomap_bobtype
Definition: file.c:341
static void io_bottom_half(struct m0_sm_group *grp, struct m0_sm_ast *ast)
Definition: file.c:6009
M0_INTERNAL void m0_varr_fini(struct m0_varr *arr)
Definition: varr.c:486
M0_INTERNAL void m0_file_unlock(struct m0_rm_incoming *req)
Definition: file.c:540
#define M0_CNT_INC(cnt)
Definition: arith.h:226
static int ioreq_iosm_handle(struct io_request *req)
Definition: file.c:4002
static int pargrp_iomap_seg_process(struct pargrp_iomap *map, uint64_t seg, bool rmw)
Definition: file.c:1965
#define indexvec_varr_dump(ivec)
Definition: file.c:411
static void nw_xfer_request_fini(struct nw_xfer_request *xfer)
Definition: file.c:1234
#define M0_FI_ENABLED(tag)
Definition: finject.h:231
struct m0_ref f_ref
Definition: fop.h:81
Definition: ext.h:37
static int ioreq_iomaps_parity_groups_cal(struct io_request *req)
Definition: file.c:3130
Definition: fid.h:38
static bool tioreq_key_eq(const void *key1, const void *key2)
Definition: file.c:600
uint64_t f_key
Definition: fid.h:40
m0_bindex_t e_start
Definition: ext.h:39
M0_INTERNAL void m0_sm_init(struct m0_sm *mach, const struct m0_sm_conf *conf, uint32_t state, struct m0_sm_group *grp)
Definition: sm.c:313
#define M0_IS0(obj)
Definition: misc.h:70
M0_INTERNAL void m0_rpc_machine_lock(struct m0_rpc_machine *machine)
Definition: rpc_machine.c:551
static struct m0_rpc_session * target_session(struct io_request *req, struct m0_fid tfid)
Definition: file.c:679
struct m0_fid ti_fid
static uint64_t indexvec_varr_count(struct m0_indexvec_varr *varr)
Definition: file.c:535
static struct m0_layout_instance * layout_instance(const struct io_request *req)
Definition: file.c:498
static bool io_req_fop_invariant(const struct io_req_fop *fop)
Definition: file.c:1165
static int pargrp_iomap_init(struct pargrp_iomap *map, struct io_request *req, uint64_t grpid)
Definition: file.c:1795
#define M0_ALLOC_PTR(ptr)
Definition: memory.h:86
struct cc_req_fop ti_cc_fop
M0_INTERNAL void m0_clink_add(struct m0_chan *chan, struct m0_clink *link)
Definition: chan.c:228
static int ioreq_user_data_copy(struct io_request *req, enum copy_direction dir, enum page_attr filter)
Definition: file.c:1700
static bool pargrp_iomap_invariant_nr(struct io_request *req)
Definition: file.c:1201
const struct m0_rpc_item_ops * ri_ops
Definition: item.h:149
static void ioreq_sm_state_set_nolock(struct io_request *req, int state)
Definition: file.c:1049
#define PRIi64
Definition: types.h:59
int(* nxo_dispatch)(struct nw_xfer_request *xfer)
int(* iro_dgmode_read)(struct io_request *req, bool rmw)
struct m0_mutex nxr_lock
M0_INTERNAL m0_bcount_t m0_net_domain_get_max_buffer_desc_size(struct m0_net_domain *dom)
const struct m0_uint128 m0_rm_m0t1fs_group
Definition: inode.c:59
enum nw_xfer_state nxr_state
struct m0_indexvec_varr ti_ivv
static uint64_t pargrp_iomap_auxbuf_alloc(struct pargrp_iomap *map, uint32_t row, uint32_t col)
Definition: file.c:2076
struct m0_rpc_session * ri_session
Definition: item.h:147
static int target_ioreq_init(struct target_ioreq *ti, struct nw_xfer_request *xfer, const struct m0_fid *cobfid, uint64_t ta_obj, struct m0_rpc_session *session, uint64_t size)
Definition: file.c:4615
static bool data_buf_invariant_nr(const struct pargrp_iomap *map)
Definition: file.c:1118
static void io_req_fop_release(struct m0_ref *ref)
Definition: file.c:5731
static void target_ioreq_seg_add(struct target_ioreq *ti, const struct m0_pdclust_src_addr *src, const struct m0_pdclust_tgt_addr *tgt, m0_bindex_t gob_offset, m0_bcount_t count, struct pargrp_iomap *map)
Definition: file.c:4841
struct m0_fop_type m0_fop_cob_create_fopt
Definition: io_fops.c:75
struct m0_rpc_item * m0_fop_to_rpc_item(const struct m0_fop *fop)
Definition: fop.c:337
static void ioreq_sm_failed(struct io_request *req, int rc)
Definition: file.c:1031
static int pargrp_iomap_populate(struct pargrp_iomap *map, struct m0_ivec_varr_cursor *cursor)
Definition: file.c:2506
M0_INTERNAL enum m0_pdclust_unit_type m0_pdclust_unit_classify(const struct m0_pdclust_layout *pl, int unit)
Definition: pdclust.c:425
static void nw_xfer_request_init(struct nw_xfer_request *xfer)
Definition: file.c:1207
M0_TL_DESCR_DEFINE(iofops, "List of IO fops", static, struct io_req_fop, irf_link, irf_magic, M0_T1FS_IOFOP_MAGIC, M0_T1FS_TIOREQ_MAGIC)
m0_bcount_t size
Definition: di.c:39
static uint64_t parity_units_page_nr(const struct m0_pdclust_layout *play)
Definition: file.c:530
page_attr
#define _0C(exp)
Definition: assert.h:311
static int start(struct m0_fom *fom)
Definition: trigger_fom.c:321
#define V_COUNT(ivec, i)
Definition: file.c:397
struct data_buf *** pi_databufs
M0_INTERNAL void m0_mutex_fini(struct m0_mutex *mutex)
Definition: mutex.c:42
M0_INTERNAL void m0_clink_fini(struct m0_clink *link)
Definition: chan.c:208
m0_bcount_t rb_bytes
Definition: bulk.h:260
static int nw_xfer_tioreq_get(struct nw_xfer_request *xfer, const struct m0_fid *fid, uint64_t ta_obj, struct m0_rpc_session *session, uint64_t size, struct target_ioreq **out)
Definition: file.c:4738
void m0_fop_put_lock(struct m0_fop *fop)
Definition: fop.c:198
M0_INTERNAL bool m0_rpc_bulk_is_empty(struct m0_rpc_bulk *rbulk)
Definition: bulk.c:539
static void ioreq_pgiomap_find(struct io_request *req, uint64_t grpid, uint64_t *cursor, struct pargrp_iomap **out)
Definition: file.c:1675
const struct io_request_ops * ir_ops
struct m0_atomic64 nxr_iofop_nr
static struct m0_fop * fop
Definition: item.c:57
#define INDEX(ivec, i)
Definition: file.c:391
static void user_page_unmap(struct data_buf *dbuf, bool set_dirty)
Definition: file.c:1284
M0_INTERNAL int32_t m0_net_domain_get_max_buffer_segments(struct m0_net_domain *dom)
static void indexvec_sort(struct m0_indexvec_varr *ivec)
Definition: file.c:1768
struct m0_io_fop irf_iofop
M0_INTERNAL void m0_sm_group_lock(struct m0_sm_group *grp)
Definition: sm.c:83
struct pargrp_iomap ** ir_iomaps
struct m0_fop * m0_rpc_item_to_fop(const struct m0_rpc_item *item)
Definition: fop.c:345
static int target_ioreq_iofops_prepare(struct target_ioreq *ti, enum page_attr filter)
Definition: file.c:6550
static const struct m0_bob_type dtbuf_bobtype
Definition: file.c:343
int(* pi_parity_recalc)(struct pargrp_iomap *map)
M0_INTERNAL void m0_rm_owner_unlock(struct m0_rm_owner *owner)
Definition: rm.c:603
M0_INTERNAL int user_page_map(struct data_buf *dbuf, unsigned long user_addr)
Definition: file.c:1247
static uint64_t group_id(m0_bindex_t index, m0_bcount_t dtsize)
Definition: file.c:560
M0_INTERNAL struct m0_rpc_session * m0t1fs_container_id_to_session(const struct m0_pool_version *pver, uint64_t container_id)
Definition: super.c:166
static struct m0_be_seg * seg
Definition: btree.c:40
int(* iro_iomaps_prepare)(struct io_request *req)
M0_INTERNAL void iov_iter_advance(struct iov_iter *i, size_t bytes)
static uint32_t ioreq_sm_state(const struct io_request *req)
Definition: file.c:975
struct m0_fid c_pver
Definition: io_fops.h:466
#define M0_ASSERT_INFO(cond, fmt,...)
#define m0_varr_for(arr, type, idx, obj)
Definition: varr.h:259
struct m0_tl rb_buflist
Definition: bulk.h:256
#define V_SEG_NR(ivec)
Definition: file.c:398
static void data_buf_init(struct data_buf *buf, void *addr, uint64_t flags)
Definition: file.c:1146
static uint64_t tioreqs_hash_func(const struct m0_htable *htable, const void *k)
Definition: file.c:593
M0_INTERNAL void m0_io_fop_fini(struct m0_io_fop *iofop)
Definition: io_fops.c:897
M0_INTERNAL int m0_io_fop_init(struct m0_io_fop *iofop, const struct m0_fid *gfid, struct m0_fop_type *ftype, void(*fop_release)(struct m0_ref *))
Definition: io_fops.c:865
static void device_state_reset(struct nw_xfer_request *xfer, bool rmw)
Definition: file.c:3990
static const struct pargrp_iomap_ops iomap_ops
Definition: file.c:876
static bool is_session_marked(struct io_request *req, struct m0_rpc_session *session)
Definition: file.c:3615
M0_INTERNAL void m0_ext_intersection(const struct m0_ext *e0, const struct m0_ext *e1, struct m0_ext *result)
Definition: ext.c:81
M0_INTERNAL struct m0_pdclust_instance * m0_layout_instance_to_pdi(const struct m0_layout_instance *li)
Definition: pdclust.c:400
struct inode * dir
Definition: dir.c:1028
M0_INTERNAL void m0_semaphore_down(struct m0_semaphore *semaphore)
Definition: semaphore.c:49
Definition: nucleus.c:42
struct nw_xfer_request * ti_nwxfer
struct m0_rm_incoming ir_in
io_req_state
#define out(...)
Definition: gen.c:41
M0_INTERNAL void m0_rpc_bulk_store_del(struct m0_rpc_bulk *rbulk)
Definition: bulk.c:215
Definition: file.h:81
M0_INTERNAL bool m0_is_read_fop(const struct m0_fop *fop)
Definition: io_fops.c:916
int type
Definition: dir.c:1031
static uint32_t target_ioreq_type_get(struct target_ioreq *ti)
Definition: file.c:579
static uint64_t pargrp_id_find(m0_bindex_t index, const struct io_request *req, const struct io_req_fop *ir_fop)
Definition: file.c:638
M0_INTERNAL void m0_rm_owner_lock(struct m0_rm_owner *owner)
Definition: rm.c:592
struct m0_fid gfid
Definition: dir.c:626
M0_INTERNAL void m0_semaphore_up(struct m0_semaphore *semaphore)
Definition: semaphore.c:65
struct m0_uint128 cr_group_id
Definition: rm.h:506
static void seg_align(struct pargrp_iomap *map, uint32_t seg, m0_bindex_t end, uint64_t sz)
Definition: file.c:2351
M0_INTERNAL struct m0_fop_cob_rw * io_rw_get(struct m0_fop *fop)
Definition: io_fops.c:1037
static bool should_req_sm_complete(struct io_request *req)
Definition: file.c:5923
static int32_t min32(int32_t a, int32_t b)
Definition: arith.h:36
struct target_ioreq * db_tioreq
M0_INTERNAL bool m0_fid_is_valid(const struct m0_fid *fid)
Definition: fid.c:96
static uint64_t iomap_page_nr(struct pargrp_iomap *map)
Definition: file.c:545
M0_INTERNAL void m0_fd_fwd_map(struct m0_pdclust_instance *pi, const struct m0_pdclust_src_addr *src, struct m0_pdclust_tgt_addr *tgt)
Definition: fd.c:838
static void ioreq_iomaps_destroy(struct io_request *req)
Definition: file.c:3255
M0_INTERNAL int m0_io_fop_prepare(struct m0_fop *fop)
Definition: io_fops.c:1513
struct m0_rpc_machine * ri_rmachine
Definition: item.h:160
int(* iro_iosm_handle)(struct io_request *req)
Definition: varr.h:121
#define M0_PRE_EX(cond)
static struct m0_dtm_oper_descr reply
Definition: transmit.c:94
static void m0_atomic64_add(struct m0_atomic64 *a, int64_t num)
M0_INTERNAL int m0_rpc_bulk_buf_add(struct m0_rpc_bulk *rbulk, uint32_t segs_nr, m0_bcount_t length, struct m0_net_domain *netdom, struct m0_net_buffer *nb, struct m0_rpc_bulk_buf **out)
Definition: bulk.c:291
const struct inode_operations m0t1fs_reg_inode_operations
Definition: file.c:6755
M0_INTERNAL struct m0_pool_version * m0t1fs_file_to_pver(const struct file *file)
Definition: file.c:449
uint64_t s_session_id
Definition: session.h:309
struct m0_fop_type m0_fop_cob_writev_fopt
Definition: io_fops.c:72
static const struct target_ioreq_ops tioreq_ops
Definition: file.c:907
#define m0_tl_for(name, head, obj)
Definition: tlist.h:695
void m0_free(void *data)
Definition: memory.c:146
static int device_check(struct io_request *req)
Definition: file.c:3641
#define m0_htable_endfor
Definition: hash.h:491
uint64_t * ir_failed_session
struct m0_rpc_item f_item
Definition: fop.h:84
struct m0_fop_cob c_body
Definition: io_fops.h:454
int(* iro_user_data_copy)(struct io_request *req, enum copy_direction dir, enum page_attr filter)
uint32_t sm_state
Definition: sm.h:307
static int iofop_async_submit(struct m0_io_fop *iofop, struct m0_rpc_session *session)
Definition: file.c:5681
static bool is_pver_dud(uint32_t fdev_nr, uint32_t dev_k, uint32_t fsvc_nr, uint32_t svc_k)
Definition: file.c:3699
struct m0_pdclust_src_addr src
Definition: fd.c:108
static bool data_buf_invariant(const struct data_buf *db)
Definition: file.c:1110
struct dgmode_rwvec * ti_dgvec
M0_INTERNAL void m0_bob_type_tlist_init(struct m0_bob_type *bt, const struct m0_tl_descr *td)
Definition: bob.c:41
struct file * ir_file
int32_t rc
Definition: trigger_fop.h:47
uint64_t h_bucket_nr
Definition: hash.h:178
static uint32_t io_desc_size(struct m0_net_domain *ndom)
Definition: file.c:6423
M0_INTERNAL struct m0t1fs_sb * m0inode_to_sb(const struct m0t1fs_inode *m0inode)
Definition: file.c:472
#define ARRAY_SIZE(a)
Definition: misc.h:45
const struct pargrp_iomap_ops * pi_ops
M0_INTERNAL void m0t1fs_ref_put_lock(struct m0t1fs_sb *csb)
Definition: super.c:749
#define M0_POST_EX(cond)
#define offsetof(typ, memb)
Definition: misc.h:29
const struct file_operations m0t1fs_reg_file_operations
Definition: file.c:5586
M0_INTERNAL void m0_poolmach_gob2cob(struct m0_poolmach *pm, const struct m0_fid *gfid, uint32_t idx, struct m0_fid *cob_fid)
struct m0_rpc_conn * s_conn
Definition: session.h:312
M0_HT_DEFINE(tioreqht, static, struct target_ioreq, uint64_t)
static void io_request_fini(struct io_request *req)
Definition: file.c:4388
struct m0_be_tx_remid fmr_remid
Definition: wire.h:80
static uint64_t target_offset(uint64_t frame, struct m0_pdclust_layout *play, m0_bindex_t gob_offset)
Definition: file.c:571
M0_INTERNAL int m0t1fs_size_update(struct dentry *dentry, uint64_t newsize)
Definition: dir.c:1525
static struct m0_sm_state_descr io_states[]
Definition: file.c:980
int(* tio_iofops_prepare)(struct target_ioreq *ti, enum page_attr filter)
static uint8_t parity[DATA_UNIT_COUNT_MAX][UNIT_BUFF_SIZE_MAX]
Definition: fop.h:80
void m0t1fs_fsync_record_update(struct m0_reqh_service_ctx *service, struct m0t1fs_sb *csb, struct m0t1fs_inode *inode, struct m0_be_tx_remid *btr)
Definition: fsync.c:397
static const struct m0_fid * file_to_fid(const struct file *file)
Definition: file.c:477
struct m0_mutex rb_mutex
Definition: bulk.h:251
enum page_attr irf_pattr
static uint64_t max64u(uint64_t a, uint64_t b)
Definition: arith.h:71
const struct m0_di_ops * fi_di_ops
Definition: file.h:92
static struct m0_addb2_frame_header last
Definition: storage.c:93
int m0t1fs_getxattr(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) ssize_t m0t1fs_getxattr(struct dentry *dentry
static int pargrp_iomap_readrest(struct pargrp_iomap *map)
Definition: file.c:2203
static struct target_ioreq * target_ioreq_locate(struct nw_xfer_request *xfer, const struct m0_fid *fid)
Definition: file.c:4722
#define FID_F
Definition: fid.h:75
static struct m0_indexvec_varr * indexvec_create(unsigned long seg_nr, const struct iovec *iov, loff_t pos)
Definition: file.c:5129
M0_INTERNAL bool m0_ivec_varr_cursor_move(struct m0_ivec_varr_cursor *cur, m0_bcount_t count)
Definition: vec.c:1198
enum io_req_type ir_type
static void m0_atomic64_set(struct m0_atomic64 *a, int64_t num)
struct m0_fop * rep_fop
Definition: dir.c:334
static uint64_t pargrp_iomap_fullpages_count(struct pargrp_iomap *map)
Definition: file.c:2051
static struct data_buf * data_buf_alloc_init(enum page_attr pattr)
Definition: file.c:4785
int(* iro_file_lock)(struct io_request *req)
M0_INTERNAL struct inode * m0t1fs_file_to_inode(const struct file *file)
Definition: file.c:435
Definition: idx_mock.c:47
static const struct nw_xfer_ops xfer_ops
Definition: file.c:837
static int pargrp_iomap_pages_mark_as_failed(struct pargrp_iomap *map, enum m0_pdclust_unit_type type)
Definition: file.c:2583
#define m0_tl_forall(name, var, head,...)
Definition: tlist.h:735
static int pargrp_iomap_databuf_alloc(struct pargrp_iomap *map, uint32_t row, uint32_t col)
Definition: file.c:1951
M0_INTERNAL struct m0_reqh_service_ctx * m0_reqh_service_ctx_from_session(struct m0_rpc_session *session)
M0_INTERNAL void io_bob_tlists_init(void)
Definition: file.c:790
struct m0_indexvec_varr * vc_ivv
Definition: vec.h:718
M0_INTERNAL void m0_sm_fini(struct m0_sm *mach)
Definition: sm.c:331