| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (C) 2011 | 
|  | 3 | * Boaz Harrosh <bharrosh@panasas.com> | 
|  | 4 | * | 
|  | 5 | * Public Declarations of the ORE API | 
|  | 6 | * | 
|  | 7 | * This file is part of the ORE (Object Raid Engine) library. | 
|  | 8 | * | 
|  | 9 | * ORE is free software; you can redistribute it and/or modify | 
|  | 10 | * it under the terms of the GNU General Public License version 2 as published | 
|  | 11 | * by the Free Software Foundation. (GPL v2) | 
|  | 12 | * | 
|  | 13 | * ORE is distributed in the hope that it will be useful, | 
|  | 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | 16 | * GNU General Public License for more details. | 
|  | 17 | * | 
|  | 18 | * You should have received a copy of the GNU General Public License | 
|  | 19 | * along with the ORE; if not, write to the Free Software | 
|  | 20 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA | 
|  | 21 | */ | 
|  | 22 | #ifndef __ORE_H__ | 
|  | 23 | #define __ORE_H__ | 
|  | 24 |  | 
|  | 25 | #include <scsi/osd_initiator.h> | 
|  | 26 | #include <scsi/osd_attributes.h> | 
|  | 27 | #include <scsi/osd_sec.h> | 
|  | 28 | #include <linux/pnfs_osd_xdr.h> | 
| Paul Gortmaker | 187f188 | 2011-11-23 20:12:59 -0500 | [diff] [blame] | 29 | #include <linux/bug.h> | 
| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 30 |  | 
|  | 31 | struct ore_comp { | 
|  | 32 | struct osd_obj_id	obj; | 
|  | 33 | u8			cred[OSD_CAP_LEN]; | 
|  | 34 | }; | 
|  | 35 |  | 
|  | 36 | struct ore_layout { | 
|  | 37 | /* Our way of looking at the data_map */ | 
| Boaz Harrosh | 8d2d83a | 2011-08-10 14:15:02 -0700 | [diff] [blame] | 38 | enum pnfs_osd_raid_algorithm4 | 
|  | 39 | raid_algorithm; | 
| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 40 | unsigned stripe_unit; | 
|  | 41 | unsigned mirrors_p1; | 
|  | 42 |  | 
|  | 43 | unsigned group_width; | 
| Boaz Harrosh | a1fec1d | 2011-10-12 18:42:22 +0200 | [diff] [blame] | 44 | unsigned parity; | 
| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 45 | u64	 group_depth; | 
|  | 46 | unsigned group_count; | 
| Boaz Harrosh | 5a51c0c | 2011-09-28 13:18:45 +0300 | [diff] [blame] | 47 |  | 
|  | 48 | /* Cached often needed calculations filled in by | 
|  | 49 | * ore_verify_layout | 
|  | 50 | */ | 
|  | 51 | unsigned long max_io_length;	/* Max length that should be passed to | 
|  | 52 | * ore_get_rw_state | 
|  | 53 | */ | 
| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 54 | }; | 
|  | 55 |  | 
| Boaz Harrosh | d866d87 | 2011-09-28 14:43:09 +0300 | [diff] [blame] | 56 | struct ore_dev { | 
|  | 57 | struct osd_dev *od; | 
|  | 58 | }; | 
|  | 59 |  | 
| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 60 | struct ore_components { | 
| Boaz Harrosh | 3bd9856 | 2011-09-28 12:04:23 +0300 | [diff] [blame] | 61 | unsigned	first_dev;		/* First logical device no    */ | 
| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 62 | unsigned	numdevs;		/* Num of devices in array    */ | 
|  | 63 | /* If @single_comp == EC_SINGLE_COMP, @comps points to a single | 
|  | 64 | * component. else there are @numdevs components | 
|  | 65 | */ | 
|  | 66 | enum EC_COMP_USAGE { | 
|  | 67 | EC_SINGLE_COMP = 0, EC_MULTPLE_COMPS = 0xffffffff | 
|  | 68 | }		single_comp; | 
|  | 69 | struct ore_comp	*comps; | 
| Boaz Harrosh | d866d87 | 2011-09-28 14:43:09 +0300 | [diff] [blame] | 70 |  | 
|  | 71 | /* Array of pointers to ore_dev-* . User will usually have these pointed | 
|  | 72 | * too a bigger struct which contain an "ore_dev ored" member and use | 
|  | 73 | * container_of(oc->ods[i], struct foo_dev, ored) to access the bigger | 
|  | 74 | * structure. | 
|  | 75 | */ | 
|  | 76 | struct ore_dev	**ods; | 
| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 77 | }; | 
|  | 78 |  | 
| Boaz Harrosh | d866d87 | 2011-09-28 14:43:09 +0300 | [diff] [blame] | 79 | /* ore_comp_dev Recievies a logical device index */ | 
|  | 80 | static inline struct osd_dev *ore_comp_dev( | 
|  | 81 | const struct ore_components *oc, unsigned i) | 
|  | 82 | { | 
| Boaz Harrosh | 3bd9856 | 2011-09-28 12:04:23 +0300 | [diff] [blame] | 83 | BUG_ON((i < oc->first_dev) || (oc->first_dev + oc->numdevs <= i)); | 
|  | 84 | return oc->ods[i - oc->first_dev]->od; | 
| Boaz Harrosh | d866d87 | 2011-09-28 14:43:09 +0300 | [diff] [blame] | 85 | } | 
|  | 86 |  | 
|  | 87 | static inline void ore_comp_set_dev( | 
|  | 88 | struct ore_components *oc, unsigned i, struct osd_dev *od) | 
|  | 89 | { | 
| Boaz Harrosh | 3bd9856 | 2011-09-28 12:04:23 +0300 | [diff] [blame] | 90 | oc->ods[i - oc->first_dev]->od = od; | 
| Boaz Harrosh | d866d87 | 2011-09-28 14:43:09 +0300 | [diff] [blame] | 91 | } | 
|  | 92 |  | 
| Boaz Harrosh | eb507bc | 2011-08-10 14:17:28 -0700 | [diff] [blame] | 93 | struct ore_striping_info { | 
| Boaz Harrosh | a1fec1d | 2011-10-12 18:42:22 +0200 | [diff] [blame] | 94 | u64 offset; | 
| Boaz Harrosh | eb507bc | 2011-08-10 14:17:28 -0700 | [diff] [blame] | 95 | u64 obj_offset; | 
| Boaz Harrosh | a1fec1d | 2011-10-12 18:42:22 +0200 | [diff] [blame] | 96 | u64 length; | 
|  | 97 | u64 first_stripe_start; /* only used in raid writes */ | 
| Boaz Harrosh | eb507bc | 2011-08-10 14:17:28 -0700 | [diff] [blame] | 98 | u64 M; /* for truncate */ | 
| Boaz Harrosh | a1fec1d | 2011-10-12 18:42:22 +0200 | [diff] [blame] | 99 | unsigned bytes_in_stripe; | 
| Boaz Harrosh | eb507bc | 2011-08-10 14:17:28 -0700 | [diff] [blame] | 100 | unsigned dev; | 
| Boaz Harrosh | a1fec1d | 2011-10-12 18:42:22 +0200 | [diff] [blame] | 101 | unsigned par_dev; | 
| Boaz Harrosh | eb507bc | 2011-08-10 14:17:28 -0700 | [diff] [blame] | 102 | unsigned unit_off; | 
| Boaz Harrosh | 769ba8d | 2011-10-14 15:33:51 +0200 | [diff] [blame] | 103 | unsigned cur_pg; | 
| Boaz Harrosh | a1fec1d | 2011-10-12 18:42:22 +0200 | [diff] [blame] | 104 | unsigned cur_comp; | 
| Boaz Harrosh | eb507bc | 2011-08-10 14:17:28 -0700 | [diff] [blame] | 105 | }; | 
|  | 106 |  | 
| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 107 | struct ore_io_state; | 
|  | 108 | typedef void (*ore_io_done_fn)(struct ore_io_state *ios, void *private); | 
| Boaz Harrosh | 769ba8d | 2011-10-14 15:33:51 +0200 | [diff] [blame] | 109 | struct _ore_r4w_op { | 
|  | 110 | /* @Priv given here is passed ios->private */ | 
|  | 111 | struct page * (*get_page)(void *priv, u64 page_index, bool *uptodate); | 
|  | 112 | void (*put_page)(void *priv, struct page *page); | 
|  | 113 | }; | 
| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 114 |  | 
|  | 115 | struct ore_io_state { | 
|  | 116 | struct kref		kref; | 
| Boaz Harrosh | 9826075 | 2011-10-02 15:32:50 +0200 | [diff] [blame] | 117 | struct ore_striping_info si; | 
| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 118 |  | 
|  | 119 | void			*private; | 
|  | 120 | ore_io_done_fn	done; | 
|  | 121 |  | 
|  | 122 | struct ore_layout	*layout; | 
| Boaz Harrosh | 5bf696d | 2011-09-28 11:39:59 +0300 | [diff] [blame] | 123 | struct ore_components	*oc; | 
| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 124 |  | 
|  | 125 | /* Global read/write IO*/ | 
|  | 126 | loff_t			offset; | 
|  | 127 | unsigned long		length; | 
|  | 128 | void			*kern_buff; | 
|  | 129 |  | 
|  | 130 | struct page		**pages; | 
|  | 131 | unsigned		nr_pages; | 
|  | 132 | unsigned		pgbase; | 
|  | 133 | unsigned		pages_consumed; | 
|  | 134 |  | 
|  | 135 | /* Attributes */ | 
|  | 136 | unsigned		in_attr_len; | 
|  | 137 | struct osd_attr		*in_attr; | 
|  | 138 | unsigned		out_attr_len; | 
|  | 139 | struct osd_attr		*out_attr; | 
|  | 140 |  | 
|  | 141 | bool			reading; | 
|  | 142 |  | 
| Boaz Harrosh | a1fec1d | 2011-10-12 18:42:22 +0200 | [diff] [blame] | 143 | /* House keeping of Parity pages */ | 
|  | 144 | bool			extra_part_alloc; | 
|  | 145 | struct page		**parity_pages; | 
|  | 146 | unsigned		max_par_pages; | 
|  | 147 | unsigned		cur_par_page; | 
|  | 148 | unsigned		sgs_per_dev; | 
| Boaz Harrosh | 769ba8d | 2011-10-14 15:33:51 +0200 | [diff] [blame] | 149 | struct __stripe_pages_2d *sp2d; | 
|  | 150 | struct ore_io_state	 *ios_read_4_write; | 
|  | 151 | const struct _ore_r4w_op *r4w; | 
| Boaz Harrosh | a1fec1d | 2011-10-12 18:42:22 +0200 | [diff] [blame] | 152 |  | 
| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 153 | /* Variable array of size numdevs */ | 
|  | 154 | unsigned numdevs; | 
|  | 155 | struct ore_per_dev_state { | 
|  | 156 | struct osd_request *or; | 
|  | 157 | struct bio *bio; | 
|  | 158 | loff_t offset; | 
|  | 159 | unsigned length; | 
| Boaz Harrosh | a1fec1d | 2011-10-12 18:42:22 +0200 | [diff] [blame] | 160 | unsigned last_sgs_total; | 
| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 161 | unsigned dev; | 
| Boaz Harrosh | a1fec1d | 2011-10-12 18:42:22 +0200 | [diff] [blame] | 162 | struct osd_sg_entry *sglist; | 
|  | 163 | unsigned cur_sg; | 
| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 164 | } per_dev[]; | 
|  | 165 | }; | 
|  | 166 |  | 
|  | 167 | static inline unsigned ore_io_state_size(unsigned numdevs) | 
|  | 168 | { | 
|  | 169 | return sizeof(struct ore_io_state) + | 
|  | 170 | sizeof(struct ore_per_dev_state) * numdevs; | 
|  | 171 | } | 
|  | 172 |  | 
|  | 173 | /* ore.c */ | 
| Boaz Harrosh | 5a51c0c | 2011-09-28 13:18:45 +0300 | [diff] [blame] | 174 | int ore_verify_layout(unsigned total_comps, struct ore_layout *layout); | 
| Boaz Harrosh | 611d7a5 | 2011-10-04 14:20:17 +0200 | [diff] [blame] | 175 | void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset, | 
| Boaz Harrosh | a1fec1d | 2011-10-12 18:42:22 +0200 | [diff] [blame] | 176 | u64 length, struct ore_striping_info *si); | 
| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 177 | int ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps, | 
|  | 178 | bool is_reading, u64 offset, u64 length, | 
|  | 179 | struct ore_io_state **ios); | 
|  | 180 | int ore_get_io_state(struct ore_layout *layout, struct ore_components *comps, | 
|  | 181 | struct ore_io_state **ios); | 
|  | 182 | void ore_put_io_state(struct ore_io_state *ios); | 
|  | 183 |  | 
| Boaz Harrosh | 4b46c9f | 2011-09-28 13:25:50 +0300 | [diff] [blame] | 184 | typedef void (*ore_on_dev_error)(struct ore_io_state *ios, struct ore_dev *od, | 
|  | 185 | unsigned dev_index, enum osd_err_priority oep, | 
|  | 186 | u64 dev_offset, u64  dev_len); | 
|  | 187 | int ore_check_io(struct ore_io_state *ios, ore_on_dev_error rep); | 
| Boaz Harrosh | 8ff660a | 2011-08-06 19:26:31 -0700 | [diff] [blame] | 188 |  | 
|  | 189 | int ore_create(struct ore_io_state *ios); | 
|  | 190 | int ore_remove(struct ore_io_state *ios); | 
|  | 191 | int ore_write(struct ore_io_state *ios); | 
|  | 192 | int ore_read(struct ore_io_state *ios); | 
|  | 193 | int ore_truncate(struct ore_layout *layout, struct ore_components *comps, | 
|  | 194 | u64 size); | 
|  | 195 |  | 
|  | 196 | int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr); | 
|  | 197 |  | 
|  | 198 | extern const struct osd_attr g_attr_logical_length; | 
|  | 199 |  | 
|  | 200 | #endif |