David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 2 | /* Handle fileserver selection and rotation. |
| 3 | * |
| 4 | * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. |
| 5 | * Written by David Howells (dhowells@redhat.com) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 6 | */ |
| 7 | |
| 8 | #include <linux/kernel.h> |
| 9 | #include <linux/slab.h> |
| 10 | #include <linux/fs.h> |
| 11 | #include <linux/sched.h> |
| 12 | #include <linux/delay.h> |
| 13 | #include <linux/sched/signal.h> |
| 14 | #include "internal.h" |
| 15 | #include "afs_fs.h" |
| 16 | |
| 17 | /* |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 18 | * Begin an operation on the fileserver. |
| 19 | * |
| 20 | * Fileserver operations are serialised on the server by vnode, so we serialise |
| 21 | * them here also using the io_lock. |
| 22 | */ |
| 23 | bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode, |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 24 | struct key *key, bool intr) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 25 | { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 26 | memset(fc, 0, sizeof(*fc)); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 27 | fc->vnode = vnode; |
| 28 | fc->key = key; |
| 29 | fc->ac.error = SHRT_MAX; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 30 | fc->error = -EDESTADDRREQ; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 31 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 32 | if (intr) { |
| 33 | fc->flags |= AFS_FS_CURSOR_INTR; |
| 34 | if (mutex_lock_interruptible(&vnode->io_lock) < 0) { |
| 35 | fc->error = -EINTR; |
| 36 | fc->flags |= AFS_FS_CURSOR_STOP; |
| 37 | return false; |
| 38 | } |
| 39 | } else { |
| 40 | mutex_lock(&vnode->io_lock); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 41 | } |
| 42 | |
| 43 | if (vnode->lock_state != AFS_VNODE_LOCK_NONE) |
| 44 | fc->flags |= AFS_FS_CURSOR_CUR_ONLY; |
| 45 | return true; |
| 46 | } |
| 47 | |
| 48 | /* |
| 49 | * Begin iteration through a server list, starting with the vnode's last used |
| 50 | * server if possible, or the last recorded good server if not. |
| 51 | */ |
| 52 | static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, |
| 53 | struct afs_vnode *vnode) |
| 54 | { |
| 55 | struct afs_cb_interest *cbi; |
| 56 | int i; |
| 57 | |
| 58 | read_lock(&vnode->volume->servers_lock); |
| 59 | fc->server_list = afs_get_serverlist(vnode->volume->servers); |
| 60 | read_unlock(&vnode->volume->servers_lock); |
| 61 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 62 | fc->untried = (1UL << fc->server_list->nr_servers) - 1; |
| 63 | fc->index = READ_ONCE(fc->server_list->preferred); |
| 64 | |
| 65 | cbi = rcu_dereference_protected(vnode->cb_interest, |
| 66 | lockdep_is_held(&vnode->io_lock)); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 67 | if (cbi) { |
| 68 | /* See if the vnode's preferred record is still available */ |
| 69 | for (i = 0; i < fc->server_list->nr_servers; i++) { |
| 70 | if (fc->server_list->servers[i].cb_interest == cbi) { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 71 | fc->index = i; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 72 | goto found_interest; |
| 73 | } |
| 74 | } |
| 75 | |
| 76 | /* If we have a lock outstanding on a server that's no longer |
| 77 | * serving this vnode, then we can't switch to another server |
| 78 | * and have to return an error. |
| 79 | */ |
| 80 | if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 81 | fc->error = -ESTALE; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 82 | return false; |
| 83 | } |
| 84 | |
| 85 | /* Note that the callback promise is effectively broken */ |
| 86 | write_seqlock(&vnode->cb_lock); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 87 | ASSERTCMP(cbi, ==, rcu_access_pointer(vnode->cb_interest)); |
| 88 | rcu_assign_pointer(vnode->cb_interest, NULL); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 89 | if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) |
| 90 | vnode->cb_break++; |
| 91 | write_sequnlock(&vnode->cb_lock); |
| 92 | |
| 93 | afs_put_cb_interest(afs_v2net(vnode), cbi); |
| 94 | cbi = NULL; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 95 | } |
| 96 | |
| 97 | found_interest: |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 98 | return true; |
| 99 | } |
| 100 | |
| 101 | /* |
| 102 | * Post volume busy note. |
| 103 | */ |
| 104 | static void afs_busy(struct afs_volume *volume, u32 abort_code) |
| 105 | { |
| 106 | const char *m; |
| 107 | |
| 108 | switch (abort_code) { |
| 109 | case VOFFLINE: m = "offline"; break; |
| 110 | case VRESTARTING: m = "restarting"; break; |
| 111 | case VSALVAGING: m = "being salvaged"; break; |
| 112 | default: m = "busy"; break; |
| 113 | } |
| 114 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 115 | pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 116 | } |
| 117 | |
| 118 | /* |
| 119 | * Sleep and retry the operation to the same fileserver. |
| 120 | */ |
| 121 | static bool afs_sleep_and_retry(struct afs_fs_cursor *fc) |
| 122 | { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 123 | if (fc->flags & AFS_FS_CURSOR_INTR) { |
| 124 | msleep_interruptible(1000); |
| 125 | if (signal_pending(current)) { |
| 126 | fc->error = -ERESTARTSYS; |
| 127 | return false; |
| 128 | } |
| 129 | } else { |
| 130 | msleep(1000); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 131 | } |
| 132 | |
| 133 | return true; |
| 134 | } |
| 135 | |
| 136 | /* |
| 137 | * Select the fileserver to use. May be called multiple times to rotate |
| 138 | * through the fileservers. |
| 139 | */ |
| 140 | bool afs_select_fileserver(struct afs_fs_cursor *fc) |
| 141 | { |
| 142 | struct afs_addr_list *alist; |
| 143 | struct afs_server *server; |
| 144 | struct afs_vnode *vnode = fc->vnode; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 145 | struct afs_error e; |
| 146 | u32 rtt; |
| 147 | int error = fc->ac.error, i; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 148 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 149 | _enter("%lx[%d],%lx[%d],%d,%d", |
| 150 | fc->untried, fc->index, |
| 151 | fc->ac.tried, fc->ac.index, |
| 152 | error, fc->ac.abort_code); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 153 | |
| 154 | if (fc->flags & AFS_FS_CURSOR_STOP) { |
| 155 | _leave(" = f [stopped]"); |
| 156 | return false; |
| 157 | } |
| 158 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 159 | fc->nr_iterations++; |
| 160 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 161 | /* Evaluate the result of the previous operation, if there was one. */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 162 | switch (error) { |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 163 | case SHRT_MAX: |
| 164 | goto start; |
| 165 | |
| 166 | case 0: |
| 167 | default: |
| 168 | /* Success or local failure. Stop. */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 169 | fc->error = error; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 170 | fc->flags |= AFS_FS_CURSOR_STOP; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 171 | _leave(" = f [okay/local %d]", error); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 172 | return false; |
| 173 | |
| 174 | case -ECONNABORTED: |
| 175 | /* The far side rejected the operation on some grounds. This |
| 176 | * might involve the server being busy or the volume having been moved. |
| 177 | */ |
| 178 | switch (fc->ac.abort_code) { |
| 179 | case VNOVOL: |
| 180 | /* This fileserver doesn't know about the volume. |
| 181 | * - May indicate that the VL is wrong - retry once and compare |
| 182 | * the results. |
| 183 | * - May indicate that the fileserver couldn't attach to the vol. |
| 184 | */ |
| 185 | if (fc->flags & AFS_FS_CURSOR_VNOVOL) { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 186 | fc->error = -EREMOTEIO; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 187 | goto next_server; |
| 188 | } |
| 189 | |
| 190 | write_lock(&vnode->volume->servers_lock); |
| 191 | fc->server_list->vnovol_mask |= 1 << fc->index; |
| 192 | write_unlock(&vnode->volume->servers_lock); |
| 193 | |
| 194 | set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 195 | error = afs_check_volume_status(vnode->volume, fc->key); |
| 196 | if (error < 0) |
| 197 | goto failed_set_error; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 198 | |
| 199 | if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 200 | fc->error = -ENOMEDIUM; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 201 | goto failed; |
| 202 | } |
| 203 | |
| 204 | /* If the server list didn't change, then assume that |
| 205 | * it's the fileserver having trouble. |
| 206 | */ |
| 207 | if (vnode->volume->servers == fc->server_list) { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 208 | fc->error = -EREMOTEIO; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 209 | goto next_server; |
| 210 | } |
| 211 | |
| 212 | /* Try again */ |
| 213 | fc->flags |= AFS_FS_CURSOR_VNOVOL; |
| 214 | _leave(" = t [vnovol]"); |
| 215 | return true; |
| 216 | |
| 217 | case VSALVAGE: /* TODO: Should this return an error or iterate? */ |
| 218 | case VVOLEXISTS: |
| 219 | case VNOSERVICE: |
| 220 | case VONLINE: |
| 221 | case VDISKFULL: |
| 222 | case VOVERQUOTA: |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 223 | fc->error = afs_abort_to_error(fc->ac.abort_code); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 224 | goto next_server; |
| 225 | |
| 226 | case VOFFLINE: |
| 227 | if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags)) { |
| 228 | afs_busy(vnode->volume, fc->ac.abort_code); |
| 229 | clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); |
| 230 | } |
| 231 | if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 232 | fc->error = -EADV; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 233 | goto failed; |
| 234 | } |
| 235 | if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 236 | fc->error = -ESTALE; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 237 | goto failed; |
| 238 | } |
| 239 | goto busy; |
| 240 | |
| 241 | case VSALVAGING: |
| 242 | case VRESTARTING: |
| 243 | case VBUSY: |
| 244 | /* Retry after going round all the servers unless we |
| 245 | * have a file lock we need to maintain. |
| 246 | */ |
| 247 | if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 248 | fc->error = -EBUSY; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 249 | goto failed; |
| 250 | } |
| 251 | if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) { |
| 252 | afs_busy(vnode->volume, fc->ac.abort_code); |
| 253 | clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); |
| 254 | } |
| 255 | busy: |
| 256 | if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { |
| 257 | if (!afs_sleep_and_retry(fc)) |
| 258 | goto failed; |
| 259 | |
| 260 | /* Retry with same server & address */ |
| 261 | _leave(" = t [vbusy]"); |
| 262 | return true; |
| 263 | } |
| 264 | |
| 265 | fc->flags |= AFS_FS_CURSOR_VBUSY; |
| 266 | goto next_server; |
| 267 | |
| 268 | case VMOVED: |
| 269 | /* The volume migrated to another server. We consider |
| 270 | * consider all locks and callbacks broken and request |
| 271 | * an update from the VLDB. |
| 272 | * |
| 273 | * We also limit the number of VMOVED hops we will |
| 274 | * honour, just in case someone sets up a loop. |
| 275 | */ |
| 276 | if (fc->flags & AFS_FS_CURSOR_VMOVED) { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 277 | fc->error = -EREMOTEIO; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 278 | goto failed; |
| 279 | } |
| 280 | fc->flags |= AFS_FS_CURSOR_VMOVED; |
| 281 | |
| 282 | set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags); |
| 283 | set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 284 | error = afs_check_volume_status(vnode->volume, fc->key); |
| 285 | if (error < 0) |
| 286 | goto failed_set_error; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 287 | |
| 288 | /* If the server list didn't change, then the VLDB is |
| 289 | * out of sync with the fileservers. This is hopefully |
| 290 | * a temporary condition, however, so we don't want to |
| 291 | * permanently block access to the file. |
| 292 | * |
| 293 | * TODO: Try other fileservers if we can. |
| 294 | * |
| 295 | * TODO: Retry a few times with sleeps. |
| 296 | */ |
| 297 | if (vnode->volume->servers == fc->server_list) { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 298 | fc->error = -ENOMEDIUM; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 299 | goto failed; |
| 300 | } |
| 301 | |
| 302 | goto restart_from_beginning; |
| 303 | |
| 304 | default: |
| 305 | clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); |
| 306 | clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 307 | fc->error = afs_abort_to_error(fc->ac.abort_code); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 308 | goto failed; |
| 309 | } |
| 310 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 311 | case -ETIMEDOUT: |
| 312 | case -ETIME: |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 313 | if (fc->error != -EDESTADDRREQ) |
| 314 | goto iterate_address; |
| 315 | /* Fall through */ |
| 316 | case -ERFKILL: |
| 317 | case -EADDRNOTAVAIL: |
| 318 | case -ENETUNREACH: |
| 319 | case -EHOSTUNREACH: |
| 320 | case -EHOSTDOWN: |
| 321 | case -ECONNREFUSED: |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 322 | _debug("no conn"); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 323 | fc->error = error; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 324 | goto iterate_address; |
| 325 | |
| 326 | case -ECONNRESET: |
| 327 | _debug("call reset"); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 328 | fc->error = error; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 329 | goto failed; |
| 330 | } |
| 331 | |
| 332 | restart_from_beginning: |
| 333 | _debug("restart"); |
| 334 | afs_end_cursor(&fc->ac); |
| 335 | afs_put_cb_interest(afs_v2net(vnode), fc->cbi); |
| 336 | fc->cbi = NULL; |
| 337 | afs_put_serverlist(afs_v2net(vnode), fc->server_list); |
| 338 | fc->server_list = NULL; |
| 339 | start: |
| 340 | _debug("start"); |
| 341 | /* See if we need to do an update of the volume record. Note that the |
| 342 | * volume may have moved or even have been deleted. |
| 343 | */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 344 | error = afs_check_volume_status(vnode->volume, fc->key); |
| 345 | if (error < 0) |
| 346 | goto failed_set_error; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 347 | |
| 348 | if (!afs_start_fs_iteration(fc, vnode)) |
| 349 | goto failed; |
| 350 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 351 | _debug("__ VOL %llx __", vnode->volume->vid); |
| 352 | error = afs_probe_fileservers(afs_v2net(vnode), fc->key, fc->server_list); |
| 353 | if (error < 0) |
| 354 | goto failed_set_error; |
| 355 | |
| 356 | pick_server: |
| 357 | _debug("pick [%lx]", fc->untried); |
| 358 | |
| 359 | error = afs_wait_for_fs_probes(fc->server_list, fc->untried); |
| 360 | if (error < 0) |
| 361 | goto failed_set_error; |
| 362 | |
| 363 | /* Pick the untried server with the lowest RTT. If we have outstanding |
| 364 | * callbacks, we stick with the server we're already using if we can. |
| 365 | */ |
| 366 | if (fc->cbi) { |
| 367 | _debug("cbi %u", fc->index); |
| 368 | if (test_bit(fc->index, &fc->untried)) |
| 369 | goto selected_server; |
| 370 | afs_put_cb_interest(afs_v2net(vnode), fc->cbi); |
| 371 | fc->cbi = NULL; |
| 372 | _debug("nocbi"); |
| 373 | } |
| 374 | |
| 375 | fc->index = -1; |
| 376 | rtt = U32_MAX; |
| 377 | for (i = 0; i < fc->server_list->nr_servers; i++) { |
| 378 | struct afs_server *s = fc->server_list->servers[i].server; |
| 379 | |
| 380 | if (!test_bit(i, &fc->untried) || !s->probe.responded) |
| 381 | continue; |
| 382 | if (s->probe.rtt < rtt) { |
| 383 | fc->index = i; |
| 384 | rtt = s->probe.rtt; |
| 385 | } |
| 386 | } |
| 387 | |
| 388 | if (fc->index == -1) |
| 389 | goto no_more_servers; |
| 390 | |
| 391 | selected_server: |
| 392 | _debug("use %d", fc->index); |
| 393 | __clear_bit(fc->index, &fc->untried); |
| 394 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 395 | /* We're starting on a different fileserver from the list. We need to |
| 396 | * check it, create a callback intercept, find its address list and |
| 397 | * probe its capabilities before we use it. |
| 398 | */ |
| 399 | ASSERTCMP(fc->ac.alist, ==, NULL); |
| 400 | server = fc->server_list->servers[fc->index].server; |
| 401 | |
| 402 | if (!afs_check_server_record(fc, server)) |
| 403 | goto failed; |
| 404 | |
| 405 | _debug("USING SERVER: %pU", &server->uuid); |
| 406 | |
| 407 | /* Make sure we've got a callback interest record for this server. We |
| 408 | * have to link it in before we send the request as we can be sent a |
| 409 | * break request before we've finished decoding the reply and |
| 410 | * installing the vnode. |
| 411 | */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 412 | error = afs_register_server_cb_interest(vnode, fc->server_list, |
| 413 | fc->index); |
| 414 | if (error < 0) |
| 415 | goto failed_set_error; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 416 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 417 | fc->cbi = afs_get_cb_interest( |
| 418 | rcu_dereference_protected(vnode->cb_interest, |
| 419 | lockdep_is_held(&vnode->io_lock))); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 420 | |
| 421 | read_lock(&server->fs_lock); |
| 422 | alist = rcu_dereference_protected(server->addresses, |
| 423 | lockdep_is_held(&server->fs_lock)); |
| 424 | afs_get_addrlist(alist); |
| 425 | read_unlock(&server->fs_lock); |
| 426 | |
| 427 | memset(&fc->ac, 0, sizeof(fc->ac)); |
| 428 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 429 | if (!fc->ac.alist) |
| 430 | fc->ac.alist = alist; |
| 431 | else |
| 432 | afs_put_addrlist(alist); |
| 433 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 434 | fc->ac.index = -1; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 435 | |
| 436 | iterate_address: |
| 437 | ASSERT(fc->ac.alist); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 438 | /* Iterate over the current server's address list to try and find an |
| 439 | * address on which it will respond to us. |
| 440 | */ |
| 441 | if (!afs_iterate_addresses(&fc->ac)) |
| 442 | goto next_server; |
| 443 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 444 | _debug("address [%u] %u/%u", fc->index, fc->ac.index, fc->ac.alist->nr_addrs); |
| 445 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 446 | _leave(" = t"); |
| 447 | return true; |
| 448 | |
| 449 | next_server: |
| 450 | _debug("next"); |
| 451 | afs_end_cursor(&fc->ac); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 452 | goto pick_server; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 453 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 454 | no_more_servers: |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 455 | /* That's all the servers poked to no good effect. Try again if some |
| 456 | * of them were busy. |
| 457 | */ |
| 458 | if (fc->flags & AFS_FS_CURSOR_VBUSY) |
| 459 | goto restart_from_beginning; |
| 460 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 461 | e.error = -EDESTADDRREQ; |
| 462 | e.responded = false; |
| 463 | for (i = 0; i < fc->server_list->nr_servers; i++) { |
| 464 | struct afs_server *s = fc->server_list->servers[i].server; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 465 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 466 | afs_prioritise_error(&e, READ_ONCE(s->probe.error), |
| 467 | s->probe.abort_code); |
| 468 | } |
| 469 | |
| 470 | error = e.error; |
| 471 | |
| 472 | failed_set_error: |
| 473 | fc->error = error; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 474 | failed: |
| 475 | fc->flags |= AFS_FS_CURSOR_STOP; |
| 476 | afs_end_cursor(&fc->ac); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 477 | _leave(" = f [failed %d]", fc->error); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 478 | return false; |
| 479 | } |
| 480 | |
| 481 | /* |
| 482 | * Select the same fileserver we used for a vnode before and only that |
| 483 | * fileserver. We use this when we have a lock on that file, which is backed |
| 484 | * only by the fileserver we obtained it from. |
| 485 | */ |
| 486 | bool afs_select_current_fileserver(struct afs_fs_cursor *fc) |
| 487 | { |
| 488 | struct afs_vnode *vnode = fc->vnode; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 489 | struct afs_cb_interest *cbi; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 490 | struct afs_addr_list *alist; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 491 | int error = fc->ac.error; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 492 | |
| 493 | _enter(""); |
| 494 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 495 | cbi = rcu_dereference_protected(vnode->cb_interest, |
| 496 | lockdep_is_held(&vnode->io_lock)); |
| 497 | |
| 498 | switch (error) { |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 499 | case SHRT_MAX: |
| 500 | if (!cbi) { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 501 | fc->error = -ESTALE; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 502 | fc->flags |= AFS_FS_CURSOR_STOP; |
| 503 | return false; |
| 504 | } |
| 505 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 506 | fc->cbi = afs_get_cb_interest(cbi); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 507 | |
| 508 | read_lock(&cbi->server->fs_lock); |
| 509 | alist = rcu_dereference_protected(cbi->server->addresses, |
| 510 | lockdep_is_held(&cbi->server->fs_lock)); |
| 511 | afs_get_addrlist(alist); |
| 512 | read_unlock(&cbi->server->fs_lock); |
| 513 | if (!alist) { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 514 | fc->error = -ESTALE; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 515 | fc->flags |= AFS_FS_CURSOR_STOP; |
| 516 | return false; |
| 517 | } |
| 518 | |
| 519 | memset(&fc->ac, 0, sizeof(fc->ac)); |
| 520 | fc->ac.alist = alist; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 521 | fc->ac.index = -1; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 522 | goto iterate_address; |
| 523 | |
| 524 | case 0: |
| 525 | default: |
| 526 | /* Success or local failure. Stop. */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 527 | fc->error = error; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 528 | fc->flags |= AFS_FS_CURSOR_STOP; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 529 | _leave(" = f [okay/local %d]", error); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 530 | return false; |
| 531 | |
| 532 | case -ECONNABORTED: |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 533 | fc->error = afs_abort_to_error(fc->ac.abort_code); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 534 | fc->flags |= AFS_FS_CURSOR_STOP; |
| 535 | _leave(" = f [abort]"); |
| 536 | return false; |
| 537 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 538 | case -ERFKILL: |
| 539 | case -EADDRNOTAVAIL: |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 540 | case -ENETUNREACH: |
| 541 | case -EHOSTUNREACH: |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 542 | case -EHOSTDOWN: |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 543 | case -ECONNREFUSED: |
| 544 | case -ETIMEDOUT: |
| 545 | case -ETIME: |
| 546 | _debug("no conn"); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 547 | fc->error = error; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 548 | goto iterate_address; |
| 549 | } |
| 550 | |
| 551 | iterate_address: |
| 552 | /* Iterate over the current server's address list to try and find an |
| 553 | * address on which it will respond to us. |
| 554 | */ |
| 555 | if (afs_iterate_addresses(&fc->ac)) { |
| 556 | _leave(" = t"); |
| 557 | return true; |
| 558 | } |
| 559 | |
| 560 | afs_end_cursor(&fc->ac); |
| 561 | return false; |
| 562 | } |
| 563 | |
| 564 | /* |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 565 | * Dump cursor state in the case of the error being EDESTADDRREQ. |
| 566 | */ |
| 567 | static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc) |
| 568 | { |
| 569 | static int count; |
| 570 | int i; |
| 571 | |
| 572 | if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) |
| 573 | return; |
| 574 | count++; |
| 575 | |
| 576 | rcu_read_lock(); |
| 577 | |
| 578 | pr_notice("EDESTADDR occurred\n"); |
| 579 | pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n", |
| 580 | fc->cb_break, fc->cb_break_2, fc->flags, fc->error); |
| 581 | pr_notice("FC: ut=%lx ix=%d ni=%u\n", |
| 582 | fc->untried, fc->index, fc->nr_iterations); |
| 583 | |
| 584 | if (fc->server_list) { |
| 585 | const struct afs_server_list *sl = fc->server_list; |
| 586 | pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n", |
| 587 | sl->nr_servers, sl->preferred, sl->vnovol_mask); |
| 588 | for (i = 0; i < sl->nr_servers; i++) { |
| 589 | const struct afs_server *s = sl->servers[i].server; |
| 590 | pr_notice("FC: server fl=%lx av=%u %pU\n", |
| 591 | s->flags, s->addr_version, &s->uuid); |
| 592 | if (s->addresses) { |
| 593 | const struct afs_addr_list *a = |
| 594 | rcu_dereference(s->addresses); |
| 595 | pr_notice("FC: - av=%u nr=%u/%u/%u pr=%u\n", |
| 596 | a->version, |
| 597 | a->nr_ipv4, a->nr_addrs, a->max_addrs, |
| 598 | a->preferred); |
| 599 | pr_notice("FC: - pr=%lx R=%lx F=%lx\n", |
| 600 | a->probed, a->responded, a->failed); |
| 601 | if (a == fc->ac.alist) |
| 602 | pr_notice("FC: - current\n"); |
| 603 | } |
| 604 | } |
| 605 | } |
| 606 | |
| 607 | pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n", |
| 608 | fc->ac.tried, fc->ac.index, fc->ac.abort_code, fc->ac.error, |
| 609 | fc->ac.responded, fc->ac.nr_iterations); |
| 610 | rcu_read_unlock(); |
| 611 | } |
| 612 | |
| 613 | /* |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 614 | * Tidy up a filesystem cursor and unlock the vnode. |
| 615 | */ |
| 616 | int afs_end_vnode_operation(struct afs_fs_cursor *fc) |
| 617 | { |
| 618 | struct afs_net *net = afs_v2net(fc->vnode); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 619 | |
| 620 | if (fc->error == -EDESTADDRREQ || |
| 621 | fc->error == -EADDRNOTAVAIL || |
| 622 | fc->error == -ENETUNREACH || |
| 623 | fc->error == -EHOSTUNREACH) |
| 624 | afs_dump_edestaddrreq(fc); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 625 | |
| 626 | mutex_unlock(&fc->vnode->io_lock); |
| 627 | |
| 628 | afs_end_cursor(&fc->ac); |
| 629 | afs_put_cb_interest(net, fc->cbi); |
| 630 | afs_put_serverlist(net, fc->server_list); |
| 631 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 632 | if (fc->error == -ECONNABORTED) |
| 633 | fc->error = afs_abort_to_error(fc->ac.abort_code); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 634 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 635 | return fc->error; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 636 | } |