Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/init/Kconfig b/init/Kconfig
index f23e90d..13685bf 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -6,19 +6,39 @@
default "/lib/modules/$(shell,uname -r)/.config"
default "/etc/kernel-config"
default "/boot/config-$(shell,uname -r)"
- default ARCH_DEFCONFIG
- default "arch/$(ARCH)/defconfig"
+ default "arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG)"
+
+config CC_VERSION_TEXT
+ string
+ default "$(CC_VERSION_TEXT)"
+ help
+ This is used in unclear ways:
+
+ - Re-run Kconfig when the compiler is updated
+ The 'default' property references the environment variable,
+ CC_VERSION_TEXT so it is recorded in include/config/auto.conf.cmd.
+ When the compiler is updated, Kconfig will be invoked.
+
+ - Ensure full rebuild when the compier is updated
+ include/linux/kconfig.h contains this option in the comment line so
+ fixdep adds include/config/cc/version/text.h into the auto-generated
+ dependency. When the compiler is updated, syncconfig will touch it
+ and then every file will be rebuilt.
config CC_IS_GCC
- def_bool $(success,$(CC) --version | head -n 1 | grep -q gcc)
+ def_bool $(success,echo "$(CC_VERSION_TEXT)" | grep -q gcc)
config GCC_VERSION
int
default $(shell,$(srctree)/scripts/gcc-version.sh $(CC)) if CC_IS_GCC
default 0
+config LD_VERSION
+ int
+ default $(shell,$(LD) --version | $(srctree)/scripts/ld-version.sh)
+
config CC_IS_CLANG
- def_bool $(success,$(CC) --version | head -n 1 | grep -q clang)
+ def_bool $(success,echo "$(CC_VERSION_TEXT)" | grep -q clang)
config LD_IS_LLD
def_bool $(success,$(LD) -v | head -n 1 | grep -q LLD)
@@ -27,12 +47,27 @@
int
default $(shell,$(srctree)/scripts/clang-version.sh $(CC))
+config LLD_VERSION
+ int
+ default $(shell,$(srctree)/scripts/lld-version.sh $(LD))
+
config CC_CAN_LINK
- def_bool $(success,$(srctree)/scripts/cc-can-link.sh $(CC))
+ bool
+ default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag)) if 64BIT
+ default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m32-flag))
+
+config CC_CAN_LINK_STATIC
+ bool
+ default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag) -static) if 64BIT
+ default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m32-flag) -static)
config CC_HAS_ASM_GOTO
def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC))
+config CC_HAS_ASM_GOTO_OUTPUT
+ depends on CC_HAS_ASM_GOTO
+ def_bool $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null)
+
config TOOLS_SUPPORT_RELR
def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh)
@@ -46,7 +81,7 @@
config IRQ_WORK
bool
-config BUILDTIME_EXTABLE_SORT
+config BUILDTIME_TABLE_SORT
bool
config THREAD_INFO_IN_TASK
@@ -133,13 +168,13 @@
which is done within the script "scripts/setlocalversion".)
config BUILD_SALT
- string "Build ID Salt"
- default ""
- help
- The build ID is used to link binaries and their debug info. Setting
- this option will use the value in the calculation of the build id.
- This is mostly useful for distributions which want to ensure the
- build is unique between builds. It's safe to leave the default.
+ string "Build ID Salt"
+ default ""
+ help
+ The build ID is used to link binaries and their debug info. Setting
+ this option will use the value in the calculation of the build id.
+ This is mostly useful for distributions which want to ensure the
+ build is unique between builds. It's safe to leave the default.
config HAVE_KERNEL_GZIP
bool
@@ -159,13 +194,16 @@
config HAVE_KERNEL_LZ4
bool
+config HAVE_KERNEL_ZSTD
+ bool
+
config HAVE_KERNEL_UNCOMPRESSED
bool
choice
prompt "Kernel compression mode"
default KERNEL_GZIP
- depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 || HAVE_KERNEL_UNCOMPRESSED
+ depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 || HAVE_KERNEL_ZSTD || HAVE_KERNEL_UNCOMPRESSED
help
The linux kernel is a kind of self-extracting executable.
Several compression algorithms are available, which differ
@@ -244,6 +282,16 @@
is about 8% bigger than LZO. But the decompression speed is
faster than LZO.
+config KERNEL_ZSTD
+ bool "ZSTD"
+ depends on HAVE_KERNEL_ZSTD
+ help
+ ZSTD is a compression algorithm targeting intermediate compression
+ with fast decompression speed. It will compress better than GZIP and
+ decompress around the same speed as LZO, but slower than LZ4. You
+ will need at least 192 KB RAM or more for booting. The zstd command
+ line tool is required for compression.
+
config KERNEL_UNCOMPRESSED
bool "None"
depends on HAVE_KERNEL_UNCOMPRESSED
@@ -256,6 +304,16 @@
endchoice
+config DEFAULT_INIT
+ string "Default init path"
+ default ""
+ help
+ This option determines the default init for the system if no init=
+ option is passed on the kernel command line. If the requested path is
+ not present, we will still then move on to attempting further
+ locations (e.g. /sbin/init, etc). If this is empty, we will just use
+ the fallback list when init= is not passed.
+
config DEFAULT_HOSTNAME
string "Default hostname"
default "(none)"
@@ -284,7 +342,7 @@
config SYSVIPC
bool "System V IPC"
- ---help---
+ help
Inter Process Communication is a suite of library functions and
system calls which let processes (running programs) synchronize and
exchange information. It is generally considered to be a good thing,
@@ -306,7 +364,7 @@
config POSIX_MQUEUE
bool "POSIX Message Queues"
depends on NET
- ---help---
+ help
POSIX variant of message queues is a part of IPC. In POSIX message
queues every message has a priority which decides about succession
of receiving it by a process. If you want to compile and run
@@ -325,6 +383,18 @@
depends on SYSCTL
default y
+config WATCH_QUEUE
+ bool "General notification queue"
+ default n
+ help
+
+ This is a general notification queue for the kernel to pass events to
+ userspace by splicing them into pipes. It can be used in conjunction
+ with watches for key/keyring change notifications and device
+ notifications.
+
+ See Documentation/watch_queue.rst
+
config CROSS_MEMORY_ATTACH
bool "Enable process_vm_readv/writev syscalls"
depends on MMU
@@ -437,6 +507,25 @@
depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING
depends on SMP
+config SCHED_THERMAL_PRESSURE
+ bool
+ default y if ARM && ARM_CPU_TOPOLOGY
+ default y if ARM64
+ depends on SMP
+ depends on CPU_FREQ_THERMAL
+ help
+ Select this option to enable thermal pressure accounting in the
+ scheduler. Thermal pressure is the value conveyed to the scheduler
+ that reflects the reduction in CPU compute capacity resulted from
+ thermal throttling. Thermal throttling occurs when the performance of
+ a CPU is capped due to high operating temperatures.
+
+ If selected, the scheduler will be able to balance tasks accordingly,
+ i.e. put less load on throttled CPUs than on non/less throttled ones.
+
+ This requires the architecture to implement
+ arch_set_thermal_pressure() and arch_get_thermal_pressure().
+
config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
depends on MULTIUSER
@@ -568,7 +657,7 @@
config IKCONFIG
tristate "Kernel .config support"
- ---help---
+ help
This option enables the complete Linux kernel ".config" file
contents to be saved in the kernel. It provides documentation
of which kernel options are used in a running kernel or in an
@@ -581,7 +670,7 @@
config IKCONFIG_PROC
bool "Enable access to .config through /proc/config.gz"
depends on IKCONFIG && PROC_FS
- ---help---
+ help
This option enables access to the kernel configuration file
through /proc/config.gz.
@@ -633,7 +722,7 @@
with more CPUs. Therefore this value is used only when the sum of
contributions is greater than the half of the default kernel ring
buffer as defined by LOG_BUF_SHIFT. The default values are set
- so that more than 64 CPUs are needed to trigger the allocation.
+ so that more than 16 CPUs are needed to trigger the allocation.
Also this option is ignored when "log_buf_len" kernel parameter is
used as it forces an exact (power of two) size of the ring buffer.
@@ -753,6 +842,9 @@
config ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
bool
+config CC_HAS_INT128
+ def_bool !$(cc-option,$(m64-flag) -D__SIZEOF_INT128__=0) && 64BIT
+
#
# For architectures that know their GCC __int128 support is sound
#
@@ -802,7 +894,7 @@
if CGROUPS
config PAGE_COUNTER
- bool
+ bool
config MEMCG
bool "Memory controller"
@@ -812,24 +904,9 @@
Provides control over the memory footprint of tasks in a cgroup.
config MEMCG_SWAP
- bool "Swap controller"
+ bool
depends on MEMCG && SWAP
- help
- Provides control over the swap space consumed by tasks in a cgroup.
-
-config MEMCG_SWAP_ENABLED
- bool "Swap controller enabled by default"
- depends on MEMCG_SWAP
default y
- help
- Memory Resource Controller Swap Extension comes with its price in
- a bigger memory consumption. General purpose distribution kernels
- which want to enable the feature but keep it disabled by default
- and let the user enable it by swapaccount=1 boot command line
- parameter should have this option unselected.
- For those who want to have the feature enabled by default should
- select this option (if, for some reason, they need to disable it
- then swapaccount=0 does the trick).
config MEMCG_KMEM
bool
@@ -840,7 +917,7 @@
bool "IO controller"
depends on BLOCK
default n
- ---help---
+ help
Generic block IO controller cgroup interface. This is the common
cgroup interface which should be used by various IO controlling
policies.
@@ -853,7 +930,7 @@
This option only enables generic Block IO controller infrastructure.
One needs to also enable actual IO controlling logic/policy. For
enabling proportional weight division of disk bandwidth in CFQ, set
- CONFIG_CFQ_GROUP_IOSCHED=y; for enabling throttling policy, set
+ CONFIG_BFQ_GROUP_IOSCHED=y; for enabling throttling policy, set
CONFIG_BLK_DEV_THROTTLING=y.
See Documentation/admin-guide/cgroup-v1/blkio-controller.rst for more information.
@@ -1010,7 +1087,8 @@
help
This option extends the perf per-cpu mode to restrict monitoring
to threads which belong to the cgroup specified and run on the
- designated cpu.
+ designated cpu. Or this can be used to have cgroup ID in samples
+ so that it can monitor performance events among cgroups.
Say N if unsure.
@@ -1064,6 +1142,14 @@
In this namespace tasks see different info provided with the
uname() system call
+config TIME_NS
+ bool "TIME namespace"
+ depends on GENERIC_VDSO_TIME_NS
+ default y
+ help
+ In this namespace boottime and monotonic clocks can be set.
+ The time will keep going with the same pace.
+
config IPC_NS
bool "IPC namespace"
depends on (SYSVIPC || POSIX_MQUEUE)
@@ -1107,6 +1193,7 @@
config CHECKPOINT_RESTORE
bool "Checkpoint/restore support"
select PROC_CHILDREN
+ select KCMP
default n
help
Enables additional kernel features in a sake of checkpoint/restore.
@@ -1199,6 +1286,18 @@
endif
+config BOOT_CONFIG
+ bool "Boot config support"
+ select BLK_DEV_INITRD
+ help
+ Extra boot config allows system admin to pass a config file as
+ complemental extension of kernel cmdline when booting.
+ The boot config file must be attached at the end of initramfs
+ with checksum, size and magic word.
+ See <file:Documentation/admin-guide/bootconfig.rst> for details.
+
+ If unsure, say Y.
+
choice
prompt "Compiler optimization level"
default CC_OPTIMIZE_FOR_PERFORMANCE
@@ -1239,7 +1338,6 @@
bool "Dead code and data elimination (EXPERIMENTAL)"
depends on HAVE_LD_DEAD_CODE_DATA_ELIMINATION
depends on EXPERT
- depends on !(FUNCTION_TRACER && CC_IS_GCC && GCC_VERSION < 40800)
depends on $(cc-option,-ffunction-sections -fdata-sections)
depends on $(ld-option,--gc-sections)
help
@@ -1254,6 +1352,12 @@
present. This option is not well tested yet, so use at your
own risk.
+config LD_ORPHAN_WARN
+ def_bool y
+ depends on ARCH_WANT_LD_ORPHAN_WARN
+ depends on !LD_IS_LLD || LLD_VERSION >= 110000
+ depends on $(ld-option,--orphan-handling=warn)
+
config SYSCTL
bool
@@ -1293,9 +1397,9 @@
select DEBUG_KERNEL
help
This option allows certain base kernel options and settings
- to be disabled or tweaked. This is for specialized
- environments which can tolerate a "non-standard" kernel.
- Only use this if you really know what you are doing.
+ to be disabled or tweaked. This is for specialized
+ environments which can tolerate a "non-standard" kernel.
+ Only use this if you really know what you are doing.
config UID16
bool "Enable 16-bit UID system calls" if EXPERT
@@ -1321,7 +1425,7 @@
config SGETMASK_SYSCALL
bool "sgetmask/ssetmask syscalls support" if EXPERT
def_bool PARISC || M68K || PPC || MIPS || X86 || SPARC || MICROBLAZE || SUPERH
- ---help---
+ help
sys_sgetmask and sys_ssetmask are obsolete system calls
no longer supported in libc but still enabled by default in some
architectures.
@@ -1331,30 +1435,13 @@
config SYSFS_SYSCALL
bool "Sysfs syscall support" if EXPERT
default y
- ---help---
+ help
sys_sysfs is an obsolete system call no longer supported in libc.
Note that disabling this option is more secure but might break
compatibility with some systems.
If unsure say Y here.
-config SYSCTL_SYSCALL
- bool "Sysctl syscall support" if EXPERT
- depends on PROC_SYSCTL
- default n
- select SYSCTL
- ---help---
- sys_sysctl uses binary paths that have been found challenging
- to properly maintain and use. The interface in /proc/sys
- using paths with ascii names is now the primary path to this
- information.
-
- Almost nothing using the binary sysctl interface so if you are
- trying to save some space it is probably safe to disable this,
- making your kernel marginally smaller.
-
- If unsure say N here.
-
config FHANDLE
bool "open by fhandle syscalls" if EXPERT
select EXPORTFS
@@ -1405,11 +1492,11 @@
bool "BUG() support" if EXPERT
default y
help
- Disabling this option eliminates support for BUG and WARN, reducing
- the size of your kernel image and potentially quietly ignoring
- numerous fatal conditions. You should only consider disabling this
- option for embedded systems with no facilities for reporting errors.
- Just say Y.
+ Disabling this option eliminates support for BUG and WARN, reducing
+ the size of your kernel image and potentially quietly ignoring
+ numerous fatal conditions. You should only consider disabling this
+ option for embedded systems with no facilities for reporting errors.
+ Just say Y.
config ELF_CORE
depends on COREDUMP
@@ -1425,8 +1512,8 @@
select I8253_LOCK
default y
help
- This option allows to disable the internal PC-Speaker
- support, saving some memory.
+ This option allows to disable the internal PC-Speaker
+ support, saving some memory.
config BASE_FULL
default y
@@ -1513,7 +1600,7 @@
config IO_URING
bool "Enable IO uring support" if EXPERT
- select ANON_INODES
+ select IO_WQ
default y
help
This option enables support for the io_uring interface, enabling
@@ -1530,6 +1617,11 @@
applications use these syscalls, you can disable this option to save
space.
+config HAVE_ARCH_USERFAULTFD_WP
+ bool
+ help
+ Arch has userfaultfd write protection support
+
config MEMBARRIER
bool "Enable membarrier() system call" if EXPERT
default y
@@ -1543,29 +1635,29 @@
If unsure, say Y.
config KALLSYMS
- bool "Load all symbols for debugging/ksymoops" if EXPERT
- default y
- help
- Say Y here to let the kernel print out symbolic crash information and
- symbolic stack backtraces. This increases the size of the kernel
- somewhat, as all symbols have to be loaded into the kernel image.
+ bool "Load all symbols for debugging/ksymoops" if EXPERT
+ default y
+ help
+ Say Y here to let the kernel print out symbolic crash information and
+ symbolic stack backtraces. This increases the size of the kernel
+ somewhat, as all symbols have to be loaded into the kernel image.
config KALLSYMS_ALL
bool "Include all symbols in kallsyms"
depends on DEBUG_KERNEL && KALLSYMS
help
- Normally kallsyms only contains the symbols of functions for nicer
- OOPS messages and backtraces (i.e., symbols from the text and inittext
- sections). This is sufficient for most cases. And only in very rare
- cases (e.g., when a debugger is used) all symbols are required (e.g.,
- names of variables from the data sections, etc).
+ Normally kallsyms only contains the symbols of functions for nicer
+ OOPS messages and backtraces (i.e., symbols from the text and inittext
+ sections). This is sufficient for most cases. And only in very rare
+ cases (e.g., when a debugger is used) all symbols are required (e.g.,
+ names of variables from the data sections, etc).
- This option makes sure that all symbols are loaded into the kernel
- image (i.e., symbols from all sections) in cost of increased kernel
- size (depending on the kernel configuration, it may be 300KiB or
- something like this).
+ This option makes sure that all symbols are loaded into the kernel
+ image (i.e., symbols from all sections) in cost of increased kernel
+ size (depending on the kernel configuration, it may be 300KiB or
+ something like this).
- Say N unless you really need all symbols.
+ Say N unless you really need all symbols.
config KALLSYMS_ABSOLUTE_PERCPU
bool
@@ -1593,15 +1685,32 @@
# end of the "standard kernel features (expert users)" menu
# syscall, maps, verifier
+
+config BPF_LSM
+ bool "LSM Instrumentation with BPF"
+ depends on BPF_EVENTS
+ depends on BPF_SYSCALL
+ depends on SECURITY
+ depends on BPF_JIT
+ help
+ Enables instrumentation of the security hooks with eBPF programs for
+ implementing dynamic MAC and Audit Policies.
+
+ If you are unsure how to answer this question, answer N.
+
config BPF_SYSCALL
bool "Enable bpf() system call"
select BPF
select IRQ_WORK
+ select TASKS_TRACE_RCU
default n
help
Enable the bpf() system call that allows to manipulate eBPF
programs and maps via file descriptors.
+config ARCH_WANT_DEFAULT_BPF_JIT
+ bool
+
config BPF_JIT_ALWAYS_ON
bool "Permanently enable BPF JIT and remove BPF interpreter"
depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
@@ -1609,6 +1718,22 @@
Enables BPF JIT and removes BPF interpreter to avoid
speculative execution of BPF instructions by the interpreter
+config BPF_JIT_DEFAULT_ON
+ def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
+ depends on HAVE_EBPF_JIT && BPF_JIT
+
+config BPF_UNPRIV_DEFAULT_OFF
+ bool "Disable unprivileged BPF by default"
+ depends on BPF_SYSCALL
+ help
+ Disables unprivileged BPF by default by setting the corresponding
+ /proc/sys/kernel/unprivileged_bpf_disabled knob to 2. An admin can
+ still reenable it by setting it to 0 later on, or permanently
+ disable it by setting it to 1 (from which no other transition to
+ 0 is possible anymore).
+
+source "kernel/bpf/preload/Kconfig"
+
config USERFAULTFD
bool "Enable userfaultfd() system call"
depends on MMU
@@ -1622,6 +1747,16 @@
config ARCH_HAS_MEMBARRIER_SYNC_CORE
bool
+config KCMP
+ bool "Enable kcmp() system call" if EXPERT
+ help
+ Enable the kernel resource comparison system call. It provides
+ user-space with the ability to compare two processes to see if they
+ share a common resource, such as a file descriptor or even virtual
+ memory space.
+
+ If unsure, say N.
+
config RSEQ
bool "Enable rseq() system call" if EXPERT
default y
@@ -1708,12 +1843,12 @@
depends on PERF_EVENTS && DEBUG_KERNEL && !PPC
select PERF_USE_VMALLOC
help
- Use vmalloc memory to back perf mmap() buffers.
+ Use vmalloc memory to back perf mmap() buffers.
- Mostly useful for debugging the vmalloc code on platforms
- that don't require it.
+ Mostly useful for debugging the vmalloc code on platforms
+ that don't require it.
- Say N if unsure.
+ Say N if unsure.
endmenu
@@ -1812,9 +1947,8 @@
command line.
config SLAB_FREELIST_RANDOM
- default n
+ bool "Randomize slab freelist"
depends on SLAB || SLUB
- bool "SLAB freelist randomization"
help
Randomizes the freelist order used on creating new pages. This
security feature reduces the predictability of the kernel slab
@@ -1822,12 +1956,14 @@
config SLAB_FREELIST_HARDENED
bool "Harden slab freelist metadata"
- depends on SLUB
+ depends on SLAB || SLUB
help
Many kernel heap attacks try to target slab cache metadata and
other infrastructure. This options makes minor performance
sacrifices to harden the kernel slab allocator against common
- freelist exploit methods.
+ freelist exploit methods. Some slab implementations have more
+ sanity-checking than others. This option is most effective with
+ CONFIG_SLUB.
config SHUFFLE_PAGE_ALLOCATOR
bool "Page allocator randomization"
@@ -1884,7 +2020,7 @@
userspace. Since that isn't generally a problem on no-MMU systems,
it is normally safe to say Y here.
- See Documentation/nommu-mmap.txt for more information.
+ See Documentation/admin-guide/mm/nommu-mmap.rst for more information.
config SYSTEM_DATA_VERIFICATION
def_bool n
@@ -2178,6 +2314,19 @@
If unsure, or if you need to build out-of-tree modules, say N.
+config UNUSED_KSYMS_WHITELIST
+ string "Whitelist of symbols to keep in ksymtab"
+ depends on TRIM_UNUSED_KSYMS
+ help
+ By default, all unused exported symbols will be un-exported from the
+ build when TRIM_UNUSED_KSYMS is selected.
+
+ UNUSED_KSYMS_WHITELIST allows to whitelist symbols that must be kept
+ exported at all times, even in absence of in-tree users. The value to
+ set here is the path to a text file containing the list of symbols,
+ one per line. The path can be absolute, or relative to the kernel
+ source tree.
+
endif # MODULES
config MODULES_TREE_LOOKUP
@@ -2212,6 +2361,9 @@
source "kernel/Kconfig.locks"
+config ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+ bool
+
config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
bool
diff --git a/init/Makefile b/init/Makefile
index 6246a06..6bc37f6 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -18,7 +18,6 @@
mounts-y := do_mounts.o
mounts-$(CONFIG_BLK_DEV_RAM) += do_mounts_rd.o
mounts-$(CONFIG_BLK_DEV_INITRD) += do_mounts_initrd.o
-mounts-$(CONFIG_BLK_DEV_MD) += do_mounts_md.o
# dependencies on generated files need to be listed explicitly
$(obj)/version.o: include/generated/compile.h
@@ -35,4 +34,4 @@
@$($(quiet)chk_compile.h)
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" \
- "$(CONFIG_PREEMPT_RT)" "$(CC) $(KBUILD_CFLAGS)"
+ "$(CONFIG_PREEMPT_RT)" $(CONFIG_CC_VERSION_TEXT) "$(LD)"
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 9634ecf..b5f9604 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -23,12 +23,11 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
#include <linux/nfs_mount.h>
+#include <linux/raid/detect.h>
#include <uapi/linux/mount.h>
#include "do_mounts.h"
-int __initdata rd_doload; /* 1 = load RAM disk, 0 = don't load */
-
int root_mountflags = MS_RDONLY | MS_SILENT;
static char * __initdata root_device_name;
static char __initdata saved_root_name[64];
@@ -38,7 +37,7 @@
static int __init load_ramdisk(char *str)
{
- rd_doload = simple_strtol(str,NULL,0) & 3;
+ pr_warn("ignoring the deprecated load_ramdisk= option\n");
return 1;
}
__setup("load_ramdisk=", load_ramdisk);
@@ -212,6 +211,7 @@
* a colon.
* 9) PARTLABEL=<name> with name being the GPT partition label.
* MSDOS partitions do not support labels!
+ * 10) /dev/cifs represents Root_CIFS (0xfe)
*
* If name doesn't have fall into the categories above, we return (0,0).
* block_class is used to check if something is a disk name. If the disk
@@ -268,6 +268,9 @@
res = Root_NFS;
if (strcmp(name, "nfs") == 0)
goto done;
+ res = Root_CIFS;
+ if (strcmp(name, "cifs") == 0)
+ goto done;
res = Root_RAM0;
if (strcmp(name, "ram") == 0)
goto done;
@@ -383,14 +386,29 @@
*s = '\0';
}
-static int __init do_mount_root(char *name, char *fs, int flags, void *data)
+static int __init do_mount_root(const char *name, const char *fs,
+ const int flags, const void *data)
{
struct super_block *s;
- int err = ksys_mount(name, "/root", fs, flags, data);
- if (err)
- return err;
+ struct page *p = NULL;
+ char *data_page = NULL;
+ int ret;
- ksys_chdir("/root");
+ if (data) {
+ /* init_mount() requires a full page as fifth argument */
+ p = alloc_page(GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ data_page = page_address(p);
+ /* zero-pad. init_mount() will make sure it's terminated */
+ strncpy(data_page, data, PAGE_SIZE);
+ }
+
+ ret = init_mount(name, "/root", fs, flags, data_page);
+ if (ret)
+ goto out;
+
+ init_chdir("/root");
s = current->fs->pwd.dentry->d_sb;
ROOT_DEV = s->s_dev;
printk(KERN_INFO
@@ -398,7 +416,11 @@
s->s_type->name,
sb_rdonly(s) ? " readonly" : "",
MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
- return 0;
+
+out:
+ if (p)
+ put_page(p);
+ return ret;
}
void __init mount_block_root(char *name, int flags)
@@ -406,12 +428,10 @@
struct page *page = alloc_page(GFP_KERNEL);
char *fs_names = page_address(page);
char *p;
-#ifdef CONFIG_BLOCK
char b[BDEVNAME_SIZE];
-#else
- const char *b = name;
-#endif
+ scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)",
+ MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
get_fs_names(fs_names);
retry:
for (p = fs_names; *p; p += strlen(p)+1) {
@@ -428,9 +448,6 @@
* and bad superblock on root device.
* and give them a list of the available devices
*/
-#ifdef CONFIG_BLOCK
- __bdevname(ROOT_DEV, b);
-#endif
printk("VFS: Cannot open root device \"%s\" or %s: error %d\n",
root_device_name, b, err);
printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");
@@ -453,9 +470,6 @@
for (p = fs_names; *p; p += strlen(p)+1)
printk(" %s", p);
printk("\n");
-#ifdef CONFIG_BLOCK
- __bdevname(ROOT_DEV, b);
-#endif
panic("VFS: Unable to mount root fs on %s", b);
out:
put_page(page);
@@ -501,33 +515,39 @@
}
#endif
-#if defined(CONFIG_BLK_DEV_RAM) || defined(CONFIG_BLK_DEV_FD)
-void __init change_floppy(char *fmt, ...)
+#ifdef CONFIG_CIFS_ROOT
+
+extern int cifs_root_data(char **dev, char **opts);
+
+#define CIFSROOT_TIMEOUT_MIN 5
+#define CIFSROOT_TIMEOUT_MAX 30
+#define CIFSROOT_RETRY_MAX 5
+
+static int __init mount_cifs_root(void)
{
- struct termios termios;
- char buf[80];
- char c;
- int fd;
- va_list args;
- va_start(args, fmt);
- vsprintf(buf, fmt, args);
- va_end(args);
- fd = ksys_open("/dev/root", O_RDWR | O_NDELAY, 0);
- if (fd >= 0) {
- ksys_ioctl(fd, FDEJECT, 0);
- ksys_close(fd);
+ char *root_dev, *root_data;
+ unsigned int timeout;
+ int try, err;
+
+ err = cifs_root_data(&root_dev, &root_data);
+ if (err != 0)
+ return 0;
+
+ timeout = CIFSROOT_TIMEOUT_MIN;
+ for (try = 1; ; try++) {
+ err = do_mount_root(root_dev, "cifs", root_mountflags,
+ root_data);
+ if (err == 0)
+ return 1;
+ if (try > CIFSROOT_RETRY_MAX)
+ break;
+
+ ssleep(timeout);
+ timeout <<= 1;
+ if (timeout > CIFSROOT_TIMEOUT_MAX)
+ timeout = CIFSROOT_TIMEOUT_MAX;
}
- printk(KERN_NOTICE "VFS: Insert %s and press ENTER\n", buf);
- fd = ksys_open("/dev/console", O_RDWR, 0);
- if (fd >= 0) {
- ksys_ioctl(fd, TCGETS, (long)&termios);
- termios.c_lflag &= ~ICANON;
- ksys_ioctl(fd, TCSETSF, (long)&termios);
- ksys_read(fd, &c, 1);
- termios.c_lflag |= ICANON;
- ksys_ioctl(fd, TCSETSF, (long)&termios);
- ksys_close(fd);
- }
+ return 0;
}
#endif
@@ -535,23 +555,16 @@
{
#ifdef CONFIG_ROOT_NFS
if (ROOT_DEV == Root_NFS) {
- if (mount_nfs_root())
- return;
-
- printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
- ROOT_DEV = Root_FD0;
+ if (!mount_nfs_root())
+ printk(KERN_ERR "VFS: Unable to mount root fs via NFS.\n");
+ return;
}
#endif
-#ifdef CONFIG_BLK_DEV_FD
- if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
- /* rd_doload is 2 for a dual initrd/ramload setup */
- if (rd_doload==2) {
- if (rd_load_disk(1)) {
- ROOT_DEV = Root_RAM1;
- root_device_name = NULL;
- }
- } else
- change_floppy("root floppy");
+#ifdef CONFIG_CIFS_ROOT
+ if (ROOT_DEV == Root_CIFS) {
+ if (!mount_cifs_root())
+ printk(KERN_ERR "VFS: Unable to mount root fs via SMB.\n");
+ return;
}
#endif
#ifdef CONFIG_BLOCK
@@ -570,8 +583,6 @@
*/
void __init prepare_namespace(void)
{
- int is_floppy;
-
if (root_delay) {
printk(KERN_INFO "Waiting %d sec before mounting root device...\n",
root_delay);
@@ -614,16 +625,11 @@
async_synchronize_full();
}
- is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR;
-
- if (is_floppy && rd_doload && rd_load_disk(0))
- ROOT_DEV = Root_RAM0;
-
mount_root();
out:
- devtmpfs_mount("dev");
- ksys_mount(".", "/", NULL, MS_MOVE, NULL);
- ksys_chroot(".");
+ devtmpfs_mount();
+ init_mount(".", "/", NULL, MS_MOVE, NULL);
+ init_chroot(".");
}
static bool is_tmpfs;
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 0bb0806..7a29ac3 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -8,26 +8,16 @@
#include <linux/mount.h>
#include <linux/major.h>
#include <linux/root_dev.h>
+#include <linux/init_syscalls.h>
-void change_floppy(char *fmt, ...);
void mount_block_root(char *name, int flags);
void mount_root(void);
extern int root_mountflags;
-static inline int create_dev(char *name, dev_t dev)
+static inline __init int create_dev(char *name, dev_t dev)
{
- ksys_unlink(name);
- return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev));
-}
-
-static inline u32 bstat(char *name)
-{
- struct kstat stat;
- if (vfs_stat(name, &stat) != 0)
- return 0;
- if (!S_ISBLK(stat.mode))
- return 0;
- return stat.rdev;
+ init_unlink(name);
+ return init_mknod(name, S_IFBLK | 0600, new_encode_dev(dev));
}
#ifdef CONFIG_BLK_DEV_RAM
@@ -51,13 +41,3 @@
static inline bool initrd_load(void) { return false; }
#endif
-
-#ifdef CONFIG_BLK_DEV_MD
-
-void md_run_setup(void);
-
-#else
-
-static inline void md_run_setup(void) {}
-
-#endif
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index a9c6cc5..533d81e 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -28,7 +28,7 @@
__setup("noinitrd", no_initrd);
-static int __init early_initrd(char *p)
+static int __init early_initrdmem(char *p)
{
phys_addr_t start;
unsigned long size;
@@ -43,19 +43,22 @@
}
return 0;
}
+early_param("initrdmem", early_initrdmem);
+
+static int __init early_initrd(char *p)
+{
+ return early_initrdmem(p);
+}
early_param("initrd", early_initrd);
-static int init_linuxrc(struct subprocess_info *info, struct cred *new)
+static int __init init_linuxrc(struct subprocess_info *info, struct cred *new)
{
ksys_unshare(CLONE_FS | CLONE_FILES);
- /* stdin/stdout/stderr for /linuxrc */
- ksys_open("/dev/console", O_RDWR, 0);
- ksys_dup(0);
- ksys_dup(0);
+ console_on_rootfs();
/* move initrd over / and chdir/chroot in initrd root */
- ksys_chdir("/root");
- ksys_mount(".", "/", NULL, MS_MOVE, NULL);
- ksys_chroot(".");
+ init_chdir("/root");
+ init_mount(".", "/", NULL, MS_MOVE, NULL);
+ init_chroot(".");
ksys_setsid();
return 0;
}
@@ -67,12 +70,14 @@
extern char *envp_init[];
int error;
+ pr_warn("using deprecated initrd support, will be removed in 2021.\n");
+
real_root_dev = new_encode_dev(ROOT_DEV);
create_dev("/dev/root.old", Root_RAM0);
/* mount initrd on rootfs' /root */
mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY);
- ksys_mkdir("/old", 0700);
- ksys_chdir("/old");
+ init_mkdir("/old", 0700);
+ init_chdir("/old");
/*
* In case that a resume from disk is carried out by linuxrc or one of
@@ -89,39 +94,30 @@
current->flags &= ~PF_FREEZER_SKIP;
/* move initrd to rootfs' /old */
- ksys_mount("..", ".", NULL, MS_MOVE, NULL);
+ init_mount("..", ".", NULL, MS_MOVE, NULL);
/* switch root and cwd back to / of rootfs */
- ksys_chroot("..");
+ init_chroot("..");
if (new_decode_dev(real_root_dev) == Root_RAM0) {
- ksys_chdir("/old");
+ init_chdir("/old");
return;
}
- ksys_chdir("/");
+ init_chdir("/");
ROOT_DEV = new_decode_dev(real_root_dev);
mount_root();
printk(KERN_NOTICE "Trying to move old root to /initrd ... ");
- error = ksys_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL);
+ error = init_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL);
if (!error)
printk("okay\n");
else {
- int fd = ksys_open("/dev/root.old", O_RDWR, 0);
if (error == -ENOENT)
printk("/initrd does not exist. Ignored.\n");
else
printk("failed\n");
printk(KERN_NOTICE "Unmounting old root\n");
- ksys_umount("/old", MNT_DETACH);
- printk(KERN_NOTICE "Trying to free ramdisk memory ... ");
- if (fd < 0) {
- error = fd;
- } else {
- error = ksys_ioctl(fd, BLKFLSBUF, 0);
- ksys_close(fd);
- }
- printk(!error ? "okay\n" : "failed\n");
+ init_umount("/old", MNT_DETACH);
}
}
@@ -136,11 +132,11 @@
* mounted in the normal path.
*/
if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) {
- ksys_unlink("/initrd.image");
+ init_unlink("/initrd.image");
handle_initrd();
return true;
}
}
- ksys_unlink("/initrd.image");
+ init_unlink("/initrd.image");
return false;
}
diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c
deleted file mode 100644
index b840315..0000000
--- a/init/do_mounts_md.c
+++ /dev/null
@@ -1,304 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/delay.h>
-#include <linux/raid/md_u.h>
-#include <linux/raid/md_p.h>
-
-#include "do_mounts.h"
-
-/*
- * When md (and any require personalities) are compiled into the kernel
- * (not a module), arrays can be assembles are boot time using with AUTODETECT
- * where specially marked partitions are registered with md_autodetect_dev(),
- * and with MD_BOOT where devices to be collected are given on the boot line
- * with md=.....
- * The code for that is here.
- */
-
-#ifdef CONFIG_MD_AUTODETECT
-static int __initdata raid_noautodetect;
-#else
-static int __initdata raid_noautodetect=1;
-#endif
-static int __initdata raid_autopart;
-
-static struct {
- int minor;
- int partitioned;
- int level;
- int chunk;
- char *device_names;
-} md_setup_args[256] __initdata;
-
-static int md_setup_ents __initdata;
-
-/*
- * Parse the command-line parameters given our kernel, but do not
- * actually try to invoke the MD device now; that is handled by
- * md_setup_drive after the low-level disk drivers have initialised.
- *
- * 27/11/1999: Fixed to work correctly with the 2.3 kernel (which
- * assigns the task of parsing integer arguments to the
- * invoked program now). Added ability to initialise all
- * the MD devices (by specifying multiple "md=" lines)
- * instead of just one. -- KTK
- * 18May2000: Added support for persistent-superblock arrays:
- * md=n,0,factor,fault,device-list uses RAID0 for device n
- * md=n,-1,factor,fault,device-list uses LINEAR for device n
- * md=n,device-list reads a RAID superblock from the devices
- * elements in device-list are read by name_to_kdev_t so can be
- * a hex number or something like /dev/hda1 /dev/sdb
- * 2001-06-03: Dave Cinege <dcinege@psychosis.com>
- * Shifted name_to_kdev_t() and related operations to md_set_drive()
- * for later execution. Rewrote section to make devfs compatible.
- */
-static int __init md_setup(char *str)
-{
- int minor, level, factor, fault, partitioned = 0;
- char *pername = "";
- char *str1;
- int ent;
-
- if (*str == 'd') {
- partitioned = 1;
- str++;
- }
- if (get_option(&str, &minor) != 2) { /* MD Number */
- printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
- return 0;
- }
- str1 = str;
- for (ent=0 ; ent< md_setup_ents ; ent++)
- if (md_setup_args[ent].minor == minor &&
- md_setup_args[ent].partitioned == partitioned) {
- printk(KERN_WARNING "md: md=%s%d, Specified more than once. "
- "Replacing previous definition.\n", partitioned?"d":"", minor);
- break;
- }
- if (ent >= ARRAY_SIZE(md_setup_args)) {
- printk(KERN_WARNING "md: md=%s%d - too many md initialisations\n", partitioned?"d":"", minor);
- return 0;
- }
- if (ent >= md_setup_ents)
- md_setup_ents++;
- switch (get_option(&str, &level)) { /* RAID level */
- case 2: /* could be 0 or -1.. */
- if (level == 0 || level == LEVEL_LINEAR) {
- if (get_option(&str, &factor) != 2 || /* Chunk Size */
- get_option(&str, &fault) != 2) {
- printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
- return 0;
- }
- md_setup_args[ent].level = level;
- md_setup_args[ent].chunk = 1 << (factor+12);
- if (level == LEVEL_LINEAR)
- pername = "linear";
- else
- pername = "raid0";
- break;
- }
- /* FALL THROUGH */
- case 1: /* the first device is numeric */
- str = str1;
- /* FALL THROUGH */
- case 0:
- md_setup_args[ent].level = LEVEL_NONE;
- pername="super-block";
- }
-
- printk(KERN_INFO "md: Will configure md%d (%s) from %s, below.\n",
- minor, pername, str);
- md_setup_args[ent].device_names = str;
- md_setup_args[ent].partitioned = partitioned;
- md_setup_args[ent].minor = minor;
-
- return 1;
-}
-
-static void __init md_setup_drive(void)
-{
- int minor, i, ent, partitioned;
- dev_t dev;
- dev_t devices[MD_SB_DISKS+1];
-
- for (ent = 0; ent < md_setup_ents ; ent++) {
- int fd;
- int err = 0;
- char *devname;
- mdu_disk_info_t dinfo;
- char name[16];
-
- minor = md_setup_args[ent].minor;
- partitioned = md_setup_args[ent].partitioned;
- devname = md_setup_args[ent].device_names;
-
- sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor);
- if (partitioned)
- dev = MKDEV(mdp_major, minor << MdpMinorShift);
- else
- dev = MKDEV(MD_MAJOR, minor);
- create_dev(name, dev);
- for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) {
- char *p;
- char comp_name[64];
- u32 rdev;
-
- p = strchr(devname, ',');
- if (p)
- *p++ = 0;
-
- dev = name_to_dev_t(devname);
- if (strncmp(devname, "/dev/", 5) == 0)
- devname += 5;
- snprintf(comp_name, 63, "/dev/%s", devname);
- rdev = bstat(comp_name);
- if (rdev)
- dev = new_decode_dev(rdev);
- if (!dev) {
- printk(KERN_WARNING "md: Unknown device name: %s\n", devname);
- break;
- }
-
- devices[i] = dev;
-
- devname = p;
- }
- devices[i] = 0;
-
- if (!i)
- continue;
-
- printk(KERN_INFO "md: Loading md%s%d: %s\n",
- partitioned ? "_d" : "", minor,
- md_setup_args[ent].device_names);
-
- fd = ksys_open(name, 0, 0);
- if (fd < 0) {
- printk(KERN_ERR "md: open failed - cannot start "
- "array %s\n", name);
- continue;
- }
- if (ksys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) {
- printk(KERN_WARNING
- "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n",
- minor);
- ksys_close(fd);
- continue;
- }
-
- if (md_setup_args[ent].level != LEVEL_NONE) {
- /* non-persistent */
- mdu_array_info_t ainfo;
- ainfo.level = md_setup_args[ent].level;
- ainfo.size = 0;
- ainfo.nr_disks =0;
- ainfo.raid_disks =0;
- while (devices[ainfo.raid_disks])
- ainfo.raid_disks++;
- ainfo.md_minor =minor;
- ainfo.not_persistent = 1;
-
- ainfo.state = (1 << MD_SB_CLEAN);
- ainfo.layout = 0;
- ainfo.chunk_size = md_setup_args[ent].chunk;
- err = ksys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo);
- for (i = 0; !err && i <= MD_SB_DISKS; i++) {
- dev = devices[i];
- if (!dev)
- break;
- dinfo.number = i;
- dinfo.raid_disk = i;
- dinfo.state = (1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC);
- dinfo.major = MAJOR(dev);
- dinfo.minor = MINOR(dev);
- err = ksys_ioctl(fd, ADD_NEW_DISK,
- (long)&dinfo);
- }
- } else {
- /* persistent */
- for (i = 0; i <= MD_SB_DISKS; i++) {
- dev = devices[i];
- if (!dev)
- break;
- dinfo.major = MAJOR(dev);
- dinfo.minor = MINOR(dev);
- ksys_ioctl(fd, ADD_NEW_DISK, (long)&dinfo);
- }
- }
- if (!err)
- err = ksys_ioctl(fd, RUN_ARRAY, 0);
- if (err)
- printk(KERN_WARNING "md: starting md%d failed\n", minor);
- else {
- /* reread the partition table.
- * I (neilb) and not sure why this is needed, but I cannot
- * boot a kernel with devfs compiled in from partitioned md
- * array without it
- */
- ksys_close(fd);
- fd = ksys_open(name, 0, 0);
- ksys_ioctl(fd, BLKRRPART, 0);
- }
- ksys_close(fd);
- }
-}
-
-static int __init raid_setup(char *str)
-{
- int len, pos;
-
- len = strlen(str) + 1;
- pos = 0;
-
- while (pos < len) {
- char *comma = strchr(str+pos, ',');
- int wlen;
- if (comma)
- wlen = (comma-str)-pos;
- else wlen = (len-1)-pos;
-
- if (!strncmp(str, "noautodetect", wlen))
- raid_noautodetect = 1;
- if (!strncmp(str, "autodetect", wlen))
- raid_noautodetect = 0;
- if (strncmp(str, "partitionable", wlen)==0)
- raid_autopart = 1;
- if (strncmp(str, "part", wlen)==0)
- raid_autopart = 1;
- pos += wlen+1;
- }
- return 1;
-}
-
-__setup("raid=", raid_setup);
-__setup("md=", md_setup);
-
-static void __init autodetect_raid(void)
-{
- int fd;
-
- /*
- * Since we don't want to detect and use half a raid array, we need to
- * wait for the known devices to complete their probing
- */
- printk(KERN_INFO "md: Waiting for all devices to be available before autodetect\n");
- printk(KERN_INFO "md: If you don't use raid, use raid=noautodetect\n");
-
- wait_for_device_probe();
-
- fd = ksys_open("/dev/md0", 0, 0);
- if (fd >= 0) {
- ksys_ioctl(fd, RAID_AUTORUN, raid_autopart);
- ksys_close(fd);
- }
-}
-
-void __init md_run_setup(void)
-{
- create_dev("/dev/md0", MKDEV(MD_MAJOR, 0));
-
- if (raid_noautodetect)
- printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=autodetect will force)\n");
- else
- autodetect_raid();
- md_setup_drive();
-}
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index 32fb049..ac021ae 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -14,12 +14,12 @@
#include <linux/decompress/generic.h>
-
-int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */
+static struct file *in_file, *out_file;
+static loff_t in_pos, out_pos;
static int __init prompt_ramdisk(char *str)
{
- rd_prompt = simple_strtol(str,NULL,0) & 1;
+ pr_warn("ignoring the deprecated prompt_ramdisk= option\n");
return 1;
}
__setup("prompt_ramdisk=", prompt_ramdisk);
@@ -33,7 +33,7 @@
}
__setup("ramdisk_start=", ramdisk_start_setup);
-static int __init crd_load(int in_fd, int out_fd, decompress_fn deco);
+static int __init crd_load(decompress_fn deco);
/*
* This routine tries to find a RAM disk image to load, and returns the
@@ -55,7 +55,8 @@
* lz4
*/
static int __init
-identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
+identify_ramdisk_image(struct file *file, loff_t pos,
+ decompress_fn *decompressor)
{
const int size = 512;
struct minix_super_block *minixsb;
@@ -66,6 +67,7 @@
unsigned char *buf;
const char *compress_name;
unsigned long n;
+ int start_block = rd_image_start;
buf = kmalloc(size, GFP_KERNEL);
if (!buf)
@@ -80,8 +82,8 @@
/*
* Read block 0 to test for compressed kernel
*/
- ksys_lseek(fd, start_block * BLOCK_SIZE, 0);
- ksys_read(fd, buf, size);
+ pos = start_block * BLOCK_SIZE;
+ kernel_read(file, buf, size, &pos);
*decompressor = decompress_method(buf, size, &compress_name);
if (compress_name) {
@@ -126,8 +128,8 @@
/*
* Read 512 bytes further to check if cramfs is padded
*/
- ksys_lseek(fd, start_block * BLOCK_SIZE + 0x200, 0);
- ksys_read(fd, buf, size);
+ pos = start_block * BLOCK_SIZE + 0x200;
+ kernel_read(file, buf, size, &pos);
if (cramfsb->magic == CRAMFS_MAGIC) {
printk(KERN_NOTICE
@@ -140,8 +142,8 @@
/*
* Read block 1 to test for minix and ext2 superblock
*/
- ksys_lseek(fd, (start_block+1) * BLOCK_SIZE, 0);
- ksys_read(fd, buf, size);
+ pos = (start_block + 1) * BLOCK_SIZE;
+ kernel_read(file, buf, size, &pos);
/* Try minix */
if (minixsb->s_magic == MINIX_SUPER_MAGIC ||
@@ -168,17 +170,24 @@
start_block);
done:
- ksys_lseek(fd, start_block * BLOCK_SIZE, 0);
kfree(buf);
return nblocks;
}
+static unsigned long nr_blocks(struct file *file)
+{
+ struct inode *inode = file->f_mapping->host;
+
+ if (!S_ISBLK(inode->i_mode))
+ return 0;
+ return i_size_read(inode) >> 10;
+}
+
int __init rd_load_image(char *from)
{
int res = 0;
- int in_fd, out_fd;
unsigned long rd_blocks, devblocks;
- int nblocks, i, disk;
+ int nblocks, i;
char *buf = NULL;
unsigned short rotate = 0;
decompress_fn decompressor = NULL;
@@ -186,20 +195,21 @@
char rotator[4] = { '|' , '/' , '-' , '\\' };
#endif
- out_fd = ksys_open("/dev/ram", O_RDWR, 0);
- if (out_fd < 0)
+ out_file = filp_open("/dev/ram", O_RDWR, 0);
+ if (IS_ERR(out_file))
goto out;
- in_fd = ksys_open(from, O_RDONLY, 0);
- if (in_fd < 0)
+ in_file = filp_open(from, O_RDONLY, 0);
+ if (IS_ERR(in_file))
goto noclose_input;
- nblocks = identify_ramdisk_image(in_fd, rd_image_start, &decompressor);
+ in_pos = rd_image_start * BLOCK_SIZE;
+ nblocks = identify_ramdisk_image(in_file, in_pos, &decompressor);
if (nblocks < 0)
goto done;
if (nblocks == 0) {
- if (crd_load(in_fd, out_fd, decompressor) == 0)
+ if (crd_load(decompressor) == 0)
goto successful_load;
goto done;
}
@@ -208,11 +218,7 @@
* NOTE NOTE: nblocks is not actually blocks but
* the number of kibibytes of data to load into a ramdisk.
*/
- if (ksys_ioctl(out_fd, BLKGETSIZE, (unsigned long)&rd_blocks) < 0)
- rd_blocks = 0;
- else
- rd_blocks >>= 1;
-
+ rd_blocks = nr_blocks(out_file);
if (nblocks > rd_blocks) {
printk("RAMDISK: image too big! (%dKiB/%ldKiB)\n",
nblocks, rd_blocks);
@@ -222,13 +228,10 @@
/*
* OK, time to copy in the data
*/
- if (ksys_ioctl(in_fd, BLKGETSIZE, (unsigned long)&devblocks) < 0)
- devblocks = 0;
- else
- devblocks >>= 1;
-
if (strcmp(from, "/initrd.image") == 0)
devblocks = nblocks;
+ else
+ devblocks = nr_blocks(in_file);
if (devblocks == 0) {
printk(KERN_ERR "RAMDISK: could not determine device size\n");
@@ -243,24 +246,15 @@
printk(KERN_NOTICE "RAMDISK: Loading %dKiB [%ld disk%s] into ram disk... ",
nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : "");
- for (i = 0, disk = 1; i < nblocks; i++) {
+ for (i = 0; i < nblocks; i++) {
if (i && (i % devblocks == 0)) {
- pr_cont("done disk #%d.\n", disk++);
+ pr_cont("done disk #1.\n");
rotate = 0;
- if (ksys_close(in_fd)) {
- printk("Error closing the disk.\n");
- goto noclose_input;
- }
- change_floppy("disk #%d", disk);
- in_fd = ksys_open(from, O_RDONLY, 0);
- if (in_fd < 0) {
- printk("Error opening disk.\n");
- goto noclose_input;
- }
- printk("Loading disk #%d... ", disk);
+ fput(in_file);
+ break;
}
- ksys_read(in_fd, buf, BLOCK_SIZE);
- ksys_write(out_fd, buf, BLOCK_SIZE);
+ kernel_read(in_file, buf, BLOCK_SIZE, &in_pos);
+ kernel_write(out_file, buf, BLOCK_SIZE, &out_pos);
#if !defined(CONFIG_S390)
if (!(i % 16)) {
pr_cont("%c\b", rotator[rotate & 0x3]);
@@ -273,19 +267,17 @@
successful_load:
res = 1;
done:
- ksys_close(in_fd);
+ fput(in_file);
noclose_input:
- ksys_close(out_fd);
+ fput(out_file);
out:
kfree(buf);
- ksys_unlink("/dev/ram");
+ init_unlink("/dev/ram");
return res;
}
int __init rd_load_disk(int n)
{
- if (rd_prompt)
- change_floppy("root floppy disk to be loaded into RAM disk");
create_dev("/dev/root", ROOT_DEV);
create_dev("/dev/ram", MKDEV(RAMDISK_MAJOR, n));
return rd_load_image("/dev/root");
@@ -293,11 +285,10 @@
static int exit_code;
static int decompress_error;
-static int crd_infd, crd_outfd;
static long __init compr_fill(void *buf, unsigned long len)
{
- long r = ksys_read(crd_infd, buf, len);
+ long r = kernel_read(in_file, buf, len, &in_pos);
if (r < 0)
printk(KERN_ERR "RAMDISK: error while reading compressed data");
else if (r == 0)
@@ -307,7 +298,7 @@
static long __init compr_flush(void *window, unsigned long outcnt)
{
- long written = ksys_write(crd_outfd, window, outcnt);
+ long written = kernel_write(out_file, window, outcnt, &out_pos);
if (written != outcnt) {
if (decompress_error == 0)
printk(KERN_ERR
@@ -326,11 +317,9 @@
decompress_error = 1;
}
-static int __init crd_load(int in_fd, int out_fd, decompress_fn deco)
+static int __init crd_load(decompress_fn deco)
{
int result;
- crd_infd = in_fd;
- crd_outfd = out_fd;
if (!deco) {
pr_emerg("Invalid ramdisk decompression routine. "
diff --git a/init/init_task.c b/init/init_task.c
index 5d8359c..5fa18ed 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -11,8 +11,8 @@
#include <linux/mm.h>
#include <linux/audit.h>
#include <linux/numa.h>
+#include <linux/scs.h>
-#include <asm/pgtable.h>
#include <linux/uaccess.h>
static struct signal_struct init_signals = {
@@ -50,6 +50,13 @@
.signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh),
};
+#ifdef CONFIG_SHADOW_CALL_STACK
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)]
+ __init_task_data = {
+ [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
+};
+#endif
+
/*
* Set up the first task table, touch at your own risk!. Base=0,
* limit=0x1fffff (=2MB)
@@ -58,6 +65,7 @@
#ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
__init_task_data
#endif
+ __aligned(L1_CACHE_BYTES)
= {
#ifdef CONFIG_THREAD_INFO_IN_TASK
.thread_info = INIT_THREAD_INFO(init_task),
@@ -106,6 +114,9 @@
.thread = INIT_THREAD,
.fs = &init_fs,
.files = &init_files,
+#ifdef CONFIG_IO_URING
+ .io_uring = NULL,
+#endif
.signal = &init_signals,
.sighand = &init_sighand,
.nsproxy = &init_nsproxy,
@@ -141,8 +152,14 @@
.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
.rcu_tasks_idle_cpu = -1,
#endif
+#ifdef CONFIG_TASKS_TRACE_RCU
+ .trc_reader_nesting = 0,
+ .trc_reader_special.s = 0,
+ .trc_holdout_list = LIST_HEAD_INIT(init_task.trc_holdout_list),
+#endif
#ifdef CONFIG_CPUSETS
- .mems_allowed_seq = SEQCNT_ZERO(init_task.mems_allowed_seq),
+ .mems_allowed_seq = SEQCNT_SPINLOCK_ZERO(init_task.mems_allowed_seq,
+ &init_task.alloc_lock),
#endif
#ifdef CONFIG_RT_MUTEXES
.pi_waiters = RB_ROOT_CACHED,
@@ -162,6 +179,16 @@
#ifdef CONFIG_KASAN
.kasan_depth = 1,
#endif
+#ifdef CONFIG_KCSAN
+ .kcsan_ctx = {
+ .disable_count = 0,
+ .atomic_next = 0,
+ .atomic_nest_count = 0,
+ .in_flat_atomic = false,
+ .access_mask = 0,
+ .scoped_accesses = {LIST_POISON1, NULL},
+ },
+#endif
#ifdef CONFIG_TRACE_IRQFLAGS
.softirqs_enabled = 1,
#endif
@@ -183,6 +210,9 @@
#ifdef CONFIG_SECURITY
.security = NULL,
#endif
+#ifdef CONFIG_SECCOMP_FILTER
+ .seccomp = { .filter_count = ATOMIC_INIT(0) },
+#endif
};
EXPORT_SYMBOL(init_task);
diff --git a/init/initramfs.c b/init/initramfs.c
index 00a3279..55b74d7 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -10,14 +10,18 @@
#include <linux/syscalls.h>
#include <linux/utime.h>
#include <linux/file.h>
+#include <linux/memblock.h>
+#include <linux/namei.h>
+#include <linux/init_syscalls.h>
-static ssize_t __init xwrite(int fd, const char *p, size_t count)
+static ssize_t __init xwrite(struct file *file, const char *p, size_t count,
+ loff_t *pos)
{
ssize_t out = 0;
/* sys_write only can write MAX_RW_COUNT aka 2G-4K bytes at most */
while (count) {
- ssize_t rv = ksys_write(fd, p, count);
+ ssize_t rv = kernel_write(file, p, count, pos);
if (rv < 0) {
if (rv == -EINTR || rv == -EAGAIN)
@@ -107,8 +111,7 @@
t[0].tv_nsec = 0;
t[1].tv_sec = mtime;
t[1].tv_nsec = 0;
-
- return do_utimes(AT_FDCWD, filename, t, AT_SYMLINK_NOFOLLOW);
+ return init_utimes(filename, t);
}
static __initdata LIST_HEAD(dir_list);
@@ -199,7 +202,6 @@
byte_count -= n;
}
-static __initdata char *vcollected;
static __initdata char *collected;
static long remains __initdata;
static __initdata char *collect;
@@ -295,11 +297,12 @@
{
struct kstat st;
- if (!vfs_lstat(path, &st) && (st.mode ^ fmode) & S_IFMT) {
+ if (!init_stat(path, &st, AT_SYMLINK_NOFOLLOW) &&
+ (st.mode ^ fmode) & S_IFMT) {
if (S_ISDIR(st.mode))
- ksys_rmdir(path);
+ init_rmdir(path);
else
- ksys_unlink(path);
+ init_unlink(path);
}
}
@@ -309,13 +312,14 @@
char *old = find_link(major, minor, ino, mode, collected);
if (old) {
clean_path(collected, 0);
- return (ksys_link(old, collected) < 0) ? -1 : 1;
+ return (init_link(old, collected) < 0) ? -1 : 1;
}
}
return 0;
}
-static __initdata int wfd;
+static __initdata struct file *wfile;
+static __initdata loff_t wfile_pos;
static int __init do_name(void)
{
@@ -332,28 +336,28 @@
int openflags = O_WRONLY|O_CREAT;
if (ml != 1)
openflags |= O_TRUNC;
- wfd = ksys_open(collected, openflags, mode);
+ wfile = filp_open(collected, openflags, mode);
+ if (IS_ERR(wfile))
+ return 0;
+ wfile_pos = 0;
- if (wfd >= 0) {
- ksys_fchown(wfd, uid, gid);
- ksys_fchmod(wfd, mode);
- if (body_len)
- ksys_ftruncate(wfd, body_len);
- vcollected = kstrdup(collected, GFP_KERNEL);
- state = CopyFile;
- }
+ vfs_fchown(wfile, uid, gid);
+ vfs_fchmod(wfile, mode);
+ if (body_len)
+ vfs_truncate(&wfile->f_path, body_len);
+ state = CopyFile;
}
} else if (S_ISDIR(mode)) {
- ksys_mkdir(collected, mode);
- ksys_chown(collected, uid, gid);
- ksys_chmod(collected, mode);
+ init_mkdir(collected, mode);
+ init_chown(collected, uid, gid, 0);
+ init_chmod(collected, mode);
dir_add(collected, mtime);
} else if (S_ISBLK(mode) || S_ISCHR(mode) ||
S_ISFIFO(mode) || S_ISSOCK(mode)) {
if (maybe_link() == 0) {
- ksys_mknod(collected, mode, rdev);
- ksys_chown(collected, uid, gid);
- ksys_chmod(collected, mode);
+ init_mknod(collected, mode, rdev);
+ init_chown(collected, uid, gid, 0);
+ init_chmod(collected, mode);
do_utime(collected, mtime);
}
}
@@ -363,16 +367,20 @@
static int __init do_copy(void)
{
if (byte_count >= body_len) {
- if (xwrite(wfd, victim, body_len) != body_len)
+ struct timespec64 t[2] = { };
+ if (xwrite(wfile, victim, body_len, &wfile_pos) != body_len)
error("write error");
- ksys_close(wfd);
- do_utime(vcollected, mtime);
- kfree(vcollected);
+
+ t[0].tv_sec = mtime;
+ t[1].tv_sec = mtime;
+ vfs_utimes(&wfile->f_path, t);
+
+ fput(wfile);
eat(body_len);
state = SkipIt;
return 0;
} else {
- if (xwrite(wfd, victim, byte_count) != byte_count)
+ if (xwrite(wfile, victim, byte_count, &wfile_pos) != byte_count)
error("write error");
body_len -= byte_count;
eat(byte_count);
@@ -384,8 +392,8 @@
{
collected[N_ALIGN(name_len) + body_len] = '\0';
clean_path(collected, 0);
- ksys_symlink(collected + N_ALIGN(name_len), collected);
- ksys_lchown(collected, uid, gid);
+ init_symlink(collected + N_ALIGN(name_len), collected);
+ init_chown(collected, uid, gid, AT_SYMLINK_NOFOLLOW);
do_utime(collected, mtime);
state = SkipIt;
next_state = Reset;
@@ -529,6 +537,13 @@
void __weak __init free_initrd_mem(unsigned long start, unsigned long end)
{
+#ifdef CONFIG_ARCH_KEEP_MEMBLOCK
+ unsigned long aligned_start = ALIGN_DOWN(start, PAGE_SIZE);
+ unsigned long aligned_end = ALIGN(end, PAGE_SIZE);
+
+ memblock_free(__pa(aligned_start), aligned_end - aligned_start);
+#endif
+
free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
"initrd");
}
@@ -564,82 +579,26 @@
#endif /* CONFIG_KEXEC_CORE */
#ifdef CONFIG_BLK_DEV_RAM
-#define BUF_SIZE 1024
-static void __init clean_rootfs(void)
-{
- int fd;
- void *buf;
- struct linux_dirent64 *dirp;
- int num;
-
- fd = ksys_open("/", O_RDONLY, 0);
- WARN_ON(fd < 0);
- if (fd < 0)
- return;
- buf = kzalloc(BUF_SIZE, GFP_KERNEL);
- WARN_ON(!buf);
- if (!buf) {
- ksys_close(fd);
- return;
- }
-
- dirp = buf;
- num = ksys_getdents64(fd, dirp, BUF_SIZE);
- while (num > 0) {
- while (num > 0) {
- struct kstat st;
- int ret;
-
- ret = vfs_lstat(dirp->d_name, &st);
- WARN_ON_ONCE(ret);
- if (!ret) {
- if (S_ISDIR(st.mode))
- ksys_rmdir(dirp->d_name);
- else
- ksys_unlink(dirp->d_name);
- }
-
- num -= dirp->d_reclen;
- dirp = (void *)dirp + dirp->d_reclen;
- }
- dirp = buf;
- memset(buf, 0, BUF_SIZE);
- num = ksys_getdents64(fd, dirp, BUF_SIZE);
- }
-
- ksys_close(fd);
- kfree(buf);
-}
-#else
-static inline void clean_rootfs(void)
-{
-}
-#endif /* CONFIG_BLK_DEV_RAM */
-
-#ifdef CONFIG_BLK_DEV_RAM
static void __init populate_initrd_image(char *err)
{
ssize_t written;
- int fd;
+ struct file *file;
+ loff_t pos = 0;
unpack_to_rootfs(__initramfs_start, __initramfs_size);
printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n",
err);
- fd = ksys_open("/initrd.image", O_WRONLY | O_CREAT, 0700);
- if (fd < 0)
+ file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700);
+ if (IS_ERR(file))
return;
- written = xwrite(fd, (char *)initrd_start, initrd_end - initrd_start);
+ written = xwrite(file, (char *)initrd_start, initrd_end - initrd_start,
+ &pos);
if (written != initrd_end - initrd_start)
pr_err("/initrd.image: incomplete write (%zd != %ld)\n",
written, initrd_end - initrd_start);
- ksys_close(fd);
-}
-#else
-static void __init populate_initrd_image(char *err)
-{
- printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
+ fput(file);
}
#endif /* CONFIG_BLK_DEV_RAM */
@@ -660,8 +619,11 @@
err = unpack_to_rootfs((char *)initrd_start, initrd_end - initrd_start);
if (err) {
- clean_rootfs();
+#ifdef CONFIG_BLK_DEV_RAM
populate_initrd_image(err);
+#else
+ printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
+#endif
}
done:
diff --git a/init/main.c b/init/main.c
index e6a1fb1..4fe58ed 100644
--- a/init/main.c
+++ b/init/main.c
@@ -28,6 +28,7 @@
#include <linux/initrd.h>
#include <linux/memblock.h>
#include <linux/acpi.h>
+#include <linux/bootconfig.h>
#include <linux/console.h>
#include <linux/nmi.h>
#include <linux/percpu.h>
@@ -63,8 +64,9 @@
#include <linux/debugobjects.h>
#include <linux/lockdep.h>
#include <linux/kmemleak.h>
+#include <linux/padata.h>
#include <linux/pid_namespace.h>
-#include <linux/device.h>
+#include <linux/device/driver.h>
#include <linux/kthread.h>
#include <linux/sched.h>
#include <linux/sched/init.h>
@@ -94,6 +96,8 @@
#include <linux/rodata_test.h>
#include <linux/jump_label.h>
#include <linux/mem_encrypt.h>
+#include <linux/kcsan.h>
+#include <linux/init_syscalls.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -104,6 +108,8 @@
#define CREATE_TRACE_POINTS
#include <trace/events/initcall.h>
+#include <kunit/test.h>
+
static int kernel_init(void *);
extern void init_IRQ(void);
@@ -137,11 +143,22 @@
char *saved_command_line;
/* Command line for parameter parsing */
static char *static_command_line;
-/* Command line for per-initcall parameter parsing */
-static char *initcall_command_line;
+/* Untouched extra command line */
+static char *extra_command_line;
+/* Extra init arguments */
+static char *extra_init_args;
+
+#ifdef CONFIG_BOOT_CONFIG
+/* Is bootconfig on command line? */
+static bool bootconfig_found;
+static bool initargs_found;
+#else
+# define bootconfig_found false
+# define initargs_found false
+#endif
static char *execute_command;
-static char *ramdisk_execute_command;
+static char *ramdisk_execute_command = "/init";
/*
* Used to generate warnings if static_key manipulation functions are used
@@ -246,10 +263,234 @@
early_param("loglevel", loglevel);
-/* Change NUL term back to "=", to make "param" the whole string. */
-static int __init repair_env_string(char *param, char *val,
+#ifdef CONFIG_BLK_DEV_INITRD
+static void * __init get_boot_config_from_initrd(u32 *_size, u32 *_csum)
+{
+ u32 size, csum;
+ char *data;
+ u32 *hdr;
+ int i;
+
+ if (!initrd_end)
+ return NULL;
+
+ data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN;
+ /*
+ * Since Grub may align the size of initrd to 4, we must
+ * check the preceding 3 bytes as well.
+ */
+ for (i = 0; i < 4; i++) {
+ if (!memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN))
+ goto found;
+ data--;
+ }
+ return NULL;
+
+found:
+ hdr = (u32 *)(data - 8);
+ size = le32_to_cpu(hdr[0]);
+ csum = le32_to_cpu(hdr[1]);
+
+ data = ((void *)hdr) - size;
+ if ((unsigned long)data < initrd_start) {
+ pr_err("bootconfig size %d is greater than initrd size %ld\n",
+ size, initrd_end - initrd_start);
+ return NULL;
+ }
+
+ /* Remove bootconfig from initramfs/initrd */
+ initrd_end = (unsigned long)data;
+ if (_size)
+ *_size = size;
+ if (_csum)
+ *_csum = csum;
+
+ return data;
+}
+#else
+static void * __init get_boot_config_from_initrd(u32 *_size, u32 *_csum)
+{
+ return NULL;
+}
+#endif
+
+#ifdef CONFIG_BOOT_CONFIG
+
+static char xbc_namebuf[XBC_KEYLEN_MAX] __initdata;
+
+#define rest(dst, end) ((end) > (dst) ? (end) - (dst) : 0)
+
+static int __init xbc_snprint_cmdline(char *buf, size_t size,
+ struct xbc_node *root)
+{
+ struct xbc_node *knode, *vnode;
+ char *end = buf + size;
+ const char *val;
+ int ret;
+
+ xbc_node_for_each_key_value(root, knode, val) {
+ ret = xbc_node_compose_key_after(root, knode,
+ xbc_namebuf, XBC_KEYLEN_MAX);
+ if (ret < 0)
+ return ret;
+
+ vnode = xbc_node_get_child(knode);
+ if (!vnode) {
+ ret = snprintf(buf, rest(buf, end), "%s ", xbc_namebuf);
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ continue;
+ }
+ xbc_array_for_each_value(vnode, val) {
+ ret = snprintf(buf, rest(buf, end), "%s=\"%s\" ",
+ xbc_namebuf, val);
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ }
+ }
+
+ return buf - (end - size);
+}
+#undef rest
+
+/* Make an extra command line under given key word */
+static char * __init xbc_make_cmdline(const char *key)
+{
+ struct xbc_node *root;
+ char *new_cmdline;
+ int ret, len = 0;
+
+ root = xbc_find_node(key);
+ if (!root)
+ return NULL;
+
+ /* Count required buffer size */
+ len = xbc_snprint_cmdline(NULL, 0, root);
+ if (len <= 0)
+ return NULL;
+
+ new_cmdline = memblock_alloc(len + 1, SMP_CACHE_BYTES);
+ if (!new_cmdline) {
+ pr_err("Failed to allocate memory for extra kernel cmdline.\n");
+ return NULL;
+ }
+
+ ret = xbc_snprint_cmdline(new_cmdline, len + 1, root);
+ if (ret < 0 || ret > len) {
+ pr_err("Failed to print extra kernel cmdline.\n");
+ memblock_free(__pa(new_cmdline), len + 1);
+ return NULL;
+ }
+
+ return new_cmdline;
+}
+
+static u32 boot_config_checksum(unsigned char *p, u32 size)
+{
+ u32 ret = 0;
+
+ while (size--)
+ ret += *p++;
+
+ return ret;
+}
+
+static int __init bootconfig_params(char *param, char *val,
const char *unused, void *arg)
{
+ if (strcmp(param, "bootconfig") == 0) {
+ bootconfig_found = true;
+ }
+ return 0;
+}
+
+static void __init setup_boot_config(const char *cmdline)
+{
+ static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
+ const char *msg;
+ int pos;
+ u32 size, csum;
+ char *data, *copy, *err;
+ int ret;
+
+ /* Cut out the bootconfig data even if we have no bootconfig option */
+ data = get_boot_config_from_initrd(&size, &csum);
+
+ strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+ err = parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL,
+ bootconfig_params);
+
+ if (IS_ERR(err) || !bootconfig_found)
+ return;
+
+ /* parse_args() stops at '--' and returns an address */
+ if (err)
+ initargs_found = true;
+
+ if (!data) {
+ pr_err("'bootconfig' found on command line, but no bootconfig found\n");
+ return;
+ }
+
+ if (size >= XBC_DATA_MAX) {
+ pr_err("bootconfig size %d greater than max size %d\n",
+ size, XBC_DATA_MAX);
+ return;
+ }
+
+ if (boot_config_checksum((unsigned char *)data, size) != csum) {
+ pr_err("bootconfig checksum failed\n");
+ return;
+ }
+
+ copy = memblock_alloc(size + 1, SMP_CACHE_BYTES);
+ if (!copy) {
+ pr_err("Failed to allocate memory for bootconfig\n");
+ return;
+ }
+
+ memcpy(copy, data, size);
+ copy[size] = '\0';
+
+ ret = xbc_init(copy, &msg, &pos);
+ if (ret < 0) {
+ if (pos < 0)
+ pr_err("Failed to init bootconfig: %s.\n", msg);
+ else
+ pr_err("Failed to parse bootconfig: %s at %d.\n",
+ msg, pos);
+ } else {
+ pr_info("Load bootconfig: %d bytes %d nodes\n", size, ret);
+ /* keys starting with "kernel." are passed via cmdline */
+ extra_command_line = xbc_make_cmdline("kernel");
+ /* Also, "init." keys are init arguments */
+ extra_init_args = xbc_make_cmdline("init");
+ }
+ return;
+}
+
+#else
+
+static void __init setup_boot_config(const char *cmdline)
+{
+ /* Remove bootconfig data from initrd */
+ get_boot_config_from_initrd(NULL, NULL);
+}
+
+static int __init warn_bootconfig(char *str)
+{
+ pr_warn("WARNING: 'bootconfig' found on the kernel command line but CONFIG_BOOT_CONFIG is not set.\n");
+ return 0;
+}
+early_param("bootconfig", warn_bootconfig);
+
+#endif
+
+/* Change NUL term back to "=", to make "param" the whole string. */
+static void __init repair_env_string(char *param, char *val)
+{
if (val) {
/* param=val or param="val"? */
if (val == param+strlen(param)+1)
@@ -257,11 +498,9 @@
else if (val == param+strlen(param)+2) {
val[-2] = '=';
memmove(val-1, val, strlen(val)+1);
- val--;
} else
BUG();
}
- return 0;
}
/* Anything after -- gets handed straight to init. */
@@ -273,7 +512,7 @@
if (panic_later)
return 0;
- repair_env_string(param, val, unused, NULL);
+ repair_env_string(param, val);
for (i = 0; argv_init[i]; i++) {
if (i == MAX_INIT_ARGS) {
@@ -293,14 +532,16 @@
static int __init unknown_bootoption(char *param, char *val,
const char *unused, void *arg)
{
- repair_env_string(param, val, unused, NULL);
+ size_t len = strlen(param);
+
+ repair_env_string(param, val);
/* Handle obsolete-style parameters */
if (obsolete_checksetup(param))
return 0;
/* Unused module parameter. */
- if (strchr(param, '.') && (!val || strchr(param, '.') < val))
+ if (strnchr(param, len, '.'))
return 0;
if (panic_later)
@@ -314,7 +555,7 @@
panic_later = "env";
panic_param = param;
}
- if (!strncmp(param, envp_init[i], val - param))
+ if (!strncmp(param, envp_init[i], len+1))
break;
}
envp_init[i] = param;
@@ -375,22 +616,51 @@
*/
static void __init setup_command_line(char *command_line)
{
- size_t len = strlen(boot_command_line) + 1;
+ size_t len, xlen = 0, ilen = 0;
- saved_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
+ if (extra_command_line)
+ xlen = strlen(extra_command_line);
+ if (extra_init_args)
+ ilen = strlen(extra_init_args) + 4; /* for " -- " */
+
+ len = xlen + strlen(boot_command_line) + 1;
+
+ saved_command_line = memblock_alloc(len + ilen, SMP_CACHE_BYTES);
if (!saved_command_line)
- panic("%s: Failed to allocate %zu bytes\n", __func__, len);
-
- initcall_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
- if (!initcall_command_line)
- panic("%s: Failed to allocate %zu bytes\n", __func__, len);
+ panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen);
static_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
if (!static_command_line)
panic("%s: Failed to allocate %zu bytes\n", __func__, len);
- strcpy(saved_command_line, boot_command_line);
- strcpy(static_command_line, command_line);
+ if (xlen) {
+ /*
+ * We have to put extra_command_line before boot command
+ * lines because there could be dashes (separator of init
+ * command line) in the command lines.
+ */
+ strcpy(saved_command_line, extra_command_line);
+ strcpy(static_command_line, extra_command_line);
+ }
+ strcpy(saved_command_line + xlen, boot_command_line);
+ strcpy(static_command_line + xlen, command_line);
+
+ if (ilen) {
+ /*
+ * Append supplemental init boot args to saved_command_line
+ * so that user can check what command line options passed
+ * to init.
+ */
+ len = strlen(saved_command_line);
+ if (initargs_found) {
+ saved_command_line[len++] = ' ';
+ } else {
+ strcpy(saved_command_line + len, " -- ");
+ len += 4;
+ }
+
+ strcpy(saved_command_line + len, extra_init_args);
+ }
}
/*
@@ -526,14 +796,16 @@
{
const char *stack;
- if (IS_ENABLED(CONFIG_INIT_STACK_ALL))
- stack = "all";
+ if (IS_ENABLED(CONFIG_INIT_STACK_ALL_PATTERN))
+ stack = "all(pattern)";
+ else if (IS_ENABLED(CONFIG_INIT_STACK_ALL_ZERO))
+ stack = "all(zero)";
else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL))
- stack = "byref_all";
+ stack = "byref_all(zero)";
else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF))
- stack = "byref";
+ stack = "byref(zero)";
else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_USER))
- stack = "__user";
+ stack = "__user(zero)";
else
stack = "off";
@@ -574,7 +846,7 @@
rest_init();
}
-asmlinkage __visible void __init start_kernel(void)
+asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
{
char *command_line;
char *after_dashes;
@@ -597,6 +869,7 @@
pr_notice("%s", linux_banner);
early_security_init();
setup_arch(&command_line);
+ setup_boot_config(command_line);
setup_command_line(command_line);
setup_nr_cpu_ids();
setup_per_cpu_areas();
@@ -606,7 +879,7 @@
build_all_zonelists(NULL);
page_alloc_init();
- pr_notice("Kernel command line: %s\n", boot_command_line);
+ pr_notice("Kernel command line: %s\n", saved_command_line);
/* parameters may set static keys */
jump_label_init();
parse_early_param();
@@ -617,6 +890,9 @@
if (!IS_ERR_OR_NULL(after_dashes))
parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
NULL, set_init_arg);
+ if (extra_init_args)
+ parse_args("Setting extra init args", extra_init_args,
+ NULL, 0, -1, -1, NULL, set_init_arg);
/*
* These use large bootmem allocations and must precede
@@ -639,11 +915,7 @@
* time - but meanwhile we still have a functioning scheduler.
*/
sched_init();
- /*
- * Disable preemption - early bootup scheduling is extremely
- * fragile until we cpu_idle() for the first time.
- */
- preempt_disable();
+
if (WARN(!irqs_disabled(),
"Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
@@ -780,6 +1052,7 @@
acpi_subsystem_init();
arch_post_acpi_subsys_init();
sfi_init_late();
+ kcsan_init();
/* Do the rest non-__init'ed, we're now alive */
arch_call_rest_init();
@@ -993,16 +1266,21 @@
"late",
};
-static void __init do_initcall_level(int level)
+static int __init ignore_unknown_bootoption(char *param, char *val,
+ const char *unused, void *arg)
+{
+ return 0;
+}
+
+static void __init do_initcall_level(int level, char *command_line)
{
initcall_entry_t *fn;
- strcpy(initcall_command_line, saved_command_line);
parse_args(initcall_level_names[level],
- initcall_command_line, __start___param,
+ command_line, __start___param,
__stop___param - __start___param,
level, level,
- NULL, &repair_env_string);
+ NULL, ignore_unknown_bootoption);
trace_initcall_level(initcall_level_names[level]);
for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
@@ -1012,9 +1290,20 @@
static void __init do_initcalls(void)
{
int level;
+ size_t len = strlen(saved_command_line) + 1;
+ char *command_line;
- for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++)
- do_initcall_level(level);
+ command_line = kzalloc(len, GFP_KERNEL);
+ if (!command_line)
+ panic("%s: Failed to allocate %zu bytes\n", __func__, len);
+
+ for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) {
+ /* Parser modifies command_line, restore it each time */
+ strcpy(command_line, saved_command_line);
+ do_initcall_level(level, command_line);
+ }
+
+ kfree(command_line);
}
/*
@@ -1045,11 +1334,17 @@
static int run_init_process(const char *init_filename)
{
+ const char *const *p;
+
argv_init[0] = init_filename;
pr_info("Run %s as init process\n", init_filename);
- return do_execve(getname_kernel(init_filename),
- (const char __user *const __user *)argv_init,
- (const char __user *const __user *)envp_init);
+ pr_debug(" with arguments:\n");
+ for (p = argv_init; *p; p++)
+ pr_debug(" %s\n", *p);
+ pr_debug(" with environment:\n");
+ for (p = envp_init; *p; p++)
+ pr_debug(" %s\n", *p);
+ return kernel_execve(init_filename, argv_init, envp_init);
}
static int try_to_run_init_process(const char *init_filename)
@@ -1093,6 +1388,11 @@
} else
pr_info("Kernel memory protection disabled.\n");
}
+#elif defined(CONFIG_ARCH_HAS_STRICT_KERNEL_RWX)
+static inline void mark_readonly(void)
+{
+ pr_warn("Kernel memory protection not selected by kernel config.\n");
+}
#else
static inline void mark_readonly(void)
{
@@ -1114,6 +1414,7 @@
async_synchronize_full();
kprobe_free_init_mem();
ftrace_free_init_mem();
+ kgdb_free_init_mem();
free_initmem();
mark_readonly();
@@ -1128,6 +1429,8 @@
rcu_end_inkernel_boot();
+ do_sysctl_args();
+
if (ramdisk_execute_command) {
ret = run_init_process(ramdisk_execute_command);
if (!ret)
@@ -1149,6 +1452,16 @@
panic("Requested init %s failed (error %d).",
execute_command, ret);
}
+
+ if (CONFIG_DEFAULT_INIT[0] != '\0') {
+ ret = run_init_process(CONFIG_DEFAULT_INIT);
+ if (ret)
+ pr_err("Default init %s failed (error %d)\n",
+ CONFIG_DEFAULT_INIT, ret);
+ else
+ return 0;
+ }
+
if (!try_to_run_init_process("/sbin/init") ||
!try_to_run_init_process("/etc/init") ||
!try_to_run_init_process("/bin/init") ||
@@ -1159,6 +1472,21 @@
"See Linux Documentation/admin-guide/init.rst for guidance.");
}
+/* Open /dev/console, for stdin/stdout/stderr, this should never fail */
+void __init console_on_rootfs(void)
+{
+ struct file *file = filp_open("/dev/console", O_RDWR, 0);
+
+ if (IS_ERR(file)) {
+ pr_err("Warning: unable to open an initial console.\n");
+ return;
+ }
+ init_dup(file);
+ init_dup(file);
+ init_dup(file);
+ fput(file);
+}
+
static noinline void __init kernel_init_freeable(void)
{
/*
@@ -1182,34 +1510,29 @@
init_mm_internals();
+ rcu_init_tasks_generic();
do_pre_smp_initcalls();
lockup_detector_init();
smp_init();
sched_init_smp();
+ padata_init();
page_alloc_init_late();
/* Initialize page ext after all struct pages are initialized. */
page_ext_init();
do_basic_setup();
- /* Open the /dev/console on the rootfs, this should never fail */
- if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
- pr_err("Warning: unable to open an initial console.\n");
+ kunit_run_all_tests();
- (void) ksys_dup(0);
- (void) ksys_dup(0);
+ console_on_rootfs();
+
/*
* check if there is an early userspace init. If yes, let it do all
* the work
*/
-
- if (!ramdisk_execute_command)
- ramdisk_execute_command = "/init";
-
- if (ksys_access((const char __user *)
- ramdisk_execute_command, 0) != 0) {
+ if (init_eaccess(ramdisk_execute_command) != 0) {
ramdisk_execute_command = NULL;
prepare_namespace();
}
diff --git a/init/noinitramfs.c b/init/noinitramfs.c
index fa9cdfa..3d62b07 100644
--- a/init/noinitramfs.c
+++ b/init/noinitramfs.c
@@ -9,6 +9,7 @@
#include <linux/stat.h>
#include <linux/kdev_t.h>
#include <linux/syscalls.h>
+#include <linux/init_syscalls.h>
/*
* Create a simple rootfs that is similar to the default initramfs
@@ -17,17 +18,16 @@
{
int err;
- err = ksys_mkdir((const char __user __force *) "/dev", 0755);
+ err = init_mkdir("/dev", 0755);
if (err < 0)
goto out;
- err = ksys_mknod((const char __user __force *) "/dev/console",
- S_IFCHR | S_IRUSR | S_IWUSR,
+ err = init_mknod("/dev/console", S_IFCHR | S_IRUSR | S_IWUSR,
new_encode_dev(MKDEV(5, 1)));
if (err < 0)
goto out;
- err = ksys_mkdir((const char __user __force *) "/root", 0700);
+ err = init_mkdir("/root", 0700);
if (err < 0)
goto out;