ASFLAGS := --32
CXXFLAGS := -m32 -ggdb -g3 -O0 -fno-pic -ffreestanding -fno-rtti -fno-exceptions -std=c++23
LDFLAGS := -m32 -static -T link.ld -ffreestanding -nostdlib
ASFILES := boot.s
CXXFILES := gdt.cpp \
idt.cpp \
memory.cpp \
multiboot.cpp \
pic.cpp \
pit.cpp \
tasking.cpp \
vgaterminal.cpp \
OBJS := $(subst .s,.o,$(ASFILES)) \
$(subst .cpp,.o,$(CXXFILES))
all: myos.iso
myos.iso: myos.bin iso/boot/grub/grub.cfg
@echo " ISO " $@
@cp myos.bin iso/boot/
@grub-mkrescue -o myos.iso iso/
myos.bin: $(OBJS) link.ld
@echo " LD " $@
@g++ $(LDFLAGS) -o $@ $(OBJS)
%.o: %.s
@echo " AS " $<
@as $(ASFLAGS) -c $< -o $@
%.o: %.cpp
@echo " CXX " $<
@g++ $(CXXFLAGS) -c $< -o $@
@echo " CLEAN"
@rm -f $(OBJS) myos.bin myos.iso
run: myos.iso
@echo " QEMU"
@qemu-system-i386 -cdrom $< -monitor stdio -no-reboot -s -S #-d int


/* Declare constants for the multiboot header. */
.set MAGIC, 0xE85250D6
.set FLAGS, 0x0
.set LENGTH, 16
.section .multiboot2
.align 8
.int MAGIC
.int FLAGS
/* info request */
.align 8
.hword 1, 0
.int 12
.int 4
/* end tag */
.align 8
.hword 0, 0
.int 8
The multiboot standard does not define the value of the stack pointer register
(esp) and it is up to the kernel to provide a stack. This allocates room for a
small stack by creating a symbol at the bottom of it, then allocating 16384
bytes for it, and finally creating a symbol at the top. The stack grows
downwards on x86. The stack is in its own section so it can be marked nobits,
which means the kernel file is smaller because it does not contain an
uninitialized stack. The stack on x86 must be 16-byte aligned according to the
System V ABI standard and de-facto extensions. The compiler will assume the
stack is properly aligned and failure to align the stack will result in
undefined behavior.
.section .bss
.align 16
.skip 16384 # 16 KiB
The linker script specifies _start as the entry point to the kernel and the
bootloader will jump to this position once the kernel has been loaded. It
doesn't make sense to return from this function as the bootloader is gone.
.section .text
.global _start
.type _start, @function
The bootloader has loaded us into 32-bit protected mode on a x86
machine. Interrupts are disabled. Paging is disabled. The processor
state is as defined in the multiboot standard. The kernel has full
control of the CPU. The kernel can only make use of hardware features
and any code it provides as part of itself. There's no printf
function, unless the kernel provides its own <stdio.h> header and a
printf implementation. There are no security restrictions, no
safeguards, no debugging mechanisms, only what the kernel provides
itself. It has absolute and complete power over the
mov %eax, multiboot_magic
mov %ebx, multiboot_ptr
To set up a stack, we set the esp register to point to the top of the
stack (as it grows downwards on x86 systems). This is necessarily done
in assembly as languages such as C cannot function without a stack.
mov $stack_top, %esp
This is a good place to initialize crucial processor state before the
high-level kernel is entered. It's best to minimize the early
environment where crucial features are offline. Note that the
processor is not fully initialized yet: Features such as floating
point instructions and instruction set extensions are not initialized
yet. The GDT should be loaded here. Paging should be enabled here.
C++ features such as global constructors and exceptions will require
runtime support to work as well.
mov $__init_array_start, %eax
cmp $__init_array_end, %eax
je .next
push %eax
call *(%eax)
pop %eax
add $0x4, %eax
jmp .again
Enter the high-level kernel. The ABI requires the stack is 16-byte
aligned at the time of the call instruction (which afterwards pushes
the return pointer of size 4 bytes). The stack was originally 16-byte
aligned above and we've pushed a multiple of 16 bytes to the
stack since (pushed 0 bytes so far), so the alignment has thus been
preserved and the call is well defined.
call kernel_main
If the system has nothing more to do, put the computer into an
infinite loop. To do that:
1) Disable interrupts with cli (clear interrupt enable in eflags).
They are already disabled by the bootloader, so this is not needed.
Mind that you might later enable interrupts and return from
kernel_main (which is sort of nonsensical to do).
2) Wait for the next interrupt to arrive with hlt (halt instruction).
Since they are disabled, this will lock up the computer.
3) Jump to the hlt instruction if it ever wakes up due to a
non-maskable interrupt occurring or due to system management mode.
1: hlt
jmp 1b
Set the size of the _start symbol to the current location '.' minus its start.
This is useful when debugging or when you implement call tracing.
.size _start, . - _start

#include <array>
#include <cstdint>
struct gdt_entry_bits {
std::uint32_t limit_low : 16;
std::uint32_t base_low : 24;
std::uint32_t accessed : 1;
std::uint32_t read_write : 1; // readable for code, writable for data
std::uint32_t conforming_expand_down : 1; // conforming for code, expand down for data
std::uint32_t code : 1; // 1 for code, 0 for data
std::uint32_t code_data_segment : 1; // should be 1 for everything but TSS and LDT
std::uint32_t DPL : 2; // privilege level
std::uint32_t present : 1;
std::uint32_t limit_high : 4;
std::uint32_t available : 1; // only used in software; has no effect on hardware
std::uint32_t long_mode : 1;
std::uint32_t big : 1; // 32-bit opcodes for code, uint32_t stack for data
std::uint32_t gran : 1; // 1 to use 4k page addressing, 0 for byte addressing
std::uint32_t base_high : 8;
} __attribute__((packed));
constinit static const std::array<gdt_entry_bits, 3> gdt {{
/* kernel_code = */ {
.limit_low = 0xFFFF,
.base_low = 0x0000,
.accessed = 0,
.read_write = 1,
.conforming_expand_down = 0,
.code = 1,
.code_data_segment = 1,
.DPL = 0,
.present = 1,
.limit_high = 0xF,
.available = 0,
.long_mode = 0,
.big = 1,
.gran = 1,
.base_high = 0x00
/* kernel_data = */ {
.limit_low = 0xFFFF,
.base_low = 0x0000,
.accessed = 0,
.read_write = 1,
.conforming_expand_down = 0,
.code = 0,
.code_data_segment = 1,
.DPL = 0,
.present = 1,
.limit_high = 0xF,
.available = 0,
.long_mode = 0,
.big = 1,
.gran = 1,
.base_high = 0x00
void gdt_initialize()
auto gdtr = reinterpret_cast<std::uint64_t>(gdt.data());
gdtr <<= 16;
gdtr |= gdt.size() * sizeof(gdt[0]);
asm volatile(R"(
lgdt %0
pushl $0x8
push $.setcs
ljmp *(%%esp)
add $8, %%esp
mov $0x10, %%eax
mov %%eax, %%ds
mov %%eax, %%es
mov %%eax, %%fs
mov %%eax, %%gs
mov %%eax, %%ss
)" :: "m"(gdtr));

#ifndef GDT_HPP
#define GDT_HPP
void gdt_initialize();
#endif // GDT_HPP

#include "idt.hpp"
#include "portio.hpp"
#include "textoutput.hpp"
#include <array>
#include <cstdint>
#include <utility>
extern TextOutput& term;
static constexpr std::uint8_t TaskGate = 0x5;
static constexpr std::uint8_t IntrGate16 = 0x6;
static constexpr std::uint8_t TrapGate16 = 0x7;
static constexpr std::uint8_t IntrGate32 = 0xE;
static constexpr std::uint8_t TrapGate32 = 0xF;
struct idt_entry_bits {
std::uint32_t offset_low : 16;
std::uint32_t segment_selector : 16;
std::uint32_t rsvd : 8;
std::uint32_t gate_type : 4;
std::uint32_t rsvd2 : 1;
std::uint32_t dpl : 2;
std::uint32_t present : 1;
std::uint32_t offset_high : 16;
} __attribute__((packed));
static std::array<Callback, 48> callbacks;
extern "C"
void interruptGeneralHandler(Registers regs)
const auto& inum = regs.inum;
if (inum >= 32) {
if (inum >= 40)
outb(0xA0, 0x20);
outb(0x20, 0x20);
if (inum < callbacks.size()) {
if (auto cb = callbacks[inum]; cb)
template<std::size_t N>
struct StubEntry
static constexpr bool HasError = N == 8 || (N >= 10 && N <= 14) || N == 17 || N == 30;
static void stub() {
if constexpr (!HasError)
asm volatile("push $0x0");
asm volatile(R"(
push %0
call interruptGeneralHandler
pop %%eax
add $0x4, %%esp
)" :: "i"(N));
static constexpr std::uint32_t segment(std::uint16_t gdt_idx, bool useLdt, std::uint16_t rpl) {
return gdt_idx | (useLdt ? 0x4 : 0x0) | (rpl & 0x3);
idt_entry_bits entry = {
.offset_low = (uint32_t)stub & 0xFFFF,
.segment_selector = segment(0x8, false, 0),
.gate_type = IntrGate32,
.dpl = 0,
.present = 1,
.offset_high = (uint32_t)stub >> 16
operator idt_entry_bits() const noexcept {
return entry;
static auto idt =
[]<std::size_t... ints>(std::index_sequence<ints...>) {
return std::array<idt_entry_bits, 256> { StubEntry<ints>()... };
void idt_initialize()
auto idtr = reinterpret_cast<std::uint64_t>(idt.data());
idtr <<= 16;
idtr |= idt.size() * sizeof(idt[0]);
asm volatile("lidt %0" :: "m"(idtr));
void idt_register_callback(std::size_t num, Callback cb)
if (num < callbacks.size())
callbacks[num] = cb;

#ifndef IDT_HPP
#define IDT_HPP
#include <cstddef>
#include <cstdint>
struct Registers
std::uint32_t inum;
std::uint32_t edi, esi, ebp, esp, ebx, edx, ecx, eax;
std::uint32_t error;
std::uint32_t eip, cs, eflags;
} __attribute__((packed));
using Callback = void (*)(const Registers&);
void idt_initialize();
void idt_register_callback(std::size_t num, Callback cb);
#endif // IDT_HPP

menuentry "myos" {
multiboot2 /boot/myos.bin

#include "gdt.hpp"
#include "idt.hpp"
#include "memory.hpp"
#include "multiboot.hpp"
#include "pic.hpp"
#include "pit.hpp"
#include "tasking.hpp"
#include "vgaterminal.hpp"
static VGATerminal vga;
TextOutput& term = vga;
extern "C"
void kernel_main(void)
term.write("Clyne's kernel, v2024\n\n");
if (!multiboot_initialize())
for (;;);
idt_register_callback(14, [](const Registers& regs) {
term.write("Page fault! eip=");
for (;;);
asm volatile("sti");
term.write("Tasking enabled.\n");
tasking_spawn([] {
for (;;)
}, 256);
for (;;)
extern "C"
void memmove(char* dst, char* src, size_t sz) {
while (sz) {
*dst = *src;

/* The bootloader will look at this image and start execution at the symbol
designated as the entry point. */
/* Tell where the various sections of the object files will be put in the final
kernel image. */
/* It used to be universally recommended to use 1M as a start offset,
as it was effectively guaranteed to be available under BIOS systems.
However, UEFI has made things more complicated, and experimental data
strongly suggests that 2M is a safer place to load. In 2016, a new
feature was introduced to the multiboot2 spec to inform bootloaders
that a kernel can be loaded anywhere within a range of addresses and
will be able to relocate itself to run from such a loader-selected
address, in order to give the loader freedom in selecting a span of
memory which is verified to be available by the firmware, in order to
work around this issue. This does not use that feature, so 2M was
chosen as a safer option than the traditional 1M. */
. = 2M;
/* First put the multiboot header, as it is required to be put very early
in the image or the bootloader won't recognize the file format.
Next we'll put the .text section. */
.text BLOCK(4K) : ALIGN(4K)
/* Read-only data. */
.rodata BLOCK(4K) : ALIGN(4K)
.init_array :
__init_array_start = .;
__init_array_end = .;
/* Read-write data (initialized) */
.data BLOCK(4K) : ALIGN(4K)
/* Read-write data (uninitialized) and stack */
.bss BLOCK(4K) : ALIGN(4K)
/* The compiler may produce other sections, by default it will put them in
a segment with the same name. Simply add stuff here as needed. */
.note :

#include "textoutput.hpp"
#include <array>
#include <cstdint>
struct PageDirectory
static constexpr std::uint32_t NotPresent = 0x2;
PageDirectory(): value(NotPresent) {}
PageDirectory(void *addr): value(reinterpret_cast<std::uint32_t>(addr) | 3) {}
std::uint32_t value;
static_assert(sizeof(PageDirectory) == sizeof(std::uint32_t));
extern std::uint32_t lowerMem;
extern std::uint32_t upperMem;
extern TextOutput& term;
static std::uintptr_t lowerFree = 0x400;
static std::uintptr_t upperFree = 0x100000;
static std::array<PageDirectory, 1024> pageDirectory;
static std::array<std::uint32_t, 1024> pageTable;
void memory_initialize()
lowerMem -= 1024;
const auto totalKb = (lowerMem + upperMem) / 1024u;
term.write("Claiming ");
term.write(" kB for allocations...\n");
std::uint32_t addr = 0;
for (auto& p : pageTable) {
p = addr | 3; // supervisor, r/w, present
addr += 0x1000;
pageDirectory[0] = PageDirectory(pageTable.data());
asm volatile(R"(
mov %%eax, %%cr3
mov %%cr0, %%eax
or $0x80000000, %%eax
mov %%eax, %%cr0
)" :: "a"(pageDirectory.data()));
term.write("Paging enabled.\n");
static void *memory_alloc(std::size_t size)
void *ret = nullptr;
if (lowerMem > size) {
ret = reinterpret_cast<void *>(lowerFree);
lowerFree += size;
lowerMem -= size;
} else if (upperMem > size) {
ret = reinterpret_cast<void *>(upperFree);
upperFree += size;
upperMem -= size;
} else {
// Uh oh!
term.write("!!! Kernel allocation failed !!!");
return ret;
void *operator new(std::size_t size)
return memory_alloc(size);
void *operator new[](std::size_t size)
return memory_alloc(size);

@ -0,0 +1,7 @@
#ifndef MEMORY_HPP
#define MEMORY_HPP
void memory_initialize();
#endif // MEMORY_HPP

#include "textoutput.hpp"
#include <cstdint>
extern TextOutput& term;
std::uint32_t multiboot_magic;
std::uint32_t *multiboot_ptr;
std::uint32_t lowerMem = 0;
std::uint32_t upperMem = 0;
bool multiboot_initialize()
if (multiboot_magic != 0x36d76289) {
term.write("Not multiboot!");
return false;
term.write("Found multiboot headers: ");
auto ptr = multiboot_ptr + 2;
while (ptr[0] != 0 && ptr[1] != 8) {
term.write(", ");
if (ptr[0] == 4) {
lowerMem = ptr[2] * 1024;
upperMem = ptr[3] * 1024;
auto next = reinterpret_cast<std::uintptr_t>(ptr);
next += ptr[1];
next = (next + 7) & ~7;
ptr = reinterpret_cast<std::uint32_t *>(next);
return true;

bool multiboot_initialize();

#include "pic.hpp"
#include "portio.hpp"
#define PIC1 0x20 /* IO base address for master PIC */
#define PIC2 0xA0 /* IO base address for slave PIC */
#define PIC1_DATA (PIC1+1)
#define PIC2_DATA (PIC2+1)
#define PIC_EOI 0x20 /* End-of-interrupt command code */
#define ICW1_ICW4 0x01 /* Indicates that ICW4 will be present */
#define ICW1_SINGLE 0x02 /* Single (cascade) mode */
#define ICW1_INTERVAL4 0x04 /* Call address interval 4 (8) */
#define ICW1_LEVEL 0x08 /* Level triggered (edge) mode */
#define ICW1_INIT 0x10 /* Initialization - required! */
#define ICW4_8086 0x01 /* 8086/88 (MCS-80/85) mode */
#define ICW4_AUTO 0x02 /* Auto (normal) EOI */
#define ICW4_BUF_SLAVE 0x08 /* Buffered mode/slave */
#define ICW4_BUF_MASTER 0x0C /* Buffered mode/master */
#define ICW4_SFNM 0x10 /* Special fully nested (not) */
void pic_initialize()
constexpr int offset1 = 0x20, offset2 = 0x28;
std::uint8_t a1, a2;
a1 = inb(PIC1_DATA); // save masks
a2 = inb(PIC2_DATA);
outb(PIC1_COMMAND, ICW1_INIT | ICW1_ICW4); // starts the initialization sequence (in cascade mode)
outb(PIC1_DATA, offset1); // ICW2: Master PIC vector offset
outb(PIC2_DATA, offset2); // ICW2: Slave PIC vector offset
outb(PIC1_DATA, 4); // ICW3: tell Master PIC that there is a slave PIC at IRQ2 (0000 0100)
outb(PIC2_DATA, 2); // ICW3: tell Slave PIC its cascade identity (0000 0010)
outb(PIC1_DATA, ICW4_8086); // ICW4: have the PICs use 8086 mode (and not 8080 mode)
outb(PIC2_DATA, ICW4_8086);
outb(PIC1_DATA, a1); // restore saved masks.
outb(PIC2_DATA, a2);
void pic_eoi(std::uint8_t irq)
if (irq >= 8)

#ifndef PIC_HPP
#define PIC_HPP
#include <cstdint>
/* reinitialize the PIC controllers, giving them specified vector offsets
rather than 8h and 70h, as configured by default */
void pic_initialize();
void pic_eoi(std::uint8_t irq);
#endif // PIC_HPP

#include "pit.hpp"
#include "idt.hpp"
#include "portio.hpp"
#include "tasking.hpp"
static volatile std::uint32_t ticks = 0;
static void timer_callback(const Registers& regs)
ticks = ticks + 1;
void pit_initialize(std::uint32_t frequency)
// Firstly, register our timer callback.
idt_register_callback(32, timer_callback);
// The value we send to the PIT is the value to divide it's input clock
// (1193180 Hz) by, to get our required frequency. Important to note is
// that the divisor must be small enough to fit into 16-bits.
auto divisor = 1193180 / frequency;
// Send the command byte.
outb(0x43, 0x36);
// Send the frequency divisor.
outb(0x40, divisor & 0xFF);
outb(0x40, (divisor >> 8) & 0xFF);
void pit_busy_wait(std::int32_t tks)
const auto end = ticks + tks;
while (end - ticks > 0)
asm volatile("nop");

#ifndef PIT_HPP
#define PIT_HPP
#include <cstdint>
void pit_initialize(std::uint32_t frequency);
void pit_busy_wait(std::int32_t tks);
#endif // PIT_HPP

#ifndef PORTIO_HPP
#define PORTIO_HPP
#include <cstdint>
inline void outb(std::uint16_t port, std::uint8_t val)
asm volatile("outb %b0, %w1" :: "a"(val), "Nd"(port) : "memory");
inline std::uint8_t inb(std::uint16_t port)
std::uint8_t val;
asm volatile("inb %w1, %b0" : "=a"(val) : "Nd"(port) : "memory");
return val;
inline void io_wait()
outb(0x80, 0);
#endif // PORTIO_HPP

#include "tasking.hpp"
#include <array>
struct Task
Registers regs;
bool valid = false;
static std::array<Task, 4> tasks;
static int current = -1;
void schedule(Registers& regs)
if (current < 0)
tasks[current].regs = regs;
do {
if (++current >= tasks.size())
current = 0;
} while (!tasks[current].valid);
regs = tasks[current].regs;
void tasking_initialize()
tasks[0].valid = true;
current = 0;
asm volatile("int $0x20");
bool tasking_spawn(void (*entry)(), unsigned ssize)
int i = -1;
for (i = 0; i < tasks.size(); ++i) {
if (!tasks[i].valid)
if (i < 0)
return false;
tasks[i] = Task();
auto& r = tasks[i].regs;
auto stack = reinterpret_cast<std::uint32_t>(new std::uint8_t[ssize]);
r.ebp = stack + ssize;
r.esp = r.ebp;
r.eip = reinterpret_cast<std::uint32_t>(entry);
r.cs = 0x8;
r.eflags = tasks[current].regs.eflags;
tasks[i].valid = true;
return true;

#include "idt.hpp"
void tasking_initialize();
bool tasking_spawn(void (*entry)(), unsigned ssize);
void schedule(Registers& regs);
#endif // TASKING_HPP

class TextOutput
virtual void write(char c) noexcept = 0;
void write(const char *s) noexcept {
if (s) {
while (*s)
void write(int n) noexcept {
char buf[32];
auto ptr = buf + sizeof(buf);
*--ptr = '\0';
do {
*--ptr = "0123456789"[n % 10];
n /= 10;
} while (n);
void write(unsigned n) noexcept {
char buf[32];
auto ptr = buf + sizeof(buf);
*--ptr = '\0';
do {
*--ptr = "0123456789"[n % 10];
n /= 10;
} while (n);

#include "portio.hpp"
#include "vgaterminal.hpp"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <utility>
void VGATerminal::write(char c) noexcept
switch (c) {
case '\n':
offset += Width;
case '\r':
offset -= offset % Width;
void VGATerminal::put(char c) noexcept
std::uint16_t cell = c
| (std::to_underlying(foreground) << 8)
| (std::to_underlying(background) << 12);
auto ptr = reinterpret_cast<std::uint16_t *>(Videoram);
ptr[offset++] = cell;
void VGATerminal::checkpos() noexcept
if (offset >= Width * Height) {
auto ptr = reinterpret_cast<std::uint16_t *>(Videoram);
const auto end = ptr + Width * Height;
std::copy(ptr + Width, end, ptr);
std::fill(end - Width, end, 0);
offset = Width * Height - Width;
void VGATerminal::updatecursor() const noexcept
outb(0x03d4, 0x0f);
outb(0x03d5, static_cast<std::uint8_t>(offset));
outb(0x03d4, 0x0e);
outb(0x03d5, static_cast<std::uint8_t>(offset >> 8));

#include "textoutput.hpp"
#include <cstddef>
#include <cstdint>
class VGATerminal : public TextOutput
enum class Color : std::uint8_t
Black = 0,
using enum Color;
virtual void write(char c) noexcept final;
static constexpr std::uintptr_t Videoram = 0xB8000;
static constexpr unsigned Width = 80;
static constexpr unsigned Height = 25;
unsigned offset = 0;
Color foreground = LightGray;
Color background = Black;
void put(char c) noexcept;
void checkpos() noexcept;
void updatecursor() const noexcept;