langref: document modules, root source files, etc

ziglang · Feb 1, 2025 · f4a67b5 · f4a67b5
1 parent 8f35e7e
commit f4a67b5
Show file tree

Hide file tree

Showing 6 changed files with 245 additions and 13 deletions.
diff --git a/doc/langref.html.in b/doc/langref.html.in
@@ -381,7 +381,7 @@
       In this case, the {#syntax#}!{#endsyntax#} may be omitted from the return
       type of <code>main</code> because no errors are returned from the function.
       </p>
-      {#see_also|Values|Tuples|@import|Errors|Root Source File|Source Encoding|try#}
+      {#see_also|Values|Tuples|@import|Errors|Entry Point|Source Encoding|try#}
       {#header_close#}
       {#header_open|Comments#}
       <p>
@@ -823,7 +823,7 @@
         <kbd>zig test</kbd> is a tool that creates and runs a test build. By default, it builds and runs an
         executable program using the <em>default test runner</em> provided by the {#link|Zig Standard Library#}
         as its main entry point. During the build, {#syntax#}test{#endsyntax#} declarations found while
-        {#link|resolving|Root Source File#} the given Zig source file are included for the default test runner
+        {#link|resolving|File and Declaration Discovery#} the given Zig source file are included for the default test runner
         to run and report on.
       </p>
       <aside>
@@ -5223,7 +5223,7 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
         <li>From library code, calling the programmer's panic function if they exposed one in the root source file.</li>
         <li>When mixing C and Zig code, calling the canonical panic implementation across multiple .o files.</li>
       </ul>
-      {#see_also|Root Source File#}
+      {#see_also|Panic Handler#}
       {#header_close#}
 
       {#header_open|@popCount#}
@@ -5966,12 +5966,12 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
       at compile-time, it falls into one of two categories.
       </p>
       <p>
-      Some Illegal Behavior is <it>safety-checked</it>: this means that the compiler will insert "safety checks"
+      Some Illegal Behavior is <em>safety-checked</em>: this means that the compiler will insert "safety checks"
       anywhere that the Illegal Behavior may occur at runtime, to determine whether it is about to happen. If it
       is, the safety check "fails", which triggers a panic.
       </p>
       <p>
-      All other Illegal Behavior is <it>unchecked</it>, meaning the compiler is unable to insert safety checks for
+      All other Illegal Behavior is <em>unchecked</em>, meaning the compiler is unable to insert safety checks for
       it. If Unchecked Illegal Behavior is invoked at runtime, anything can happen: usually that will be some kind of
       crash, but the optimizer is free to make Unchecked Illegal Behavior do anything, such as calling arbitrary functions
       or clobbering arbitrary data. This is similar to the concept of "undefined behavior" in some other languages. Note that
@@ -6481,14 +6481,155 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
       {#builtin#}
       {#see_also|Build Mode#}
       {#header_close#}
-      {#header_open|Root Source File#}
-      <p>TODO: explain how root source file finds other files</p>
-      <p>TODO: pub fn main</p>
-      <p>TODO: pub fn panic</p>
-      <p>TODO: if linking with libc you can use export fn main</p>
-      <p>TODO: order independent top level declarations</p>
-      <p>TODO: lazy analysis</p>
-      <p>TODO: using comptime { _ = @import() }</p>
+      {#header_open|Compilation Model#}
+      <p>
+      A Zig compilation is separated into <em>modules</em>. Each module is a collection of Zig source files,
+      one of which is the module's <em>root source file</em>. Each module can <em>depend</em> on any number of
+      other modules, forming a directed graph (dependency loops between modules are allowed). If module A
+      depends on module B, then any Zig source file in module A can import the <em>root source file</em> of
+      module B using {#syntax#}@import{#endsyntax#} with the module's name. In essence, a module acts as an
+      alias to import a Zig source file (which might exist in a completely separate part of the filesystem).
+      </p>
+      <p>
+      A simple Zig program compiled with <code>zig build-exe</code> has two key modules: the one containing your
+      code, known as the "main" or "root" module, and the standard library. Your module <em>depends on</em>
+      the standard library module under the name "std", which is what allows you to write
+      {#syntax#}@import("std"){#endsyntax#}! In fact, every single module in a Zig compilation -- including
+      the standard library itself -- implicitly depends on the standard library module under the name "std".
+      </p>
+      <p>
+      The "root module" (the one provided by you in the <code>zig build-exe</code> example) has a special
+      property. Like the standard library, it is implicitly made available to all modules (including itself),
+      this time under the name "root". So, {#syntax#}@import("root"){#endsyntax#} will always be equivalent to
+      {#syntax#}@import{#endsyntax#} of your "main" source file (often, but not necessarily, named
+      <code>main.zig</code>).
+      </p>
+      {#header_open|Source File Structs#}
+      <p>
+      Every Zig source file is implicitly a {#syntax#}struct{#endsyntax#} declaration; you can imagine that
+      the file's contents are literally surrounded by {#syntax#}struct { ... }{#endsyntax#}. This means that
+      as well as declarations, the top level of a file is permitted to contain fields:
+      </p>
+      {#code|TopLevelFields.zig#}
+      <p>
+      Such files can be instantiated just like any other {#syntax#}struct{#endsyntax#} type. A file's "root
+      struct type" can be referred to within that file using {#link|@This#}.
+      </p>
+      {#header_close#}
+      {#header_open|File and Declaration Discovery#}
+      <p>
+      Zig places importance on the concept of whether any piece of code is <em>semantically analyzed</em>; in
+      eseence, whether the compiler "looks at" it. What code is analyzed is based on what files and
+      declarations are "discovered" from a certain point. This process of "discovery" is based on a simple set
+      of recursive rules:
+      </p>
+      <ul>
+        <li>If a call to {#syntax#}@import{#endsyntax#} is analyzed, the file being imported is analyzed.</li>
+        <li>If a type (including a file) is analyzed, all {#syntax#}comptime{#endsyntax#}, {#syntax#}usingnamespace{#endsyntax#}, and {#syntax#}export{#endsyntax#} declarations within it are analyzed.</li>
+        <li>If a type (including a file) is analyzed, and the compilation is for a {#link|test|Zig Test#}, and the module the type is within is the root module of the compiatilation, then all {#syntax#}test{#endsyntax#} declarations within it are also analyzed.</li>
+        <li>If a reference to a named declaration (i.e. a usage of it) is analyzed, the declaration being referenced is analyzed. Declarations are order-independent, so this reference may be above or below the declaration being referenced, or even in another file entirely.</li>
+      </ul>
+      <p>
+      That's it! Those rules define how Zig files and declarations are discovered. All that remains is to
+      understand where this process <em>starts</em>.
+      </p>
+      <p>
+      The answer to that is the root of the standard library: every Zig compilation begins by analyzing the
+      file <code>lib/std/std.zig</code>. This file contains a {#syntax#}comptime{#endsyntax#} declaration
+      which imports {#syntax#}lib/std/start.zig{#endsyntax#}, and that file in turn uses
+      {#syntax#}@import("root"){#endsyntax#} to reference the "root module"; so, the file you provide as your
+      main module's root source file is effectively also a root, because the standard library will always
+      reference it.
+      </p>
+      <p>
+      It is often desirable to make sure that certain declarations -- particularly {#syntax#}test{#endsyntax#}
+      or {#syntax#}export{#endsyntax#} declarations -- are discovered. Based on the above rules, a common
+      strategy for this is to use {#syntax#}@import{#endsyntax#} within a {#syntax#}comptime{#endsyntax#} or
+      {#syntax#}test{#endsyntax#} block:
+      </p>
+      {#syntax_block|zig|force_file_discovery.zig#}
+comptime {
+    // This will ensure that the file 'api.zig' is always discovered (as long as this file is discovered).
+    // It is useful if 'api.zig' contains important exported declarations.
+    _ = @import("api.zig");
+
+    // We could also have a file which contains declarations we only want to export depending on a comptime
+    // condition. In that case, we can use an `if` statement here:
+    if (builtin.os.tag == .windows) {
+        _ = @import("windows_api.zig");
+    }
+}
+
+test {
+    // This will ensure that the file 'tests.zig' is always discovered (as long as this file is discovered),
+    // if this compilation is a test. It is useful if 'tests.zig' contains tests we want to ensure are run.
+    _ = @import("tests.zig");
+
+    // We could also have a file which contains tests we only want to run depending on a comptime condition.
+    // In that case, we can use an `if` statement here:
+    if (builtin.os.tag == .windows) {
+        _ = @import("windows_tests.zig");
+    }
+}
+
+const builtin = @import("builtin");
+      {#end_syntax_block#}
+      {#header_close#}
+      {#header_open|Special Root Declarations#}
+      <p>
+      Because the root module's root source file is always accessible using
+      {#syntax#}@import("root"){#endsyntax#}, is is sometimes used by libraries -- including the Zig Standard
+      Library -- as a place for the program to expose some "global" information to that library. The Zig
+      Standard Library will look for several declarations in this file.
+      </p>
+      {#header_open|Entry Point#}
+      <p>
+      When building an executable, the most important thing to be looked up in this file is the program's
+      <em>entry point</em>. Most commonly, this is a function named {#syntax#}main{#endsyntax#}, which
+      {#syntax#}std.start{#endsyntax#} will call just after performing important initialization work.
+      </p>
+      <p>
+      Alternatively, the presence of a declaration named {#syntax#}_start{#endsyntax#} (for instance,
+      {#syntax#}pub const _start = {};{#endsyntax#}) will disable the default {#syntax#}std.start{#endsyntax#}
+      logic, allowing your root source file to export a low-level entry point as needed.
+      </p>
+      {#code|entry_point.zig#}
+      <p>
+      If the Zig compilation links libc, the {#syntax#}main{#endsyntax#} function can optionally be an
+      {#syntax#}export fn{#endsyntax#} which matches the signature of the C <code>main</code> function:
+      </p>
+      {#code|libc_export_entry_point.zig#}
+      <p>
+      {#syntax#}std.start{#endsyntax#} may also use other entry point declarations in certain situations, such
+      as {#syntax#}wWinMain{#endsyntax#} or {#syntax#}EfiMain{#endsyntax#}. Refer to the
+      {#syntax#}lib/std/start.zig{#endsyntax#} logic for details of these declarations.
+      </p>
+      {#header_close#}
+      {#header_open|Standard Library Options#}
+      <p>
+      The standard library also looks for a declaration in the root module's root source file named
+      {#syntax#}std_options{#endsyntax#}. If present, this declaration is expected to be a struct of type
+      {#syntax#}std.Options{#endsyntax#}, and allows the program to customize some standard library
+      functionality, such as the {#syntax#}std.log{#endsyntax#} implementation.
+      </p>
+      {#code|std_options.zig#}
+      {#header_close#}
+      {#header_open|Panic Handler#}
+      <p>
+      The Zig Standard Library looks for a declaration named {#syntax#}panic{#endsyntax#} in the root module's
+      root source file. If present, it is expected to be a namespace (container type) with declarations
+      providing different panic handlers.
+      </p>
+      <p>
+      See {#syntax#}std.debug.simple_panic{#endsyntax#} for a basic implementation of this namespace.
+      </p>
+      <p>
+      Overriding how the panic handler actually outputs messages, but keeping the formatted safety panics
+      which are enabled by default, can be easily achieved with {#syntax#}std.debug.FullPanic{#endsyntax#}:
+      </p>
+      {#code|panic_handler.zig#}
+      {#header_close#}
+      {#header_close#}
       {#header_close#}
       {#header_open|Zig Build System#}
       <p>

diff --git a/doc/langref/TopLevelFields.zig b/doc/langref/TopLevelFields.zig
@@ -0,0 +1,18 @@
+//! Because this file contains fields, it is a type which is intended to be instantiated, and so
+//! is named in TitleCase instead of snake_case by convention.
+
+foo: u32,
+bar: u64,
+
+/// `@This()` can be used to refer to this struct type. In files with fields, is quite common to name the type
+/// here, so it can be easily referenced by other declarations.
+const TopLevelFields = @This();
+
+pub fn init(val: u32) TopLevelFields {
+    return .{
+        .foo = val,
+        .bar = val * 10,
+    };
+}
+
+// syntax
diff --git a/doc/langref/entry_point.zig b/doc/langref/entry_point.zig
@@ -0,0 +1,20 @@
+/// `std.start` imports this file using `@import("root")`, and uses this declaration as the program's
+/// user-provided entry point. It can return any of the following types:
+/// * `void`
+/// * `E!void`, for any error set `E`
+/// * `u8`
+/// * `E!u8`, for any error set `E`
+/// Returning a `void` value from this function will exit with code 0.
+/// Returning a `u8` value from this function with exit with the given status code.
+/// Returning an error value from this function will print an Error Return Trace and exit with code 1.
+pub fn main() void {
+    std.debug.print("Hello, World!\n", .{});
+}
+
+// If uncommented, this declaration would suppress the usual std.start logic, causing
+// the `main` declaration above to be ignored.
+//pub const _start = {};
+
+const std = @import("std");
+
+// exe=succeed
diff --git a/doc/langref/libc_export_entry_point.zig b/doc/langref/libc_export_entry_point.zig
@@ -0,0 +1,10 @@
+pub export fn main(argc: c_int, argv: [*]const [*:0]const u8) c_int {
+    const args = argv[0..@intCast(argc)];
+    std.debug.print("Hello! argv[0] is '{s}'\n", .{args[0]});
+    return 0;
+}
+
+const std = @import("std");
+
+// exe=succeed
+// link_libc
diff --git a/doc/langref/panic_handler.zig b/doc/langref/panic_handler.zig
@@ -0,0 +1,18 @@
+pub fn main() void {
+    @setRuntimeSafety(true);
+    var x: u8 = 255;
+    // Let's overflow this integer!
+    x += 1;
+}
+
+pub const panic = std.debug.FullPanic(myPanic);
+
+fn myPanic(msg: []const u8, first_trace_addr: ?usize) noreturn {
+    _ = first_trace_addr;
+    std.debug.print("Panic! {s}\n", .{msg});
+    std.process.exit(1);
+}
+
+const std = @import("std");
+
+// exe=fail
diff --git a/doc/langref/std_options.zig b/doc/langref/std_options.zig
@@ -0,0 +1,25 @@
+/// The presence of this declaration allows the program to override certain behaviors of the standard library.
+/// For a full list of available options, see the documentation for `std.Options`.
+pub const options: std.Options = .{
+    // By default, in safe build modes, the standard library will attach a segfault handler to the program to
+    // print a helpful stack trace if a segmentation fault occurs. Here, we can disable this, or even enable
+    // it in unsafe build modes.
+    .enable_segfault_handler = true,
+    // This is the logging function used by `std.log`.
+    .logFn = myLogFn,
+};
+
+fn myLogFn(
+    comptime level: std.log.Level,
+    comptime scope: @Type(.enum_literal),
+    comptime format: []const u8,
+    args: anytype,
+) void {
+    // We could do anything we want here!
+    // ...but actually, let's just call the default implementation.
+    std.log.defaultLog(level, scope, format, args);
+}
+
+const std = @import("std");
+
+// syntax