From 0b6adfde1ef9fe84654ccf8578c92ad31d099673 Mon Sep 17 00:00:00 2001 From: Allen Webster Date: Wed, 24 Apr 2024 12:04:52 -0700 Subject: [PATCH] write DETAILS files for each major example --- GUIDE.txt | 39 +++- clang/build_win32_before_main.bat | 2 +- clang/build_win32_before_main_v2.bat | 18 ++ linux_before_main/DETAILS.txt | 9 + linux_before_main/linux_before_main.c | 15 +- linux_linking/DETAILS.txt | 117 +++++++++++ linux_linking/linux_main.c | 2 +- win32_before_main/DETAILS.txt | 39 ++++ win32_before_main/win32_before_main.c | 20 +- win32_linking/DETAILS.txt | 41 ++++ xlist/DETAILS.txt | 266 ++++++++++++++++++++++++++ xlist/base.c | 2 +- xlist/base.h | 14 +- 13 files changed, 566 insertions(+), 18 deletions(-) create mode 100644 clang/build_win32_before_main_v2.bat create mode 100644 linux_before_main/DETAILS.txt create mode 100644 linux_linking/DETAILS.txt create mode 100644 win32_before_main/DETAILS.txt create mode 100644 win32_linking/DETAILS.txt create mode 100644 xlist/DETAILS.txt diff --git a/GUIDE.txt b/GUIDE.txt index 41f4bb3..706afc1 100644 --- a/GUIDE.txt +++ b/GUIDE.txt @@ -1,11 +1,42 @@ +#################################### Hello #################################### + The main goal of this investigation is to organize shared data and code across multiple binary files. This is especially important for something like a base -layer that will be used in a program that supports hot-reloading or plugins. +layer that is used in a program that supports hot-reloading and/or plugins. -Each isolated example in this repository explores a way to set up the base -layer, plugin, and main program. +This repository contains examples of source and build details needed to achieve +dynamic linking (shared data and code) on Windows, and Linux, with the cl, gcc, +and clang compilers. -The examples: +To start exploring an example navigate into the folder of the example and run +the build script from your command line. It should generate an executable in +a build/ folder. Each example also has a DETAILS.txt with info about the +details that go into the example's structure and the expected output of the +example program. + + + +################################### Topics #################################### + +Looking for a specific topic? This index tells you which example to jump to. + +exporting symbols -> win32_linking, linux_linking +load-time import symbols -> win32_linking, linux_linking +run-time linking symbols -> win32_linking, linux_linking +building a .dll file -> win32_linking +building a .so file -> linux_linking +module initialization -> win32_before_main, linux_before_main +cl build line options -> win32_linking +gcc build line options -> linux_linking +linux load-time search paths -> linux_linking +clang build line options -> clang +abstracted base layer -> xlist + + + +################################ The Examples ################################# + +An explanation of the main ideas in each example. *_linking - Concrete examples for each operating system showing how to setup and use various types of dynamic linking. In these diff --git a/clang/build_win32_before_main.bat b/clang/build_win32_before_main.bat index 8aaa05d..4dbcb45 100644 --- a/clang/build_win32_before_main.bat +++ b/clang/build_win32_before_main.bat @@ -11,4 +11,4 @@ cd build REM: Build -clang %src%\win32_before_main.c +clang %src%\win32_before_main.c -o win32_before_main.exe diff --git a/clang/build_win32_before_main_v2.bat b/clang/build_win32_before_main_v2.bat new file mode 100644 index 0000000..89e7a01 --- /dev/null +++ b/clang/build_win32_before_main_v2.bat @@ -0,0 +1,18 @@ +@echo off + +REM: It turns out the gcc syntax __attribute__((constructor)) works +REM: on clang even for windows builds. You can run this script to +REM: see for yourself. + +REM: Path setup + +cd ..\linux_before_main +SET src=%cd% +cd .. +if not exist "build\" mkdir build +cd build + + + +REM: Build +clang %src%\linux_before_main.c -o win32_before_main_v2.exe diff --git a/linux_before_main/DETAILS.txt b/linux_before_main/DETAILS.txt new file mode 100644 index 0000000..7d55ba5 --- /dev/null +++ b/linux_before_main/DETAILS.txt @@ -0,0 +1,9 @@ +gcc makes this really easy with a straightforward compiler extension. All we +have to do is write a regular `void f(void)` function and mark it with: + +__attribute__((constructor)) + + + +Literally that's it. Someone tell Microsoft how cool this is! + diff --git a/linux_before_main/linux_before_main.c b/linux_before_main/linux_before_main.c index 0ca3736..b2e881d 100644 --- a/linux_before_main/linux_before_main.c +++ b/linux_before_main/linux_before_main.c @@ -11,4 +11,17 @@ static void run_before_main_func(void){ int main(){ printf("x = %d\n", x); return(0); -} \ No newline at end of file +} + + +#if 0 +// wrapped in a macro: + +#define BEFORE_MAIN(n) \ +__attribute__((constructor)) static void n(void) + +BEFORE_MAIN(before_main_rule){ + // do work here +} + +#endif diff --git a/linux_linking/DETAILS.txt b/linux_linking/DETAILS.txt new file mode 100644 index 0000000..4903d82 --- /dev/null +++ b/linux_linking/DETAILS.txt @@ -0,0 +1,117 @@ +linux_main.c defines the executable linux_main.exe + it depends on load-time linking with linux_base.so + it tries to perform run-time linking with linux_plugin.so + +linux_base.c defines the binary linux_base.so + +linux_plugin.c defines the binary linux_plugin.so + it depends on load-time linking with linux_base.so + +The build script has to build linux_base.c first because it needs the + results of that build to setup the load-time linking in the other builds. + The linux_base.so is used to resolve load-time imported symbols. + +The program has a shared data structure 'int x' in the 'base' layer that is + read and modified from both the 'main' layer and 'plugin' layer. + +The expected output if the plugin loads successfully is: + +``` +x = 0 +provided by plugin: { + x = 1 + x = 2 +} +x = 3 +``` + +The expected output if the plugin is not found is: + +``` +x = 0 +x = 1 +``` + +The default Linux search paths for loading binaries do not include the +current directory of the process, or the path to the executable binary file. +It is possible to get it to behave like Windows binary loading, but some +extra steps need to be taken. (Load-Time Search Paths) (Run-Time Search Paths) + + +########################### Load-Time Search Paths ############################ + +For load-time binary dependencies, we can actually bake extra search paths +into a binary. GCC's options do not cover this, but there is a backdoor in +GCC for talking right to the underlying linker (ld). + +The backdoor is the option -Wl (lowercase L). The syntax of this option is a +little unusual. As soon as a space occurs the backdoor is closed, so the +entire option has to be specified without any spaces. Since we need spaces, +the backdoor lets us use commas. It will remove the commas and replace them +with spaces before passing the command on to ld. It looks something like this: + +gcc ... -Wl,-option,value ... + +The specific option we want to pass through this way is -rpath. This option +tells the linker to bake a path into the search paths of the binary, so the +syntax for specifying a path this way with the backdoor syntax is: + +gcc ... -Wl,-rpath,loadpath ... + +In order to get the same behavior as we have on Windows, we want the path to +be relative to the binary itself. This can be done using the special syntax +'$ORIGIN/' as the path. But there is a problem here too. The dollar sign +already has a meaning in the shell, so to actually pass a raw dollar sign we +actually have to escape it with a backslash. Putting it all together the option +looks like this: + +gcc ... -Wl,-rpath,\$ORIGIN/ ... + +If that seems like a lot, that's because it is A LOT. + +It's also pretty atypical to do things this way on Linux where the system tries +to have a specific place to put all the different pieces of executables. In +particular you might try to put the executable in a 'bin/' folder and the +shared object binaries in a 'lib/' folder. Then you would use the binary +relative path like this: + +gcc ... -Wl,-rpath,\$ORIGIN/../lib ... + + +############################ Run-Time Search Paths ############################ + +The search paths for dlopen only include the system binary paths by default. +This matters if we are calling dlopen like this: + +dlopen("mylayer.so", flags) + +In this case the search paths will not include any binary relative rules or +current directory relative rules. + + +We can use $ORIGIN like we did in the load-time case to specify paths relative +to the calling binary: + +dlopen("$ORIGIN/mylayer.so", flags) + +Since this version is not just a base file name, the system search paths are +ignored and the path indicated by $ORIGIN is inspected directly. + + +We can also use . to specify the current directory like we do on the command +line when we call a script or run an executable in the current directory: + +dlopen("./mylayer.so", flags) + +In this case the system will look exactly in the current directory. + + +Finally we can use full paths to specify a directory without ambiguity: + +dlopen("/home/username/pluginproject/mylayer.so", flags) + +The nice thing about this option is that it means we can perform our own +search on the file system, and assemble a full path to get exactly what we want +if we have to. + + diff --git a/linux_linking/linux_main.c b/linux_linking/linux_main.c index e2095fc..4cb4c8e 100644 --- a/linux_linking/linux_main.c +++ b/linux_linking/linux_main.c @@ -21,7 +21,7 @@ int main(){ // to call a function with run-time linking, we must manually load and link it - void *module = dlopen("./linux_plugin.so", RTLD_NOW); + void *module = dlopen("$ORIGIN/linux_plugin.so", RTLD_NOW); if (module != 0){ GET_PROC(plugin_func, module, "plugin_func"); } diff --git a/win32_before_main/DETAILS.txt b/win32_before_main/DETAILS.txt new file mode 100644 index 0000000..2f6b79e --- /dev/null +++ b/win32_before_main/DETAILS.txt @@ -0,0 +1,39 @@ +Being able to run code 'before main' isn't just a magic trick. In a situation +where there may be more than one layer with dynamic linking and more than one +.dll (plugin system for instance), the maintenance burden of setting up each +layer in a central DllMain or main is significant enough to be a burden. + +It is possible to get this effect on Windows through the CL compiler, but it +would be a stretch to say that it is "supported". The way I show here works +by relying on the fact that a special section does exist that contains function +pointers that run before 'main' or 'DllMain'. We can use CL's compiler +extensions to add a function pointer to that section just by declaring it as a +global variable and marking it up with: + +__declspec(allocate(".CRT$XCU")) + +If you look up this method on the internet, you will find claims that under +certain types of whole-program optimization, this won't work. In particular +this happens if you use the option /GL in CL. + +This happens because the global variable appears to be unused from the +perspective of the compiler & linker. Since it is never directly referenced, +there is no C-level semantical reason to think this global variable is doing +anything. + +However, in this example I show how we can still make it work. We have to +make sure the linker won't eliminate the global variable that we are trying to +place into the ".CRT$XCU" section. I achieve this by marking it as an export +symbol. Export symbols can't be eliminated even if they aren't used locally. + +From what I've seen in testing, this works as desired, even with the /GL option. + +IMPORTANT RESTRICTION: Because this creates an export symbol, each time we use +this within a binary it must have a unique name. Generally I would recommend +naming before-main symbol by scoping it to the layer where it exists. + + +CLANG NOTE: Interestingly, clang can build this, but it can also use the +__attribute__((constructor)) extension on Windows, which is a lot closer to +"supporting" this feature. I suspect that When I am building with clang I will +prefer to go with this option most of the time. diff --git a/win32_before_main/win32_before_main.c b/win32_before_main/win32_before_main.c index 481afbd..e6d0903 100644 --- a/win32_before_main/win32_before_main.c +++ b/win32_before_main/win32_before_main.c @@ -7,7 +7,7 @@ static void run_before_main_func(void); // set the before-main execution function pointer __declspec(allocate(".CRT$XCU")) -__pragma(comment(linker, "/INCLUDE:run_before_main_ptr")) +__declspec(dllexport) void (*run_before_main_ptr)(void) = run_before_main_func; // define the "before main" function @@ -21,4 +21,20 @@ static void run_before_main_func(void){ int main(){ printf("x = %d\n", x); return(0); -} \ No newline at end of file +} + + +#if 0 +// wrapped in a macro: + +#define BEFORE_MAIN(n) static void n(void); \ +__declspec(allocate(".CRT$XCU")) \ +__declspec(dllexport) \ +void (*n##__)(void) = n; \ +static void n(void) + +BEFORE_MAIN(before_main_rule){ + // do work here +} + +#endif diff --git a/win32_linking/DETAILS.txt b/win32_linking/DETAILS.txt new file mode 100644 index 0000000..db4bc33 --- /dev/null +++ b/win32_linking/DETAILS.txt @@ -0,0 +1,41 @@ +win32_main.c defines the executable win32_main.exe + it depends on load-time linking with win32_base.dll + it tries to perform run-time linking with win32_plugin.dll + +win32_base.c defines the binary win32_base.dll + +win32_plugin.c defines the binary win32_plugin.dll + it depends on load-time linking with win32_base.dll + +The build script has to build win32_base.c first because it needs the + results of that build to setup the load-time linking in the other builds. + The win32_base.lib that is generated along with win32_base.dll is used + to resolve load-time imported symbols. + +The program has a shared data structure 'int x' in the 'base' layer that is + read and modified from both the 'main' layer and 'plugin' layer. + +The expected output if the plugin loads successfully is: + +``` +x = 0 +provided by plugin: { + x = 1 + x = 2 +} +x = 3 +``` + +The expected output if the plugin is not found is: + +``` +x = 0 +x = 1 +``` + +You should be able to relocate the plugin and use a full path to it and still +get the first result. As long as the load-time dependency win32_base.dll is +with the executable, it will load. It can be found in some other paths, but +you cannot specify the search paths manually, so keeping it with the executable +is the simplest option. + diff --git a/xlist/DETAILS.txt b/xlist/DETAILS.txt new file mode 100644 index 0000000..9d86f1e --- /dev/null +++ b/xlist/DETAILS.txt @@ -0,0 +1,266 @@ +In this example I link everything through run-time linking. The upsides to this +are that everything dealing with the linking is in my code so I can tweak it +or debug it directly, and I don't have a mix of load-time and run-time linking +making the wranling of keyword abstraction and linker options simpler. + +There are some downsides too, and in this example I show how I can mitigate +these downsides pretty well. + +The two big downsides I address in this example are: + 1. Symbol declaration and binding gets more difficult in C + 2. Each binary requires some dynamic initialization + +Finally after going over these problems in detail, I will present some details +of the solution I use in this example. + +Like the *_linking examples, the expected output if the plugin loads +successfully is: + +``` +x = 0 +provided by plugin: { + x = 1 + x = 2 +} +x = 3 +``` + +And the expected output if the plugin is not found is: + +``` +x = 0 +x = 1 +``` + + + +############################# Symbol Declaration ############################## + +The symbol declaration problem requires a bit of setup to fully appreciate. + +Normally in C you think of your program code as header & implementation, or +declaration & definition. + +So we might have a header file like: + +`layer.h` + +``` +void* layer_a(int x); +void layer_b(void *a, void *m); +int layer_c(void *a); +``` + +And then an implementation file like: + +`layer.c` + +``` +void* layer_a(int x){ + // ... +} + +void layer_b(void *a, void *m){ + // ... +} + +int layer_c(void *a){ + // ... +} +``` + +Whether we are linking these statically (unity build) or across object files +(classic build) it's pretty easy, the header just gets included at all usage +and implementation sites as it is. + +When we transition to linking across binaries, we hit a problem. The reason we +hit a problem is that with run-time linking we need to be directing our +calls through function pointers instead of through functions. + + +## Solution: reroute from function to function pointer ## + +One way to handle this is to keep header and provide a different implementation +file: + +`layer.dynamic.c` + +``` +void* layer_a(int x){ + return(layer_funcs->layer_a(x)); +} + +void layer_b(void *a, void *m){ + layer_funcs->layer_b(a, m); +} + +int layer_c(void *a){ + return(layer_funcs->layer_c(a)); +} +``` + +Then on the side that defines the symbols, we still use `layer.c` but in any +binary that wants to load the symbols, we would use `layer.dynamic.c` which +reroutes the normal function calls to function pointers. + +This `*.dynamic.c` file is pretty fatty though - requiring several lines of +pattern duplication for each function in the layer. + +A metaprogramming system can help here if you want to go that route, but +putting another build program in the mix isn't exactly light weight either. + + +## Solution: call through function pointer table ## + +Another option is to say that the place where the issue will pop-out is at +all the usage sites. Any code written as a user of the layer will switch from +calling the layer like this `layer_foo( ... )` to `layer->foo( ... )`. + +In theory this eliminates maintenance work, but oh boy are we in for it the +first time we realize we have a helper that wants to work in both the context +of the layer's user AND the layer's definer. At that point we're either +duplicating the helper, which leads us to minimize the richness of helpers we +develop around the layer, or we put them in some kind of unifying +wrapper, which is the problem we were trying to solve in the first place. + + +## Solution: global function pointers ## + +A third way to handle this is to define a new version of the header: + +`layer.dynamic.h` + +``` +void* (*layer_a)(int x) = 0; +void (*layer_b)(void *a, void *m) = 0; +int (*layer_c)(void *a) = 0; +``` + +In this version each function symbol that the user wants to see gets replaced +with a global function pointer. + +Now each usage site can looks like a function usage site. We still have some +maintenance burden increase like in the `*.dynamic.c` but not as much. What's +really nice about this version is we can actually generate this from an xlist. +We can't so easily use an xlist in the other case because the pattern expansion +is a little too heterogenous. + +This is essentially what I do in this example. I generate the function pointers +from an xlist. I don't have a separate `base.dynamic.h` though, I just +put both versions of the function symbols in `base.h` and use the preprocessor +to select one or the other. This way they can easily have shared type +definitions and constants. + + +## Conclusion: Symbol Declaration ## + +So the symbol declaration problem is really about deciding how to provide the +declarations that allow us to refer to symbols that get resolved dynamically. + +This is only a problem for run-time linking because with load-time linking we +can just create regular function symbols with some special mark up. It would +be nice if C had anticipated this and gave us a better way to define these +run-time resolved symbols with the same basic syntax we use for regular +function symbols. But alas, that's not how it is. + + +########################### Dynamic Initialization ############################ + +The dynamic initialization problem is about how to maintain the run-time +linking code. + +Imagine we have a 'base' layer like this: + +`base.h` + +``` +void base_a(void); +void base_b(void); +void base_c(void); +``` + +If we just maintain the run-time linking with brute force our run-time linking +code would look *something* like this: + +``` +void base_init(void){ + Library *library = library_open("base"); + GET_PROC(base_a, library, "base_a"); + GET_PROC(base_b, library, "base_b"); + GET_PROC(base_c, library, "base_c"); +} +``` + +The exact details depend on how you've solved the symbol declaration problem +and on the operating system APIs for loading and linking binaries. + +We can easily clean up the maintenance burden of this part with an xlist, or +by using a single GET_PROC which then passes through a function pointer table +with the rest of the layer's functions. + +The other part of dynamic initialization problem is deciding how we will ensure +the initialization actually gets done. + +For instance, let's look at the layers used in this example 'base' 'plugin' and +'main'. Both 'plugin' and 'main' are users of 'base'. 'main' is responsible for +loading 'plugin' if it wants to, and for proceeding gracefully if the 'plugin' +is missing. + +We need to ensure `base_init` gets called in each binary. + +## Solution: manual initialization ## + +For 'main' we would just say it's the responsibility of the entry point to call +`base_init`. + +For 'plugin' we have two options. The first option is that when 'main' loads +the 'plugin' layer it is responsible for reaching into the module, finding its +`base_init` function and calling it. The second option is that the 'plugin' +module has an on-load entry point that calls `base_init`. + +None of these options are "broken" but they do require some extra +hand shaking and protocol designing between all these binaries. + +## Solution: automatic initialization ## + +Another option is to have the 'base' layer itself provide the code that does +all of the initialization automatically for the users of the 'base' layer. In +order to do this 'base' will need to be able to write something like an +"on-load hook". A function that gets called automatically when the binary +loads. The code that defines the 'base' layer will only insert this hook into +binaries that are trying to run-time link to the 'base' layer definitions. + +In the *_before_main examples I show how it is actually possible to do this in +C, although the details are admittedly sketchy in the case of Windows with the +CL compiler. + +This basically lets us emulate the automatic linking provided by load-time +linking, but as a downside, it means we have less flexibility about how the +layer gets loaded. + + +################################## Solution ################################### + +The big idea of my solution is to use an xlist to minimize maintenance burden +without bringing in a whole cloth code generator. + +I put all the 'base' layer functions that will be run-time linked into an +xlist file `base.xlist.h`. + +I also put the normal style of symbol definition list in `base.h`. Technically +I don't need this, I could just generate it from the xlist, but then I don't +have any "normal" looking version of the function declarations. The xlist is +highly reusable, suitable for almost every purpose, but it is not very +readable. Users of my code should be able to just skim some natural looking +C code with comments and formatting to understand the code they are using. + +I setup a function pointer table `BASE_Funcs` so that I only have to export +one symbol from the 'base' layer implementor. That symbol fills and exposes +the function pointer table for run-time linking. + +When the base layer is included in a binary that is not the implementor the +'base' layer generates a before-main hook to load the 'base' layer and +perform the run-time linking. + +Thanks to this design neither 'main' nor 'plugin' have to do anything to +start using the 'base' layer except to include it. diff --git a/xlist/base.c b/xlist/base.c index 2162ef8..773ed21 100644 --- a/xlist/base.c +++ b/xlist/base.c @@ -53,7 +53,7 @@ BEFORE_MAIN(base_dynamic_user_init){ } } #elif OS_LINUX - void *module = dlopen("./base.so", RTLD_NOW); + void *module = dlopen("$ORIGIN/base.so", RTLD_NOW); if (module != 0){ BASE_ExportFuncs *base_export_functions = (BASE_ExportFuncs*)dlsym(module, "base_export_functions"); if (base_export_functions != 0){ diff --git a/xlist/base.h b/xlist/base.h index 064656c..ebeaf63 100644 --- a/xlist/base.h +++ b/xlist/base.h @@ -27,14 +27,16 @@ #endif +// before-main abstraction + #if OS_WINDOWS # pragma section(".CRT$XCU", read) -# define BEFORE_MAIN(n) static void n(void); \ -__declspec(allocate(".CRT$XCU")) \ -__pragma(comment(linker, "/INCLUDE:" #n "__")) \ -void (*n##__)(void) = n; \ +# define BEFORE_MAIN(n) static void n(void); \ +__declspec(allocate(".CRT$XCU")) \ +__declspec(dllexport) \ +void (*n##__)(void) = n; \ static void n(void) #elif OS_LINUX @@ -46,10 +48,6 @@ __attribute__((constructor)) static void n(void) # error BEFORE_MAIN missing for this OS #endif -// base layer types - -typedef void BASE_Library; - // base symbols shared