Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
pugs
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
code
pugs
Commits
7db6a40e
Commit
7db6a40e
authored
8 months ago
by
Stéphane Del Pino
Browse files
Options
Downloads
Patches
Plain Diff
Update user documentation
Describe shortly checkpoint/resume use
parent
7341451d
No related branches found
No related tags found
1 merge request
!199
Integrate checkpointing
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
.gitignore
+1
-0
1 addition, 0 deletions
.gitignore
cmake/PugsDoc.cmake
+5
-0
5 additions, 0 deletions
cmake/PugsDoc.cmake
doc/lisp/build-doc-config.el
+1
-1
1 addition, 1 deletion
doc/lisp/build-doc-config.el
doc/userdoc.org
+196
-1
196 additions, 1 deletion
doc/userdoc.org
with
203 additions
and
2 deletions
.gitignore
+
1
−
0
View file @
7db6a40e
...
@@ -24,3 +24,4 @@ GTAGS
...
@@ -24,3 +24,4 @@ GTAGS
/doc/lisp/.fltk/
/doc/lisp/.fltk/
/doc/*.msh
/doc/*.msh
/doc/*.vtu
/doc/*.vtu
/doc/checkpoint.h5
This diff is collapsed.
Click to expand it.
cmake/PugsDoc.cmake
+
5
−
0
View file @
7db6a40e
...
@@ -37,6 +37,7 @@ if (EMACS AND GNUPLOT_FOUND AND GMSH)
...
@@ -37,6 +37,7 @@ if (EMACS AND GNUPLOT_FOUND AND GMSH)
COMMAND
COMMAND
${
CMAKE_COMMAND
}
-E env
${
CMAKE_COMMAND
}
-E env
PUGS=
${
PUGS_BINARY_DIR
}
/pugs
PUGS=
${
PUGS_BINARY_DIR
}
/pugs
PUGS_CHECKPOINT=
${
PUGS_BINARY_DIR
}
/pugs_checkpoint
HOME=
${
PUGS_SOURCE_DIR
}
/doc/lisp
HOME=
${
PUGS_SOURCE_DIR
}
/doc/lisp
PUGS_SOURCE_DIR=
${
PUGS_SOURCE_DIR
}
PUGS_SOURCE_DIR=
${
PUGS_SOURCE_DIR
}
PUGS_BINARY_DIR=
${
PUGS_BINARY_DIR
}
PUGS_BINARY_DIR=
${
PUGS_BINARY_DIR
}
...
@@ -51,6 +52,7 @@ if (EMACS AND GNUPLOT_FOUND AND GMSH)
...
@@ -51,6 +52,7 @@ if (EMACS AND GNUPLOT_FOUND AND GMSH)
COMMAND
COMMAND
${
CMAKE_COMMAND
}
-E env
${
CMAKE_COMMAND
}
-E env
PUGS=
${
PUGS_BINARY_DIR
}
/pugs
PUGS=
${
PUGS_BINARY_DIR
}
/pugs
PUGS_CHECKPOINT=
${
PUGS_BINARY_DIR
}
/pugs_checkpoint
HOME=
${
PUGS_SOURCE_DIR
}
/doc/lisp
HOME=
${
PUGS_SOURCE_DIR
}
/doc/lisp
PUGS_SOURCE_DIR=
${
PUGS_SOURCE_DIR
}
PUGS_SOURCE_DIR=
${
PUGS_SOURCE_DIR
}
PUGS_BINARY_DIR=
${
PUGS_BINARY_DIR
}
PUGS_BINARY_DIR=
${
PUGS_BINARY_DIR
}
...
@@ -59,6 +61,7 @@ if (EMACS AND GNUPLOT_FOUND AND GMSH)
...
@@ -59,6 +61,7 @@ if (EMACS AND GNUPLOT_FOUND AND GMSH)
"
${
PUGS_SOURCE_DIR
}
/doc/userdoc.org"
"
${
PUGS_SOURCE_DIR
}
/doc/userdoc.org"
"
${
PUGS_SOURCE_DIR
}
/doc/lisp/userdoc-html.el"
"
${
PUGS_SOURCE_DIR
}
/doc/lisp/userdoc-html.el"
pugs
pugs
pugs_checkpoint
pugsdoc-dir
pugsdoc-dir
pugsdoc-download-elpa
pugsdoc-download-elpa
${
ORG_GENERATOR_FILES
}
${
ORG_GENERATOR_FILES
}
...
@@ -77,6 +80,7 @@ if (EMACS AND GNUPLOT_FOUND AND GMSH)
...
@@ -77,6 +80,7 @@ if (EMACS AND GNUPLOT_FOUND AND GMSH)
COMMAND
COMMAND
${
CMAKE_COMMAND
}
-E env
${
CMAKE_COMMAND
}
-E env
PUGS=
${
PUGS_BINARY_DIR
}
/pugs
PUGS=
${
PUGS_BINARY_DIR
}
/pugs
PUGS_CHECKPOINT=
${
PUGS_BINARY_DIR
}
/pugs_checkpoint
HOME=
${
PUGS_SOURCE_DIR
}
/doc/lisp
HOME=
${
PUGS_SOURCE_DIR
}
/doc/lisp
PUGS_SOURCE_DIR=
${
PUGS_SOURCE_DIR
}
PUGS_SOURCE_DIR=
${
PUGS_SOURCE_DIR
}
PUGS_BINARY_DIR=
${
PUGS_BINARY_DIR
}
PUGS_BINARY_DIR=
${
PUGS_BINARY_DIR
}
...
@@ -87,6 +91,7 @@ if (EMACS AND GNUPLOT_FOUND AND GMSH)
...
@@ -87,6 +91,7 @@ if (EMACS AND GNUPLOT_FOUND AND GMSH)
"
${
PUGS_SOURCE_DIR
}
/tools/pgs-pygments.sh"
"
${
PUGS_SOURCE_DIR
}
/tools/pgs-pygments.sh"
"
${
PUGS_SOURCE_DIR
}
/tools/pgs-pygments.py"
"
${
PUGS_SOURCE_DIR
}
/tools/pgs-pygments.py"
pugs
pugs
pugs_checkpoint
pugsdoc-dir
pugsdoc-dir
pugsdoc-download-elpa
pugsdoc-download-elpa
${
ORG_GENERATOR_FILES
}
${
ORG_GENERATOR_FILES
}
...
...
This diff is collapsed.
Click to expand it.
doc/lisp/build-doc-config.el
+
1
−
1
View file @
7db6a40e
...
@@ -51,7 +51,7 @@
...
@@ -51,7 +51,7 @@
'
(
org-latex-listings
'minted
))
'
(
org-latex-listings
'minted
))
(
setq
org-latex-minted-options
(
setq
org-latex-minted-options
'
((
"linenos=true"
)
(
"breaklines"
)))
'
((
"linenos=true"
)
(
"breaklines"
)
(
"autogobble"
)
))
(
defun
org-export-output-file-name-modified
(
orig-fun
extension
&optional
subtreep
pub-dir
)
(
defun
org-export-output-file-name-modified
(
orig-fun
extension
&optional
subtreep
pub-dir
)
(
unless
pub-dir
(
unless
pub-dir
...
...
This diff is collapsed.
Click to expand it.
doc/userdoc.org
+
196
−
1
View file @
7db6a40e
...
@@ -32,6 +32,12 @@
...
@@ -32,6 +32,12 @@
#+LATEX_HEADER_EXTRA: \usepackage{mathpazo}
#+LATEX_HEADER_EXTRA: \usepackage{mathpazo}
#+LATEX_HEADER_EXTRA: \usepackage{inconsolata}
#+LATEX_HEADER_EXTRA: \usepackage{inconsolata}
#+LATEX_HEADER_EXTRA: %Patch accsupp to avoid copying line numbers when copying from listing
#+LATEX_HEADER_EXTRA: \usepackage{accsupp}
#+LATEX_HEADER_EXTRA: \newcommand\emptyaccsupp[1]{\BeginAccSupp{ActualText={}}#1\EndAccSupp{}}
#+LATEX_HEADER_EXTRA: \let\theHFancyVerbLine\theFancyVerbLine
#+LATEX_HEADER_EXTRA: \def\theFancyVerbLine{\rmfamily\tiny\emptyaccsupp{\arabic{FancyVerbLine}}}
#+LATEX_HEADER_EXTRA: \BeforeBeginEnvironment{tabular}{\rowcolors[]{2}{orange!5}{orange!10}}
#+LATEX_HEADER_EXTRA: \BeforeBeginEnvironment{tabular}{\rowcolors[]{2}{orange!5}{orange!10}}
#+LATEX_HEADER_EXTRA: \BeforeBeginEnvironment{minted}{\begin{mdframed}[linecolor=blue,backgroundcolor=blue!10]}
#+LATEX_HEADER_EXTRA: \BeforeBeginEnvironment{minted}{\begin{mdframed}[linecolor=blue,backgroundcolor=blue!10]}
#+LATEX_HEADER_EXTRA: \AfterEndEnvironment{minted}{\end{mdframed}}
#+LATEX_HEADER_EXTRA: \AfterEndEnvironment{minted}{\end{mdframed}}
...
@@ -2541,11 +2547,200 @@ to files for instance) as we will see below.
...
@@ -2541,11 +2547,200 @@ to files for instance) as we will see below.
**** ~core~ provided functions
**** ~core~ provided functions
***** ~exit: Z -> void~
***** execution control functions
Here are functions that allow to control the execution of the
script. These can stop the execution if some conditions are met or
create checkpoint that may be used to stop and then resume the
execution.
****** ~exit: Z -> void~
This function interrupts the execution of the script. The integer (~Z~)
This function interrupts the execution of the script. The integer (~Z~)
value is the code that is returned when ~pugs~ exits.
value is the code that is returned when ~pugs~ exits.
#+NAME: exit-function
#+BEGIN_SRC pugs :exports both :results output
for (let i:N, i = 1; i < 10; ++i) {
cout << "i = " << i << "\n";
if (i==3) {
exit(0);
}
}
#+END_SRC
The output shows that the execution is interrupted (in a clean way) if
the condition (here ~i == 3~) is met.
#+RESULTS: exit-function
****** ~stop: void -> B~
This is an interactive function that is used to notify the code to
stop its execution. It returns ~true~ if one of the following stopping
condition is met.
- If a ~stop~ file is created *after* the beginning of the execution in
the *execution directory*. This file can be created using the ~touch~
command for instance
#+BEGIN_SRC shell :exports source
touch stop
#+END_SRC
- If ~pugs~ was compiled using ~Slurm~ support (this can be checked using
the command ~./pugs -v~) then during batch execution the ~stop~ function
will also return ~true~ as soon as the remaining execution time is
less than $150s$.
In the following example the function ~stop~ returns always ~false~ since
none of the above conditions are satisfied.
#+NAME: stop-function
#+BEGIN_SRC pugs :exports both :results output
for (let i:N, i = 1; i <= 5; ++i) {
cout << "i = " << i << "\n";
if (stop()) {
exit(0);
}
}
#+END_SRC
The output shows that the execution reaches ~i == 5~.
#+RESULTS: stop-function
****** ~checkpoint: void -> void~
This function creates a checkpoint that can be used as a starting
point for another execution. This function can be placed anywhere in
the script and possibly multiple times. The checkpoint storage file is
~checkpoint.h5~ and it is created in the current execution directory.
*This functionality requires ~pugs~ to be compiled with ~HDF5~ support*.
The following example creates two checkpoints.
#+NAME: checkpoint-function
#+BEGIN_SRC pugs :exports both :results output
let n:N, n = 4;
cout << "n = " << n << "\n";
checkpoint();
let x:R^3, x = [1, 2, 3];
cout << "x = " << x << "\n";
checkpoint();
#+END_SRC
The output displays the creation of two checkpoints in the file ~checkpoint.h5~.
#+RESULTS: checkpoint-function
The command
#+BEGIN_SRC shell :exports code
pugs_checkpoint --info checkpoint.h5
#+END_SRC
displays a simple execution state for each checkpoint stored in the
file in order to help user to choose the appropriate resuming
checkpoint.
#+NAME: checkpoint-info
#+BEGIN_SRC shell :exports results :results output
${PUGS_CHECKPOINT} --no-color --info checkpoint.h5
#+END_SRC
#+RESULTS: checkpoint-info
One can notice that the ~x~ variable does not appear in the first
checkpoint since it was not created at this point. Also two pointers
are defined:
- ~last_checkpoint~ that displays the more recent checkpoint,
and
- ~resuming_checkpoint~ that indicates which checkpoint will be used when
resuming.
Proceeding with this example, one can change the resuming checkpoint
with the command
#+BEGIN_SRC shell :exports code
pugs_checkpoint --resume-from 0 checkpoint.h5
#+END_SRC
which displays
#+NAME: checkpoint-resume-from
#+BEGIN_SRC shell :exports results :results output
${PUGS_CHECKPOINT} --no-color --resume-from 0 checkpoint.h5
#+END_SRC
#+RESULTS: checkpoint-resume-from
Running again
#+BEGIN_SRC shell :exports code
pugs_checkpoint --info checkpoint.h5
#+END_SRC
now prints
#+NAME: checkpoint-info0
#+BEGIN_SRC shell :exports results :results output
${PUGS_CHECKPOINT} --no-color --info checkpoint.h5
#+END_SRC
#+RESULTS: checkpoint-info0
One notices that ~resuming_checkpoint~ now points to ~checkpoint_0~.
It is now possible to resume the execution using the command
#+BEGIN_SRC shell :exports code
pugs --resume checkpoint.h5
#+END_SRC
The output is now
#+NAME: checkpoint-resume0
#+BEGIN_SRC shell :exports results :results output
${PUGS} --no-exec-stat --no-preamble --no-color --threads=1 --resume checkpoint.h5
#+END_SRC
#+RESULTS: checkpoint-resume0
Observe that the random seed is reset to the value that was stored at
checkpoint 0.
Also each checkpoint whose number is greater than the resuming
checkpoint (here 0) are removed when the next checkpoint (here 1) is
written.
#+BEGIN_note
One may have noticed that the script file that is used for resuming is
actually stored in the ~checkpoint.h5~ file, the script is not a command
line argument.
By now, it is not possible to *simply* modify the script while
resuming. This is done in purpose since it remains unclear if such a
dangerous functionality should be make easy. Indeed allowing
script modifications may lead to undefined behaviors.
#+END_note
#+BEGIN_warning
The number of ~MPI~ ranks *cannot* be changed when resuming.
This is not likely to change since it is a very specific
functionality. However, a post-processing tool rewriting checkpoints
may be developed to achieve it, but it is not a priority.
#+END_warning
****** ~checkpoint_and_exit: void -> void~
This is an advanced version of the ~checkpoint~ function that cause a
clean ~exit~ of the code after writing a checkpoint.
The use case for this function is batch execution and is generally
triggered by the ~stop~ function.
We give here a simple example.
#+NAME: checkpoint-exit-function
#+BEGIN_SRC pugs :exports both :results output
for (let i:N, i = 1; i<=10; ++i) {
cout << "i = " << i << "\n";
if (i==5) {
checkpoint_and_exit();
}
}
#+END_SRC
The output is
#+RESULTS: checkpoint-exit-function
Then one can resume the execution as previously by
#+BEGIN_SRC shell :exports code
pugs --resume checkpoint.h5
#+END_SRC
It gives the output
The output is now
#+NAME: checkpoint-resume1
#+BEGIN_SRC shell :exports results :results output
${PUGS} --no-exec-stat --no-preamble --no-color --threads=1 --resume checkpoint.h5
#+END_SRC
#+RESULTS: checkpoint-resume1
# Clean-up
#+BEGIN_SRC shell :exports results :results none
/bin/rm -f checkpoint.h5
#+END_SRC
***** ~getAvailableModules: void -> string~
***** ~getAvailableModules: void -> string~
This function that is used in the preamble of this section is a
This function that is used in the preamble of this section is a
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment