Commit 3c4c553f authored by ph's avatar ph

up

parent 6f10f941
......@@ -2903,7 +2903,7 @@ status open
\noindent
\align center
\begin_inset Tabular
<lyxtabular version="3" rows="13" columns="7">
<lyxtabular version="3" rows="13" columns="8">
<features tabularvalignment="middle">
<column alignment="center" valignment="top">
<column alignment="center" valignment="top">
......@@ -2912,6 +2912,7 @@ status open
<column alignment="center" valignment="top">
<column alignment="center" valignment="top">
<column alignment="center" valignment="top">
<column alignment="center" valignment="top">
<row>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
......@@ -2925,6 +2926,15 @@ status open
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
CU
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
precision
\end_layout
......@@ -2989,7 +2999,16 @@ efficiency
\begin_inset Text
\begin_layout Plain Layout
1 CPU (AMD EPYC 2.0 GHz)
AMD
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
1
\end_layout
\end_inset
......@@ -3054,7 +3073,16 @@ float32
\begin_inset Text
\begin_layout Plain Layout
24 CPU (AMD EPYC 2.0 GHz)
AMD
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
24
\end_layout
\end_inset
......@@ -3119,7 +3147,16 @@ float32
\begin_inset Text
\begin_layout Plain Layout
2x8 CPU (Intel Xeon 1.7 GHz) POCL driver
Intel (pocl)
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
16
\end_layout
\end_inset
......@@ -3184,7 +3221,16 @@ float32
\begin_inset Text
\begin_layout Plain Layout
2x8 CPU (Intel Xeon 1.7 GHz)
Intel
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
16
\end_layout
\end_inset
......@@ -3249,7 +3295,16 @@ float32
\begin_inset Text
\begin_layout Plain Layout
2x8 CPU (Intel Xeon 1.7 GHz)
Intel
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
16
\end_layout
\end_inset
......@@ -3314,7 +3369,16 @@ float64
\begin_inset Text
\begin_layout Plain Layout
GPU (NVIDIA GTX 1660)
GTX
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
22
\end_layout
\end_inset
......@@ -3379,7 +3443,16 @@ float32
\begin_inset Text
\begin_layout Plain Layout
GPU (NVIDIA Quadro P6000)
Quadro
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
30
\end_layout
\end_inset
......@@ -3444,7 +3517,16 @@ float32
\begin_inset Text
\begin_layout Plain Layout
GPU (NVIDIA Quadro P6000)
Quadro
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
30
\end_layout
\end_inset
......@@ -3509,7 +3591,16 @@ float64
\begin_inset Text
\begin_layout Plain Layout
GPU (NVIDIA Tesla V100)
V100
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
80
\end_layout
\end_inset
......@@ -3574,7 +3665,16 @@ float32
\begin_inset Text
\begin_layout Plain Layout
GPU (NVIDIA Tesla V100)
V100
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
80
\end_layout
\end_inset
......@@ -3639,14 +3739,19 @@ float64
\begin_inset Text
\begin_layout Plain Layout
\begin_inset Formula $L^{1}$
\end_inset
error
\begin_inset Formula $e_{N}$
\end_inset
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
\end_layout
\end_inset
......@@ -3711,12 +3816,20 @@ float64
\begin_inset Text
\begin_layout Plain Layout
conv.
rate
\begin_inset Formula $\beta$
\end_inset
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
\end_layout
\end_inset
......
......@@ -889,35 +889,35 @@ of magnitude (see for instance \cite{helluy2016asynchronous}).
\begin{table}
\centering{}%
\begin{tabular}{|c|c|c|c|c|c|c|}
\hline
& precision & N=128 & N=256 & N=512 & N=1024 & ``efficiency''\tabularnewline
\hline
1 CPU (AMD EPYC 2.0 GHz) & float32 & 11.9 s & 159 s & 621 s & 6396 s & 1\tabularnewline
\hline
\hline
24 CPU (AMD EPYC 2.0 GHz) & float32 & 1.01 s & 9.4 s & 153 s & 1380 s & 5\tabularnewline
\hline
2x8 CPU (Intel Xeon 1.7 GHz) POCL driver & float32 & 2.30 s & 17.3 s & 96.6 s & 644 s & 10\tabularnewline
\hline
2x8 CPU (Intel Xeon 1.7 GHz) & float32 & 0.75 s & 3.93 s & 32 s & 226 s & 30\tabularnewline
\hline
2x8 CPU (Intel Xeon 1.7 GHz) & float64 & 0.82 s & 5.62 s & 53 s & 315 s & 20\tabularnewline
\hline
GPU (NVIDIA GTX 1660) & float32 & 0.04 s & 0.31s & 2.46 s & 19.48 s & 330\tabularnewline
\hline
GPU (NVIDIA Quadro P6000) & float32 & 0.017 s & 0.15 s & 1.06 s & 8.25 s & 780\tabularnewline
\hline
GPU (NVIDIA Quadro P6000) & float64 & 0.15 s & 0.81 s & 5.67 s & 45.53 s & 140\tabularnewline
\hline
GPU (NVIDIA Tesla V100) & float32 & 0.015 s & 0.084 s & 0.54 s & 3.93 s & 1600\tabularnewline
\hline
GPU (NVIDIA Tesla V100) & float64 & 0.031 s & 0.21 s & 1.17 s & 8.35 s & 770\tabularnewline
\hline
$L^{1}$ error $e_{N}$ & float64 & 0.05067625 & 0.013039866 & 0.003265470 & 0.00081652 & \tabularnewline
\hline
conv. rate $\beta$ & float64 & - & 1.96 & 1.99 & 2.00 & \tabularnewline
\hline
\begin{tabular}{|c|c|c|c|c|c|c|c|}
\hline
& CU & precision & N=128 & N=256 & N=512 & N=1024 & ``efficiency''\tabularnewline
\hline
AMD & 1 & float32 & 11.9 s & 159 s & 621 s & 6396 s & 1\tabularnewline
\hline
\hline
AMD & 24 & float32 & 1.01 s & 9.4 s & 153 s & 1380 s & 5\tabularnewline
\hline
Intel (pocl) & 16 & float32 & 2.30 s & 17.3 s & 96.6 s & 644 s & 10\tabularnewline
\hline
Intel & 16 & float32 & 0.75 s & 3.93 s & 32 s & 226 s & 30\tabularnewline
\hline
Intel & 16 & float64 & 0.82 s & 5.62 s & 53 s & 315 s & 20\tabularnewline
\hline
GTX & 22 & float32 & 0.04 s & 0.31s & 2.46 s & 19.48 s & 330\tabularnewline
\hline
Quadro & 30 & float32 & 0.017 s & 0.15 s & 1.06 s & 8.25 s & 780\tabularnewline
\hline
Quadro & 30 & float64 & 0.15 s & 0.81 s & 5.67 s & 45.53 s & 140\tabularnewline
\hline
V100 & 80 & float32 & 0.015 s & 0.084 s & 0.54 s & 3.93 s & 1600\tabularnewline
\hline
V100 & 80 & float64 & 0.031 s & 0.21 s & 1.17 s & 8.35 s & 770\tabularnewline
\hline
$e_{N}$ & & float64 & 0.05067625 & 0.013039866 & 0.003265470 & 0.00081652 & \tabularnewline
\hline
$\beta$ & & float64 & - & 1.96 & 1.99 & 2.00 & \tabularnewline
\hline
\end{tabular}\caption{Convergence and performance study\label{tab:Convergence-and-performance}.
Some tests are done in single precision (float32) and others in double
precision (float64). The ``efficiency'' is a comparison for N=1024
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment