diff --git a/shared/n-acd/.cherryci/ci-test b/shared/n-acd/.cherryci/ci-test
new file mode 100755
index 0000000000..71f3457037
--- /dev/null
+++ b/shared/n-acd/.cherryci/ci-test
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -e
+
+rm -Rf "./ci-build"
+mkdir "./ci-build"
+cd "./ci-build"
+
+${CHERRY_LIB_MESONSETUP} . "${CHERRY_LIB_SRCDIR}" ${N_ACD_CONF}
+${CHERRY_LIB_NINJABUILD}
+sudo ${CHERRY_LIB_MESONTEST}
+# no valgrind tests, since bpf(2) is not supported by it
diff --git a/shared/n-acd/.cherryci/matrix b/shared/n-acd/.cherryci/matrix
new file mode 100755
index 0000000000..0b5da37c74
--- /dev/null
+++ b/shared/n-acd/.cherryci/matrix
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+set -e
+
+CHERRY_MATRIX+=("export N_ACD_CONF=-Debpf=false ${CHERRY_LIB_M_DEFAULT[*]}")
diff --git a/shared/n-acd/.editorconfig b/shared/n-acd/.editorconfig
index b41176962d..b10bb4f3f8 100644
--- a/shared/n-acd/.editorconfig
+++ b/shared/n-acd/.editorconfig
@@ -1,16 +1,11 @@
-# http://EditorConfig.org
-
-# top-most EditorConfig file
root = true
-# Unix-style newlines with a newline ending every file, utf-8 charset
[*]
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
charset = utf-8
-# match config files, set indent to spaces with width of eight
[*.{c,h}]
indent_style = space
indent_size = 8
diff --git a/shared/n-acd/.gitmodules b/shared/n-acd/.gitmodules
index ec8b866d2f..d73d05a267 100644
--- a/shared/n-acd/.gitmodules
+++ b/shared/n-acd/.gitmodules
@@ -4,3 +4,6 @@
[submodule "subprojects/c-siphash"]
path = subprojects/c-siphash
url = https://github.com/c-util/c-siphash.git
+[submodule "subprojects/c-rbtree"]
+ path = subprojects/c-rbtree
+ url = https://github.com/c-util/c-rbtree.git
diff --git a/shared/n-acd/.travis.yml b/shared/n-acd/.travis.yml
index ed0bcf38c4..99a7bb9461 100644
--- a/shared/n-acd/.travis.yml
+++ b/shared/n-acd/.travis.yml
@@ -1,18 +1,21 @@
-dist: trusty
-sudo: required
os: linux
+dist: trusty
language: c
-compiler:
- - gcc
- - clang
-install:
- - curl -L "https://github.com/ninja-build/ninja/releases/download/v1.7.2/ninja-linux.zip" -o "ninja-linux.zip"
- - sudo unzip "ninja-linux.zip" -d "/usr/local/bin"
- - sudo chmod 755 "/usr/local/bin/ninja"
- - pip3 install meson
+services:
+ - docker
-script:
- - meson "build"
- - ninja -C "build"
- - sudo MESON_TESTTHREADS=64 ninja -C "build" test
+before_install:
+ - curl -O -L "https://raw.githubusercontent.com/cherry-pick/cherry-images/v1/scripts/vmrun"
+ - curl -O -L "https://raw.githubusercontent.com/cherry-pick/cherry-ci/v1/scripts/cherryci"
+ - chmod +x "./vmrun" "./cherryci"
+
+jobs:
+ include:
+ - stage: test
+ script:
+ - ./vmrun -- ../src/cherryci -d ../src/.cherryci -s c-util -m
+ - script:
+ - ./vmrun -T armv7hl -- ../src/cherryci -d ../src/.cherryci -s c-util
+ - script:
+ - ./vmrun -T i686 -- ../src/cherryci -d ../src/.cherryci -s c-util
diff --git a/shared/n-acd/AUTHORS b/shared/n-acd/AUTHORS
new file mode 100644
index 0000000000..89ee27d233
--- /dev/null
+++ b/shared/n-acd/AUTHORS
@@ -0,0 +1,39 @@
+LICENSE:
+ This project is dual-licensed under both the Apache License, Version
+ 2.0, and the GNU Lesser General Public License, Version 2.1+.
+
+AUTHORS-ASL:
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+AUTHORS-LGPL:
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program; If not, see .
+
+COPYRIGHT: (ordered alphabetically)
+ Copyright (C) 2015-2018 Red Hat, Inc.
+
+AUTHORS: (ordered alphabetically)
+ Beniamino Galvani
+ David Herrmann
+ Thomas Haller
+ Tom Gundersen
diff --git a/shared/n-acd/AUTHORS-ASL b/shared/n-acd/AUTHORS-ASL
new file mode 100644
index 0000000000..5d501a7284
--- /dev/null
+++ b/shared/n-acd/AUTHORS-ASL
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+Copyright {yyyy} {name of copyright owner}
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/shared/n-acd/AUTHORS-LGPL b/shared/n-acd/AUTHORS-LGPL
new file mode 100644
index 0000000000..4362b49151
--- /dev/null
+++ b/shared/n-acd/AUTHORS-LGPL
@@ -0,0 +1,502 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded. In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+ , 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
diff --git a/shared/n-acd/COPYING b/shared/n-acd/COPYING
deleted file mode 100644
index 81c0566b88..0000000000
--- a/shared/n-acd/COPYING
+++ /dev/null
@@ -1,19 +0,0 @@
-LICENSE:
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-
-COPYRIGHT: (ordered alphabetically)
- Copyright (C) 2015-2017 Red Hat, Inc.
-
-AUTHORS: (ordered alphabetically)
- David Herrmann
- Tom Gundersen
diff --git a/shared/n-acd/LICENSE b/shared/n-acd/LICENSE
deleted file mode 100644
index 5d501a7284..0000000000
--- a/shared/n-acd/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
-TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
-2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
-3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
-4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
-5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
-6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
-7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
-8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
-9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
-END OF TERMS AND CONDITIONS
-
-APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "{}"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
-Copyright {yyyy} {name of copyright owner}
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
diff --git a/shared/n-acd/LICENSE b/shared/n-acd/LICENSE
new file mode 120000
index 0000000000..da24c5e4a6
--- /dev/null
+++ b/shared/n-acd/LICENSE
@@ -0,0 +1 @@
+AUTHORS-ASL
\ No newline at end of file
diff --git a/shared/n-acd/NEWS b/shared/n-acd/NEWS
new file mode 100644
index 0000000000..bb06abbc5a
--- /dev/null
+++ b/shared/n-acd/NEWS
@@ -0,0 +1,21 @@
+n-acd - IPv4 Address Conflict Detection
+
+CHANGES WITH 1:
+
+ * Initial release of n-acd. This project implements the IPv4 Address
+ Conflict Detection standard as defined in RFC-5227. The state machine
+ is implemented in a shared library and provides a stable ISO-C11 API.
+ The implementation is linux-only and relies heavily on the API
+ behavior of recent linux kernel releases.
+
+ * Compared to the pre-releases, this release supports many parallel
+ probes on a single n-acd context. This reduces the number of
+ allocated network resources to O(1), based on the number of running
+ parallel probes.
+
+ * The n-acd project is now dual-licensed: ASL-2.0 and LGPL-2.1+
+
+ Contributions from: Beniamino Galvani, David Herrmann, Thomas Haller,
+ Tom Gundersen
+
+ - Tübingen, 2018-08-08
diff --git a/shared/n-acd/README b/shared/n-acd/README
index 4077cba05e..b88d31c713 100644
--- a/shared/n-acd/README
+++ b/shared/n-acd/README
@@ -8,7 +8,7 @@ ABOUT:
kernel releases.
DETAILS:
- https://github.com/nettools/n-acd/wiki
+ https://nettools.github.io/n-acd
BUG REPORTS:
https://github.com/nettools/n-acd/issues
@@ -20,14 +20,18 @@ GIT:
GITWEB:
https://github.com/nettools/n-acd
+MAILINGLIST:
+ https://groups.google.com/forum/#!forum/nettools-devel
+
LICENSE:
- Apache Software License 2.0 (LICENSE)
- See COPYING for details.
+ Apache Software License 2.0
+ Lesser General Public License 2.1+
+ See AUTHORS for details.
REQUIREMENTS:
The requirements for n-acd are:
- Linux kernel >= 3.0
+ Linux kernel >= 3.19
libc (e.g., glibc >= 2.16)
At build-time, the following software is required:
@@ -36,15 +40,15 @@ REQUIREMENTS:
pkg-config >= 0.29
INSTALL:
- The meson build-system is used for n-acd. Contact upstream
+ The meson build-system is used for this project. Contact upstream
documentation for detailed help. In most situations the following
- commands are sufficient to build and install n-acd from source:
+ commands are sufficient to build and install from source:
$ mkdir build
$ cd build
- $ meson setup . ..
+ $ meson setup ..
$ ninja
- $ ninja test
+ $ meson test
# ninja install
- No custom configuration options are available.
+ For custom configuration options see meson_options.txt.
diff --git a/shared/n-acd/meson.build b/shared/n-acd/meson.build
index da923c288d..a05164c048 100644
--- a/shared/n-acd/meson.build
+++ b/shared/n-acd/meson.build
@@ -1,19 +1,25 @@
-project('n-acd',
+project(
+ 'n-acd',
'c',
version: '1',
license: 'Apache',
default_options: [
- 'buildtype=release',
'c_std=c11',
- ])
+ ],
+)
+project_description = 'IPv4 Address Conflict Detection'
add_project_arguments('-D_GNU_SOURCE', language: 'c')
mod_pkgconfig = import('pkgconfig')
sub_clist = subproject('c-list')
+sub_crbtree = subproject('c-rbtree')
sub_csiphash = subproject('c-siphash')
dep_clist = sub_clist.get_variable('libclist_dep')
+dep_crbtree = sub_crbtree.get_variable('libcrbtree_dep')
dep_csiphash = sub_csiphash.get_variable('libcsiphash_dep')
+use_ebpf = get_option('ebpf')
+
subdir('src')
diff --git a/shared/n-acd/meson_options.txt b/shared/n-acd/meson_options.txt
new file mode 100644
index 0000000000..b024ee1d4c
--- /dev/null
+++ b/shared/n-acd/meson_options.txt
@@ -0,0 +1 @@
+option('ebpf', type: 'boolean', value: true, description: 'Enable eBPF packet filtering')
diff --git a/shared/n-acd/src/libnacd.sym b/shared/n-acd/src/libnacd.sym
index c9bd487533..f85e13acf9 100644
--- a/shared/n-acd/src/libnacd.sym
+++ b/shared/n-acd/src/libnacd.sym
@@ -1,13 +1,28 @@
-LIBNACD_1 {
+LIBNACD_2 {
global:
+ n_acd_config_new;
+ n_acd_config_free;
+ n_acd_config_set_ifindex;
+ n_acd_config_set_transport;
+ n_acd_config_set_mac;
+
+ n_acd_probe_config_new;
+ n_acd_probe_config_free;
+ n_acd_probe_config_set_ip;
+ n_acd_probe_config_set_timeout;
+
n_acd_new;
- n_acd_free;
+ n_acd_ref;
+ n_acd_unref;
n_acd_get_fd;
n_acd_dispatch;
n_acd_pop_event;
- n_acd_start;
- n_acd_stop;
- n_acd_announce;
+ n_acd_probe;
+
+ n_acd_probe_free;
+ n_acd_probe_set_userdata;
+ n_acd_probe_get_userdata;
+ n_acd_probe_announce;
local:
*;
};
diff --git a/shared/n-acd/src/meson.build b/shared/n-acd/src/meson.build
index ba09d1323b..0a405f9c4d 100644
--- a/shared/n-acd/src/meson.build
+++ b/shared/n-acd/src/meson.build
@@ -1,76 +1,94 @@
#
# target: libnacd.so
-# We build both, a static and a shared library. We want our tests to get access
-# to internals, so we link them statically.
#
-libnacd_private = static_library('nacd-private',
- ['n-acd.c'],
- c_args: [
- '-fvisibility=hidden',
- '-fno-common'
- ],
- dependencies: [
- dep_clist,
- dep_csiphash,
- ],
- pic: true)
-install_headers('n-acd.h')
libnacd_symfile = join_paths(meson.current_source_dir(), 'libnacd.sym')
-libnacd_shared = shared_library('nacd',
- dependencies: dep_csiphash,
- objects: libnacd_private.extract_all_objects(),
- install: true,
- soversion: 0,
- link_depends: libnacd_symfile,
- link_args: [
- '-Wl,--no-undefined',
- '-Wl,--version-script=@0@'.format(libnacd_symfile)
- ])
-mod_pkgconfig.generate(libraries: libnacd_shared,
- version: meson.project_version(),
- name: 'libnacd',
- filebase: 'libnacd',
- description: 'IPv4 Address Conflict Detection')
-#
-# target: test-api
-# The test-api program explicitly links against the shared library, since it
-# tests for symbol visibility.
-#
+libnacd_deps = [
+ dep_clist,
+ dep_crbtree,
+ dep_csiphash,
+]
-test_api = executable('test-api',
- ['test-api.c'],
- link_with: libnacd_shared)
-test('API Symbol Visibility', test_api)
+libnacd_sources = [
+ 'n-acd.c',
+ 'n-acd-probe.c',
+ 'util/timer.c',
+]
+
+if use_ebpf
+ libnacd_sources += [
+ 'n-acd-bpf.c',
+ ]
+else
+ libnacd_sources += [
+ 'n-acd-bpf-fallback.c',
+ ]
+endif
+
+libnacd_private = static_library(
+ 'nacd-private',
+ libnacd_sources,
+ c_args: [
+ '-fvisibility=hidden',
+ '-fno-common'
+ ],
+ dependencies: libnacd_deps,
+ pic: true,
+)
+
+libnacd_shared = shared_library(
+ 'nacd',
+ objects: libnacd_private.extract_all_objects(),
+ dependencies: libnacd_deps,
+ install: not meson.is_subproject(),
+ soversion: 0,
+ link_depends: libnacd_symfile,
+ link_args: [
+ '-Wl,--no-undefined',
+ '-Wl,--version-script=@0@'.format(libnacd_symfile)
+ ],
+)
+
+libnacd_dep = declare_dependency(
+ include_directories: include_directories('.'),
+ link_with: libnacd_private,
+ dependencies: libnacd_deps,
+ version: meson.project_version(),
+)
+
+if not meson.is_subproject()
+ install_headers('n-acd.h')
+
+ mod_pkgconfig.generate(
+ libraries: libnacd_shared,
+ version: meson.project_version(),
+ name: 'libnacd',
+ filebase: 'libnacd',
+ description: project_description,
+ )
+endif
#
# target: test-*
-# All other tests are listed here. They link against the static library, so
-# they can access internals for verification.
#
-test_basic = executable('test-basic',
- ['test-basic.c'],
- link_with: libnacd_private)
-test('Basic API Behavior', test_basic)
+test_api = executable('test-api', ['test-api.c'], link_with: libnacd_shared)
+test('API Symbol Visibility', test_api)
-test_loopback = executable('test-loopback',
- ['test-loopback.c'],
- link_with: libnacd_private)
+if use_ebpf
+ test_bpf = executable('test-bpf', ['test-bpf.c'], dependencies: libnacd_dep)
+ test('eBPF socket filtering', test_bpf)
+endif
+
+test_loopback = executable('test-loopback', ['test-loopback.c'], dependencies: libnacd_dep)
test('Echo Suppression via Loopback', test_loopback)
-test_twice = executable('test-twice',
- ['test-twice.c'],
- link_with: libnacd_private)
-test('Two ACD in Parallel', test_twice)
+test_timer = executable('test-timer', ['util/test-timer.c'], dependencies: libnacd_dep)
+test('Timer helper', test_timer)
-test_unplug = executable('test-unplug',
- ['test-unplug.c'],
- link_with: libnacd_private)
-test('Async Interface Hotplug', test_unplug)
+#test_unplug = executable('test-unplug', ['test-unplug.c'], dependencies: libnacd_dep)
+#test('Async Interface Hotplug', test_unplug)
-test_unused = executable('test-unsed',
- ['test-unused.c'],
- link_with: libnacd_private)
-test('Unconflicted ACD', test_unused)
+test_veth = executable('test-veth', ['test-veth.c'], dependencies: libnacd_dep)
+test('Parallel ACD instances', test_veth)
diff --git a/shared/n-acd/src/n-acd-bpf-fallback.c b/shared/n-acd/src/n-acd-bpf-fallback.c
new file mode 100644
index 0000000000..5e6bdd0677
--- /dev/null
+++ b/shared/n-acd/src/n-acd-bpf-fallback.c
@@ -0,0 +1,29 @@
+/*
+ * A noop implementation of eBPF filter for IPv4 Address Conflict Detection
+ *
+ * These are a collection of dummy funcitons that have no effect, but allows
+ * n-acd to compile without eBPF support.
+ *
+ * See n-acd-bpf.c for documentation.
+ */
+
+#include
+#include "n-acd-private.h"
+
+int n_acd_bpf_map_create(int *mapfdp, size_t max_entries) {
+ *mapfdp = -1;
+ return 0;
+}
+
+int n_acd_bpf_map_add(int mapfd, struct in_addr *addrp) {
+ return 0;
+}
+
+int n_acd_bpf_map_remove(int mapfd, struct in_addr *addrp) {
+ return 0;
+}
+
+int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *macp) {
+ *progfdp = -1;
+ return 0;
+}
diff --git a/shared/n-acd/src/n-acd-bpf.c b/shared/n-acd/src/n-acd-bpf.c
new file mode 100644
index 0000000000..771a28eeb2
--- /dev/null
+++ b/shared/n-acd/src/n-acd-bpf.c
@@ -0,0 +1,316 @@
+/*
+ * eBPF filter for IPv4 Address Conflict Detection
+ *
+ * An eBPF map and an eBPF program are provided. The map contains all the
+ * addresses address conflict detection is performed on, and the program
+ * filters out all packets except exactly the packets relevant to the ACD
+ * protocol on the addresses currently in the map.
+ *
+ * Note that userspace still has to filter the incoming packets, as filter
+ * are applied when packets are queued on the socket, not when userspace
+ * receives them. It is therefore possible to receive packets about addresses
+ * that have already been removed.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "n-acd-private.h"
+
+#define BPF_LD_ABS(SIZE, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM, \
+ })
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0, \
+ })
+
+#define BPF_LD_MAP_FD(DST, MAP_FD) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = BPF_PSEUDO_MAP_FD, \
+ .off = 0, \
+ .imm = (__u32) (MAP_FD), \
+ }), \
+ ((struct bpf_insn) { \
+ .code = 0, /* zero is reserved opcode */ \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((__u64) (MAP_FD)) >> 32, \
+ })
+
+#define BPF_ALU_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0, \
+ })
+
+#define BPF_ALU_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM, \
+ })
+
+#define BPF_MOV_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0, \
+ })
+
+#define BPF_MOV_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM, \
+ })
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0, \
+ })
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0, \
+ })
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM, \
+ })
+
+#define BPF_EMIT_CALL(FUNC) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_CALL, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = FUNC, \
+ })
+
+#define BPF_EXIT_INSN() \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_EXIT, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0, \
+ })
+
+static int n_acd_syscall_bpf(int cmd, union bpf_attr *attr, unsigned int size) {
+ return (int)syscall(__NR_bpf, cmd, attr, size);
+}
+
+int n_acd_bpf_map_create(int *mapfdp, size_t max_entries) {
+ union bpf_attr attr;
+ int mapfd;
+
+ memset(&attr, 0, sizeof(attr));
+ attr = (union bpf_attr){
+ .map_type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(uint32_t),
+ .value_size = sizeof(uint8_t), /* values are never used, but must be set */
+ .max_entries = max_entries,
+ };
+
+ mapfd = n_acd_syscall_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ if (mapfd < 0)
+ return -errno;
+
+ *mapfdp = mapfd;
+ return 0;
+}
+
+int n_acd_bpf_map_add(int mapfd, struct in_addr *addrp) {
+ union bpf_attr attr;
+ uint32_t addr = be32toh(addrp->s_addr);
+ uint8_t _dummy = 0;
+ int r;
+
+ memset(&attr, 0, sizeof(attr));
+ attr = (union bpf_attr){
+ .map_fd = mapfd,
+ .key = (uint64_t)(unsigned long)&addr,
+ .value = (uint64_t)(unsigned long)&_dummy,
+ .flags = BPF_NOEXIST,
+ };
+
+ r = n_acd_syscall_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int n_acd_bpf_map_remove(int mapfd, struct in_addr *addrp) {
+ uint32_t addr = be32toh(addrp->s_addr);
+ union bpf_attr attr;
+ int r;
+
+ memset(&attr, 0, sizeof(attr));
+ attr = (union bpf_attr){
+ .map_fd = mapfd,
+ .key = (uint64_t)(unsigned long)&addr,
+ };
+
+ r = n_acd_syscall_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *macp) {
+ const union {
+ uint8_t u8[6];
+ uint16_t u16[3];
+ uint32_t u32[1];
+ } mac = {
+ .u8 = {
+ macp->ether_addr_octet[0],
+ macp->ether_addr_octet[1],
+ macp->ether_addr_octet[2],
+ macp->ether_addr_octet[3],
+ macp->ether_addr_octet[4],
+ macp->ether_addr_octet[5],
+ },
+ };
+ struct bpf_insn prog[] = {
+ /* for using BPF_LD_ABS r6 must point to the skb, currently in r1 */
+ BPF_MOV_REG(6, 1), /* r6 = r1 */
+
+ /* drop the packet if it is too short */
+ BPF_LDX_MEM(BPF_W, 0, 6, offsetof(struct __sk_buff, len)), /* r0 = skb->len */
+ BPF_JMP_IMM(BPF_JGE, 0, sizeof(struct ether_arp), 2), /* if (r0 >= sizeof(ether_arp)) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+
+ /* drop the packet if the header is not as expected */
+ BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_hrd)), /* r0 = header type */
+ BPF_JMP_IMM(BPF_JEQ, 0, ARPHRD_ETHER, 2), /* if (r0 == ethernet) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+
+ BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_pro)), /* r0 = protocol */
+ BPF_JMP_IMM(BPF_JEQ, 0, ETHERTYPE_IP, 2), /* if (r0 == IP) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+
+ BPF_LD_ABS(BPF_B, offsetof(struct ether_arp, arp_hln)), /* r0 = hw addr length */
+ BPF_JMP_IMM(BPF_JEQ, 0, sizeof(struct ether_addr), 2), /* if (r0 == sizeof(ether_addr)) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+
+ BPF_LD_ABS(BPF_B, offsetof(struct ether_arp, arp_pln)), /* r0 = protocol addr length */
+ BPF_JMP_IMM(BPF_JEQ, 0, sizeof(struct in_addr), 2), /* if (r0 == sizeof(in_addr)) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+
+ /* drop packets from our own mac address */
+ BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_sha)), /* r0 = first four bytes of packet mac address */
+ BPF_JMP_IMM(BPF_JNE, 0, be32toh(mac.u32[0]), 4), /* if (r0 != first four bytes of our mac address) skip 4 */
+ BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_sha) + 4), /* r0 = last two bytes of packet mac address */
+ BPF_JMP_IMM(BPF_JNE, 0, be16toh(mac.u16[2]), 2), /* if (r0 != last two bytes of our mac address) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+
+ /*
+ * We listen for two kinds of packets:
+ * Conflicts)
+ * These are requests or replies with the sender address not set to INADDR_ANY. The
+ * conflicted address is the sender address, remember this in r7.
+ * Probes)
+ * These are requests with the sender address set to INADDR_ANY. The probed address
+ * is the target address, remember this in r7.
+ * Any other packets are dropped.
+ */
+ BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_spa)), /* r0 = sender ip address */
+ BPF_JMP_IMM(BPF_JEQ, 0, 0, 7), /* if (r0 == 0) skip 7 */
+ BPF_MOV_REG(7, 0), /* r7 = r0 */
+ BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_op)), /* r0 = operation */
+ BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REQUEST, 3), /* if (r0 == request) skip 3 */
+ BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REPLY, 2), /* if (r0 == reply) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+ BPF_JMP_IMM(BPF_JA, 0, 0, 6), /* skip 6 */
+ BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_tpa)), /* r0 = target ip address */
+ BPF_MOV_REG(7, 0), /* r7 = r0 */
+ BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_op)), /* r0 = operation */
+ BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REQUEST, 2), /* if (r0 == request) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+
+ /* check if the probe or conflict is for an address we are monitoring */
+ BPF_STX_MEM(BPF_W, 10, 7, -4), /* *(uint32_t*)fp - 4 = r7 */
+ BPF_MOV_REG(2, 10), /* r2 = fp */
+ BPF_ALU_IMM(BPF_ADD, 2, -4), /* r2 -= 4 */
+ BPF_LD_MAP_FD(1, mapfd), /* r1 = mapfd */
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), /* r0 = map_lookup_elem(r1, r2) */
+ BPF_JMP_IMM(BPF_JNE, 0, 0, 2), /* if (r0 != NULL) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+
+ /* return exactly the packet length*/
+ BPF_MOV_IMM(0, sizeof(struct ether_arp)), /* r0 = sizeof(struct ether_arp) */
+ BPF_EXIT_INSN(), /* return */
+ };
+ union bpf_attr attr;
+ int progfd;
+
+ memset(&attr, 0, sizeof(attr));
+ attr = (union bpf_attr){
+ .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+ .insns = (uint64_t)(unsigned long)prog,
+ .insn_cnt = sizeof(prog) / sizeof(*prog),
+ .license = (uint64_t)(unsigned long)"ASL",
+ };
+
+ progfd = n_acd_syscall_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (progfd < 0)
+ return -errno;
+
+ *progfdp = progfd;
+ return 0;
+}
diff --git a/shared/n-acd/src/n-acd-private.h b/shared/n-acd/src/n-acd-private.h
new file mode 100644
index 0000000000..3f20791234
--- /dev/null
+++ b/shared/n-acd/src/n-acd-private.h
@@ -0,0 +1,172 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "util/timer.h"
+#include "n-acd.h"
+
+typedef struct NAcdEventNode NAcdEventNode;
+
+#define _cleanup_(_x) __attribute__((__cleanup__(_x)))
+#define _public_ __attribute__((__visibility__("default")))
+
+/* This augments the error-codes with internal ones that are never exposed. */
+enum {
+ _N_ACD_INTERNAL = _N_ACD_E_N,
+
+ N_ACD_E_DROPPED,
+};
+
+enum {
+ N_ACD_PROBE_STATE_PROBING,
+ N_ACD_PROBE_STATE_CONFIGURING,
+ N_ACD_PROBE_STATE_ANNOUNCING,
+ N_ACD_PROBE_STATE_FAILED,
+};
+
+struct NAcdConfig {
+ int ifindex;
+ unsigned int transport;
+ uint8_t mac[ETH_ALEN];
+ size_t n_mac;
+};
+
+#define N_ACD_CONFIG_NULL(_x) { \
+ .transport = _N_ACD_TRANSPORT_N, \
+ }
+
+struct NAcdProbeConfig {
+ struct in_addr ip;
+ uint64_t timeout_msecs;
+};
+
+#define N_ACD_PROBE_CONFIG_NULL(_x) { \
+ .timeout_msecs = N_ACD_TIMEOUT_RFC5227, \
+ }
+
+struct NAcdEventNode {
+ CList acd_link;
+ CList probe_link;
+ NAcdEvent event;
+ uint8_t sender[ETH_ALEN];
+ bool is_public : 1;
+};
+
+#define N_ACD_EVENT_NODE_NULL(_x) { \
+ .acd_link = C_LIST_INIT((_x).acd_link), \
+ .probe_link = C_LIST_INIT((_x).probe_link), \
+ }
+
+struct NAcd {
+ unsigned long n_refs;
+ unsigned int seed;
+ int fd_epoll;
+ int fd_socket;
+ CRBTree ip_tree;
+ CList event_list;
+ Timer timer;
+
+ /* BPF map */
+ int fd_bpf_map;
+ size_t n_bpf_map;
+ size_t max_bpf_map;
+
+ /* configuration */
+ int ifindex;
+ uint8_t mac[ETH_ALEN];
+
+ /* flags */
+ bool preempted : 1;
+};
+
+#define N_ACD_NULL(_x) { \
+ .n_refs = 1, \
+ .fd_epoll = -1, \
+ .fd_socket = -1, \
+ .ip_tree = C_RBTREE_INIT, \
+ .event_list = C_LIST_INIT((_x).event_list), \
+ .timer = TIMER_NULL((_x).timer), \
+ .fd_bpf_map = -1, \
+ }
+
+struct NAcdProbe {
+ NAcd *acd;
+ CRBNode ip_node;
+ CList event_list;
+ Timeout timeout;
+
+ /* configuration */
+ struct in_addr ip;
+ uint64_t timeout_multiplier;
+ void *userdata;
+
+ /* state */
+ unsigned int state;
+ unsigned int n_iteration;
+ unsigned int defend;
+ uint64_t last_defend;
+};
+
+#define N_ACD_PROBE_NULL(_x) { \
+ .ip_node = C_RBNODE_INIT((_x).ip_node), \
+ .event_list = C_LIST_INIT((_x).event_list), \
+ .timeout = TIMEOUT_INIT((_x).timeout), \
+ .state = N_ACD_PROBE_STATE_PROBING, \
+ .defend = N_ACD_DEFEND_NEVER, \
+ }
+
+/* events */
+
+int n_acd_event_node_new(NAcdEventNode **nodep);
+NAcdEventNode *n_acd_event_node_free(NAcdEventNode *node);
+
+/* contexts */
+
+void n_acd_remember(NAcd *acd, uint64_t now, bool success);
+int n_acd_raise(NAcd *acd, NAcdEventNode **nodep, unsigned int event);
+int n_acd_send(NAcd *acd, const struct in_addr *tpa, const struct in_addr *spa);
+int n_acd_ensure_bpf_map_space(NAcd *acd);
+
+/* probes */
+
+int n_acd_probe_new(NAcdProbe **probep, NAcd *acd, NAcdProbeConfig *config);
+int n_acd_probe_raise(NAcdProbe *probe, NAcdEventNode **nodep, unsigned int event);
+int n_acd_probe_handle_timeout(NAcdProbe *probe);
+int n_acd_probe_handle_packet(NAcdProbe *probe, struct ether_arp *packet, bool hard_conflict);
+
+/* eBPF */
+
+int n_acd_bpf_map_create(int *mapfdp, size_t max_elements);
+int n_acd_bpf_map_add(int mapfd, struct in_addr *addr);
+int n_acd_bpf_map_remove(int mapfd, struct in_addr *addr);
+
+int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *mac);
+
+/* inline helpers */
+
+static inline int n_acd_errno(void) {
+ /*
+ * Compilers continuously warn about uninitialized variables since they
+ * cannot deduce that `return -errno;` will always be negative. This
+ * small wrapper makes sure compilers figure that out. Use it as
+ * replacement for `errno` read access. Yes, it generates worse code,
+ * but only marginally and only affects slow-paths.
+ */
+ return abs(errno) ? : EIO;
+}
+
+static inline void n_acd_event_node_freep(NAcdEventNode **node) {
+ if (*node)
+ n_acd_event_node_free(*node);
+}
+
+static inline void n_acd_closep(int *fdp) {
+ if (*fdp >= 0)
+ close(*fdp);
+}
diff --git a/shared/n-acd/src/n-acd-probe.c b/shared/n-acd/src/n-acd-probe.c
new file mode 100644
index 0000000000..8c233b56a7
--- /dev/null
+++ b/shared/n-acd/src/n-acd-probe.c
@@ -0,0 +1,636 @@
+/*
+ * IPv4 Address Conflict Detection
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "n-acd.h"
+#include "n-acd-private.h"
+
+/*
+ * These parameters and timing intervals specified in RFC-5227. The original
+ * values are:
+ *
+ * PROBE_NUM 3
+ * PROBE_WAIT 1s
+ * PROBE_MIN 1s
+ * PROBE_MAX 3s
+ * ANNOUNCE_NUM 3
+ * ANNOUNCE_WAIT 2s
+ * ANNOUNCE_INTERVAL 2s
+ * MAX_CONFLICTS 10
+ * RATE_LIMIT_INTERVAL 60s
+ * DEFEND_INTERVAL 10s
+ *
+ * If we assume a best-case and worst-case scenario for non-conflicted runs, we
+ * end up with a runtime between 4s and 9s to finish the probe. Then it still
+ * takes a fixed 4s to finish the announcements.
+ *
+ * RFC 5227 section 1.1:
+ * [...] (Note that the values listed here are fixed constants; they are
+ * not intended to be modifiable by implementers, operators, or end users.
+ * These constants are given symbolic names here to facilitate the writing
+ * of future standards that may want to reference this document with
+ * different values for these named constants; however, at the present time
+ * no such future standards exist.) [...]
+ *
+ * Unfortunately, no-one ever stepped up to write a "future standard" to revise
+ * the timings. A 9s timeout for successful link setups is not acceptable today.
+ * Hence, we will just go forward and ignore the proposed values. On both
+ * wired and wireless local links round-trip latencies of below 3ms are common.
+ * We require the caller to set a timeout multiplier, where 1 corresponds to a
+ * total probe time between 0.5 ms and 1.0 ms. On modern networks a multiplier
+ * of about 100 should be a reasonable default. To comply with the RFC select a
+ * multiplier of 9000.
+ */
+#define N_ACD_RFC_PROBE_NUM (3)
+#define N_ACD_RFC_PROBE_WAIT_NSEC (UINT64_C(111111)) /* 1/9 ms */
+#define N_ACD_RFC_PROBE_MIN_NSEC (UINT64_C(111111)) /* 1/9 ms */
+#define N_ACD_RFC_PROBE_MAX_NSEC (UINT64_C(333333)) /* 3/9 ms */
+#define N_ACD_RFC_ANNOUNCE_NUM (3)
+#define N_ACD_RFC_ANNOUNCE_WAIT_NSEC (UINT64_C(222222)) /* 2/9 ms */
+#define N_ACD_RFC_ANNOUNCE_INTERVAL_NSEC (UINT64_C(222222)) /* 2/9 ms */
+#define N_ACD_RFC_MAX_CONFLICTS (10)
+#define N_ACD_RFC_RATE_LIMIT_INTERVAL_NSEC (UINT64_C(60000000000)) /* 60s */
+#define N_ACD_RFC_DEFEND_INTERVAL_NSEC (UINT64_C(10000000000)) /* 10s */
+
+/**
+ * XXX
+ */
+_public_ int n_acd_probe_config_new(NAcdProbeConfig **configp) {
+ _cleanup_(n_acd_probe_config_freep) NAcdProbeConfig *config = NULL;
+
+ config = malloc(sizeof(*config));
+ if (!config)
+ return -ENOMEM;
+
+ *config = (NAcdProbeConfig)N_ACD_PROBE_CONFIG_NULL(*config);
+
+ *configp = config;
+ config = NULL;
+ return 0;
+}
+
+/**
+ * XXX
+ */
+_public_ NAcdProbeConfig *n_acd_probe_config_free(NAcdProbeConfig *config) {
+ if (!config)
+ return NULL;
+
+ free(config);
+
+ return NULL;
+}
+
+/**
+ * XXX
+ */
+_public_ void n_acd_probe_config_set_ip(NAcdProbeConfig *config, struct in_addr ip) {
+ config->ip = ip;
+}
+
+/**
+ * XXX
+ */
+_public_ void n_acd_probe_config_set_timeout(NAcdProbeConfig *config, uint64_t msecs) {
+ config->timeout_msecs = msecs;
+}
+
+static void n_acd_probe_schedule(NAcdProbe *probe, uint64_t n_timeout, unsigned int n_jitter) {
+ uint64_t n_time;
+
+ timer_now(&probe->acd->timer, &n_time);
+ n_time += n_timeout;
+
+ /*
+ * ACD specifies jitter values to reduce packet storms on the local
+ * link. This call accepts the maximum relative jitter value in
+ * nanoseconds as @n_jitter. We then use rand_r(3p) to get a
+ * pseudo-random jitter on top of the real timeout given as @n_timeout.
+ */
+ if (n_jitter) {
+ uint64_t random;
+
+ random = ((uint64_t)rand_r(&probe->acd->seed) << 32) | (uint64_t)rand_r(&probe->acd->seed);
+ n_time += random % n_jitter;
+ }
+
+ timeout_schedule(&probe->timeout, &probe->acd->timer, n_time);
+}
+
+static void n_acd_probe_unschedule(NAcdProbe *probe) {
+ timeout_unschedule(&probe->timeout);
+}
+
+static bool n_acd_probe_is_unique(NAcdProbe *probe) {
+ NAcdProbe *sibling;
+
+ if (!c_rbnode_is_linked(&probe->ip_node))
+ return false;
+
+ sibling = c_rbnode_entry(c_rbnode_next(&probe->ip_node), NAcdProbe, ip_node);
+ if (sibling && sibling->ip.s_addr == probe->ip.s_addr)
+ return false;
+
+ sibling = c_rbnode_entry(c_rbnode_prev(&probe->ip_node), NAcdProbe, ip_node);
+ if (sibling && sibling->ip.s_addr == probe->ip.s_addr)
+ return false;
+
+ return true;
+}
+
+static int n_acd_probe_link(NAcdProbe *probe) {
+ int r;
+
+ /*
+ * Make sure the kernel bpf map has space for at least one more
+ * entry.
+ */
+ r = n_acd_ensure_bpf_map_space(probe->acd);
+ if (r)
+ return r;
+
+ /*
+ * Link entry into context, indexed by its IP. Note that we allow
+ * duplicates just fine. It is up to you to decide whether to avoid
+ * duplicates, if you don't want them. Duplicates on the same context
+ * do not conflict with each other, though.
+ */
+ {
+ CRBNode **slot, *parent;
+ NAcdProbe *other;
+
+ slot = &probe->acd->ip_tree.root;
+ parent = NULL;
+ while (*slot) {
+ other = c_rbnode_entry(*slot, NAcdProbe, ip_node);
+ parent = *slot;
+ if (probe->ip.s_addr < other->ip.s_addr)
+ slot = &(*slot)->left;
+ else
+ slot = &(*slot)->right;
+ }
+
+ c_rbtree_add(&probe->acd->ip_tree, parent, slot, &probe->ip_node);
+ }
+
+ /*
+ * Add the ip address to the map, if it is not already there.
+ */
+ if (n_acd_probe_is_unique(probe)) {
+ r = n_acd_bpf_map_add(probe->acd->fd_bpf_map, &probe->ip);
+ if (r) {
+ /*
+ * Make sure the IP address is linked in userspace iff
+ * it is linked in the kernel.
+ */
+ c_rbnode_unlink(&probe->ip_node);
+ return r;
+ }
+ ++probe->acd->n_bpf_map;
+ }
+
+ return 0;
+}
+
+static void n_acd_probe_unlink(NAcdProbe *probe) {
+ int r;
+
+ /*
+ * If this is the only probe for a given IP, remove the IP from the
+ * kernel BPF map.
+ */
+ if (n_acd_probe_is_unique(probe)) {
+ r = n_acd_bpf_map_remove(probe->acd->fd_bpf_map, &probe->ip);
+ assert(r >= 0);
+ --probe->acd->n_bpf_map;
+ }
+ c_rbnode_unlink(&probe->ip_node);
+}
+
+int n_acd_probe_new(NAcdProbe **probep, NAcd *acd, NAcdProbeConfig *config) {
+ _cleanup_(n_acd_probe_freep) NAcdProbe *probe = NULL;
+ int r;
+
+ if (!config->ip.s_addr)
+ return N_ACD_E_INVALID_ARGUMENT;
+
+ probe = malloc(sizeof(*probe));
+ if (!probe)
+ return -ENOMEM;
+
+ *probe = (NAcdProbe)N_ACD_PROBE_NULL(*probe);
+ probe->acd = n_acd_ref(acd);
+ probe->ip = config->ip;
+
+ /*
+ * We use the provided timeout-length as multiplier for all our
+ * timeouts. The provided timeout defines the maximum length of an
+ * entire probe-interval until the first announcement. Given the
+ * spec-provided parameters, this ends up as:
+ *
+ * PROBE_WAIT + PROBE_MAX + PROBE_MAX + ANNOUNCE_WAIT
+ * = 1s + 3s + 3s + 2s
+ * = 9s
+ *
+ * Hence, the default value for this timeout is 9000ms, which just
+ * ends up matching the spec-provided values.
+ *
+ * What we now semantically do is divide this timeout by 1ns/1000000.
+ * This first turns it into nanoseconds, then strips the unit by
+ * turning it into a multiplier. However, rather than performing the
+ * division here, we multiplier all our timeouts by 1000000 statically
+ * at compile time. Therefore, we can use the user-provided timeout as
+ * unmodified multiplier. No conversion necessary.
+ */
+ probe->timeout_multiplier = config->timeout_msecs;
+
+ r = n_acd_probe_link(probe);
+ if (r)
+ return r;
+
+ /*
+ * Now that everything is set up, we have to send the first probe. This
+ * is done after ~PROBE_WAIT seconds, hence we schedule our timer.
+ * In case no timeout-multiplier is set, we pretend we already sent all
+ * probes successfully and schedule the timer so we proceed with the
+ * announcements. We must schedule a fake timer there, since we are not
+ * allowed to advance the state machine outside of n_acd_dispatch().
+ */
+ if (probe->timeout_multiplier) {
+ probe->n_iteration = 0;
+ n_acd_probe_schedule(probe,
+ 0,
+ probe->timeout_multiplier * N_ACD_RFC_PROBE_WAIT_NSEC);
+ } else {
+ probe->n_iteration = N_ACD_RFC_PROBE_NUM;
+ n_acd_probe_schedule(probe, 0, 0);
+ }
+
+ *probep = probe;
+ probe = NULL;
+ return 0;
+}
+
+/**
+ * XXX
+ */
+_public_ NAcdProbe *n_acd_probe_free(NAcdProbe *probe) {
+ NAcdEventNode *node, *t_node;
+
+ if (!probe)
+ return NULL;
+
+ c_list_for_each_entry_safe(node, t_node, &probe->event_list, probe_link)
+ n_acd_event_node_free(node);
+
+ n_acd_probe_unschedule(probe);
+ n_acd_probe_unlink(probe);
+ probe->acd = n_acd_unref(probe->acd);
+ free(probe);
+
+ return NULL;
+}
+
+int n_acd_probe_raise(NAcdProbe *probe, NAcdEventNode **nodep, unsigned int event) {
+ _cleanup_(n_acd_event_node_freep) NAcdEventNode *node = NULL;
+ int r;
+
+ r = n_acd_raise(probe->acd, &node, event);
+ if (r)
+ return r;
+
+ switch (event) {
+ case N_ACD_EVENT_READY:
+ node->event.ready.probe = probe;
+ break;
+ case N_ACD_EVENT_USED:
+ node->event.used.probe = probe;
+ break;
+ case N_ACD_EVENT_DEFENDED:
+ node->event.defended.probe = probe;
+ break;
+ case N_ACD_EVENT_CONFLICT:
+ node->event.conflict.probe = probe;
+ break;
+ default:
+ assert(0);
+ return -EIO;
+ }
+
+ c_list_link_tail(&probe->event_list, &node->probe_link);
+
+ if (nodep)
+ *nodep = node;
+ node = NULL;
+ return 0;
+}
+
+int n_acd_probe_handle_timeout(NAcdProbe *probe) {
+ int r;
+
+ switch (probe->state) {
+ case N_ACD_PROBE_STATE_PROBING:
+ /*
+ * We are still PROBING. We send 3 probes with a random timeout
+ * scheduled between each. If, after a fixed timeout, we did
+ * not receive any conflict we consider the probing successful.
+ */
+ if (probe->n_iteration < N_ACD_RFC_PROBE_NUM) {
+ /*
+ * We have not sent all 3 probes, yet. A timer fired,
+ * so we are ready to send the next probe. If this is
+ * the third probe, schedule a timer for ANNOUNCE_WAIT
+ * to give other peers a chance to answer. If this is
+ * not the third probe, wait between PROBE_MIN and
+ * PROBE_MAX for the next probe.
+ */
+
+ r = n_acd_send(probe->acd, &probe->ip, NULL);
+ if (r) {
+ if (r != -N_ACD_E_DROPPED)
+ return r;
+
+ /*
+ * Packet was dropped, and we know about it. It
+ * never reached the network. Reasons are
+ * manifold, and n_acd_send() raises events if
+ * necessary.
+ * From a probe-perspective, we simply pretend
+ * we never sent the probe and schedule a
+ * timeout for the next probe, effectively
+ * doubling a single probe-interval.
+ */
+ } else {
+ /* Successfully sent, so advance counter. */
+ ++probe->n_iteration;
+ }
+
+ if (probe->n_iteration < N_ACD_RFC_PROBE_NUM)
+ n_acd_probe_schedule(probe,
+ probe->timeout_multiplier * N_ACD_RFC_PROBE_MIN_NSEC,
+ probe->timeout_multiplier * (N_ACD_RFC_PROBE_MAX_NSEC - N_ACD_RFC_PROBE_MIN_NSEC));
+ else
+ n_acd_probe_schedule(probe,
+ probe->timeout_multiplier * N_ACD_RFC_ANNOUNCE_WAIT_NSEC,
+ 0);
+ } else {
+ /*
+ * All 3 probes succeeded and we waited enough to
+ * consider this address usable by now. Do not announce
+ * the address, yet. We must first give the caller a
+ * chance to configure the address (so they can answer
+ * ARP requests), before announcing it.
+ */
+ r = n_acd_probe_raise(probe, NULL, N_ACD_EVENT_READY);
+ if (r)
+ return r;
+
+ probe->state = N_ACD_PROBE_STATE_CONFIGURING;
+ }
+
+ break;
+
+ case N_ACD_PROBE_STATE_ANNOUNCING:
+ /*
+ * We are ANNOUNCING, meaning the caller configured the address
+ * on the interface and is actively using it. We send 3
+ * announcements out, in a short interval, and then just
+ * perform passive conflict detection.
+ * Note that once all 3 announcements are sent, we no longer
+ * schedule a timer, so this part should not trigger, anymore.
+ */
+
+ r = n_acd_send(probe->acd, &probe->ip, &probe->ip);
+ if (r) {
+ if (r != -N_ACD_E_DROPPED)
+ return r;
+
+ /*
+ * See above in STATE_PROBING for details. We know the
+ * packet was never sent, so we simply try again after
+ * extending the timer.
+ */
+ } else {
+ /* Successfully sent, so advance counter. */
+ ++probe->n_iteration;
+ }
+
+ if (probe->n_iteration < N_ACD_RFC_ANNOUNCE_NUM) {
+ /*
+ * Announcements are always scheduled according to the
+ * time-intervals specified in the spec. We always use
+ * the RFC5227-mandated multiplier.
+ * If you reconsider this, note that timeout_multiplier
+ * might be 0 here.
+ */
+ n_acd_probe_schedule(probe,
+ N_ACD_TIMEOUT_RFC5227 * N_ACD_RFC_ANNOUNCE_INTERVAL_NSEC,
+ 0);
+ }
+
+ break;
+
+ case N_ACD_PROBE_STATE_CONFIGURING:
+ case N_ACD_PROBE_STATE_FAILED:
+ default:
+ /*
+ * There are no timeouts in these states. If we trigger one,
+ * something is fishy.
+ */
+ assert(0);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int n_acd_probe_handle_packet(NAcdProbe *probe, struct ether_arp *packet, bool hard_conflict) {
+ NAcdEventNode *node;
+ uint64_t now;
+ int r;
+
+ timer_now(&probe->acd->timer, &now);
+
+ switch (probe->state) {
+ case N_ACD_PROBE_STATE_PROBING:
+ /*
+ * Regardless whether this is a hard or soft conflict, we must
+ * treat this as a probe failure. That is, notify the caller of
+ * the conflict and wait for further instructions. We do not
+ * react to this, until the caller tells us what to do, but we
+ * do stop sending further probes.
+ */
+ r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_USED);
+ if (r)
+ return r;
+
+ node->event.used.sender = node->sender;
+ node->event.used.n_sender = ETH_ALEN;
+ memcpy(node->sender, packet->arp_sha, ETH_ALEN);
+
+ n_acd_probe_unschedule(probe);
+ n_acd_probe_unlink(probe);
+ probe->state = N_ACD_PROBE_STATE_FAILED;
+
+ break;
+
+ case N_ACD_PROBE_STATE_CONFIGURING:
+ /*
+ * We are waiting for the caller to configure the interface and
+ * start ANNOUNCING. In this state, we cannot defend the
+ * address as that would indicate that it is ready to be used,
+ * and we cannot signal CONFLICT or USED as the caller may
+ * already have started to use the address (and may have
+ * configured the engine to always defend it, which means they
+ * should be able to rely on never losing it after READY).
+ * Simply drop the event, and rely on the anticipated ANNOUNCE
+ * to trigger it again.
+ */
+
+ break;
+
+ case N_ACD_PROBE_STATE_ANNOUNCING: {
+ /*
+ * We were already instructed to announce the address, which
+ * means the address is configured and in use. Hence, the
+ * caller is responsible to serve regular ARP queries. Meaning,
+ * we can ignore any soft conflicts (other peers doing ACD).
+ *
+ * But if we see a hard-conflict, we either defend the address
+ * according to the caller's instructions, or we report the
+ * conflict and bail out.
+ */
+ bool conflict = false, rate_limited = false;
+
+ if (!hard_conflict)
+ break;
+
+ rate_limited = now < probe->last_defend + N_ACD_RFC_DEFEND_INTERVAL_NSEC;
+
+ switch (probe->defend) {
+ case N_ACD_DEFEND_NEVER:
+ conflict = true;
+ break;
+ case N_ACD_DEFEND_ONCE:
+ if (rate_limited) {
+ conflict = true;
+ break;
+ }
+
+ /* fallthrough */
+ case N_ACD_DEFEND_ALWAYS:
+ if (!rate_limited) {
+ r = n_acd_send(probe->acd, &probe->ip, &probe->ip);
+ if (r) {
+ if (r != -N_ACD_E_DROPPED)
+ return r;
+
+ if (probe->defend == N_ACD_DEFEND_ONCE) {
+ conflict = true;
+ break;
+ }
+ }
+
+ if (r != -N_ACD_E_DROPPED)
+ probe->last_defend = now;
+ }
+
+ r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_DEFENDED);
+ if (r)
+ return r;
+
+ node->event.defended.sender = node->sender;
+ node->event.defended.n_sender = ETH_ALEN;
+ memcpy(node->sender, packet->arp_sha, ETH_ALEN);
+
+ break;
+ }
+
+ if (conflict) {
+ r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_CONFLICT);
+ if (r)
+ return r;
+
+ node->event.conflict.sender = node->sender;
+ node->event.conflict.n_sender = ETH_ALEN;
+ memcpy(node->sender, packet->arp_sha, ETH_ALEN);
+
+ n_acd_probe_unschedule(probe);
+ n_acd_probe_unlink(probe);
+ probe->state = N_ACD_PROBE_STATE_FAILED;
+ }
+
+ break;
+ }
+
+ case N_ACD_PROBE_STATE_FAILED:
+ default:
+ /*
+ * We are not listening for packets in these states. If we receive one,
+ * something is fishy.
+ */
+ assert(0);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * n_acd_probe_set_userdata - XXX
+ */
+_public_ void n_acd_probe_set_userdata(NAcdProbe *probe, void *userdata) {
+ probe->userdata = userdata;
+}
+
+/**
+ * n_acd_probe_get_userdata - XXX
+ */
+_public_ void n_acd_probe_get_userdata(NAcdProbe *probe, void **userdatap) {
+ *userdatap = probe->userdata;
+}
+
+/**
+ * n_acd_probe_announce() - announce the configured IP address
+ * @probe: probe object
+ * @defend: defence policy
+ *
+ * Announce the IP address on the local link, and start defending it according
+ * to the given policy, which mut be one of N_ACD_DEFEND_ONCE,
+ * N_ACD_DEFEND_NEVER, or N_ACD_DEFEND_ALWAYS.
+ *
+ * This must be called in response to an N_ACD_EVENT_READY event, and only
+ * after the given address has been configured on the given network interface.
+ *
+ * Return: 0 on success, N_ACD_E_INVALID_ARGUMENT in case the defence policy
+ * is invalid, negative error code on failure.
+ */
+_public_ int n_acd_probe_announce(NAcdProbe *probe, unsigned int defend) {
+ if (defend >= _N_ACD_DEFEND_N)
+ return N_ACD_E_INVALID_ARGUMENT;
+
+ probe->state = N_ACD_PROBE_STATE_ANNOUNCING;
+ probe->defend = defend;
+ probe->n_iteration = 0;
+
+ /*
+ * We must schedule a fake-timeout, since we are not allowed to
+ * advance the state-machine outside of n_acd_dispatch().
+ */
+ n_acd_probe_schedule(probe, 0, 0);
+
+ return 0;
+}
diff --git a/shared/n-acd/src/n-acd.c b/shared/n-acd/src/n-acd.c
index 9164f95895..def56a2152 100644
--- a/shared/n-acd/src/n-acd.c
+++ b/shared/n-acd/src/n-acd.c
@@ -1,188 +1,38 @@
/*
* IPv4 Address Conflict Detection
- *
- * This implements the main n-acd API. It is built around an epoll-fd to
- * encapsulate a timerfd+socket. The n-acd context has quite straightforward
- * lifetime rules. The parameters must be set when the engine is started, and
- * they can only be changed by stopping and restartding the engine. The engine
- * is started on demand and stopped when no longer needed.
- * During the entire lifetime the context can be dispatched. That is, the
- * dispatcher does not have to be aware of the context state. After each call
- * to dispatch(), the caller must pop all pending events until -EAGAIN is
- * returned.
- *
- * If a conflict is detected, the ACD engine reports to the caller and stops
- * the engine. The caller can now modify parameters and restart the engine, if
- * required.
*/
#include
#include
+#include
#include
#include
#include
+#include
#include
-#include
-#include
#include
-#include
#include
#include
-#include
-#include
#include
#include
#include
#include
#include
-#include
#include
#include
#include "n-acd.h"
-
-#define _public_ __attribute__((__visibility__("default")))
-
-/*
- * These parameters and timing intervals specified in RFC-5227. The original
- * values are:
- *
- * PROBE_NUM 3
- * PROBE_WAIT 1s
- * PROBE_MIN 1s
- * PROBE_MAX 3s
- * ANNOUNCE_NUM 3
- * ANNOUNCE_WAIT 2s
- * ANNOUNCE_INTERVAL 2s
- * MAX_CONFLICTS 10
- * RATE_LIMIT_INTERVAL 60s
- * DEFEND_INTERVAL 10s
- *
- * If we assume a best-case and worst-case scenario for non-conflicted runs, we
- * end up with a runtime between 4s and 9s to finish the probe. Then it still
- * takes a fixed 4s to finish the announcements.
- *
- * RFC 5227 section 1.1:
- * [...] (Note that the values listed here are fixed constants; they are
- * not intended to be modifiable by implementers, operators, or end users.
- * These constants are given symbolic names here to facilitate the writing
- * of future standards that may want to reference this document with
- * different values for these named constants; however, at the present time
- * no such future standards exist.) [...]
- *
- * Unfortunately, no-one ever stepped up to write a "future standard" to revise
- * the timings. A 9s timeout for successful link setups is not acceptable today.
- * Hence, we will just go forward and ignore the proposed values. On both
- * wired and wireless local links round-trip latencies of below 3ms are common,
- * while latencies above 10ms are rarely seen. We require the caller to set a
- * timeout multiplier, where 1 corresponds to a total probe time of 0.5 ms and
- * 1.0 ms. On modern networks a multiplier of about 100 should be a reasonable
- * default. To comply with the RFC select a multiplier of 9000.
- */
-#define N_ACD_RFC_PROBE_NUM (3)
-#define N_ACD_RFC_PROBE_WAIT_USEC (UINT64_C(111)) /* 111us */
-#define N_ACD_RFC_PROBE_MIN_USEC (UINT64_C(111)) /* 111us */
-#define N_ACD_RFC_PROBE_MAX_USEC (UINT64_C(333)) /* 333us */
-#define N_ACD_RFC_ANNOUNCE_NUM (3)
-#define N_ACD_RFC_ANNOUNCE_WAIT_USEC (UINT64_C(222)) /* 222us */
-#define N_ACD_RFC_ANNOUNCE_INTERVAL_USEC (UINT64_C(222)) /* 222us */
-#define N_ACD_RFC_MAX_CONFLICTS (10)
-#define N_ACD_RFC_RATE_LIMIT_INTERVAL_USEC (UINT64_C(60000000)) /* 60s */
-#define N_ACD_RFC_DEFEND_INTERVAL_USEC (UINT64_C(10000000)) /* 10s */
-
-/*
- * Fake ENETDOWN error-code. We use this as replacement for known EFOOBAR error
- * codes. It is explicitly chosen to be outside the known error-code range.
- * Whenever we are deep down in a call-stack and notice a ENETDOWN error, we
- * return this instead. It is caught by the top-level dispatcher and then
- * properly handled.
- * This avoids gracefully handling ENETDOWN in call-stacks, but then continuing
- * with some work in the callers without noticing the soft failure.
- */
-#define N_ACD_E_DOWN (INT_MAX)
-
-#define TIME_INFINITY ((uint64_t) -1)
+#include "n-acd-private.h"
enum {
N_ACD_EPOLL_TIMER,
N_ACD_EPOLL_SOCKET,
};
-enum {
- N_ACD_STATE_INIT,
- N_ACD_STATE_PROBING,
- N_ACD_STATE_CONFIGURING,
- N_ACD_STATE_ANNOUNCING,
-};
-
-typedef struct NAcdEventNode {
- NAcdEvent event;
- uint8_t sender[ETH_ALEN];
- CList link;
-} NAcdEventNode;
-
-struct NAcd {
- /* context */
- unsigned int seed;
- int fd_epoll;
- int fd_timer;
-
- /* configuration */
- NAcdConfig config;
- uint8_t mac[ETH_ALEN];
- uint64_t timeout_multiplier;
-
- /* runtime */
- int fd_socket;
- unsigned int state;
- unsigned int n_iteration;
- unsigned int n_conflicts;
- unsigned int defend;
- uint64_t last_defend;
- uint64_t last_conflict;
-
- /* pending events */
- CList events;
- NAcdEventNode *current;
-};
-
-static int n_acd_errno(void) {
- /*
- * Compilers continuously warn about uninitialized variables since they
- * cannot deduce that `return -errno;` will always be negative. This
- * small wrapper makes sure compilers figure that out. Use it as
- * replacement for `errno` read access. Yes, it generates worse code,
- * but only marginally and only affects slow-paths.
- */
- return abs(errno) ? : EIO;
-}
-
-static int n_acd_event_node_new(NAcdEventNode **nodep, unsigned int event) {
- NAcdEventNode *node;
-
- node = calloc(1, sizeof(*node));
- if (!node)
- return -ENOMEM;
-
- node->event.event = event;
- node->link = (CList)C_LIST_INIT(node->link);
-
- *nodep = node;
-
- return 0;
-}
-
-static NAcdEventNode *n_acd_event_node_free(NAcdEventNode *node) {
- if (!node)
- return NULL;
-
- c_list_unlink(&node->link);
- free(node);
-
- return NULL;
-}
-
static int n_acd_get_random(unsigned int *random) {
- uint8_t hash_seed[] = { 0x3a, 0x0c, 0xa6, 0xdd, 0x44, 0xef, 0x5f, 0x7a, 0x5e, 0xd7, 0x25, 0x37, 0xbf, 0x4e, 0x80, 0xa1 };
+ uint8_t hash_seed[] = {
+ 0x3a, 0x0c, 0xa6, 0xdd, 0x44, 0xef, 0x5f, 0x7a,
+ 0x5e, 0xd7, 0x25, 0x37, 0xbf, 0x4e, 0x80, 0xa1,
+ };
CSipHash hash = C_SIPHASH_NULL;
struct timespec ts;
const uint8_t *p;
@@ -203,7 +53,7 @@ static int n_acd_get_random(unsigned int *random) {
if (p)
c_siphash_append(&hash, p, 16);
- r = clock_gettime(CLOCK_BOOTTIME, &ts);
+ r = clock_gettime(CLOCK_MONOTONIC, &ts);
if (r < 0)
return -n_acd_errno();
@@ -214,12 +64,243 @@ static int n_acd_get_random(unsigned int *random) {
return 0;
}
-static void n_acd_reset(NAcd *acd) {
- acd->state = N_ACD_STATE_INIT;
- acd->defend = N_ACD_DEFEND_NEVER;
- acd->n_iteration = 0;
- acd->last_defend = 0;
- timerfd_settime(acd->fd_timer, 0, &(struct itimerspec){}, NULL);
+static int n_acd_socket_new(int *fdp, int fd_bpf_prog, NAcdConfig *config) {
+ const struct sockaddr_ll address = {
+ .sll_family = AF_PACKET,
+ .sll_protocol = htobe16(ETH_P_ARP),
+ .sll_ifindex = config->ifindex,
+ .sll_halen = ETH_ALEN,
+ .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ };
+ int r, s = -1;
+
+ s = socket(PF_PACKET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
+ if (s < 0) {
+ r = -n_acd_errno();
+ goto error;
+ }
+
+ if (fd_bpf_prog >= 0) {
+ r = setsockopt(s, SOL_SOCKET, SO_ATTACH_BPF, &fd_bpf_prog, sizeof(fd_bpf_prog));
+ if (r < 0)
+ return -n_acd_errno();
+ }
+
+ r = bind(s, (struct sockaddr *)&address, sizeof(address));
+ if (r < 0) {
+ r = -n_acd_errno();
+ goto error;
+ }
+
+ *fdp = s;
+ s = -1;
+ return 0;
+
+error:
+ if (s >= 0)
+ close(s);
+ return r;
+}
+
+/**
+ * XXX
+ */
+_public_ int n_acd_config_new(NAcdConfig **configp) {
+ _cleanup_(n_acd_config_freep) NAcdConfig *config = NULL;
+
+ config = malloc(sizeof(*config));
+ if (!config)
+ return -ENOMEM;
+
+ *config = (NAcdConfig)N_ACD_CONFIG_NULL(*config);
+
+ *configp = config;
+ config = NULL;
+ return 0;
+}
+
+/**
+ * XXX
+ */
+_public_ NAcdConfig *n_acd_config_free(NAcdConfig *config) {
+ if (!config)
+ return NULL;
+
+ free(config);
+
+ return NULL;
+}
+
+/**
+ * XXX
+ */
+_public_ void n_acd_config_set_ifindex(NAcdConfig *config, int ifindex) {
+ config->ifindex = ifindex;
+}
+
+/**
+ * XXX
+ */
+_public_ void n_acd_config_set_transport(NAcdConfig *config, unsigned int transport) {
+ config->transport = transport;
+}
+
+/**
+ * XXX
+ */
+_public_ void n_acd_config_set_mac(NAcdConfig *config, const uint8_t *mac, size_t n_mac) {
+ config->n_mac = n_mac;
+ memcpy(config->mac, mac, n_mac > ETH_ALEN ? ETH_ALEN : n_mac);
+}
+
+int n_acd_event_node_new(NAcdEventNode **nodep) {
+ NAcdEventNode *node;
+
+ node = malloc(sizeof(*node));
+ if (!node)
+ return -ENOMEM;
+
+ *node = (NAcdEventNode)N_ACD_EVENT_NODE_NULL(*node);
+
+ *nodep = node;
+ return 0;
+}
+
+NAcdEventNode *n_acd_event_node_free(NAcdEventNode *node) {
+ if (!node)
+ return NULL;
+
+ c_list_unlink(&node->probe_link);
+ c_list_unlink(&node->acd_link);
+ free(node);
+
+ return NULL;
+}
+
+int n_acd_ensure_bpf_map_space(NAcd *acd) {
+ NAcdProbe *probe;
+ _cleanup_(n_acd_closep) int fd_map = -1, fd_prog = -1;
+ size_t max_map;
+ int r;
+
+ if (acd->n_bpf_map < acd->max_bpf_map)
+ return 0;
+
+ max_map = 2 * acd->max_bpf_map;
+
+ r = n_acd_bpf_map_create(&fd_map, max_map);
+ if (r)
+ return r;
+
+ c_rbtree_for_each_entry(probe, &acd->ip_tree, ip_node) {
+ r = n_acd_bpf_map_add(fd_map, &probe->ip);
+ if (r)
+ return r;
+ }
+
+ r = n_acd_bpf_compile(&fd_prog, fd_map, (struct ether_addr*) acd->mac);
+ if (r)
+ return r;
+
+ if (fd_prog >= 0) {
+ r = setsockopt(acd->fd_socket, SOL_SOCKET, SO_ATTACH_BPF, &fd_prog, sizeof(fd_prog));
+ if (r)
+ return -n_acd_errno();
+ }
+
+ if (acd->fd_bpf_map >= 0)
+ close(acd->fd_bpf_map);
+ acd->fd_bpf_map = fd_map;
+ fd_map = -1;
+ acd->max_bpf_map = max_map;
+ return 0;
+}
+
+/**
+ * n_acd_new() - create a new ACD context
+ * @acdp: output argument for context
+ * @config: configuration parameters
+ *
+ * Create a new ACD context and return it in @acdp.
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+_public_ int n_acd_new(NAcd **acdp, NAcdConfig *config) {
+ _cleanup_(n_acd_unrefp) NAcd *acd = NULL;
+ _cleanup_(n_acd_closep) int fd_bpf_prog = -1;
+ int r;
+
+ if (config->ifindex <= 0 ||
+ config->transport != N_ACD_TRANSPORT_ETHERNET ||
+ config->n_mac != ETH_ALEN ||
+ !memcmp(config->mac, (uint8_t[ETH_ALEN]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, ETH_ALEN))
+ return N_ACD_E_INVALID_ARGUMENT;
+
+ acd = malloc(sizeof(*acd));
+ if (!acd)
+ return -ENOMEM;
+
+ *acd = (NAcd)N_ACD_NULL(*acd);
+ acd->ifindex = config->ifindex;
+ memcpy(acd->mac, config->mac, ETH_ALEN);
+
+ r = n_acd_get_random(&acd->seed);
+ if (r)
+ return r;
+
+ acd->fd_epoll = epoll_create1(EPOLL_CLOEXEC);
+ if (acd->fd_epoll < 0)
+ return -n_acd_errno();
+
+ r = timer_init(&acd->timer);
+ if (r < 0)
+ return r;
+
+ acd->max_bpf_map = 8;
+
+ r = n_acd_bpf_map_create(&acd->fd_bpf_map, acd->max_bpf_map);
+ if (r)
+ return r;
+
+ r = n_acd_bpf_compile(&fd_bpf_prog, acd->fd_bpf_map, (struct ether_addr*) acd->mac);
+ if (r)
+ return r;
+
+ r = n_acd_socket_new(&acd->fd_socket, fd_bpf_prog, config);
+ if (r)
+ return r;
+
+ r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, acd->timer.fd,
+ &(struct epoll_event){
+ .events = EPOLLIN,
+ .data.u32 = N_ACD_EPOLL_TIMER,
+ });
+ if (r < 0)
+ return -n_acd_errno();
+
+ r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, acd->fd_socket,
+ &(struct epoll_event){
+ .events = EPOLLIN,
+ .data.u32 = N_ACD_EPOLL_SOCKET,
+ });
+ if (r < 0)
+ return -n_acd_errno();
+
+ *acdp = acd;
+ acd = NULL;
+ return 0;
+}
+
+static void n_acd_free(NAcd *acd) {
+ NAcdEventNode *node, *t_node;
+
+ if (!acd)
+ return;
+
+ c_list_for_each_entry_safe(node, t_node, &acd->event_list, acd_link)
+ n_acd_event_node_free(node);
+
+ assert(c_rbtree_is_empty(&acd->ip_tree));
if (acd->fd_socket >= 0) {
assert(acd->fd_epoll >= 0);
@@ -227,101 +308,16 @@ static void n_acd_reset(NAcd *acd) {
close(acd->fd_socket);
acd->fd_socket = -1;
}
-}
-/**
- * n_acd_new() - create a new ACD context
- * @acdp: output argument for context
- *
- * Create a new ACD context and return it in @acdp.
- *
- * Return: 0 on success, or a negative error code on failure.
- */
-_public_ int n_acd_new(NAcd **acdp) {
- NAcd *acd;
- int r;
-
- acd = calloc(1, sizeof(*acd));
- if (!acd)
- return -ENOMEM;
-
- acd->fd_epoll = -1;
- acd->fd_timer = -1;
- acd->fd_socket = -1;
- acd->state = N_ACD_STATE_INIT;
- acd->defend = N_ACD_DEFEND_NEVER;
- acd->events = (CList)C_LIST_INIT(acd->events);
- acd->last_conflict = TIME_INFINITY;
-
- r = n_acd_get_random(&acd->seed);
- if (r < 0)
- return r;
-
- acd->fd_epoll = epoll_create1(EPOLL_CLOEXEC);
- if (acd->fd_epoll < 0) {
- r = -n_acd_errno();
- goto error;
+ if (acd->fd_bpf_map >= 0) {
+ close(acd->fd_bpf_map);
+ acd->fd_bpf_map = -1;
}
- acd->fd_timer = timerfd_create(CLOCK_BOOTTIME, TFD_CLOEXEC | TFD_NONBLOCK);
- if (acd->fd_timer < 0 && errno == EINVAL) {
- /*
- * Fall back to CLOCK_MONOTONIC when CLOCK_BOOTTIME is
- * not available (kernel < 3.15).
- */
- acd->fd_timer = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
- }
- if (acd->fd_timer < 0) {
- r = -n_acd_errno();
- goto error;
- }
-
- r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, acd->fd_timer,
- &(struct epoll_event){
- .events = EPOLLIN,
- .data.u32 = N_ACD_EPOLL_TIMER,
- });
- if (r < 0) {
- r = -n_acd_errno();
- goto error;
- }
-
- *acdp = acd;
- return 0;
-
-error:
- n_acd_free(acd);
- return r;
-}
-
-/**
- * n_acd_free() - free an ACD context
- *
- * Frees all resources held by the context. This may be called at any time,
- * but doing so invalidates all data owned by the context.
- *
- * Return: NULL.
- */
-_public_ void n_acd_free(NAcd *acd) {
- NAcdEventNode *node;
-
- if (!acd)
- return;
-
- n_acd_reset(acd);
-
- acd->current = n_acd_event_node_free(acd->current);
-
- while ((node = c_list_first_entry(&acd->events, NAcdEventNode, link)))
- n_acd_event_node_free(node);
-
- assert(acd->fd_socket < 0);
-
- if (acd->fd_timer >= 0) {
+ if (acd->timer.fd >= 0) {
assert(acd->fd_epoll >= 0);
- epoll_ctl(acd->fd_epoll, EPOLL_CTL_DEL, acd->fd_timer, NULL);
- close(acd->fd_timer);
- acd->fd_timer = -1;
+ epoll_ctl(acd->fd_epoll, EPOLL_CTL_DEL, acd->timer.fd, NULL);
+ timer_deinit(&acd->timer);
}
if (acd->fd_epoll >= 0) {
@@ -332,271 +328,164 @@ _public_ void n_acd_free(NAcd *acd) {
free(acd);
}
+/**
+ * XXX
+ */
+_public_ NAcd *n_acd_ref(NAcd *acd) {
+ if (acd)
+ ++acd->n_refs;
+ return acd;
+}
+
+/**
+ * XXX
+ */
+_public_ NAcd *n_acd_unref(NAcd *acd) {
+ if (acd && !--acd->n_refs)
+ n_acd_free(acd);
+ return NULL;
+}
+
+int n_acd_raise(NAcd *acd, NAcdEventNode **nodep, unsigned int event) {
+ NAcdEventNode *node;
+ int r;
+
+ r = n_acd_event_node_new(&node);
+ if (r)
+ return r;
+
+ node->event.event = event;
+ c_list_link_tail(&acd->event_list, &node->acd_link);
+
+ if (nodep)
+ *nodep = node;
+ return 0;
+}
+
+int n_acd_send(NAcd *acd, const struct in_addr *tpa, const struct in_addr *spa) {
+ struct sockaddr_ll address = {
+ .sll_family = AF_PACKET,
+ .sll_protocol = htobe16(ETH_P_ARP),
+ .sll_ifindex = acd->ifindex,
+ .sll_halen = ETH_ALEN,
+ .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ };
+ struct ether_arp arp = {
+ .ea_hdr = {
+ .ar_hrd = htobe16(ARPHRD_ETHER),
+ .ar_pro = htobe16(ETHERTYPE_IP),
+ .ar_hln = sizeof(acd->mac),
+ .ar_pln = sizeof(uint32_t),
+ .ar_op = htobe16(ARPOP_REQUEST),
+ },
+ };
+ ssize_t l;
+ int r;
+
+ memcpy(arp.arp_sha, acd->mac, sizeof(acd->mac));
+ memcpy(arp.arp_tpa, &tpa->s_addr, sizeof(uint32_t));
+
+ if (spa)
+ memcpy(arp.arp_spa, &spa->s_addr, sizeof(spa->s_addr));
+
+ l = sendto(acd->fd_socket,
+ &arp,
+ sizeof(arp),
+ MSG_NOSIGNAL,
+ (struct sockaddr *)&address,
+ sizeof(address));
+ if (l < 0) {
+ if (errno == EAGAIN || errno == ENOBUFS) {
+ /*
+ * We never maintain outgoing queues. We rely on the
+ * network device to do that for us. In case the queues
+ * are full, or the kernel refuses to queue the packet
+ * for other reasons, we must tell our caller that the
+ * packet was dropped.
+ */
+ return N_ACD_E_DROPPED;
+ } else if (errno == ENETDOWN || errno == ENXIO) {
+ /*
+ * These errors happen if the network device went down
+ * or was actually removed. We always propagate this as
+ * event, so the user can react accordingly (similarly
+ * to the recvmmsg(2) handler). In case the user does
+ * not immediately react, we also tell our caller that
+ * the packet was dropped, so we don't erroneously
+ * treat this as success.
+ */
+
+ r = n_acd_raise(acd, NULL, N_ACD_EVENT_DOWN);
+ if (r)
+ return r;
+
+ return N_ACD_E_DROPPED;
+ }
+
+ /*
+ * Random network error. We treat this as fatal and propagate
+ * the error, so it is noticed and can be investigated.
+ */
+ return -n_acd_errno();
+ } else if (l != (ssize_t)sizeof(arp)) {
+ /*
+ * Ugh, the kernel modified the packet. This is unexpected. We
+ * consider the packet lost.
+ */
+ return N_ACD_E_DROPPED;
+ }
+
+ return 0;
+}
+
/**
* n_acd_get_fd() - get pollable file descriptor
* @acd: ACD context
* @fdp: output argument for file descriptor
*
- * Returns a file descriptor in @fdp. This filedescriptor can be polled by
+ * Returns a file descriptor in @fdp. This file descriptor can be polled by
* the caller to indicate when the ACD context can be dispatched.
*/
_public_ void n_acd_get_fd(NAcd *acd, int *fdp) {
*fdp = acd->fd_epoll;
}
-static int n_acd_push_event(NAcd *acd, unsigned int event, uint16_t *operation, uint8_t (*sender)[6], uint8_t (*target)[4]) {
- NAcdEventNode *node;
- int r;
-
- r = n_acd_event_node_new(&node, event);
- if (r < 0)
- return r;
-
- switch (event) {
- case N_ACD_EVENT_USED:
- node->event.used.operation = be16toh(*operation);
- memcpy(node->sender, sender, sizeof(node->sender));
- node->event.used.sender = node->sender;
- node->event.used.n_sender = sizeof(node->sender);
- memcpy(&node->event.used.target, target, sizeof(node->event.used.target));
- break;
- case N_ACD_EVENT_CONFLICT:
- node->event.conflict.operation = be16toh(*operation);
- memcpy(node->sender, sender, sizeof(node->sender));
- node->event.used.sender = node->sender;
- node->event.used.n_sender = sizeof(node->sender);
- memcpy(&node->event.conflict.target, target, sizeof(node->event.conflict.target));
- break;
- case N_ACD_EVENT_DEFENDED:
- node->event.defended.operation = be16toh(*operation);
- memcpy(node->sender, sender, sizeof(node->sender));
- node->event.used.sender = node->sender;
- node->event.used.n_sender = sizeof(node->sender);
- memcpy(&node->event.defended.target, target, sizeof(node->event.defended.target));
- break;
- case N_ACD_EVENT_READY:
- case N_ACD_EVENT_DOWN:
- break;
- default:
- assert(0);
- }
-
- c_list_link_tail(&acd->events, &node->link);
-
- return 0;
-}
-
-static int n_acd_now(uint64_t *nowp) {
- struct timespec ts;
- int r;
-
- r = clock_gettime(CLOCK_BOOTTIME, &ts);
- if (r < 0)
- return -n_acd_errno();
-
- *nowp = ts.tv_sec * UINT64_C(1000000) + ts.tv_nsec / UINT64_C(1000);
- return 0;
-}
-
-static int n_acd_schedule(NAcd *acd, uint64_t u_timeout, unsigned int u_jitter) {
- uint64_t u_next = u_timeout;
- int r;
-
- /*
- * ACD specifies jitter values to reduce packet storms on the local
- * link. This call accepts the maximum relative jitter value in
- * microseconds as @u_jitter. We then use rand_r(3p) to get a
- * pseudo-random jitter on top of the real timeout given as @u_timeout.
- * Note that rand_r() is fine for this. Before you try to improve the
- * RNG, you better spend some time securing ARP.
- */
- if (u_jitter)
- u_next += rand_r(&acd->seed) % u_jitter;
-
- /*
- * Setting .it_value to 0 in timerfd_settime() disarms the timer. Avoid
- * this and always schedule at least 1us. Otherwise, we'd have to
- * recursively call into the time-out handler, which we really want to
- * avoid. No reason to optimize performance here.
- */
- if (!u_next)
- u_next = 1;
-
- r = timerfd_settime(acd->fd_timer, 0,
- &(struct itimerspec){ .it_value = {
- .tv_sec = u_next / UINT64_C(1000000),
- .tv_nsec = u_next % UINT64_C(1000000) * UINT64_C(1000),
- } }, NULL);
- if (r < 0)
- return -n_acd_errno();
-
- return 0;
-}
-
-static int n_acd_send(NAcd *acd, const struct in_addr *spa) {
- struct sockaddr_ll address = {
- .sll_family = AF_PACKET,
- .sll_protocol = htobe16(ETH_P_ARP),
- .sll_ifindex = acd->config.ifindex,
- .sll_halen = ETH_ALEN,
- .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
- };
- struct ether_arp arp = {
- .ea_hdr.ar_hrd = htobe16(ARPHRD_ETHER),
- .ea_hdr.ar_pro = htobe16(ETHERTYPE_IP),
- .ea_hdr.ar_hln = sizeof(acd->mac),
- .ea_hdr.ar_pln = sizeof(uint32_t),
- .ea_hdr.ar_op = htobe16(ARPOP_REQUEST),
- };
- ssize_t l;
-
- memcpy(arp.arp_sha, acd->mac, sizeof(acd->mac));
- memcpy(arp.arp_tpa, &acd->config.ip.s_addr, sizeof(uint32_t));
-
- if (spa)
- memcpy(arp.arp_spa, &spa->s_addr, sizeof(spa->s_addr));
-
- l = sendto(acd->fd_socket, &arp, sizeof(arp), MSG_NOSIGNAL, (struct sockaddr *)&address, sizeof(address));
- if (l == (ssize_t)sizeof(arp)) {
- /* Packet was properly sent. */
- return 0;
- } else if (l >= 0) {
- /*
- * Ugh. The packet was truncated. This should not happen, but
- * lets just pretend the packet was dropped.
- */
- return 0;
- } else if (errno == EAGAIN || errno == ENOBUFS) {
- /*
- * In case the output buffer is full, the packet is silently
- * dropped. This is just as if the physical layer happened to
- * drop the packet. We are not on a reliable medium, so no
- * reason to pretend we are.
- */
- return 0;
- } else if (errno == ENETDOWN || errno == ENXIO) {
- /*
- * We get ENETDOWN if the network-device goes down or is
- * removed. ENXIO might happen on async send-operations if the
- * network-device was unplugged and thus the kernel is no
- * longer aware of it.
- * In any case, we do not allow proceeding with this socket. We
- * stop the engine and notify the user gracefully.
- */
- return -N_ACD_E_DOWN;
- }
-
- return -n_acd_errno();
-}
-
-static void n_acd_remember_conflict(NAcd *acd, uint64_t now) {
- if (++acd->n_conflicts >= N_ACD_RFC_MAX_CONFLICTS) {
- acd->n_conflicts = N_ACD_RFC_MAX_CONFLICTS;
- acd->last_conflict = now;
- }
-}
-
static int n_acd_handle_timeout(NAcd *acd) {
+ NAcdProbe *probe;
+ uint64_t now;
int r;
- switch (acd->state) {
- case N_ACD_STATE_PROBING:
- /*
- * We are still PROBING. We send 3 probes with a random timeout
- * scheduled between each. If, after a fixed timeout, we did
- * not receive any conflict we consider the probing successful.
- */
- if (acd->n_iteration >= N_ACD_RFC_PROBE_NUM) {
- /*
- * All 3 probes succeeded and we waited enough to
- * consider this address usable by now. Do not announce
- * the address, yet. We must first give the caller a
- * chance to configure the address (so they can answer
- * ARP requests), before announcing it. But our
- * callbacks are not necessarily synchronous (we want
- * to allow IPC there), so just notify the caller and
- * wait for further instructions, thus effectively
- * increasing the probe-wait.
- */
- r = n_acd_push_event(acd, N_ACD_EVENT_READY, NULL, NULL, NULL);
- if (r)
- return r;
+ /*
+ * Read the current time once, and handle all timouts that triggered
+ * before the current time. Rereading the current time in each loop
+ * might risk creating a live-lock, and the fact that we read the
+ * time after reading the timer guarantees that the timeout which
+ * woke us up is hanlded.
+ *
+ * When there are no more timeouts to handle at the given time, we
+ * rearm the timer to potentially wake us up again in the future.
+ */
+ timer_now(&acd->timer, &now);
- acd->state = N_ACD_STATE_CONFIGURING;
- } else {
- /*
- * We have not sent all 3 probes, yet. A timer fired,
- * so we are ready to send the next probe. If this is
- * the third probe, schedule a timer for ANNOUNCE_WAIT
- * to give other peers a chance to answer. If this is
- * not the third probe, wait between PROBE_MIN and
- * PROBE_MAX for the next probe.
- */
+ for (;;) {
+ Timeout *timeout;
- r = n_acd_send(acd, NULL);
- /*
- * During probe we must respect the total timeout and so
- * we ignore errors caused by a down interface.
- */
- if (r < 0 && r != -N_ACD_E_DOWN)
- return r;
-
- if (++acd->n_iteration >= N_ACD_RFC_PROBE_NUM)
- r = n_acd_schedule(acd, acd->timeout_multiplier * N_ACD_RFC_ANNOUNCE_WAIT_USEC, 0);
- else
- r = n_acd_schedule(acd, acd->timeout_multiplier * N_ACD_RFC_PROBE_MIN_USEC,
- acd->timeout_multiplier * (N_ACD_RFC_PROBE_MAX_USEC - N_ACD_RFC_PROBE_MIN_USEC));
- if (r < 0)
- return r;
- }
-
- break;
-
- case N_ACD_STATE_ANNOUNCING:
- /*
- * We are ANNOUNCING, meaning the caller configured the address
- * on the interface and is actively using it. We send 3
- * announcements out, in a short interval, and then just
- * perform passive conflict detection.
- * Note that once all 3 announcements are sent, we no longer
- * schedule a timer, so this part should not trigger, anymore.
- */
-
- r = n_acd_send(acd, &acd->config.ip);
+ r = timer_pop_timeout(&acd->timer, now, &timeout);
if (r < 0) {
- if (r != -N_ACD_E_DOWN)
- return r;
+ return r;
+ } else if (!timeout) {
/*
- * We want to send all the 3 announcements even if the
- * interface goes temporarily down. Therefore, if send()
- * fails, don't increment the iteration and try again.
+ * There are no more timeouts pending before @now. Rearm
+ * the timer to fire again at the next timeout.
*/
- } else
- acd->n_iteration++;
-
- if (acd->n_iteration < N_ACD_RFC_ANNOUNCE_NUM) {
- /*
- * Announcements are always scheduled according to the
- * time-intervals specified in the spec. We always use
- * the RFC5227-mandated multiplier.
- * If you reconsider this, note that timeout_multiplier
- * might be 0 here.
- */
- r = n_acd_schedule(acd, N_ACD_TIMEOUT_RFC5227 * N_ACD_RFC_ANNOUNCE_INTERVAL_USEC, 0);
- if (r < 0)
- return r;
+ timer_rearm(&acd->timer);
+ break;
}
- break;
-
- case N_ACD_STATE_INIT:
- case N_ACD_STATE_CONFIGURING:
- default:
- /*
- * There are no timeouts in these states. If we trigger one,
- * something is fishy. Let the caller deal with this.
- */
- return -EIO;
+ probe = (void *)timeout - offsetof(NAcdProbe, timeout);
+ r = n_acd_probe_handle_timeout(probe);
+ if (r)
+ return r;
}
return 0;
@@ -604,136 +493,94 @@ static int n_acd_handle_timeout(NAcd *acd) {
static int n_acd_handle_packet(NAcd *acd, struct ether_arp *packet) {
bool hard_conflict;
- uint64_t now;
+ NAcdProbe *probe;
+ uint32_t addr;
+ CRBNode *node;
int r;
/*
- * Via BPF we discard any non-conflict packets. There are only 2 types
- * that can pass: A conflict on the Sender Protocol Address, or a
- * conflict on the Target Protocol Address.
+ * We are interested in 2 kinds of ARP messages:
*
- * The former we call a hard-conflict. It implies that the sender uses
- * the address already. We must always catch this and in some way react
- * to it. Any kind, REQUEST or REPLY must be caught (though it is
- * unlikely that we ever catch REPLIES since they tend to be unicasts).
+ * 1) Someone who is *NOT* us sends *ANY* ARP message with our IP
+ * address as sender. This is never good, because it implies an
+ * address conflict.
+ * We call this a hard-conflict.
*
- * However, in case the Target Protocol Address matches, we just know
- * that somebody is looking for the address. Hence, we must also check
- * that the packet is an ARP-Probe (Sender Protocol Address is 0). If
- * it is, it means someone else does ACD on our address. We call this a
- * soft conflict.
+ * 2) Someone who is *NOT* us sends an ARP REQUEST without any sender
+ * IP, but our IP as target. This implies someone else performs an
+ * ARP Probe with our address. This also implies a conflict, but
+ * one that can be resolved by responding to the probe.
+ * We call this a soft-conflict.
+ *
+ * We are never interested in any other ARP message. The kernel already
+ * deals with everything else, hence, we can silently ignore those.
+ *
+ * Now, we simply check whether a sender-address is set. This allows us
+ * to distinguish both cases. We then check further conditions, so we
+ * can bail out early if neither is the case.
+ *
+ * Lastly, we perform a lookup in our probe-set to check whether the
+ * address actually matches, so we can let these probes dispatch the
+ * message. Note that we allow duplicate probes, so we need to dispatch
+ * each matching probe, not just one.
*/
- if (!memcmp(packet->arp_spa, (uint8_t[4]){ }, sizeof(packet->arp_spa)) &&
- !memcmp(packet->arp_tpa, &acd->config.ip.s_addr, sizeof(packet->arp_tpa)) &&
- packet->ea_hdr.ar_op == htobe16(ARPOP_REQUEST)) {
- hard_conflict = false;
- } else if (!memcmp(packet->arp_spa, &acd->config.ip.s_addr, sizeof(packet->arp_spa))) {
+
+ if (memcmp(packet->arp_spa, (uint8_t[4]){ }, sizeof(packet->arp_spa))) {
+ memcpy(&addr, packet->arp_spa, sizeof(addr));
hard_conflict = true;
+ } else if (packet->ea_hdr.ar_op == htobe16(ARPOP_REQUEST)) {
+ memcpy(&addr, packet->arp_tpa, sizeof(addr));
+ hard_conflict = false;
} else {
/*
- * Ignore anything that is specific enough to match the BPF
- * filter, but is none of the conflicts described above.
- */
- return 0;
- }
-
- r = n_acd_now(&now);
- if (r < 0)
- return r;
-
- switch (acd->state) {
- case N_ACD_STATE_PROBING:
- /*
- * Regardless whether this is a hard or soft conflict, we must
- * treat this as a probe failure. That is, notify the caller of
- * the conflict and wait for further instructions. We do not
- * react to this, until the caller tells us what to do. But we
- * immediately stop the engine, since there is no point in
- * continuing the probing.
- */
- n_acd_remember_conflict(acd, now);
- n_acd_reset(acd);
- r = n_acd_push_event(acd, N_ACD_EVENT_USED, &packet->ea_hdr.ar_op, &packet->arp_sha, &packet->arp_tpa);
- if (r)
- return r;
-
- break;
-
- case N_ACD_STATE_CONFIGURING:
- /*
- * We are waiting for the caller to configure the interface and
- * start ANNOUNCING. In this state, we cannot defend the address
- * as that would indicate that it is ready to be used, and we
- * cannot signal CONFLICT or USED as the caller may already have
- * started to use the address (and may have configured the engine
- * to always defend it, which means they should be able to rely on
- * never losing it after READY). Simply drop the event, and rely
- * on the anticipated ANNOUNCE to trigger it again.
- */
-
- break;
-
- case N_ACD_STATE_ANNOUNCING:
- /*
- * We were already instructed to announce the address, which
- * means the address is configured and in use. Hence, the
- * caller is responsible to serve regular ARP queries. Meaning,
- * we can ignore any soft conflicts (other peers doing ACD).
- *
- * But if we see a hard-conflict, we either defend the address
- * according to the caller's instructions, or we report the
- * conflict and bail out.
- */
-
- if (!hard_conflict)
- break;
-
- if (acd->defend == N_ACD_DEFEND_NEVER) {
- n_acd_remember_conflict(acd, now);
- n_acd_reset(acd);
- r = n_acd_push_event(acd, N_ACD_EVENT_CONFLICT, &packet->ea_hdr.ar_op, &packet->arp_sha, &packet->arp_tpa);
- if (r)
- return r;
- } else {
- if (now > acd->last_defend + N_ACD_RFC_DEFEND_INTERVAL_USEC) {
- r = n_acd_send(acd, &acd->config.ip);
- if (r < 0)
- return r;
-
- acd->last_defend = now;
- r = n_acd_push_event(acd, N_ACD_EVENT_DEFENDED, &packet->ea_hdr.ar_op, &packet->arp_sha, &packet->arp_tpa);
- if (r)
- return r;
- } else if (acd->defend == N_ACD_DEFEND_ONCE) {
- n_acd_remember_conflict(acd, now);
- n_acd_reset(acd);
- r = n_acd_push_event(acd, N_ACD_EVENT_CONFLICT, &packet->ea_hdr.ar_op, &packet->arp_sha, &packet->arp_tpa);
- if (r)
- return r;
- } else {
- r = n_acd_push_event(acd, N_ACD_EVENT_DEFENDED, &packet->ea_hdr.ar_op, &packet->arp_sha, &packet->arp_tpa);
- if (r)
- return r;
- }
- }
-
- break;
-
- case N_ACD_STATE_INIT:
- default:
- /*
- * The socket should not be dispatched in those states, since
- * it is neither allocated nor added to epoll. Fail hard if we
- * trigger this somehow.
+ * The BPF filter will not let through any other packet.
*/
return -EIO;
}
+ /* Find top-most node that matches @addr. */
+ node = acd->ip_tree.root;
+ while (node) {
+ probe = c_rbnode_entry(node, NAcdProbe, ip_node);
+ if (addr < probe->ip.s_addr)
+ node = node->left;
+ else if (addr > probe->ip.s_addr)
+ node = node->right;
+ else
+ break;
+ }
+
+ /*
+ * If the address is unknown, we drop the package. This might happen if
+ * the kernel queued the packet and passed the BPF filter, but we
+ * modified the set before dequeuing the message.
+ */
+ if (!node)
+ return 0;
+
+ /* Forward to left-most child that still matches @addr. */
+ while (node->left && addr == c_rbnode_entry(node->left,
+ NAcdProbe,
+ ip_node)->ip.s_addr)
+ node = node->left;
+
+ /* Iterate all matching entries in-order. */
+ do {
+ probe = c_rbnode_entry(node, NAcdProbe, ip_node);
+
+ r = n_acd_probe_handle_packet(probe, packet, hard_conflict);
+ if (r)
+ return r;
+
+ node = c_rbnode_next(node);
+ } while (node && addr == c_rbnode_entry(node,
+ NAcdProbe,
+ ip_node)->ip.s_addr);
+
return 0;
}
static int n_acd_dispatch_timer(NAcd *acd, struct epoll_event *event) {
- uint64_t v;
int r;
if (event->events & (EPOLLHUP | EPOLLERR)) {
@@ -746,97 +593,113 @@ static int n_acd_dispatch_timer(NAcd *acd, struct epoll_event *event) {
}
if (event->events & EPOLLIN) {
- for (unsigned int i = 0; i < 128; ++i) {
- r = read(acd->fd_timer, &v, sizeof(v));
- if (r == sizeof(v)) {
- /*
- * We successfully read a timer-value. Handle it and
- * return. We do NOT fall-through to EPOLLHUP handling,
- * as we always must drain buffers first.
- */
- return n_acd_handle_timeout(acd);
- } else if (r >= 0) {
- /*
- * Kernel guarantees 8-byte reads; fail hard if it
- * suddenly starts doing weird shit. No clue what to do
- * with those values, anyway.
- */
- return -EIO;
- } else if (errno == EAGAIN) {
- /*
- * No more pending events.
- */
- return 0;
- } else {
- /*
- * Something failed. We use CLOCK_BOOTTIME, so
- * ECANCELED cannot happen. Hence, there is no error
- * that we could gracefully handle. Fail hard and let
- * the caller deal with it.
- */
- return -n_acd_errno();
- }
- }
+ r = timer_read(&acd->timer);
+ if (r <= 0)
+ return r;
- return N_ACD_E_PREEMPTED;
+ assert(r == TIMER_E_TRIGGERED);
+
+ /*
+ * A timer triggered, handle all pending timeouts at a given
+ * point in time. There can only be a finite number of pending
+ * timeouts, any new ones will be in the future, so not handled
+ * now, but guaranteed to wake us up again when they do trigger.
+ */
+ r = n_acd_handle_timeout(acd);
+ if (r)
+ return r;
}
return 0;
}
-static int n_acd_dispatch_socket(NAcd *acd, struct epoll_event *event) {
- struct ether_arp packet;
- ssize_t l;
+static bool n_acd_packet_is_valid(NAcd *acd, void *packet, size_t n_packet) {
+ struct ether_arp *arp;
- for (unsigned int i = 0; i < 128; ++i) {
- /*
- * Regardless whether EPOLLIN is set in @event->events, we always
- * invoke recv(2). This is a safety-net for sockets, which always fetch
- * queued errors on all syscalls. That means, if anything failed on the
- * socket, we will be notified via recv(2). This simplifies the code
- * and avoid magic EPOLLIN/ERR/HUP juggling.
- *
- * Note that we must use recv(2) over read(2), since the latter cannot
- * deal with empty packets properly.
- *
- * We explicitly skip passing MSG_TRUNC here. We *WANT*
- * overlong packets to be retrieved and truncated. Ethernet
- * frames might not have byte-granular lengths. Real hardware
- * does add trailing padding/garbage, so we must discard this
- * here.
- */
- l = recv(acd->fd_socket, &packet, sizeof(packet), 0);
- if (l == (ssize_t)sizeof(packet)) {
+ /*
+ * The eBPF filter will ensure that this function always returns true, however,
+ * this allows the eBPF filter to be an optional optimization which is necessary
+ * on older kernels.
+ *
+ * See comments in n-acd-bpf.c for details.
+ */
+
+ if (n_packet != sizeof(*arp))
+ return false;
+
+ arp = packet;
+
+ if (arp->arp_hrd != htobe16(ARPHRD_ETHER))
+ return false;
+
+ if (arp->arp_pro != htobe16(ETHERTYPE_IP))
+ return false;
+
+ if (arp->arp_hln != sizeof(struct ether_addr))
+ return false;
+
+ if (arp->arp_pln != sizeof(struct in_addr))
+ return false;
+
+ if (!memcmp(arp->arp_sha, acd->mac, sizeof(struct ether_addr)))
+ return false;
+
+ if (memcmp(arp->arp_spa, &((struct in_addr) { INADDR_ANY }), sizeof(struct in_addr))) {
+ if (arp->arp_op != htobe16(ARPOP_REQUEST) && arp->arp_op != htobe16(ARPOP_REPLY))
+ return false;
+ } else if (arp->arp_op != htobe16(ARPOP_REQUEST)) {
+ return false;
+ }
+
+ return true;
+}
+
+static int n_acd_dispatch_socket(NAcd *acd, struct epoll_event *event) {
+ const size_t n_batch = 8;
+ struct mmsghdr msgs[n_batch];
+ struct iovec iovecs[n_batch];
+ struct ether_arp data[n_batch];
+ size_t i;
+ int r, n;
+
+ for (i = 0; i < n_batch; ++i) {
+ iovecs[i].iov_base = data + i;
+ iovecs[i].iov_len = sizeof(data[i]);
+ msgs[i].msg_hdr = (struct msghdr){
+ .msg_iov = iovecs + i,
+ .msg_iovlen = 1,
+ };
+ }
+
+ /*
+ * We always directly call into recvmmsg(2), regardless which EPOLL*
+ * event is signalled. On sockets, the recv(2)-family of syscalls does
+ * a suitable job of handling all possible scenarios and telling us
+ * about it. Hence, lets take the easy route and always ask the kernel
+ * about the current state.
+ */
+ n = recvmmsg(acd->fd_socket, msgs, n_batch, 0, NULL);
+ if (n < 0) {
+ if (errno == ENETDOWN) {
/*
- * We read a full ARP packet. We never fall-through to EPOLLHUP
- * handling, as we always must drain buffers first.
+ * We get ENETDOWN if the network-device goes down or
+ * is removed. This error is temporary and only queued
+ * once. Subsequent reads will simply return EAGAIN
+ * until the device is up again and has data queued.
+ * Usually, the caller should tear down all probes when
+ * an interface goes down, but we leave it up to the
+ * caller to decide what to do. We propagate the code
+ * and continue.
*/
- return n_acd_handle_packet(acd, &packet);
- } else if (l >= 0) {
- /*
- * The BPF filter discards short packets, so error out
- * if something slips through for any reason. Don't silently
- * ignore it, since we explicitly want to know if something
- * went fishy.
- */
- return -EIO;
- } else if (errno == ENETDOWN || errno == ENXIO) {
- /*
- * The network device went down or was removed. Ignore
- * such errors and let the pending probe time out.
- * Subsequent reads will simply return EAGAIN until the
- * device is up again and has data queued.
- */
- return 0;
+ return n_acd_raise(acd, NULL, N_ACD_EVENT_DOWN);
} else if (errno == EAGAIN) {
/*
- * We cannot read data from the socket (we got EAGAIN). As a safety net
- * check for EPOLLHUP/ERR. Those cannot be disabled with epoll, so we
- * must make sure to not busy-loop by ignoring them. Note that we know
- * recv(2) on sockets to return an error if either of these epoll-flags
- * is set. Hence, if we did not handle it above, we have no other way
- * but treating those flags as fatal errors and returning them to the
- * caller.
+ * There is no more data queued and we did not get
+ * preempted. Everything is good to go.
+ * As a safety-net against busy-looping, we do check
+ * for HUP/ERR. Neither should be set, since they imply
+ * error-dequeue behavior on all socket calls. Lets
+ * fail hard if we trigger it, so we can investigate.
*/
if (event->events & (EPOLLHUP | EPOLLERR))
return -EIO;
@@ -844,35 +707,63 @@ static int n_acd_dispatch_socket(NAcd *acd, struct epoll_event *event) {
return 0;
} else {
/*
- * Cannot dispatch the packet. This might be due to OOM, HUP,
- * or something else. We cannot handle it gracefully so forward
- * to the caller.
+ * Something went wrong. Propagate the error-code, so
+ * this can be investigated.
*/
return -n_acd_errno();
}
+ } else if (n >= (ssize_t)n_batch) {
+ /*
+ * If all buffers were filled with data, we cannot be sure that
+ * there is nothing left to read. But to avoid starvation, we
+ * cannot loop on this condition. Instead, we mark the context
+ * as preempted so the caller can call us again.
+ * Note that in level-triggered event-loops this condition can
+ * be neglected, but in edge-triggered event-loops it is
+ * crucial to forward this information.
+ *
+ * On the other hand, there are several conditions where the
+ * kernel might return less batches than requested, but was
+ * still preempted. However, all of those cases require the
+ * preemption to have triggered a wakeup *after* we entered
+ * recvmmsg(). Hence, even if we did not recognize the
+ * preemption, an edge must have triggered and as such we will
+ * handle the event on the next turn.
+ */
+ acd->preempted = true;
}
- return N_ACD_E_PREEMPTED;
+ for (i = 0; (ssize_t)i < n; ++i) {
+ if (!n_acd_packet_is_valid(acd, data + i, msgs[i].msg_len))
+ continue;
+ /*
+ * Handle the packet. Bail out if something went wrong. Note
+ * that this must be fatal errors, since we discard all other
+ * packets that follow.
+ */
+ r = n_acd_handle_packet(acd, data + i);
+ if (r)
+ return r;
+ }
+
+ return 0;
}
/**
- * n_acd_dispatch() - dispatch ACD context
- * @acd: ACD context
- *
- * Return: 0 on successful dispatch of all pending events, N_ACD_E_PREEMPT in
- * case there are more still more events to be dispatched, or a
- * negative error code on failure.
+ * XXX
*/
_public_ int n_acd_dispatch(NAcd *acd) {
struct epoll_event events[2];
int n, i, r = 0;
- bool preempted = false;
n = epoll_wait(acd->fd_epoll, events, sizeof(events) / sizeof(*events), 0);
if (n < 0) {
+ /* Linux never returns EINTR if `timeout == 0'. */
return -n_acd_errno();
}
+ acd->preempted = false;
+
for (i = 0; i < n; ++i) {
switch (events[i].data.u32) {
case N_ACD_EPOLL_TIMER:
@@ -882,35 +773,16 @@ _public_ int n_acd_dispatch(NAcd *acd) {
r = n_acd_dispatch_socket(acd, events + i);
break;
default:
+ assert(0);
r = 0;
break;
}
- if (r == N_ACD_E_PREEMPTED)
- preempted = true;
- else if (r != 0)
- break;
- }
-
- if (r == -N_ACD_E_DOWN) {
- /*
- * N_ACD_E_DOWN is synthesized whenever we notice
- * ENETDOWN-related errors on the network interface. This
- * allows bailing out of deep call-paths and then handling the
- * error gracefully here.
- */
- n_acd_reset(acd);
- r = n_acd_push_event(acd, N_ACD_EVENT_DOWN, NULL, NULL, NULL);
if (r)
return r;
-
- return 0;
}
- if (preempted)
- return N_ACD_E_PREEMPTED;
- else
- return r;
+ return acd->preempted ? N_ACD_E_PREEMPTED : 0;
}
/**
@@ -920,343 +792,75 @@ _public_ int n_acd_dispatch(NAcd *acd) {
*
* Returns a pointer to the next pending event. The event is still owend by
* the context, and is only valid until the next call to n_acd_pop_event()
- * or until the context is freed.
+ * or until the owning object is freed (either the ACD context or the indicated
+ * probe object).
+ *
+ * An event either originates on the ACD context, or one of the configured
+ * probes. If the event-type has a 'probe' pointer, it originated on the
+ * indicated probe (which is *never* NULL), otherwise it originated on the
+ * context.
+ *
+ * Users must call this function repeatedly until either an error is returned,
+ * or the event-pointer is NULL. Wakeups on the epoll-fd are only guaranteed
+ * for each batch of events. Hence, it is the callers responsibility to drain
+ * the event-queue somehow after each call to n_acd_dispatch(). Note that
+ * events can only be added by n_acd_dispatch(), hence, you cannot live-lock
+ * when draining the event queue.
*
* The possible events are:
- * * N_ACD_EVENT_READY: The configured IP address was probed successfully
+ * * N_ACD_EVENT_READY: A configured IP address was probed successfully
* and is ready to be used. Once configured on the
* interface, the caller must call n_acd_announce()
* to announce and start defending the address.
- * No further events may be received before
- * n_acd_announce() has been called.
* * N_ACD_EVENT_USED: Someone is already using the IP address being
- * probed. The engine was stopped, and the caller
- * may restart it to try again.
- * * N_ACD_EVENT_DEFENDED: A conflict was detected for the announced IP
+ * probed. The probe is put into stopped state and
+ * should be freed by the caller.
+ * * N_ACD_EVENT_DEFENDED: A conflict was detected for an announced IP
* address, and the engine attempted to defend it.
* This is purely informational, and no action is
* required by the caller.
- * * N_ACD_EVENT_CONFLICT: A conflict was detected for the announced IP
- * address, and the engine was not able to defend
+ * * N_ACD_EVENT_CONFLICT: A conflict was detected for an announced IP
+ * address, and the probe was not able to defend
* it (according to the configured policy). The
- * engine has stoppde, the caller must stop using
- * the address immediately, and may restart the
- * engine to retry.
- * * N_ACD_EVENT_DOWN: A network error was detected. The engine was
- * stopped and it is the responsibility of the
- * caller to restart it once the network may be
- * functional again.
+ * probe halted, the caller must stop using
+ * the address immediately, and should free the probe.
+ * * N_ACD_EVENT_DOWN: The specified network interface was put down. The
+ * user is recommended to free *ALL* probes and
+ * recreate them as soon as the interface is up again.
+ * Note that this event is purely informational. The
+ * probes will continue running, but all packets will
+ * be blackholed, and no network packets are received,
+ * until the network is back up again. Hence, from an
+ * operational perspective, the legitimacy of the ACD
+ * probes is lost and the user better re-probes all
+ * addresses.
*
- * Returns: 0 on success, N_ACD_E_STOPPED if there are no more events and
- * the engine has been stopped, N_ACD_E_DONE if there are no more
- * events, but the engine is still running, or a negative error
- * code on failure.
+ * Returns: 0 on success, negative error code on failure. The popped event is
+ * returned in @eventp. If no event is pending, NULL is placed in
+ * @eventp and 0 is returned. If an error is returned, @eventp is left
+ * untouched.
*/
_public_ int n_acd_pop_event(NAcd *acd, NAcdEvent **eventp) {
- acd->current = n_acd_event_node_free(acd->current);
+ NAcdEventNode *node, *t_node;
- if (c_list_is_empty(&acd->events)) {
- if (acd->state == N_ACD_STATE_INIT)
- return N_ACD_E_STOPPED;
- else
- return N_ACD_E_DONE;
- }
-
- acd->current = c_list_first_entry(&acd->events, NAcdEventNode, link);
- c_list_unlink(&acd->current->link);
-
- if (eventp)
- *eventp = &acd->current->event;
-
- return 0;
-}
-
-static int n_acd_bind_socket(NAcd *acd, int s) {
- /*
- * Due to strict aliasing, we cannot get uint32_t/uint16_t pointers to
- * acd->config.mac, so provide a union accessor.
- */
- const union {
- uint8_t u8[6];
- uint16_t u16[3];
- uint32_t u32[1];
- } mac = {
- .u8 = {
- acd->mac[0],
- acd->mac[1],
- acd->mac[2],
- acd->mac[3],
- acd->mac[4],
- acd->mac[5],
- },
- };
- struct sock_filter filter[] = {
- /*
- * Basic ARP header validation. Make sure the packet-length,
- * wire type, protocol type, and address lengths are correct.
- */
- BPF_STMT(BPF_LD + BPF_W + BPF_LEN, 0), /* A <- packet length */
- BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, sizeof(struct ether_arp), 1, 0), /* #packet >= #arp-packet ? */
- BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_hrd)), /* A <- header */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPHRD_ETHER, 1, 0), /* header == ethernet ? */
- BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_pro)), /* A <- protocol */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_IP, 1, 0), /* protocol == IP ? */
- BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
- BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_hln)), /* A <- hardware address length */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, sizeof(struct ether_addr), 1, 0), /* length == sizeof(ether_addr)? */
- BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
- BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_pln)), /* A <- protocol address length */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, sizeof(struct in_addr), 1, 0), /* length == sizeof(in_addr) ? */
- BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_op)), /* A <- operation */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPOP_REQUEST, 2, 0), /* protocol == request ? */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPOP_REPLY, 1, 0), /* protocol == reply ? */
- BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
-
- /*
- * Sender hardware address must be different from ours. Note
- * that BPF runs in big-endian mode, but assumes immediates are
- * given in native-endian. This might look weird on 6-byte mac
- * addresses, but is needed to revert the BPF magic.
- */
- BPF_STMT(BPF_LD + BPF_IMM, be32toh(mac.u32[0])), /* A <- 4 bytes of client's MAC */
- BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
- BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ether_arp, arp_sha)), /* A <- 4 bytes of SHA */
- BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* A xor X */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 6), /* A == 0 ? */
- BPF_STMT(BPF_LD + BPF_IMM, be16toh(mac.u16[2])), /* A <- remainder of client's MAC */
- BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, arp_sha) + 4), /* A <- remainder of SHA */
- BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* A xor X */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 1), /* A == 0 ? */
- BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
-
- /*
- * Sender protocol address or target protocol address must be
- * equal to the one we care about. Again, immediates must be
- * given in native-endian.
- */
- BPF_STMT(BPF_LD + BPF_IMM, be32toh(acd->config.ip.s_addr)), /* A <- clients IP */
- BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
- BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ether_arp, arp_spa)), /* A <- SPA */
- BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* X xor A */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 1), /* A == 0 ? */
- BPF_STMT(BPF_RET + BPF_K, 65535), /* return all */
- BPF_STMT(BPF_LD + BPF_IMM, be32toh(acd->config.ip.s_addr)), /* A <- clients IP */
- BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
- BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ether_arp, arp_tpa)), /* A <- TPA */
- BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* X xor A */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 1), /* A == 0 ? */
- BPF_STMT(BPF_RET + BPF_K, 65535), /* return all */
- BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
- };
- const struct sock_fprog fprog = {
- .len = sizeof(filter) / sizeof(*filter),
- .filter = filter,
- };
- const struct sockaddr_ll address = {
- .sll_family = AF_PACKET,
- .sll_protocol = htobe16(ETH_P_ARP),
- .sll_ifindex = acd->config.ifindex,
- .sll_halen = ETH_ALEN,
- .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
- };
- int r;
-
- /*
- * Install a packet filter that matches on the ARP header and
- * addresses, to reduce the number of wake-ups to a minimum.
- */
- r = setsockopt(s, SOL_SOCKET, SO_ATTACH_FILTER, &fprog, sizeof(fprog));
- if (r < 0)
- return -n_acd_errno();
-
- /*
- * Bind the packet-socket to ETH_P_ARP and the specified network
- * interface.
- */
- r = bind(s, (struct sockaddr *)&address, sizeof(address));
- if (r < 0)
- return -n_acd_errno();
-
- return 0;
-}
-
-static int n_acd_setup_socket(NAcd *acd) {
- int r, s;
-
- s = socket(PF_PACKET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
- if (s < 0)
- return -n_acd_errno();
-
- r = n_acd_bind_socket(acd, s);
- if (r < 0)
- goto error;
-
- r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, s,
- &(struct epoll_event){
- .events = EPOLLIN,
- .data.u32 = N_ACD_EPOLL_SOCKET,
- });
- if (r < 0) {
- r = -n_acd_errno();
- goto error;
- }
-
- acd->fd_socket = s;
- return 0;
-
-error:
- close(s);
- return r;
-}
-
-/**
- * n_acd_start() - start the ACD engine
- * @acd: ACD context
- * @config: description of interface and desired IP address
- *
- * Start probing the given address on the given interface.
- *
- * The engine must not already be running, and there must not be
- * any pending events.
- *
- * Returns: 0 on success, N_ACD_E_INVALID_ARGUMENT in case the configuration
- * was invalid, N_ACD_E_BUSY if the engine is running or there are
- * pending events, or a negative error code on failure.
- */
-_public_ int n_acd_start(NAcd *acd, NAcdConfig *config) {
- uint64_t now, delay;
- int r;
-
- if (config->ifindex <= 0 ||
- config->transport != N_ACD_TRANSPORT_ETHERNET ||
- config->n_mac != ETH_ALEN ||
- !memcmp(config->mac, (uint8_t[ETH_ALEN]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, ETH_ALEN) ||
- !config->ip.s_addr)
- return N_ACD_E_INVALID_ARGUMENT;
-
- if (acd->state != N_ACD_STATE_INIT || !c_list_is_empty(&acd->events))
- return N_ACD_E_BUSY;
-
- acd->config = *config;
- memcpy(acd->mac, config->mac, config->n_mac);
- acd->config.mac = acd->mac;
- acd->timeout_multiplier = config->timeout_msec;
-
- r = n_acd_setup_socket(acd);
- if (r < 0)
- goto error;
-
- if (acd->timeout_multiplier) {
- delay = 0;
- acd->n_iteration = 0;
-
- if (acd->last_conflict != TIME_INFINITY) {
- r = n_acd_now(&now);
- if (r < 0)
- goto error;
-
- if (now < acd->last_conflict + N_ACD_RFC_RATE_LIMIT_INTERVAL_USEC)
- delay = acd->last_conflict + N_ACD_RFC_RATE_LIMIT_INTERVAL_USEC - now;
+ c_list_for_each_entry_safe(node, t_node, &acd->event_list, acd_link) {
+ if (node->is_public) {
+ n_acd_event_node_free(node);
+ continue;
}
- r = n_acd_schedule(acd, delay, acd->timeout_multiplier * N_ACD_RFC_PROBE_WAIT_USEC);
- if (r < 0)
- goto error;
- } else {
- /*
- * A zero timeout means we drop the probing alltogether, and behave as if
- * the last probe succeeded immediately.
- */
- acd->n_iteration = N_ACD_RFC_PROBE_NUM;
-
- r = n_acd_schedule(acd, 0, 0);
- if (r < 0)
- goto error;
+ node->is_public = true;
+ *eventp = &node->event;
+ return 0;
}
- acd->state = N_ACD_STATE_PROBING;
- acd->defend = N_ACD_DEFEND_NEVER;
- acd->last_defend = 0;
- return 0;
-
-error:
- n_acd_reset(acd);
- return r;
-}
-
-/**
- * n_acd_stop() - stop the ACD engine
- * @acd: ACD context
- *
- * Stop the engine. No new events may be triggered, but pending events are not
- * flushed. Before calling n_acd_start() again all pending events must be popped.
- *
- * Return: 0 on success, negative error code on failure.
- */
-_public_ int n_acd_stop(NAcd *acd) {
- n_acd_reset(acd);
+ *eventp = NULL;
return 0;
}
/**
- * n_acd_announce() - announce the configured IP address
- * @acd: ACD context
- * @defend: defence policy
- *
- * Announce the IP address on the local link, and start defending it according
- * to the given policy, which mut be one of N_ACD_DEFEND_ONCE,
- * N_ACD_DEFEND_NEVER, or N_ACD_DEFEND_ALWAYS.
- *
- * This must be called after the engine in response to an N_ACD_EVENT_READY
- * event, and only after the given address has been configured on the given
- * interface.
- *
- * Return: 0 on success, N_ACD_E_INVALID_ARGUMENT in case the defence policy
- * is invalid, N_ACD_E_BUSY if this is not in response to a
- * N_ACD_EVENT_READY event, or a negative error code on failure.
+ * XXX
*/
-_public_ int n_acd_announce(NAcd *acd, unsigned int defend) {
- uint64_t now;
- int r;
-
- if (defend >= _N_ACD_DEFEND_N)
- return N_ACD_E_INVALID_ARGUMENT;
- if (acd->state != N_ACD_STATE_CONFIGURING)
- return N_ACD_E_BUSY;
-
- /*
- * Sending announcements means we finished probing and use the address
- * now. We therefore reset the conflict counter in case we adhered to
- * the rate-limit. Since probing is properly delayed, a well-behaving
- * client will always reset the conflict counter here. However, if you
- * force-use an address regardless of conflicts, then this will not
- * trigger and the conflict counter stays untouched.
- */
- if (acd->last_conflict != TIME_INFINITY) {
- r = n_acd_now(&now);
- if (r < 0)
- return r;
-
- if (now >= acd->last_conflict + N_ACD_RFC_RATE_LIMIT_INTERVAL_USEC)
- acd->n_conflicts = 0;
- }
-
- /*
- * Instead of sending the first announcement here, we schedule an idle
- * timer. This avoids possibly recursing into the user callback. We
- * should never trigger callbacks from arbitrary stacks, but always
- * restrict them to the dispatcher.
- */
- r = n_acd_schedule(acd, 0, 0);
- if (r < 0)
- return r;
-
- acd->state = N_ACD_STATE_ANNOUNCING;
- acd->defend = defend;
- acd->n_iteration = 0;
- return 0;
+_public_ int n_acd_probe(NAcd *acd, NAcdProbe **probep, NAcdProbeConfig *config) {
+ return n_acd_probe_new(probep, acd, config);
}
diff --git a/shared/n-acd/src/n-acd.h b/shared/n-acd/src/n-acd.h
index 75646243d8..74b0aacb59 100644
--- a/shared/n-acd/src/n-acd.h
+++ b/shared/n-acd/src/n-acd.h
@@ -15,44 +15,23 @@ extern "C" {
#include
#include
+typedef struct NAcd NAcd;
+typedef struct NAcdConfig NAcdConfig;
+typedef struct NAcdEvent NAcdEvent;
+typedef struct NAcdProbe NAcdProbe;
+typedef struct NAcdProbeConfig NAcdProbeConfig;
+
#define N_ACD_TIMEOUT_RFC5227 (UINT64_C(9000))
enum {
_N_ACD_E_SUCCESS,
- N_ACD_E_DONE,
- N_ACD_E_STOPPED,
N_ACD_E_PREEMPTED,
-
N_ACD_E_INVALID_ARGUMENT,
- N_ACD_E_BUSY,
+
+ _N_ACD_E_N,
};
-typedef struct NAcd NAcd;
-
-typedef struct NAcdConfig {
- int ifindex;
- unsigned int transport;
- const uint8_t *mac;
- size_t n_mac;
- struct in_addr ip;
- uint64_t timeout_msec;
-} NAcdConfig;
-
-typedef struct NAcdEvent {
- unsigned int event;
- union {
- struct {
- } ready, down;
- struct {
- uint16_t operation;
- uint8_t *sender;
- size_t n_sender;
- struct in_addr target;
- } used, defended, conflict;
- };
-} NAcdEvent;
-
enum {
N_ACD_TRANSPORT_ETHERNET,
_N_ACD_TRANSPORT_N,
@@ -74,21 +53,94 @@ enum {
_N_ACD_DEFEND_N,
};
-int n_acd_new(NAcd **acdp);
-void n_acd_free(NAcd *acd);
+struct NAcdEvent {
+ unsigned int event;
+ union {
+ struct {
+ NAcdProbe *probe;
+ } ready;
+ struct {
+ } down;
+ struct {
+ NAcdProbe *probe;
+ uint8_t *sender;
+ size_t n_sender;
+ } used, defended, conflict;
+ };
+};
+
+/* configs */
+
+int n_acd_config_new(NAcdConfig **configp);
+NAcdConfig *n_acd_config_free(NAcdConfig *config);
+
+void n_acd_config_set_ifindex(NAcdConfig *config, int ifindex);
+void n_acd_config_set_transport(NAcdConfig *config, unsigned int transport);
+void n_acd_config_set_mac(NAcdConfig *config, const uint8_t *mac, size_t n_mac);
+
+int n_acd_probe_config_new(NAcdProbeConfig **configp);
+NAcdProbeConfig *n_acd_probe_config_free(NAcdProbeConfig *config);
+
+void n_acd_probe_config_set_ip(NAcdProbeConfig *config, struct in_addr ip);
+void n_acd_probe_config_set_timeout(NAcdProbeConfig *config, uint64_t msecs);
+
+/* contexts */
+
+int n_acd_new(NAcd **acdp, NAcdConfig *config);
+NAcd *n_acd_ref(NAcd *acd);
+NAcd *n_acd_unref(NAcd *acd);
void n_acd_get_fd(NAcd *acd, int *fdp);
-
int n_acd_dispatch(NAcd *acd);
int n_acd_pop_event(NAcd *acd, NAcdEvent **eventp);
-int n_acd_announce(NAcd *acd, unsigned int defend);
-int n_acd_start(NAcd *acd, NAcdConfig *config);
-int n_acd_stop(NAcd *acd);
+int n_acd_probe(NAcd *acd, NAcdProbe **probep, NAcdProbeConfig *config);
-static inline void n_acd_freep(NAcd **acd) {
+/* probes */
+
+NAcdProbe *n_acd_probe_free(NAcdProbe *probe);
+
+void n_acd_probe_set_userdata(NAcdProbe *probe, void *userdata);
+void n_acd_probe_get_userdata(NAcdProbe *probe, void **userdatap);
+
+int n_acd_probe_announce(NAcdProbe *probe, unsigned int defend);
+
+/* inline helpers */
+
+static inline void n_acd_config_freep(NAcdConfig **config) {
+ if (*config)
+ n_acd_config_free(*config);
+}
+
+static inline void n_acd_config_freev(NAcdConfig *config) {
+ n_acd_config_free(config);
+}
+
+static inline void n_acd_probe_config_freep(NAcdProbeConfig **config) {
+ if (*config)
+ n_acd_probe_config_free(*config);
+}
+
+static inline void n_acd_probe_config_freev(NAcdProbeConfig *config) {
+ n_acd_probe_config_free(config);
+}
+
+static inline void n_acd_unrefp(NAcd **acd) {
if (*acd)
- n_acd_free(*acd);
+ n_acd_unref(*acd);
+}
+
+static inline void n_acd_unrefv(NAcd *acd) {
+ n_acd_unref(acd);
+}
+
+static inline void n_acd_probe_freep(NAcdProbe **probe) {
+ if (*probe)
+ n_acd_probe_free(*probe);
+}
+
+static inline void n_acd_probe_freev(NAcdProbe *probe) {
+ n_acd_probe_free(probe);
}
#ifdef __cplusplus
diff --git a/shared/n-acd/src/test-api.c b/shared/n-acd/src/test-api.c
index 697181abaa..e16de48b73 100644
--- a/shared/n-acd/src/test-api.c
+++ b/shared/n-acd/src/test-api.c
@@ -7,67 +7,90 @@
#include
#include "test.h"
-static void test_api_constants(void) {
- assert(N_ACD_DEFEND_NEVER != _N_ACD_DEFEND_N);
- assert(N_ACD_DEFEND_ONCE != _N_ACD_DEFEND_N);
- assert(N_ACD_DEFEND_ALWAYS != _N_ACD_DEFEND_N);
+static void test_api(void) {
+ NAcdConfig *config = NULL;
+ NAcd *acd = NULL;
+ int r;
+
+ assert(N_ACD_E_PREEMPTED);
+ assert(N_ACD_E_INVALID_ARGUMENT);
+
+ assert(N_ACD_TRANSPORT_ETHERNET != _N_ACD_TRANSPORT_N);
assert(N_ACD_EVENT_READY != _N_ACD_EVENT_N);
assert(N_ACD_EVENT_USED != _N_ACD_EVENT_N);
assert(N_ACD_EVENT_DEFENDED != _N_ACD_EVENT_N);
assert(N_ACD_EVENT_CONFLICT != _N_ACD_EVENT_N);
assert(N_ACD_EVENT_DOWN != _N_ACD_EVENT_N);
-}
-static void test_api_management(void) {
- NAcd *acd = NULL;
- int r;
+ assert(N_ACD_DEFEND_NEVER != _N_ACD_DEFEND_N);
+ assert(N_ACD_DEFEND_ONCE != _N_ACD_DEFEND_N);
+ assert(N_ACD_DEFEND_ALWAYS != _N_ACD_DEFEND_N);
- /* new/free/freep */
+ n_acd_config_freep(&config);
- n_acd_freep(&acd);
-
- r = n_acd_new(&acd);
+ r = n_acd_config_new(&config);
assert(!r);
- n_acd_free(acd);
-}
+ n_acd_config_set_ifindex(config, 1);
+ n_acd_config_set_transport(config, N_ACD_TRANSPORT_ETHERNET);
+ n_acd_config_set_mac(config, (uint8_t[6]){ }, 6);
-static void test_api_runtime(void) {
- NAcdConfig config = {
- .ifindex = 1,
- .transport = N_ACD_TRANSPORT_ETHERNET,
- .mac = (uint8_t[]){ 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54 },
- .n_mac = ETH_ALEN,
- .ip = { htobe32((127 << 24) | (1 << 0)) },
- .timeout_msec = 100,
- };
- NAcd *acd;
- int r;
+ {
+ NAcdEvent *event;
+ int fd;
- /* get_fd/dispatch/pop_event/start/stop/announce */
+ n_acd_unrefp(&acd);
+ n_acd_ref(NULL);
- r = n_acd_new(&acd);
- assert(!r);
+ r = n_acd_new(&acd, config);
+ assert(!r);
- n_acd_get_fd(acd, &r);
- assert(r >= 0);
- r = n_acd_dispatch(acd);
- assert(!r);
- r = n_acd_pop_event(acd, NULL);
- assert(r == N_ACD_E_STOPPED);
- r = n_acd_start(acd, &config);
- assert(!r);
- r = n_acd_start(acd, &config);
- assert(r == N_ACD_E_BUSY);
- r = n_acd_pop_event(acd, NULL);
- assert(r == N_ACD_E_DONE);
- r = n_acd_stop(acd);
- assert(!r);
- r = n_acd_announce(acd, N_ACD_DEFEND_NEVER);
- assert(r == N_ACD_E_BUSY);
+ n_acd_get_fd(acd, &fd);
+ n_acd_dispatch(acd);
+ n_acd_pop_event(acd, &event);
- n_acd_free(acd);
+ {
+ NAcdProbeConfig *c = NULL;
+
+ n_acd_probe_config_freep(&c);
+
+ r = n_acd_probe_config_new(&c);
+ assert(!r);
+
+ n_acd_probe_config_set_ip(c, (struct in_addr){ 1 });
+ n_acd_probe_config_set_timeout(c, N_ACD_TIMEOUT_RFC5227);
+
+ {
+ NAcdProbe *probe = NULL;
+ void *userdata;
+
+ r = n_acd_probe(acd, &probe, c);
+ assert(!r);
+
+ n_acd_probe_get_userdata(probe, &userdata);
+ assert(userdata == NULL);
+ n_acd_probe_set_userdata(probe, acd);
+ n_acd_probe_get_userdata(probe, &userdata);
+ assert(userdata == acd);
+
+ r = n_acd_probe_announce(probe, N_ACD_DEFEND_ONCE);
+ assert(!r);
+
+ n_acd_probe_free(probe);
+ n_acd_probe_freev(NULL);
+ }
+
+ n_acd_probe_config_free(c);
+ n_acd_probe_config_freev(NULL);
+ }
+
+ n_acd_unref(acd);
+ n_acd_unrefv(NULL);
+ }
+
+ n_acd_config_free(config);
+ n_acd_config_freev(NULL);
}
int main(int argc, char **argv) {
@@ -77,8 +100,6 @@ int main(int argc, char **argv) {
if (r)
return r;
- test_api_constants();
- test_api_management();
- test_api_runtime();
+ test_api();
return 0;
}
diff --git a/shared/n-acd/src/test-basic.c b/shared/n-acd/src/test-basic.c
deleted file mode 100644
index fa85cb0549..0000000000
--- a/shared/n-acd/src/test-basic.c
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * Basic Tests
- */
-
-#include
-#include
-#include
-#include
-#include "n-acd.h"
-
-int main(int argc, char **argv) {
- return 0;
-}
diff --git a/shared/n-acd/src/test-bpf.c b/shared/n-acd/src/test-bpf.c
new file mode 100644
index 0000000000..aa8b20ec30
--- /dev/null
+++ b/shared/n-acd/src/test-bpf.c
@@ -0,0 +1,228 @@
+/*
+ * eBPF socket filter tests
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "n-acd.h"
+#include "n-acd-private.h"
+#include "test.h"
+
+#define ETHER_ARP_PACKET_INIT(_op, _mac, _sip, _tip) { \
+ .ea_hdr = { \
+ .ar_hrd = htobe16(ARPHRD_ETHER), \
+ .ar_pro = htobe16(ETHERTYPE_IP), \
+ .ar_hln = 6, \
+ .ar_pln = 4, \
+ .ar_op = htobe16(_op), \
+ }, \
+ .arp_sha[0] = (_mac)->ether_addr_octet[0], \
+ .arp_sha[1] = (_mac)->ether_addr_octet[1], \
+ .arp_sha[2] = (_mac)->ether_addr_octet[2], \
+ .arp_sha[3] = (_mac)->ether_addr_octet[3], \
+ .arp_sha[4] = (_mac)->ether_addr_octet[4], \
+ .arp_sha[5] = (_mac)->ether_addr_octet[5], \
+ .arp_spa[0] = (be32toh((_sip)->s_addr) >> 24) & 0xff, \
+ .arp_spa[1] = (be32toh((_sip)->s_addr) >> 16) & 0xff, \
+ .arp_spa[2] = (be32toh((_sip)->s_addr) >> 8) & 0xff, \
+ .arp_spa[3] = be32toh((_sip)->s_addr) & 0xff, \
+ .arp_tpa[0] = (be32toh((_tip)->s_addr) >> 24) & 0xff, \
+ .arp_tpa[1] = (be32toh((_tip)->s_addr) >> 16) & 0xff, \
+ .arp_tpa[2] = (be32toh((_tip)->s_addr) >> 8) & 0xff, \
+ .arp_tpa[3] = be32toh((_tip)->s_addr) & 0xff, \
+ }
+
+static void test_map(void) {
+ int r, mapfd = -1;
+ struct in_addr addr = { 1 };
+
+ r = n_acd_bpf_map_create(&mapfd, 8);
+ assert(r >= 0);
+ assert(mapfd >= 0);
+
+ r = n_acd_bpf_map_remove(mapfd, &addr);
+ assert(r == -ENOENT);
+
+ r = n_acd_bpf_map_add(mapfd, &addr);
+ assert(r >= 0);
+
+ r = n_acd_bpf_map_add(mapfd, &addr);
+ assert(r == -EEXIST);
+
+ r = n_acd_bpf_map_remove(mapfd, &addr);
+ assert(r >= 0);
+
+ r = n_acd_bpf_map_remove(mapfd, &addr);
+ assert(r == -ENOENT);
+
+ close(mapfd);
+}
+
+static void verify_success(struct ether_arp *packet, int out_fd, int in_fd) {
+ uint8_t buf[sizeof(struct ether_arp)];
+ int r;
+
+ r = send(out_fd, packet, sizeof(struct ether_arp), 0);
+ assert(r == sizeof(struct ether_arp));
+
+ r = recv(in_fd, buf, sizeof(buf), 0);
+ assert(r == sizeof(struct ether_arp));
+}
+
+static void verify_failure(struct ether_arp *packet, int out_fd, int in_fd) {
+ uint8_t buf[sizeof(struct ether_arp)];
+ int r;
+
+ r = send(out_fd, packet, sizeof(struct ether_arp), 0);
+ assert(r == sizeof(struct ether_arp));
+
+ r = recv(in_fd, buf, sizeof(buf), 0);
+ assert(r < 0);
+ assert(errno == EAGAIN);
+}
+
+static void test_filter(void) {
+ uint8_t buf[sizeof(struct ether_arp) + 1];
+ struct ether_addr mac1 = { { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 } };
+ struct ether_addr mac2 = { { 0x01, 0x02, 0x03, 0x04, 0x05, 0x07 } };
+ struct in_addr ip0 = { 0 };
+ struct in_addr ip1 = { 1 };
+ struct in_addr ip2 = { 2 };
+ struct ether_arp *packet = (struct ether_arp *)buf;
+ int r, mapfd = -1, progfd = -1, pair[2];
+
+ r = n_acd_bpf_map_create(&mapfd, 1);
+ assert(r >= 0);
+
+ r = n_acd_bpf_compile(&progfd, mapfd, &mac1);
+ assert(r >= 0);
+ assert(progfd >= 0);
+
+ r = socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, pair);
+ assert(r >= 0);
+
+ r = setsockopt(pair[1], SOL_SOCKET, SO_ATTACH_BPF, &progfd,
+ sizeof(progfd));
+ assert(r >= 0);
+
+ r = n_acd_bpf_map_add(mapfd, &ip1);
+ assert(r >= 0);
+
+ /* valid */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
+ verify_success(packet, pair[0], pair[1]);
+
+ /* valid: reply instead of request */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REPLY, &mac2, &ip1, &ip2);
+ verify_success(packet, pair[0], pair[1]);
+
+ /* valid: to us instead of from us */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip0, &ip1);
+ verify_success(packet, pair[0], pair[1]);
+
+ /* invalid header type */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
+ packet->arp_hrd += 1;
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* invalid protocol */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
+ packet->arp_pro += 1;
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* invalid hw addr length */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
+ packet->arp_hln += 1;
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* invalid protocol addr length */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
+ packet->arp_pln += 1;
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* invalid operation */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_NAK, &mac2, &ip1, &ip2);
+ packet->arp_hln += 1;
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* own mac */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac1, &ip1, &ip2);
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* not to, nor from us, with source */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip2, &ip2);
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* not to, nor from us, without source */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip0, &ip2);
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* to us instead of from us, but reply */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REPLY, &mac2, &ip0, &ip1);
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* long */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
+ r = send(pair[0], buf, sizeof(struct ether_arp) + 1, 0);
+ assert(r == sizeof(struct ether_arp) + 1);
+
+ r = recv(pair[1], buf, sizeof(buf), 0);
+ assert(r == sizeof(struct ether_arp));
+
+ /* short */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
+ r = send(pair[0], buf, sizeof(struct ether_arp) - 1, 0);
+ assert(r == sizeof(struct ether_arp) - 1);
+
+ r = recv(pair[1], buf, sizeof(buf), 0);
+ assert(r < 0);
+ assert(errno == EAGAIN);
+
+ /*
+ * Send one packet before and one packet after modifying the map,
+ * verify that the modification applies at the time of send(), not recv().
+ */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
+ r = send(pair[0], buf, sizeof(struct ether_arp), 0);
+ assert(r == sizeof(struct ether_arp));
+
+ r = n_acd_bpf_map_remove(mapfd, &ip1);
+ assert(r >= 0);
+
+ r = send(pair[0], buf, sizeof(struct ether_arp), 0);
+ assert(r == sizeof(struct ether_arp));
+
+ r = recv(pair[1], buf, sizeof(buf), 0);
+ assert(r == sizeof(struct ether_arp));
+
+ r = recv(pair[1], buf, sizeof(buf), 0);
+ assert(r < 0);
+ assert(errno == EAGAIN);
+
+ close(pair[0]);
+ close(pair[1]);
+ close(progfd);
+ close(mapfd);
+}
+
+int main(int argc, char **argv) {
+ int r;
+
+ r = test_setup();
+ if (r)
+ return r;
+
+ test_map();
+ test_filter();
+
+ return 0;
+}
diff --git a/shared/n-acd/src/test-loopback.c b/shared/n-acd/src/test-loopback.c
index 98195c93a5..5c01d65b68 100644
--- a/shared/n-acd/src/test-loopback.c
+++ b/shared/n-acd/src/test-loopback.c
@@ -9,44 +9,62 @@
#include "test.h"
static void test_loopback(int ifindex, uint8_t *mac, size_t n_mac) {
- NAcdConfig config = {
- .ifindex = ifindex,
- .transport = N_ACD_TRANSPORT_ETHERNET,
- .mac = mac,
- .n_mac = n_mac,
- .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) },
- .timeout_msec = 100,
- };
- struct pollfd pfds;
+ NAcdConfig *config;
NAcd *acd;
+ struct pollfd pfds;
int r, fd;
- r = n_acd_new(&acd);
+ r = n_acd_config_new(&config);
assert(!r);
- n_acd_get_fd(acd, &fd);
- r = n_acd_start(acd, &config);
+ n_acd_config_set_ifindex(config, ifindex);
+ n_acd_config_set_transport(config, N_ACD_TRANSPORT_ETHERNET);
+ n_acd_config_set_mac(config, mac, n_mac);
+
+ r = n_acd_new(&acd, config);
assert(!r);
- for (;;) {
- NAcdEvent *event;
- pfds = (struct pollfd){ .fd = fd, .events = POLLIN };
- r = poll(&pfds, 1, -1);
- assert(r >= 0);
+ n_acd_config_free(config);
- r = n_acd_dispatch(acd);
+ {
+ NAcdProbeConfig *probe_config;
+ NAcdProbe *probe;
+ struct in_addr ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) };
+
+ r = n_acd_probe_config_new(&probe_config);
assert(!r);
- r = n_acd_pop_event(acd, &event);
- if (!r) {
- assert(event->event == N_ACD_EVENT_READY);
- break;
- } else {
- assert(r == N_ACD_E_DONE);
+ n_acd_probe_config_set_ip(probe_config, ip);
+ n_acd_probe_config_set_timeout(probe_config, 100);
+
+ r = n_acd_probe(acd, &probe, probe_config);
+ assert(!r);
+
+ n_acd_probe_config_free(probe_config);
+
+ n_acd_get_fd(acd, &fd);
+
+ for (;;) {
+ NAcdEvent *event;
+ pfds = (struct pollfd){ .fd = fd, .events = POLLIN };
+ r = poll(&pfds, 1, -1);
+ assert(r >= 0);
+
+ r = n_acd_dispatch(acd);
+ assert(!r);
+
+ r = n_acd_pop_event(acd, &event);
+ assert(!r);
+ if (event) {
+ assert(event->event == N_ACD_EVENT_READY);
+ break;
+ }
}
+
+ n_acd_probe_free(probe);
}
- n_acd_free(acd);
+ n_acd_unref(acd);
}
int main(int argc, char **argv) {
@@ -57,9 +75,7 @@ int main(int argc, char **argv) {
if (r)
return r;
- r = system("ip link set lo up");
- assert(r == 0);
- test_if_query("lo", &ifindex, &mac);
+ test_loopback_up(&ifindex, &mac);
test_loopback(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet));
return 0;
diff --git a/shared/n-acd/src/test-veth.c b/shared/n-acd/src/test-veth.c
new file mode 100644
index 0000000000..64724f5e9f
--- /dev/null
+++ b/shared/n-acd/src/test-veth.c
@@ -0,0 +1,238 @@
+/*
+ * Test on a veth link
+ *
+ * This essentially mimics a real nework with two peers.
+ *
+ * Run one ACD context on each end of the tunnel. On one end probe for N,
+ * addresses on the other end pre-configure N/3 of the same addresses and probe
+ * for another N/3 of the addresses.
+ *
+ * Verify that in the case of simultaneous probes of the same address at most one
+ * succeed, in the case of probing for a configured address it always fails, and
+ * probing for a non-existent address always succeeds.
+ *
+ * Make sure to keep N fairly high as the protocol is probabilistic, and we also
+ * want to verify that resizing the internal maps works correctly.
+ */
+
+#include
+#include "test.h"
+
+#define TEST_ACD_N_PROBES (9)
+
+typedef enum {
+ TEST_ACD_STATE_UNKNOWN,
+ TEST_ACD_STATE_USED,
+ TEST_ACD_STATE_READY,
+} TestAcdState;
+
+static void test_veth(int ifindex1, uint8_t *mac1, size_t n_mac1,
+ int ifindex2, uint8_t *mac2, size_t n_mac2) {
+ NAcdConfig *config;
+ NAcd *acd1, *acd2;
+ NAcdProbe *probes1[TEST_ACD_N_PROBES];
+ NAcdProbe *probes2[TEST_ACD_N_PROBES];
+ unsigned long state1, state2;
+ size_t n_running = 0;
+ int r;
+
+ r = n_acd_config_new(&config);
+ assert(!r);
+
+ n_acd_config_set_transport(config, N_ACD_TRANSPORT_ETHERNET);
+
+ n_acd_config_set_ifindex(config, ifindex1);
+ n_acd_config_set_mac(config, mac1, n_mac1);
+ r = n_acd_new(&acd1, config);
+ assert(!r);
+
+ n_acd_config_set_ifindex(config, ifindex2);
+ n_acd_config_set_mac(config, mac2, n_mac2);
+ r = n_acd_new(&acd2, config);
+ assert(!r);
+
+ n_acd_config_free(config);
+
+ {
+ NAcdProbeConfig *probe_config;
+
+ r = n_acd_probe_config_new(&probe_config);
+ assert(!r);
+ n_acd_probe_config_set_timeout(probe_config, 64);
+
+ assert(TEST_ACD_N_PROBES <= 10 << 24);
+
+ for (size_t i = 0; i < TEST_ACD_N_PROBES; ++i) {
+ struct in_addr ip = { htobe32((10 << 24) | i) };
+
+ n_acd_probe_config_set_ip(probe_config, ip);
+
+ switch (i % 3) {
+ case 0:
+ /*
+ * Probe on one side, and leave the address
+ * unset on the other. The probe must succeed.
+ */
+
+ break;
+ case 1:
+ /*
+ * Preconfigure the address on one side, and
+ * probe on the other. The probe must fail.
+ */
+ test_add_child_ip(&ip);
+
+ break;
+ case 2:
+ /*
+ * Probe both sides for the same address, at
+ * most one may succeed.
+ */
+ r = n_acd_probe(acd2, &probes2[i], probe_config);
+ assert(!r);
+
+ ++n_running;
+
+ break;
+ }
+
+ r = n_acd_probe(acd1, &probes1[i], probe_config);
+ assert(!r);
+
+ ++n_running;
+ }
+
+ n_acd_probe_config_free(probe_config);
+
+ while (n_running > 0) {
+ NAcdEvent *event;
+ struct pollfd pfds[2] = {
+ { .events = POLLIN },
+ { .events = POLLIN },
+ };
+
+ n_acd_get_fd(acd1, &pfds[0].fd);
+ n_acd_get_fd(acd2, &pfds[1].fd);
+
+ r = poll(pfds, 2, -1);
+ assert(r >= 0);
+
+ if (pfds[0].revents & POLLIN) {
+ r = n_acd_dispatch(acd1);
+ assert(!r || r == N_ACD_E_PREEMPTED);
+
+ for (;;) {
+ r = n_acd_pop_event(acd1, &event);
+ assert(!r);
+ if (event) {
+ switch (event->event) {
+ case N_ACD_EVENT_READY:
+ n_acd_probe_get_userdata(event->ready.probe, (void**)&state1);
+ assert(state1 == TEST_ACD_STATE_UNKNOWN);
+ state1 = TEST_ACD_STATE_READY;
+ n_acd_probe_set_userdata(event->ready.probe, (void*)state1);
+
+ break;
+ case N_ACD_EVENT_USED:
+ n_acd_probe_get_userdata(event->used.probe, (void**)&state1);
+ assert(state1 == TEST_ACD_STATE_UNKNOWN);
+ state1 = TEST_ACD_STATE_USED;
+ n_acd_probe_set_userdata(event->used.probe, (void*)state1);
+
+ break;
+ default:
+ assert(0);
+ }
+
+ --n_running;
+ } else {
+ break;
+ }
+ }
+ }
+
+ if (pfds[1].revents & POLLIN) {
+ r = n_acd_dispatch(acd2);
+ assert(!r || r == N_ACD_E_PREEMPTED);
+
+ for (;;) {
+ r = n_acd_pop_event(acd2, &event);
+ assert(!r);
+ if (event) {
+ switch (event->event) {
+ case N_ACD_EVENT_READY:
+ n_acd_probe_get_userdata(event->ready.probe, (void**)&state2);
+ assert(state2 == TEST_ACD_STATE_UNKNOWN);
+ state2 = TEST_ACD_STATE_READY;
+ n_acd_probe_set_userdata(event->ready.probe, (void*)state2);
+
+ break;
+ case N_ACD_EVENT_USED:
+ n_acd_probe_get_userdata(event->used.probe, (void**)&state2);
+ assert(state2 == TEST_ACD_STATE_UNKNOWN);
+ state2 = TEST_ACD_STATE_USED;
+ n_acd_probe_set_userdata(event->used.probe, (void*)state2);
+
+ break;
+ default:
+ assert(0);
+ }
+
+ --n_running;
+ } else {
+ break;
+ }
+ }
+ }
+ }
+
+ for (size_t i = 0; i < TEST_ACD_N_PROBES; ++i) {
+ struct in_addr ip = { htobe32((10 << 24) | i) };
+
+ switch (i % 3) {
+ case 0:
+ n_acd_probe_get_userdata(probes1[i], (void **)&state1);
+ assert(state1 == TEST_ACD_STATE_READY);
+
+ break;
+ case 1:
+ test_del_child_ip(&ip);
+
+ n_acd_probe_get_userdata(probes1[i], (void **)&state1);
+ assert(state1 == TEST_ACD_STATE_USED);
+
+ break;
+ case 2:
+ n_acd_probe_get_userdata(probes1[i], (void **)&state1);
+ n_acd_probe_get_userdata(probes2[i], (void **)&state2);
+ assert(state1 != TEST_ACD_STATE_UNKNOWN);
+ assert(state2 != TEST_ACD_STATE_UNKNOWN);
+ assert(state1 == TEST_ACD_STATE_USED || state2 == TEST_ACD_STATE_USED);
+ n_acd_probe_free(probes2[i]);
+
+ break;
+ }
+ n_acd_probe_free(probes1[i]);
+ }
+ }
+
+ n_acd_unref(acd2);
+ n_acd_unref(acd1);
+}
+
+int main(int argc, char **argv) {
+ struct ether_addr mac1, mac2;
+ int r, ifindex1, ifindex2;
+
+ r = test_setup();
+ if (r)
+ return r;
+
+ test_veth_new(&ifindex1, &mac1, &ifindex2, &mac2);
+ for (unsigned int i = 0; i < 8; ++i) {
+ test_veth(ifindex1, mac1.ether_addr_octet, sizeof(mac1.ether_addr_octet),
+ ifindex2, mac2.ether_addr_octet, sizeof(mac2.ether_addr_octet));
+ }
+
+ return 0;
+}
diff --git a/shared/n-acd/src/test.h b/shared/n-acd/src/test.h
index 92315858ba..f2cb801aab 100644
--- a/shared/n-acd/src/test.h
+++ b/shared/n-acd/src/test.h
@@ -11,7 +11,9 @@
#include
#include
#include
+#include
#include
+#include
#include
#include
#include
@@ -22,6 +24,32 @@
#include
#include "n-acd.h"
+static inline void test_add_child_ip(const struct in_addr *addr) {
+ char *p;
+ int r;
+
+ r = asprintf(&p, "ip addr add dev veth1 %s/8", inet_ntoa(*addr));
+ assert(r >= 0);
+
+ r = system(p);
+ assert(r >= 0);
+
+ free(p);
+}
+
+static inline void test_del_child_ip(const struct in_addr *addr) {
+ char *p;
+ int r;
+
+ r = asprintf(&p, "ip addr del dev veth1 %s/8", inet_ntoa(*addr));
+ assert(r >= 0);
+
+ r = system(p);
+ assert(r >= 0);
+
+ free(p);
+}
+
static inline void test_if_query(const char *name, int *indexp, struct ether_addr *macp) {
struct ifreq ifr = {};
size_t l;
@@ -39,7 +67,7 @@ static inline void test_if_query(const char *name, int *indexp, struct ether_add
s = socket(AF_INET, SOCK_DGRAM, 0);
assert(s >= 0);
- strncpy(ifr.ifr_name, name, l);
+ strncpy(ifr.ifr_name, name, l + 1);
r = ioctl(s, SIOCGIFHWADDR, &ifr);
assert(r >= 0);
@@ -84,6 +112,15 @@ static inline void test_veth_new(int *parent_indexp,
test_if_query("veth1", child_indexp, child_macp);
}
+static inline void test_loopback_up(int *indexp, struct ether_addr *macp) {
+ int r;
+
+ r = system("ip link set lo up");
+ assert(r == 0);
+
+ test_if_query("lo", indexp, macp);
+}
+
static inline int test_setup(void) {
int r;
diff --git a/shared/n-acd/src/util/test-timer.c b/shared/n-acd/src/util/test-timer.c
new file mode 100644
index 0000000000..9cc3109b60
--- /dev/null
+++ b/shared/n-acd/src/util/test-timer.c
@@ -0,0 +1,176 @@
+/*
+ * Tests for timer utility library
+ */
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include "timer.h"
+
+#define N_TIMEOUTS (10000)
+
+static void test_api(void) {
+ Timer timer = TIMER_NULL(timer);
+ Timeout t1 = TIMEOUT_INIT(t1), t2 = TIMEOUT_INIT(t2), *t;
+ int r;
+
+ r = timer_init(&timer);
+ assert(!r);
+
+ timeout_schedule(&t1, &timer, 1);
+ timeout_schedule(&t2, &timer, 2);
+
+ r = timer_pop_timeout(&timer, 10, &t);
+ assert(!r);
+ assert(t == &t1);
+
+ timeout_unschedule(&t2);
+
+ r = timer_pop_timeout(&timer, 10, &t);
+ assert(!r);
+ assert(!t);
+
+ timer_deinit(&timer);
+}
+
+static void test_pop(void) {
+ Timer timer = TIMER_NULL(timer);
+ Timeout timeouts[N_TIMEOUTS] = {};
+ uint64_t times[N_TIMEOUTS] = {};
+ size_t n_timeouts = 0;
+ bool armed;
+ Timeout *t;
+ int r;
+
+ r = timer_init(&timer);
+ assert(!r);
+
+ for(size_t i = 0; i < N_TIMEOUTS; ++i) {
+ timeouts[i] = (Timeout)TIMEOUT_INIT(timeouts[i]);
+ times[i] = rand() % 128 + 1;
+ timeout_schedule(&timeouts[i], &timer, times[i]);
+ }
+
+ armed = true;
+
+ for(size_t i = 0; i <= 128; ++i) {
+ if (armed) {
+ struct pollfd pfd = {
+ .fd = timer.fd,
+ .events = POLLIN,
+ };
+ uint64_t count;
+
+ r = poll(&pfd, 1, -1);
+ assert(r == 1);
+
+ r = read(timer.fd, &count, sizeof(count));
+ assert(r == sizeof(count));
+ assert(count == 1);
+ armed = false;
+ }
+
+ for (;;) {
+ uint64_t current_time;
+
+ r = timer_pop_timeout(&timer, i, &t);
+ assert(!r);
+ if (!t) {
+ timer_rearm(&timer);
+ break;
+ }
+
+ current_time = times[t - timeouts];
+ assert(current_time == i);
+ ++n_timeouts;
+ armed = true;
+ }
+ }
+
+ assert(n_timeouts == N_TIMEOUTS);
+
+ r = timer_pop_timeout(&timer, (uint64_t)-1, &t);
+ assert(!r);
+ assert(!t);
+
+ timer_deinit(&timer);
+}
+
+void test_arm(void) {
+ struct itimerspec spec = {
+ .it_value = {
+ .tv_sec = 1000,
+ },
+ };
+ int fd1, fd2, r;
+
+ fd1 = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
+ assert(fd1 >= 0);
+
+ fd2 = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
+ assert(fd1 >= 0);
+
+ r = timerfd_settime(fd1, 0, &spec, NULL);
+ assert(r >= 0);
+
+ r = timerfd_settime(fd2, 0, &spec, NULL);
+ assert(r >= 0);
+
+ r = timerfd_gettime(fd1, &spec);
+ assert(r >= 0);
+ assert(spec.it_value.tv_sec);
+
+ r = timerfd_gettime(fd2, &spec);
+ assert(r >= 0);
+ assert(spec.it_value.tv_sec);
+
+ spec = (struct itimerspec){};
+
+ r = timerfd_settime(fd1, 0, &spec, NULL);
+ assert(r >= 0);
+
+ r = timerfd_gettime(fd1, &spec);
+ assert(r >= 0);
+ assert(!spec.it_value.tv_sec);
+ assert(!spec.it_value.tv_nsec);
+
+ r = timerfd_gettime(fd2, &spec);
+ assert(r >= 0);
+ assert(spec.it_value.tv_sec);
+
+ spec = (struct itimerspec){ .it_value = { .tv_nsec = 1, }, };
+
+ r = timerfd_settime(fd1, 0, &spec, NULL);
+ assert(r >= 0);
+
+ r = poll(&(struct pollfd) { .fd = fd1, .events = POLLIN }, 1, -1);
+ assert(r == 1);
+
+ r = timerfd_settime(fd2, 0, &spec, NULL);
+ assert(r >= 0);
+
+ r = poll(&(struct pollfd) { .fd = fd2, .events = POLLIN }, 1, -1);
+ assert(r == 1);
+
+ spec = (struct itimerspec){};
+
+ r = timerfd_settime(fd1, 0, &spec, NULL);
+ assert(r >= 0);
+
+ r = poll(&(struct pollfd) { .fd = fd2, .events = POLLIN }, 1, -1);
+ assert(r == 1);
+
+ close(fd2);
+ close(fd1);
+}
+
+int main(int argc, char **argv) {
+ test_arm();
+ test_api();
+ test_pop();
+ return 0;
+}
diff --git a/shared/n-acd/src/util/timer.c b/shared/n-acd/src/util/timer.c
new file mode 100644
index 0000000000..c995ba400f
--- /dev/null
+++ b/shared/n-acd/src/util/timer.c
@@ -0,0 +1,189 @@
+/*
+ * Timer Utility Library
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include "timer.h"
+
+int timer_init(Timer *timer) {
+ clockid_t clock = CLOCK_BOOTTIME;
+ int r;
+
+ r = timerfd_create(clock, TFD_CLOEXEC | TFD_NONBLOCK);
+ if (r < 0 && errno == EINVAL) {
+ clock = CLOCK_MONOTONIC;
+ r = timerfd_create(clock, TFD_CLOEXEC | TFD_NONBLOCK);
+ }
+ if (r < 0)
+ return -errno;
+
+ *timer = (Timer)TIMER_NULL(*timer);
+ timer->fd = r;
+ timer->clock = clock;
+
+ return 0;
+}
+
+void timer_deinit(Timer *timer) {
+ assert(c_rbtree_is_empty(&timer->tree));
+
+ if (timer->fd >= 0) {
+ close(timer->fd);
+ timer->fd = -1;
+ }
+}
+
+void timer_now(Timer *timer, uint64_t *nowp) {
+ struct timespec ts;
+ int r;
+
+ r = clock_gettime(timer->clock, &ts);
+ assert(r >= 0);
+
+ *nowp = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec;
+}
+
+void timer_rearm(Timer *timer) {
+ uint64_t time;
+ Timeout *timeout;
+ int r;
+
+ /*
+ * A timeout value of 0 clears the timer, we sholud only set that if
+ * no timout exists in the tree.
+ */
+
+ timeout = c_rbnode_entry(c_rbtree_first(&timer->tree), Timeout, node);
+ assert(!timeout || timeout->timeout);
+
+ time = timeout ? timeout->timeout : 0;
+
+ if (time != timer->scheduled_timeout) {
+ r = timerfd_settime(timer->fd,
+ TFD_TIMER_ABSTIME,
+ &(struct itimerspec){
+ .it_value = {
+ .tv_sec = time / UINT64_C(1000000000),
+ .tv_nsec = time % UINT64_C(1000000000),
+ },
+ },
+ NULL);
+ assert(r >= 0);
+
+ timer->scheduled_timeout = time;
+ }
+}
+
+int timer_read(Timer *timer) {
+ uint64_t v;
+ int r;
+
+ r = read(timer->fd, &v, sizeof(v));
+ if (r < 0) {
+ if (errno == EAGAIN) {
+ /*
+ * No more pending events.
+ */
+ return 0;
+ } else {
+ /*
+ * Something failed. We use CLOCK_BOOTTIME/MONOTONIC,
+ * so ECANCELED cannot happen. Hence, there is no
+ * error that we could gracefully handle. Fail hard
+ * and let the caller deal with it.
+ */
+ return -errno;
+ }
+ } else if (r != sizeof(v) || v == 0) {
+ /*
+ * Kernel guarantees 8-byte reads, and only to return
+ * data if at least one timer triggered; fail hard if
+ * it suddenly starts doing weird shit.
+ */
+ return -EIO;
+ }
+
+ return TIMER_E_TRIGGERED;
+}
+
+
+int timer_pop_timeout(Timer *timer, uint64_t until, Timeout **timeoutp) {
+ Timeout *timeout;
+
+ /*
+ * If the first timeout is scheduled before @until, then unlink
+ * it and return it. Otherwise, return NULL.
+ */
+ timeout = c_rbnode_entry(c_rbtree_first(&timer->tree), Timeout, node);
+ if (timeout && timeout->timeout <= until) {
+ c_rbnode_unlink(&timeout->node);
+ timeout->timeout = 0;
+ *timeoutp = timeout;
+ } else {
+ *timeoutp = NULL;
+ }
+
+ return 0;
+}
+
+void timeout_schedule(Timeout *timeout, Timer *timer, uint64_t time) {
+
+ assert(time);
+
+ /*
+ * In case @timeout was already scheduled, remove it from the
+ * tree. If we are moving it to a new timer, rearm the old one.
+ */
+ if (timeout->timer) {
+ c_rbnode_unlink(&timeout->node);
+ if (timeout->timer != timer)
+ timer_rearm(timeout->timer);
+ }
+ timeout->timer = timer;
+ timeout->timeout = time;
+
+ /*
+ * Now insert it back into the tree in the correct new position.
+ * We allow duplicates in the tree, so this insertion is open-coded.
+ */
+ {
+ Timeout *other;
+ CRBNode **slot, *parent;
+
+ slot = &timer->tree.root;
+ parent = NULL;
+ while (*slot) {
+ other = c_rbnode_entry(*slot, Timeout, node);
+ parent = *slot;
+ if (timeout->timeout < other->timeout)
+ slot = &(*slot)->left;
+ else
+ slot = &(*slot)->right;
+ }
+
+ c_rbtree_add(&timer->tree, parent, slot, &timeout->node);
+ }
+
+ /*
+ * Rearm the timer as we updated the timeout tree.
+ */
+ timer_rearm(timer);
+}
+
+void timeout_unschedule(Timeout *timeout) {
+ Timer *timer = timeout->timer;
+
+ if (!timer)
+ return;
+
+ c_rbnode_unlink(&timeout->node);
+ timeout->timeout = 0;
+ timeout->timer = NULL;
+
+ timer_rearm(timer);
+}
diff --git a/shared/n-acd/src/util/timer.h b/shared/n-acd/src/util/timer.h
new file mode 100644
index 0000000000..2acc99e379
--- /dev/null
+++ b/shared/n-acd/src/util/timer.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+
+typedef struct Timer Timer;
+typedef struct Timeout Timeout;
+
+enum {
+ _TIMER_E_SUCCESS,
+
+ TIMER_E_TRIGGERED,
+
+ _TIMER_E_N,
+};
+
+struct Timer {
+ int fd;
+ clockid_t clock;
+ CRBTree tree;
+ uint64_t scheduled_timeout;
+};
+
+#define TIMER_NULL(_x) { \
+ .fd = -1, \
+ .tree = C_RBTREE_INIT, \
+ }
+
+struct Timeout {
+ Timer *timer;
+ CRBNode node;
+ uint64_t timeout;
+};
+
+#define TIMEOUT_INIT(_x) { \
+ .node = C_RBNODE_INIT((_x).node), \
+ }
+
+int timer_init(Timer *timer);
+void timer_deinit(Timer *timer);
+
+void timer_now(Timer *timer, uint64_t *nowp);
+
+int timer_pop_timeout(Timer *timer, uint64_t now, Timeout **timerp);
+void timer_rearm(Timer *timer);
+int timer_read(Timer *timer);
+
+void timeout_schedule(Timeout *timeout, Timer *timer, uint64_t time);
+void timeout_unschedule(Timeout *timeout);
+
diff --git a/shared/n-acd/subprojects/c-list b/shared/n-acd/subprojects/c-list
index 72c59181d6..dda36d30c7 160000
--- a/shared/n-acd/subprojects/c-list
+++ b/shared/n-acd/subprojects/c-list
@@ -1 +1 @@
-Subproject commit 72c59181d677a3f50b201d51f190b1bff02d4279
+Subproject commit dda36d30c7d655b4d61358519168fa7ce0e9dae9
diff --git a/shared/n-acd/subprojects/c-rbtree b/shared/n-acd/subprojects/c-rbtree
new file mode 160000
index 0000000000..bf627e0c32
--- /dev/null
+++ b/shared/n-acd/subprojects/c-rbtree
@@ -0,0 +1 @@
+Subproject commit bf627e0c32241915108f66ad9738444e4d045b45
diff --git a/shared/n-acd/subprojects/c-siphash b/shared/n-acd/subprojects/c-siphash
index e01ab640dc..b24d2e2048 160000
--- a/shared/n-acd/subprojects/c-siphash
+++ b/shared/n-acd/subprojects/c-siphash
@@ -1 +1 @@
-Subproject commit e01ab640dcf72dfa6928c94a261bf78cd943d9c3
+Subproject commit b24d2e20489b08bb350d67b82f6fb354d6951a1c