Add osmo_io with initial poll backend

* make backend configurable for later
* segmentation callback for chunked streams
* logging target for osmo_io
* support partial writes

Change-Id: I50d73cf550d6ce8154bf827bf47408131cf5b0a0
Related: SYS#5094, OS#5751
diff --git a/src/core/Makefile.am b/src/core/Makefile.am
index 50c39d1..80ee458 100644
--- a/src/core/Makefile.am
+++ b/src/core/Makefile.am
@@ -50,6 +50,8 @@
 	msgb.c \
 	netdev.c \
 	netns.c \
+	osmo_io.c \
+	osmo_io_poll.c \
 	panic.c \
 	prbs.c \
 	prim.c \
@@ -107,6 +109,7 @@
 	conv_acc_sse_impl.h \
 	conv_acc_neon_impl.h \
 	crcXXgen.c.tpl \
+	osmo_io_internal.h \
 	stat_item_internal.h \
 	libosmocore.map \
 	$(NULL)
diff --git a/src/core/libosmocore.map b/src/core/libosmocore.map
index 095a4c0..7bca331 100644
--- a/src/core/libosmocore.map
+++ b/src/core/libosmocore.map
@@ -252,6 +252,30 @@
 osmo_init_logging2;
 osmo_int_to_float_str_buf;
 osmo_int_to_float_str_c;
+osmo_io_backend_names;
+osmo_iofd_close;
+osmo_iofd_free;
+osmo_iofd_get_data;
+osmo_iofd_get_fd;
+osmo_iofd_get_name;
+osmo_iofd_get_priv_nr;
+osmo_iofd_init;
+osmo_iofd_ops;
+osmo_iofd_read_disable;
+osmo_iofd_read_enable;
+osmo_iofd_register;
+osmo_iofd_sendto_msgb;
+osmo_iofd_set_alloc_info;
+osmo_iofd_set_data;
+osmo_iofd_set_priv_nr;
+osmo_iofd_setup;
+osmo_iofd_txqueue_clear;
+osmo_iofd_txqueue_len;
+osmo_iofd_unregister;
+osmo_iofd_uring_init;
+osmo_iofd_write_disable;
+osmo_iofd_write_enable;
+osmo_iofd_write_msgb;
 osmo_ip_str_type;
 osmo_isdnhdlc_decode;
 osmo_isdnhdlc_encode;
diff --git a/src/core/logging.c b/src/core/logging.c
index 3095f0d..c6774f5 100644
--- a/src/core/logging.c
+++ b/src/core/logging.c
@@ -313,6 +313,12 @@
 		.enabled = 1, .loglevel = LOGL_NOTICE,
 		.color = "\033[38;5;11m",
 	},
+	[INT2IDX(DLIO)] = {
+		.name = "DLIO",
+		.description = "libosmocore IO Subsystem",
+		.enabled = 1, .loglevel = LOGL_NOTICE,
+		.color = "\033[38;5;67m",
+	},
 };
 
 void assert_loginfo(const char *src)
diff --git a/src/core/osmo_io.c b/src/core/osmo_io.c
new file mode 100644
index 0000000..cfb6d68
--- /dev/null
+++ b/src/core/osmo_io.c
@@ -0,0 +1,602 @@
+/*! \file osmo_io.c
+ * New osmocom async I/O API.
+ *
+ * (C) 2022 by Harald Welte <laforge@osmocom.org>
+ * (C) 2022-2023 by sysmocom - s.f.m.c. GmbH <info@sysmocom.de>
+ * Author: Daniel Willmann <dwillmann@sysmocom.de>
+ *
+ * All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0+
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ */
+
+#include "../config.h"
+#if defined(__linux__)
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <talloc.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdbool.h>
+#include <errno.h>
+
+#include <osmocom/core/osmo_io.h>
+#include <osmocom/core/linuxlist.h>
+#include <osmocom/core/logging.h>
+#include <osmocom/core/msgb.h>
+#include <osmocom/core/socket.h>
+#include <osmocom/core/talloc.h>
+#include <osmocom/core/utils.h>
+
+#include "osmo_io_internal.h"
+
+/*! This environment variable can be set to manually set the backend used in osmo_io */
+#define OSMO_IO_BACKEND_ENV "LIBOSMO_IO_BACKEND"
+
+const struct value_string osmo_io_backend_names[] = {
+	{ OSMO_IO_BACKEND_POLL, "poll" },
+	{ 0, NULL }
+};
+
+static enum osmo_io_backend g_io_backend;
+
+/* Used by some tests, can't be static */
+struct iofd_backend_ops osmo_iofd_ops;
+
+/*! initialize osmo_io for the current thread */
+void osmo_iofd_init(void)
+{
+	switch (g_io_backend) {
+	case OSMO_IO_BACKEND_POLL:
+		break;
+	default:
+		OSMO_ASSERT(0);
+		break;
+	}
+}
+
+/* ensure main thread always has pre-initialized osmo_io
+ * priority 103: run after on_dso_load_select */
+static __attribute__((constructor(103))) void on_dso_load_osmo_io(void)
+{
+	char *backend = getenv(OSMO_IO_BACKEND_ENV);
+	if (backend == NULL)
+		backend = OSMO_IO_BACKEND_DEFAULT;
+
+	if (!strcmp("POLL", backend)) {
+		g_io_backend = OSMO_IO_BACKEND_POLL;
+		osmo_iofd_ops = iofd_poll_ops;
+	} else {
+		fprintf(stderr, "Invalid osmo_io backend requested: \"%s\"\nCheck the environment variable %s\n", backend, OSMO_IO_BACKEND_ENV);
+		exit(1);
+	}
+
+	osmo_iofd_init();
+}
+
+/*! Allocate the msghdr
+ *  \param[in] iofd the osmo_io file structure
+ *  \param[in] action the action this msg(hdr) is for (read, write, ..)
+ *  \param[in] msg the msg buffer to use. Will allocate a new one if NULL
+ *  \returns the newly allocated msghdr or NULL in case of error */
+struct iofd_msghdr *iofd_msghdr_alloc(struct osmo_io_fd *iofd, enum iofd_msg_action action, struct msgb *msg)
+{
+	struct iofd_msghdr *hdr = talloc_zero(iofd, struct iofd_msghdr);
+	if (!hdr)
+		return NULL;
+	if (!msg) {
+		msg = iofd_msgb_alloc(iofd);
+		if (!msg) {
+			talloc_free(hdr);
+			return NULL;
+		}
+	}
+
+	hdr->action = action;
+	hdr->iofd = iofd;
+	hdr->msg = msg;
+
+	return hdr;
+}
+
+/*! Free the msghdr
+ *  \param[in] msghdr the msghdr to free
+ */
+void iofd_msghdr_free(struct iofd_msghdr *msghdr)
+{
+	/* msghdr->msg is never owned by msghdr, it will either be freed in the send path or
+	 * or passed on to the read callback which takes ownership. */
+	talloc_free(msghdr);
+}
+
+/*! convenience wrapper to call msgb_alloc with parameters from osmo_io_fd */
+struct msgb *iofd_msgb_alloc(struct osmo_io_fd *iofd)
+{
+	uint16_t headroom = iofd->msgb_alloc.headroom;
+
+	OSMO_ASSERT(iofd->msgb_alloc.size < 0xffff - headroom);
+	return msgb_alloc_headroom_c(iofd->msgb_alloc.ctx,
+				     iofd->msgb_alloc.size + headroom, headroom, iofd->name);
+}
+
+/*! return the pending msgb in iofd or NULL if there is none*/
+struct msgb *iofd_msgb_pending(struct osmo_io_fd *iofd)
+{
+	struct msgb *msg = NULL;
+
+	msg = iofd->pending;
+	iofd->pending = NULL;
+
+	return msg;
+}
+
+/*! Return the pending msgb or allocate and return a new one */
+struct msgb *iofd_msgb_pending_or_alloc(struct osmo_io_fd *iofd)
+{
+	struct msgb *msg = NULL;
+
+	msg = iofd_msgb_pending(iofd);
+	if (!msg)
+		msg = iofd_msgb_alloc(iofd);
+
+	return msg;
+}
+
+/*! Enqueue a message to be sent
+ *
+ *  Enqueues the message at the back of the queue provided there is enough space.
+ *  \param[in] iofd the file descriptor
+ *  \param[in] msghdr the message to enqueue
+ *  \returns 0 if the message was enqueued succcessfully,
+ *    -ENOSPC if the queue already contains the maximum number of messages
+ */
+int iofd_txqueue_enqueue(struct osmo_io_fd *iofd, struct iofd_msghdr *msghdr)
+{
+	if (iofd->tx_queue.current_length >= iofd->tx_queue.max_length)
+		return -ENOSPC;
+
+	llist_add_tail(&msghdr->list, &iofd->tx_queue.msg_queue);
+	iofd->tx_queue.current_length++;
+
+	if (iofd->write_enabled && iofd->tx_queue.current_length == 1)
+		osmo_iofd_ops.write_enable(iofd);
+
+	return 0;
+}
+
+/*! Enqueue a message at the front
+ *
+ *  Used to enqueue a msgb from a partial send again. This function will always
+ *  enqueue the message, even if the maximum number of messages is reached.
+ *  \param[in] iofd the file descriptor
+ *  \param[in] msghdr the message to enqueue
+ */
+void iofd_txqueue_enqueue_front(struct osmo_io_fd *iofd, struct iofd_msghdr *msghdr)
+{
+	llist_add(&msghdr->list, &iofd->tx_queue.msg_queue);
+	iofd->tx_queue.current_length++;
+}
+
+/*! Dequeue a message from the front
+ *
+ *  \param[in] iofd the file descriptor
+ *  \returns the msghdr from the front of the queue or NULL if the queue is empty
+ */
+struct iofd_msghdr *iofd_txqueue_dequeue(struct osmo_io_fd *iofd)
+{
+	struct llist_head *lh;
+
+	if (iofd->tx_queue.current_length == 0)
+		return NULL;
+
+	lh = iofd->tx_queue.msg_queue.next;
+
+	OSMO_ASSERT(lh);
+	iofd->tx_queue.current_length--;
+	llist_del(lh);
+
+	if (iofd->tx_queue.current_length == 0)
+		osmo_iofd_ops.write_disable(iofd);
+
+	return llist_entry(lh, struct iofd_msghdr, list);
+}
+
+/*! Handle segmentation of the msg. If this function returns *_HANDLE_ONE or MORE then the data in msg will contain
+ *  one complete message.
+ *  If there are bytes left over, *pending_out will point to a msgb with the remaining data.
+*/
+static enum iofd_seg_act iofd_handle_segmentation(struct osmo_io_fd *iofd, struct msgb *msg, struct msgb **pending_out)
+{
+	int pending_len, msg_len;
+	struct msgb *msg_pending;
+
+	msg_len = msgb_length(msg);
+
+	if (!iofd->io_ops.segmentation_cb) {
+		*pending_out = NULL;
+		return IOFD_SEG_ACT_HANDLE_ONE;
+	}
+
+	int len = iofd->io_ops.segmentation_cb(msg, msg_len);
+
+	pending_len = msg_len - len;
+	/* No segmentation needed, return */
+	if (pending_len == 0) {
+		*pending_out = NULL;
+		return IOFD_SEG_ACT_HANDLE_ONE;
+	} else if (pending_len < 0) {
+		*pending_out = msg;
+		return IOFD_SEG_ACT_DEFER;
+	}
+
+	/* Copy the pending data over */
+	msg_pending = iofd_msgb_alloc(iofd);
+	memcpy(msgb_data(msg_pending), msgb_data(msg) + len, pending_len);
+	msgb_put(msg_pending, pending_len);
+	*pending_out = msg_pending;
+
+	/* Trim the original msgb to size */
+	msgb_trim(msg, len);
+	return IOFD_SEG_ACT_HANDLE_MORE;
+}
+
+/*! Restore message boundaries on read() and pass individual messages to the read callback
+ */
+void iofd_handle_segmented_read(struct osmo_io_fd *iofd, struct msgb *msg, int rc)
+{
+	int res;
+	struct msgb *pending = NULL;
+
+	if (rc <= 0) {
+		iofd->io_ops.read_cb(iofd, rc, msg);
+		return;
+	}
+
+	do {
+		res = iofd_handle_segmentation(iofd, msg, &pending);
+		if (res != IOFD_SEG_ACT_DEFER || rc < 0)
+			iofd->io_ops.read_cb(iofd, rc, msg);
+		if (res == IOFD_SEG_ACT_HANDLE_MORE)
+			msg = pending;
+	} while (res == IOFD_SEG_ACT_HANDLE_MORE);
+
+	OSMO_ASSERT(iofd->pending == NULL);
+	iofd->pending = pending;
+}
+
+/* Public functions */
+
+/*! Send a message through a connected socket
+ *
+ *  Appends the message to the internal transmit queue.
+ *  If the function returns success (0) it will take ownership of the msgb and
+ *  internally call msgb_free() after the write request completes.
+ *  In case of an error the msgb needs to be freed by the caller.
+ *  \param[in] iofd file descriptor to write to
+ *  \param[in] msg message buffer to write
+ *  \returns 0 in case of success; a negative value in case of error
+ */
+int osmo_iofd_write_msgb(struct osmo_io_fd *iofd, struct msgb *msg)
+{
+	int rc;
+	struct iofd_msghdr *msghdr = iofd_msghdr_alloc(iofd, IOFD_ACT_WRITE, msg);
+	if (!msghdr)
+		return -ENOMEM;
+
+	msghdr->flags = 0;
+	msghdr->iov[0].iov_base = msgb_data(msghdr->msg);
+	msghdr->iov[0].iov_len = msgb_length(msghdr->msg);
+	msghdr->hdr.msg_iov = &msghdr->iov[0];
+	msghdr->hdr.msg_iovlen = 1;
+
+	rc = iofd_txqueue_enqueue(iofd, msghdr);
+	if (rc < 0) {
+		iofd_msghdr_free(msghdr);
+		LOGPIO(iofd, LOGL_ERROR, "enqueueing message failed (%d). Rejecting msgb\n", rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+/*! Send a message through an unconnected socket
+ *
+ *  Appends the message to the internal transmit queue.
+ *  If the function returns success (0), it will take ownership of the msgb and
+ *  internally call msgb_free() after the write request completes.
+ *  In case of an error the msgb needs to be freed by the caller.
+ *  \param[in] iofd file descriptor to write to
+ *  \param[in] msg message buffer to send
+ *  \param[in] sendto_flags Flags to pass to the send call
+ *  \param[in] dest destination address to send the message to
+ *  \returns 0 in case of success; a negative value in case of error
+ */
+int osmo_iofd_sendto_msgb(struct osmo_io_fd *iofd, struct msgb *msg, int sendto_flags, const struct osmo_sockaddr *dest)
+{
+	int rc;
+
+	OSMO_ASSERT(iofd->mode == OSMO_IO_FD_MODE_RECVFROM_SENDTO);
+
+	struct iofd_msghdr *msghdr = iofd_msghdr_alloc(iofd, IOFD_ACT_SENDTO, msg);
+	if (!msghdr)
+		return -ENOMEM;
+
+	if (dest) {
+		msghdr->osa = *dest;
+		msghdr->hdr.msg_name = &msghdr->osa.u.sa;
+		msghdr->hdr.msg_namelen = osmo_sockaddr_size(&msghdr->osa);
+	}
+	msghdr->flags = sendto_flags;
+	msghdr->iov[0].iov_base = msgb_data(msghdr->msg);
+	msghdr->iov[0].iov_len = msgb_length(msghdr->msg);
+	msghdr->hdr.msg_iov = &msghdr->iov[0];
+	msghdr->hdr.msg_iovlen = 1;
+
+	rc = iofd_txqueue_enqueue(iofd, msghdr);
+	if (rc < 0) {
+		iofd_msghdr_free(msghdr);
+		LOGPIO(iofd, LOGL_ERROR, "enqueueing message failed (%d). Rejecting msgb\n", rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+/*! Enable reading from this iofd
+ *
+ *  \param[in] iofd the file descriptor
+ */
+void osmo_iofd_read_enable(struct osmo_io_fd *iofd)
+{
+	iofd->read_enabled = true;
+	osmo_iofd_ops.read_enable(iofd);
+}
+
+/*! Disable reading from this iofd
+ *
+ *  \param[in] iofd the file descriptor
+ */
+void osmo_iofd_read_disable(struct osmo_io_fd *iofd)
+{
+	iofd->read_enabled = false;
+	osmo_iofd_ops.read_disable(iofd);
+}
+
+/*! Enable writing to this iofd
+ *
+ *  \param[in] iofd the file descriptor
+ */
+void osmo_iofd_write_enable(struct osmo_io_fd *iofd)
+{
+	iofd->write_enabled = true;
+	if (iofd->tx_queue.current_length > 0)
+		osmo_iofd_ops.write_enable(iofd);
+}
+
+/*! Disable writing to this iofd
+ *
+ *  \param[in] iofd the file descriptor
+ */
+void osmo_iofd_write_disable(struct osmo_io_fd *iofd)
+{
+	iofd->write_enabled = false;
+	osmo_iofd_ops.write_disable(iofd);
+}
+
+/*! Allocate and setup a new iofd
+ *  \param[in] ctx the parent context from which to allocate
+ *  \param[in] fd the underlying system file descriptor
+ *  \param[in] name the name of the iofd
+ *  \param[in] mode the mode of the iofd, whether it should use read()/write(), sendto()/recvfrom()
+ *  \param[in] ioops structure with read/write/send/recv callbacks
+ *  \param[in] data user data pointer accessible by the ioops callbacks
+ *  \returns The newly allocated osmo_io_fd struct or NULL on failure
+ */
+struct osmo_io_fd *osmo_iofd_setup(const void *ctx, int fd, const char *name, enum osmo_io_fd_mode mode,
+		  const struct osmo_io_ops *ioops, void *data)
+{
+	struct osmo_io_fd *iofd = talloc_zero(ctx, struct osmo_io_fd);
+	if (!iofd)
+		return NULL;
+
+	iofd->fd = fd;
+	iofd->mode = mode;
+
+	iofd->name = talloc_strdup(iofd, name);
+
+	if (ioops)
+		iofd->io_ops = *ioops;
+
+	iofd->pending = NULL;
+
+	iofd->data = data;
+
+	iofd->msgb_alloc.ctx = ctx;
+	iofd->msgb_alloc.size = OSMO_IO_DEFAULT_MSGB_SIZE;
+	iofd->msgb_alloc.headroom = OSMO_IO_DEFAULT_MSGB_HEADROOM;
+
+	iofd->tx_queue.max_length = 32;
+	INIT_LLIST_HEAD(&iofd->tx_queue.msg_queue);
+
+	return iofd;
+}
+
+/*! Register the fd with the underlying backend
+ *
+ *  \param[in] iofd the iofd file descriptor
+ *  \param[in] fd the system fd number that will be registeres. If negative will use the one already set.
+ *  \returns zero on success, a negative value on error
+*/
+int osmo_iofd_register(struct osmo_io_fd *iofd, int fd)
+{
+	if (fd >= 0)
+		iofd->fd = fd;
+	iofd->closed = false;
+
+	if (osmo_iofd_ops.register_fd)
+		return osmo_iofd_ops.register_fd(iofd);
+
+	return 0;
+}
+
+/*! Unregister the fd from the underlying backend
+ *
+ *  \param[in] iofd the file descriptor
+ *  \returns zero on success, a negative value on error
+ */
+int osmo_iofd_unregister(struct osmo_io_fd *iofd)
+{
+	if (osmo_iofd_ops.unregister_fd)
+		return osmo_iofd_ops.unregister_fd(iofd);
+
+	return 0;
+}
+
+/*! Get the number of messages in the tx queue
+ *
+ *  \param[in] iofd the file descriptor
+ */
+unsigned int osmo_iofd_txqueue_len(struct osmo_io_fd *iofd)
+{
+	return iofd->tx_queue.current_length;
+}
+
+/*! Clear the transmit queue of the the iofd
+ *
+ *  This function frees all messages currently pending in the transmit queue
+ *  \param[in] iofd the file descriptor
+ */
+void osmo_iofd_txqueue_clear(struct osmo_io_fd *iofd)
+{
+	struct iofd_msghdr *hdr;
+	while ((hdr = iofd_txqueue_dequeue(iofd))) {
+		msgb_free(hdr->msg);
+		iofd_msghdr_free(hdr);
+	}
+}
+
+/*! Free the iofd
+ *
+ *  This function is safe to use in the read/write callbacks and will defer freeing it until safe to do so.
+ *  The iofd will be closed before.
+ *  \param[in] iofd the file descriptor
+ */
+void osmo_iofd_free(struct osmo_io_fd *iofd)
+{
+	if (!iofd)
+		return;
+
+	osmo_iofd_close(iofd);
+
+	if (!iofd->in_callback) {
+		talloc_free(iofd);
+	} else {
+		/* Prevent our parent context from freeing us prematurely */
+		talloc_steal(NULL, iofd);
+		iofd->to_free = true;
+	}
+}
+
+/*! Close the iofd
+ *
+ *  This function closes the underlying fd and clears any messages in the tx queue
+ *  The iofd is not freed and can be assigned a new file descriptor with osmo_iofd_register()
+ *  \param[in] iofd the file descriptor
+ *  \ returns 0 on success, a negative value otherwise
+ */
+int osmo_iofd_close(struct osmo_io_fd *iofd)
+{
+	int rc = 0;
+
+	if (iofd->closed)
+		return rc;
+
+	iofd->closed = true;
+
+	/* Free pending msgs in tx queue */
+	osmo_iofd_txqueue_clear(iofd);
+	msgb_free(iofd->pending);
+
+	iofd->pending = NULL;
+
+	if (osmo_iofd_ops.close)
+		rc = osmo_iofd_ops.close(iofd);
+	iofd->fd = -1;
+	return rc;
+}
+
+/*! Set the size and headroom of the msgb allocated when receiving messages
+ *  \param[in] size the size of the msgb when receiving data
+ *  \param[in] headroom the headroom of the msgb when receiving data
+ */
+void osmo_iofd_set_alloc_info(struct osmo_io_fd *iofd, unsigned int size, unsigned int headroom)
+{
+	iofd->msgb_alloc.headroom = headroom;
+	iofd->msgb_alloc.size = size;
+}
+
+/*! Get the associated user-data from an iofd
+ *  \param[in] iofd the file descriptor
+ *  \returns the data that was previously set with \ref osmo_iofd_setup()
+ */
+void *osmo_iofd_get_data(const struct osmo_io_fd *iofd)
+{
+	return iofd->data;
+}
+
+/*! Set the associated user-data from an iofd
+ *  \param[in] iofd the file descriptor
+ *  \param[in] data the data to set
+ */
+void osmo_iofd_set_data(struct osmo_io_fd *iofd, void *data)
+{
+	iofd->data = data;
+}
+
+/*! Get the private number from an iofd
+ *  \param[in] iofd the file descriptor
+ *  \returns the private number that was previously set with \ref osmo_iofd_set_priv_nr()
+ */
+unsigned int osmo_iofd_get_priv_nr(const struct osmo_io_fd *iofd)
+{
+	return iofd->priv_nr;
+}
+
+/*! Set the private number from an iofd
+ *  \param[in] iofd the file descriptor
+ *  \param[in] priv_nr the private number to set
+ */
+void osmo_iofd_set_priv_nr(struct osmo_io_fd *iofd, unsigned int priv_nr)
+{
+	iofd->priv_nr = priv_nr;
+}
+
+/*! Get the underlying file descriptor from an iofd
+ *  \param[in] iofd the file descriptor
+ *  \returns the underlying file descriptor number */
+int osmo_iofd_get_fd(const struct osmo_io_fd *iofd)
+{
+	return iofd->fd;
+}
+
+/*! Get the name of the file descriptor
+ *  \param[in] iofd the file descriptor
+ *  \returns the name of the iofd as given in \ref osmo_iofd_setup() */
+const char *osmo_iofd_get_name(const struct osmo_io_fd *iofd)
+{
+	return iofd->name;
+}
+
+#endif /* defined(__linux__) */
diff --git a/src/core/osmo_io_internal.h b/src/core/osmo_io_internal.h
new file mode 100644
index 0000000..7fc4b6b
--- /dev/null
+++ b/src/core/osmo_io_internal.h
@@ -0,0 +1,132 @@
+/*! \file osmo_io_internal.h */
+
+#pragma once
+
+#include <unistd.h>
+#include <stdbool.h>
+
+#include <osmocom/core/osmo_io.h>
+#include <osmocom/core/linuxlist.h>
+#include <osmocom/core/msgb.h>
+#include <osmocom/core/select.h>
+#include <osmocom/core/socket.h>
+
+#include "../config.h"
+
+#define OSMO_IO_DEFAULT_MSGB_SIZE 1024
+#define OSMO_IO_DEFAULT_MSGB_HEADROOM 128
+
+extern const struct iofd_backend_ops iofd_poll_ops;
+#define OSMO_IO_BACKEND_DEFAULT "POLL"
+
+struct iofd_backend_ops {
+	int (*register_fd)(struct osmo_io_fd *iofd);
+	int (*unregister_fd)(struct osmo_io_fd *iofd);
+	int (*close)(struct osmo_io_fd *iofd);
+	void (*write_enable)(struct osmo_io_fd *iofd);
+	void (*write_disable)(struct osmo_io_fd *iofd);
+	void (*read_enable)(struct osmo_io_fd *iofd);
+	void (*read_disable)(struct osmo_io_fd *iofd);
+};
+
+struct osmo_io_fd {
+	/*! linked list for internal management */
+	struct llist_head list;
+	/*! actual operating-system level file decriptor */
+	int fd;
+	/*! type of read/write mode to use */
+	enum osmo_io_fd_mode mode;
+
+	/*! flags to guard closing/freeing of iofd */
+	bool closed;
+	bool in_callback;
+	bool to_free;
+
+	bool write_enabled;
+	bool read_enabled;
+
+	/*! human-readable name to associte with fd */
+	const char *name;
+
+	/*! send/recv (msg) callback functions */
+	struct osmo_io_ops io_ops;
+	/*! Pending msgb to keep partial data during segmentation */
+	struct msgb *pending;
+
+	/*! data pointer passed through to call-back function */
+	void *data;
+	/*! private number, extending \a data */
+	unsigned int priv_nr;
+
+	struct {
+		/*! talloc context from which to allocate msgb when reading */
+		const void *ctx;
+		/*! size of msgb to allocate (excluding headroom) */
+		unsigned int size;
+		/*! headroom to allocate when allocating msgb's */
+		unsigned int headroom;
+	} msgb_alloc;
+
+	struct {
+		/*! maximum length of write queue */
+		unsigned int max_length;
+		/*! current length of write queue */
+		unsigned int current_length;
+		/*! actual linked list implementing the transmit queue */
+		struct llist_head msg_queue;
+	} tx_queue;
+
+	union {
+		struct {
+			struct osmo_fd ofd;
+		} poll;
+		struct {
+			bool read_enabled;
+			bool read_pending;
+			bool write_pending;
+			bool write_enabled;
+			/* TODO: index into array of registered fd's? */
+		} uring;
+	} u;
+};
+
+enum iofd_msg_action {
+	IOFD_ACT_READ,
+	IOFD_ACT_WRITE,
+	IOFD_ACT_RECVFROM,
+	IOFD_ACT_SENDTO,
+	// TODO: SCTP_*
+};
+
+
+/* serialized version of 'struct msghdr' employed by sendmsg/recvmsg */
+struct iofd_msghdr {
+	struct llist_head list;
+	enum iofd_msg_action action;
+	struct msghdr hdr;
+	struct osmo_sockaddr osa;
+	struct iovec iov[1];
+	int flags;
+
+	struct msgb *msg;
+	struct osmo_io_fd *iofd;
+};
+
+enum iofd_seg_act {
+	IOFD_SEG_ACT_HANDLE_ONE,
+	IOFD_SEG_ACT_HANDLE_MORE,
+	IOFD_SEG_ACT_DEFER,
+};
+
+struct iofd_msghdr *iofd_msghdr_alloc(struct osmo_io_fd *iofd, enum iofd_msg_action action, struct msgb *msg);
+void iofd_msghdr_free(struct iofd_msghdr *msghdr);
+
+struct msgb *iofd_msgb_alloc(struct osmo_io_fd *iofd);
+struct msgb *iofd_msgb_pending(struct osmo_io_fd *iofd);
+struct msgb *iofd_msgb_pending_or_alloc(struct osmo_io_fd *iofd);
+
+void iofd_handle_segmented_read(struct osmo_io_fd *iofd, struct msgb *msg, int rc);
+
+int iofd_txqueue_enqueue(struct osmo_io_fd *iofd, struct iofd_msghdr *msghdr);
+void iofd_txqueue_enqueue_front(struct osmo_io_fd *iofd, struct iofd_msghdr *msghdr);
+struct iofd_msghdr *iofd_txqueue_dequeue(struct osmo_io_fd *iofd);
diff --git a/src/core/osmo_io_poll.c b/src/core/osmo_io_poll.c
new file mode 100644
index 0000000..95aa84a
--- /dev/null
+++ b/src/core/osmo_io_poll.c
@@ -0,0 +1,185 @@
+/*! \file osmo_io_poll.c
+ * New osmocom async I/O API.
+ *
+ * (C) 2022 by Harald Welte <laforge@osmocom.org>
+ * (C) 2022-2023 by sysmocom - s.f.m.c. GmbH <info@sysmocom.de>
+ * Author: Daniel Willmann <dwillmann@sysmocom.de>
+ *
+ * All Rights Reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0+
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ */
+
+#include "../config.h"
+#if defined(__linux__)
+
+#include <errno.h>
+#include <stdio.h>
+#include <talloc.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <sys/socket.h>
+
+#include <osmocom/core/osmo_io.h>
+#include <osmocom/core/linuxlist.h>
+#include <osmocom/core/logging.h>
+#include <osmocom/core/msgb.h>
+#include <osmocom/core/select.h>
+#include <osmocom/core/socket.h>
+#include <osmocom/core/talloc.h>
+#include <osmocom/core/utils.h>
+
+#include "osmo_io_internal.h"
+
+static void iofd_poll_ofd_cb_recvmsg_sendmsg(struct osmo_fd *ofd, unsigned int what)
+{
+	struct osmo_io_fd *iofd = ofd->data;
+	struct msgb *msg;
+	int rc, flags = 0;
+
+	if (what & OSMO_FD_READ) {
+		struct iofd_msghdr hdr;
+		msg = iofd_msgb_pending_or_alloc(iofd);
+		if (!msg) {
+			LOGP(DLIO, LOGL_ERROR, "iofd(%s): Could not get msgb for reading\n", iofd->name);
+			OSMO_ASSERT(0);
+		}
+
+		hdr.msg = msg;
+		hdr.iov[0].iov_base = msgb_data(msg);
+		hdr.iov[0].iov_len = msgb_tailroom(msg);
+		hdr.hdr.msg_iov = &hdr.iov[0];
+		hdr.hdr.msg_iovlen = 1;
+		hdr.hdr.msg_name = &hdr.osa.u.sa;
+		hdr.hdr.msg_namelen = osmo_sockaddr_size(&hdr.osa);
+
+		rc = recvmsg(ofd->fd, &hdr.hdr, flags);
+		if (rc > 0)
+			msgb_put(msg, rc);
+
+		switch (iofd->mode) {
+		case OSMO_IO_FD_MODE_READ_WRITE:
+			iofd_handle_segmented_read(iofd, msg, rc);
+			break;
+		case OSMO_IO_FD_MODE_RECVFROM_SENDTO:
+			iofd->io_ops.recvfrom_cb(iofd, rc, msg, &hdr.osa);
+			break;
+		case OSMO_IO_FD_MODE_SCTP_RECVMSG_SENDMSG:
+			/* TODO Implement */
+			OSMO_ASSERT(false);
+			break;
+		}
+	}
+
+	if (iofd->closed)
+		return;
+
+	if (what & OSMO_FD_WRITE) {
+		struct iofd_msghdr *msghdr = iofd_txqueue_dequeue(iofd);
+		if (msghdr) {
+			msg = msghdr->msg;
+
+			rc = sendmsg(ofd->fd, &msghdr->hdr, msghdr->flags);
+			if (rc > 0 && rc < msgb_length(msg)) {
+				msgb_pull(msg, rc);
+				iofd_txqueue_enqueue_front(iofd, msghdr);
+				return;
+			}
+
+			switch (iofd->mode) {
+			case OSMO_IO_FD_MODE_READ_WRITE:
+				iofd->io_ops.write_cb(iofd, rc, msg);
+				break;
+			case OSMO_IO_FD_MODE_RECVFROM_SENDTO:
+				iofd->io_ops.sendto_cb(iofd, rc, msg, &msghdr->osa);
+				break;
+			case OSMO_IO_FD_MODE_SCTP_RECVMSG_SENDMSG:
+				OSMO_ASSERT(false);
+				break;
+			}
+
+			talloc_free(msghdr);
+			msgb_free(msg);
+		}
+	}
+}
+
+static int iofd_poll_ofd_cb_dispatch(struct osmo_fd *ofd, unsigned int what)
+{
+	struct osmo_io_fd *iofd = ofd->data;
+
+	iofd->in_callback = true;
+	iofd_poll_ofd_cb_recvmsg_sendmsg(ofd, what);
+	iofd->in_callback = false;
+
+	if (iofd->to_free) {
+		talloc_free(iofd);
+		return 0;
+	}
+
+	return 0;
+}
+
+int iofd_poll_register(struct osmo_io_fd *iofd)
+{
+	struct osmo_fd *ofd = &iofd->u.poll.ofd;
+	osmo_fd_setup(ofd, iofd->fd, 0, &iofd_poll_ofd_cb_dispatch, iofd, 0);
+	return osmo_fd_register(ofd);
+}
+
+int iofd_poll_unregister(struct osmo_io_fd *iofd)
+{
+	struct osmo_fd *ofd = &iofd->u.poll.ofd;
+	osmo_fd_unregister(ofd);
+
+	return 0;
+}
+
+int iofd_poll_close(struct osmo_io_fd *iofd)
+{
+	osmo_fd_close(&iofd->u.poll.ofd);
+
+	return 0;
+}
+
+void iofd_poll_read_enable(struct osmo_io_fd *iofd)
+{
+	osmo_fd_read_enable(&iofd->u.poll.ofd);
+}
+
+void iofd_poll_read_disable(struct osmo_io_fd *iofd)
+{
+	osmo_fd_read_disable(&iofd->u.poll.ofd);
+}
+
+void iofd_poll_write_enable(struct osmo_io_fd *iofd)
+{
+	osmo_fd_write_enable(&iofd->u.poll.ofd);
+}
+
+void iofd_poll_write_disable(struct osmo_io_fd *iofd)
+{
+	osmo_fd_write_disable(&iofd->u.poll.ofd);
+}
+
+const struct iofd_backend_ops iofd_poll_ops = {
+	.register_fd = iofd_poll_register,
+	.unregister_fd = iofd_poll_unregister,
+	.close = iofd_poll_close,
+	.write_enable = iofd_poll_write_enable,
+	.write_disable = iofd_poll_write_disable,
+	.read_enable = iofd_poll_read_enable,
+	.read_disable = iofd_poll_read_disable,
+};
+
+#endif /* defined(__linux__) */