net: Introduce recvmmsg socket syscall
Meaning receive multiple messages, reducing the number of syscalls and
net stack entry/exit operations.
Next patches will introduce mechanisms where protocols that want to
optimize this operation will provide an unlocked_recvmsg operation.
This takes into account comments made by:
. Paul Moore: sock_recvmsg is called only for the first datagram,
sock_recvmsg_nosec is used for the rest.
. Caitlin Bestler: recvmmsg now has a struct timespec timeout, that
works in the same fashion as the ppoll one.
If the underlying protocol returns a datagram with MSG_OOB set, this
will make recvmmsg return right away with as many datagrams (+ the OOB
one) it has received so far.
. Rémi Denis-Courmont & Steven Whitehouse: If we receive N < vlen
datagrams and then recvmsg returns an error, recvmmsg will return
the successfully received datagrams, store the error and return it
in the next call.
This paves the way for a subsequent optimization, sk_prot->unlocked_recvmsg,
where we will be able to acquire the lock only at batch start and end, not at
every underlying recvmsg call.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/net.h b/include/linux/net.h
index 529a093..b42bb60 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -41,6 +41,7 @@
#define SYS_SENDMSG 16 /* sys_sendmsg(2) */
#define SYS_RECVMSG 17 /* sys_recvmsg(2) */
#define SYS_ACCEPT4 18 /* sys_accept4(2) */
+#define SYS_RECVMMSG 19 /* sys_recvmmsg(2) */
typedef enum {
SS_FREE = 0, /* not allocated */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 3273a0c..59966f1 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -65,6 +65,12 @@
unsigned msg_flags;
};
+/* For recvmmsg/sendmmsg */
+struct mmsghdr {
+ struct msghdr msg_hdr;
+ unsigned msg_len;
+};
+
/*
* POSIX 1003.1g - ancillary data object information
* Ancillary data consits of a sequence of pairs of
@@ -312,6 +318,10 @@
extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr);
extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
+struct timespec;
+
+extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
+ unsigned int flags, struct timespec *timeout);
#endif
#endif /* not kernel and not glibc */
#endif /* _LINUX_SOCKET_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a990ace..714f063 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -25,6 +25,7 @@
struct list_head;
struct msgbuf;
struct msghdr;
+struct mmsghdr;
struct msqid_ds;
struct new_utsname;
struct nfsctl_arg;
@@ -677,6 +678,9 @@
asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned,
struct sockaddr __user *, int __user *);
asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags);
+asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg,
+ unsigned int vlen, unsigned flags,
+ struct timespec __user *timeout);
asmlinkage long sys_socket(int, int, int);
asmlinkage long sys_socketpair(int, int, int, int __user *);
asmlinkage long sys_socketcall(int call, unsigned long __user *args);