From a079b220505ae34997a14fc910af746b126ee11f Mon Sep 17 00:00:00 2001 From: Thomas Forgione Date: Wed, 16 Oct 2019 15:19:49 +0200 Subject: [PATCH] I have no idea what i'm doing --- src/bib.bib | 108 ++++++++++++++++++++++++++ src/foreword/3d-model.tex | 2 +- src/state-of-the-art/3d-streaming.tex | 105 +++++++++++++++++-------- src/state-of-the-art/intro.tex | 4 + src/state-of-the-art/main.tex | 3 + src/state-of-the-art/video.tex | 44 +++++------ 6 files changed, 209 insertions(+), 57 deletions(-) create mode 100644 src/state-of-the-art/intro.tex diff --git a/src/bib.bib b/src/bib.bib index 8b297d3..88ab5ce 100644 --- a/src/bib.bib +++ b/src/bib.bib @@ -550,3 +550,111 @@ pages={61--66}, year={2015} } + +@inproceedings{batched-multi-triangulation, + title={Batched multi triangulation}, + author={Cignoni, Paolo and Ganovelli, Fabio and Gobbetti, Enrico and Marton, Fabio and Ponchio, Federico and Scopigno, Roberto}, + booktitle={VIS 05. IEEE Visualization, 2005.}, + pages={207--214}, + year={2005}, + organization={IEEE} +} + +@article{3dhop, + title={3DHOP: 3D heritage online presenter}, + author={Potenziani, Marco and Callieri, Marco and Dellepiane, Matteo and Corsini, Massimiliano and Ponchio, Federico and Scopigno, Roberto}, + journal={Computers \& Graphics}, + volume={52}, + pages={129--141}, + year={2015}, + publisher={Elsevier} +} + +@inproceedings{progressive-compression-textured-meshes, + title={Cost-driven framework for progressive compression of textured meshes}, + author={Portaneri, C{\'e}dric and Alliez, Pierre and Hemmer, Michael and Birklein, Lukas and Schoemer, Elmar}, + booktitle={Proceedings of the 10th ACM Multimedia Systems Conference}, + pages={175--188}, + year={2019}, + organization={ACM} +} + + +@article{zampoglou, + title={Adaptive streaming of complex Web 3D scenes based on the MPEG-DASH standard}, + author={Zampoglou, Markos and Kapetanakis, Kostas and Stamoulias, Andreas and Malamos, Athanasios G and Panagiotakis, Spyros}, + journal={Multimedia Tools and Applications}, + volume={77}, + number={1}, + pages={125--148}, + year={2018}, + publisher={Springer} +} + +@article{batex3, + title={Batex3: Bit allocation for progressive transmission of textured 3-d models}, + author={Tian, Dihong and AlRegib, Ghassan}, + journal={IEEE Transactions on Circuits and Systems for Video Technology}, + volume={18}, + number={1}, + pages={23--35}, + year={2008}, + publisher={IEEE} +} + +@article{visual-quality-assessment, + title={Subjective and objective visual quality assessment of textured 3D meshes}, + author={Guo, Jinjiang and Vidal, Vincent and Cheng, Irene and Basu, Anup and Baskurt, Atilla and Lavoue, Guillaume}, + journal={ACM Transactions on Applied Perception (TAP)}, + volume={14}, + number={2}, + pages={11}, + year={2017}, + publisher={ACM} +} + +@inproceedings{mesh-texture-multiplexing, + author = {Yang, Sheng and Lee, Chao-Hua and Kuo, C.-C. Jay}, + title = {Optimized Mesh and Texture Multiplexing for Progressive Textured Model Transmission}, + booktitle = {Proceedings of the 12th Annual ACM International Conference on Multimedia}, + series = {MULTIMEDIA '04}, + month = {Oct}, + year = {2004}, + isbn = {1-58113-893-8}, + location = {New York, NY, USA}, + pages = {676--683}, + numpages = {8}, + url = {http://doi.acm.org/10.1145/1027527.1027683}, + doi = {10.1145/1027527.1027683}, + acmid = {1027683}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {mesh-texture multiplexing, progressive transmission, rate-distortion surface} +} + +@inproceedings{x3dom-scalable, + address = {Los Angeles, California}, + author = {Behr, J. and Jung, Y. and Keil, J. and Drevensek, T. and Zoellner, M. and Eschler, P. and Fellner, D.}, + booktitle = {Proceedings of the 15th International Conference on Web 3D Technology - Web3D '10}, + location = {Los Angeles, California}, + doi = {10.1145/1836049.1836077}, + isbn = {9781450302098}, + pages = {185-194}, + publisher = {ACM}, + title = {{A scalable architecture for the HTML5/X3D integration model X3DOM}}, + url = {http://portal.acm.org/citation.cfm?doid=1836049.1836077}, + month = {Jul}, + year = {2010} +} + + +@inproceedings{pop-buffer, + title={The pop buffer: Rapid progressive clustering by geometry quantization}, + author={Limper, Max and Jung, Yvonne and Behr, Johannes and Alexa, Marc}, + booktitle={Computer Graphics Forum}, + volume={32}, + number={7}, + pages={197--206}, + year={2013}, + organization={Wiley Online Library} +} diff --git a/src/foreword/3d-model.tex b/src/foreword/3d-model.tex index c33d45a..73329cf 100644 --- a/src/foreword/3d-model.tex +++ b/src/foreword/3d-model.tex @@ -3,7 +3,7 @@ The previous chapter voluntarily remained vague about what \emph{3D data} actual This chapter presents in detail the 3D data we consider and how it is renderer. We also give insights about interaction and streaming by comparing the 3D case to the video one. -\section{What is a 3D model?} +\section{What is a 3D model?\label{f:3d}} \subsection{3D data} Most classical 3D models are set of mesh and textures, that can potentially be arranged in a scene graph. diff --git a/src/state-of-the-art/3d-streaming.tex b/src/state-of-the-art/3d-streaming.tex index 6455ecb..7c77c48 100644 --- a/src/state-of-the-art/3d-streaming.tex +++ b/src/state-of-the-art/3d-streaming.tex @@ -66,55 +66,92 @@ When the model is light enough, it is encoded as is, and the operations needed t Thus, a client can start by downloading the low resolution model, display it to the user, and keep downloading and displaying details as time goes by. This process reduces the time a user has to wait before seeing something, and increases the quality of experience. +Following this, many approaches use multi triangulation, which creates mesh fragments at different levels of resolution and encodes the dependencies between fragments in a directed acyclic graph. +\citep{batched-multi-triangulation} proposes a GPU optimized version of multi triangulation that pushes its performances to real time. +It is notably used in 3DHOP (3D Heritage Online Presenter, \citep{3dhop}), a framework to easily build web interfaces to present 3D models to users in the context of cultural heritage. + +Some other approaches use voxels in order to progressively stream 3D models. +It is the case of~\citep{pop-buffer}, which proposes the pop buffer, a progressive compression method that allows efficient decoding which is useful, in particular for mobile devices. + More recently, to answer the need for a standard format for 3D data, the Khronos group has proposed a generic format called glTF (GL Transmission Format,~\citep{gltf}) to handle all types of 3D content representations: point clouds, meshes, animated model, etc. glTF is based on a JSON file, which encodes the structure of a scene of 3D objects. It can contain a scene tree with cameras, meshes, buffers, materials, textures, animations an skinning information. Although relevant for compression, transmission and in particular streaming, this standard does not yet consider view-dependent streaming which is required for large scene remote visualisation. -3D Tiles \citep{3d-tiles} is a specification for visualizing massive 3D geospatial data developed by Cesium and built on glTF\@. +% Zampoglou + +\citep{zampoglou} are the first to propose DASH to stream 3D content. +In their work, the authors describe a system that allows users to access 3D content at multiple resolutions. +They organize the content, following DASH terminology, into periods, adaptation sets, representations. +Their first adaptation set codes the tree structure of the scene graph. +Each further adaptation set contains both geometry and texture information and is available at different resolutions defined in a corresponding representation. +To avoid requests that would take too long and thus introduce latency, the representations are split into segments. +The authors discuss the optimal number of polygons that should be stored in a single segment. +On the one hand, using segments containing very few faces will induce many HTTP requests from the client, and will lead to poor streaming efficiency. +On the other hand, if segments contain too many faces, the time to load the segment will be long and the system loses adaptability. +This approach works well for several objects, but does not handle view-dependent streaming, which is desirable in the use case of large NVEs\@. + +\subsection{Viewpoint dependency} + +3D Tiles \citep{3d-tiles} is a specification for visualizing massive 3D geospatial data developed by Cesium and built on top of glTF\@. Their main goal is to display 3D objects on top of regular maps, and the data they use is quite different from ours: while they have nice and regular polygons with all the semantic they need, we only work on a polygon soup with textures. Their use case is also different from ours, while their interface allows a user to have a top vision of a city, we want our users to move inside a city. -\copied{} -\subsection{Prefetching in NVE} -The general prefetching problem can be described as follows: what are the data most likely to be accessed by the user in the near future, and in what order do we download the data? - -The simplest answer to the first question assumes that the user would likely access content close to the current position, thus would retrieve the 3D content within a given radius of the user (also known as the \textit{area of interest}, or AoI). +Another way to implement viewpoint dependency is to access the content near the user's camera. This approach, implemented in Second Life and several other NVEs (e.g.,~\citep{peer-texture-streaming}), only depends on the location of the avatar, not on its viewing direction. It exploits spatial locality and works well for any continuous movement of the user, including turning. Once the set of objects that are likely to be accessed by the user is determined, the next question is in what order should these objects be retrieved. A simple approach is to retrieve the objects based on distance: the spatial distance from the user's virtual location and rotational distance from the user's view. -Other approaches consider the movement of the user and attempt to predict where the user will move to in the future. -\citep{motion-prediction} and~\citep{walkthrough-ve} predict the direction of movement from the user's mouse input pattern. -The predicted mouse movement direction is then mapped to the navigation path in the NVE\@. -Objects that fall in the predicted path are then prefetched. -CyberWalk~\citep{cyberwalk} uses an exponentially weighted moving average of past movement vectors, adjusted with the residual of prediction, to predict the next location of the user. +\subsection{Geometry and textures} -\citep{prefetching-walkthrough-latency} cluster the navigation paths of users and use them to predict the future navigation paths. -Objects that fall within the predicted navigation path are prefetched. -All these approaches work well for a navigation path that is continuous --- once the user clicks on a bookmark and jumps to a new location, the path is no longer continuous and the prediction becomes wrong. +As discussed in Chapter~\ref{f:3d}, meshes consists in two main types of data: geometry and textures. +When addressing 3D streaming, one must find a compromise between geometry and textures, and a system needs to solve this compromise. -Moving beyond ordering objects to prefetch based on distance only,~\citep{caching-prefetching-dve} propose to predict the user's interest in an object as well. -Objects within AoI are then retrieved in decreasing order of predicted interest value to the user. +Balancing between streaming of geometry and texture data are considered by~\citep{batex3},~\citep{visual-quality-assessment}, and~\citep{mesh-texture-multiplexing}. +All three work considered a single, manifold textured mesh model with progressive meshes. +Their approach is to combine the distortion caused by having lower resolution meshes and textures into a single view independent metric. -% \cite{learning-user-access-patterns} investigates how to render large-scale 3-D scenes on a thin client. -% Efficient scene prefetching to provide timely data with a limited cache is one of the most critical issues for remote 3-D data scheduling in networked virtual environment applications. -% Existing prefetching schemes predict the future positions of each individual user based on user traces. -% In this paper, we investigate scene content sequences accessed by various users instead of user viewpoint traces and propose a user access pattern-based 3-D scene prefetching scheme. -% We make a relationship graph-based clustering to partition history user access sequences into several clusters and choose representative sequences from among these clusters as user access patterns. -% Then, these user access patterns are prioritized by their popularity and users' personal preference. -% Based on these access patterns, the proposed prefetching scheme predicts the scene contents that will most likely be visited in the future and delivers them to the client in advance. - -\citep{remote-rendering-streaming} investigate remote image-based rendering (IBR) as the most suitable solution for rendering complex 3D scenes on mobile devices, where the server renders the 3D scene and streams the rendered images to the client. -However, sending a large number of images is inefficient due to the possible limitations of wireless connections. -They propose a prefetching scheme at the server side that predicts client movements and hence prefetches the corresponding images. - -Prefetching techniques easing 3D data streaming and real-time rendering for remote walkthroughs are considered in~\citep{prefetching-remote-walkthroughs}. -Culling methods, that don't possess frame to frame coherence, can successfully be combined with remote scene databases, if the prefetching algorithm is adapted accordingly. -We present a quantitative transmission policy, that takes the limited bandwidth of the network and the limited memory available at the client computer into account. - -Also in the context remote visualization,~\citep{cache-remote-visualization} study caching and prefetching and optimize configurations of remote visualization architectures. -They aim at minimizing the fetch time in a remote visualization system and defend a practical infrastructure software to adaptively optimize the caching architecture of such systems under varying conditions (e.g.\ when network ressources vary). +\citep{progressive-compression-textured-meshes} also deals with the geometry / texture compromise. +This work designs a cost driven framework for 3D data compression, both in terms of geometry and textures. +This framework generates an atlas for textures that enables efficient compression and multiresolution scheme. +% \copied{} +% \subsection{Prefetching in NVE} +% The general prefetching problem can be described as follows: what are the data most likely to be accessed by the user in the near future, and in what order do we download the data? +% + +% +% Other approaches consider the movement of the user and attempt to predict where the user will move to in the future. +% \citep{motion-prediction} and~\citep{walkthrough-ve} predict the direction of movement from the user's mouse input pattern. +% The predicted mouse movement direction is then mapped to the navigation path in the NVE\@. +% Objects that fall in the predicted path are then prefetched. +% CyberWalk~\citep{cyberwalk} uses an exponentially weighted moving average of past movement vectors, adjusted with the residual of prediction, to predict the next location of the user. +% +% \citep{prefetching-walkthrough-latency} cluster the navigation paths of users and use them to predict the future navigation paths. +% Objects that fall within the predicted navigation path are prefetched. +% All these approaches work well for a navigation path that is continuous --- once the user clicks on a bookmark and jumps to a new location, the path is no longer continuous and the prediction becomes wrong. +% +% Moving beyond ordering objects to prefetch based on distance only,~\citep{caching-prefetching-dve} propose to predict the user's interest in an object as well. +% Objects within AoI are then retrieved in decreasing order of predicted interest value to the user. +% +% % \cite{learning-user-access-patterns} investigates how to render large-scale 3-D scenes on a thin client. +% % Efficient scene prefetching to provide timely data with a limited cache is one of the most critical issues for remote 3-D data scheduling in networked virtual environment applications. +% % Existing prefetching schemes predict the future positions of each individual user based on user traces. +% % In this paper, we investigate scene content sequences accessed by various users instead of user viewpoint traces and propose a user access pattern-based 3-D scene prefetching scheme. +% % We make a relationship graph-based clustering to partition history user access sequences into several clusters and choose representative sequences from among these clusters as user access patterns. +% % Then, these user access patterns are prioritized by their popularity and users' personal preference. +% % Based on these access patterns, the proposed prefetching scheme predicts the scene contents that will most likely be visited in the future and delivers them to the client in advance. +% +% \citep{remote-rendering-streaming} investigate remote image-based rendering (IBR) as the most suitable solution for rendering complex 3D scenes on mobile devices, where the server renders the 3D scene and streams the rendered images to the client. +% However, sending a large number of images is inefficient due to the possible limitations of wireless connections. +% They propose a prefetching scheme at the server side that predicts client movements and hence prefetches the corresponding images. +% +% Prefetching techniques easing 3D data streaming and real-time rendering for remote walkthroughs are considered in~\citep{prefetching-remote-walkthroughs}. +% Culling methods, that don't possess frame to frame coherence, can successfully be combined with remote scene databases, if the prefetching algorithm is adapted accordingly. +% We present a quantitative transmission policy, that takes the limited bandwidth of the network and the limited memory available at the client computer into account. +% +% Also in the context remote visualization,~\citep{cache-remote-visualization} study caching and prefetching and optimize configurations of remote visualization architectures. +% They aim at minimizing the fetch time in a remote visualization system and defend a practical infrastructure software to adaptively optimize the caching architecture of such systems under varying conditions (e.g.\ when network ressources vary). +% diff --git a/src/state-of-the-art/intro.tex b/src/state-of-the-art/intro.tex new file mode 100644 index 0000000..df7808a --- /dev/null +++ b/src/state-of-the-art/intro.tex @@ -0,0 +1,4 @@ +\fresh{} +In this chapter, we present the related work on topics similar to ours. +As discussed in the previous chapter, video and 3D share many similarities and that is why this chapter will start by a review on video streaming. +Then, we proceed with presenting 3D streaming, and we end with 3D navigation. diff --git a/src/state-of-the-art/main.tex b/src/state-of-the-art/main.tex index 65aa3e3..7123013 100644 --- a/src/state-of-the-art/main.tex +++ b/src/state-of-the-art/main.tex @@ -1,5 +1,8 @@ \chapter{Related work\label{sote}} +\input{state-of-the-art/intro} +\resetstyle{} + \input{state-of-the-art/video} \resetstyle{} diff --git a/src/state-of-the-art/video.tex b/src/state-of-the-art/video.tex index 963b59f..cfca40e 100644 --- a/src/state-of-the-art/video.tex +++ b/src/state-of-the-art/video.tex @@ -87,25 +87,25 @@ An example of such a property is given in Listing~\ref{sota:srd-xml}. Essentially, this feature is a way of achieving view-dependent streaming, since the client only displays a part of the video and can avoid downloading content that will not be displayed. This is especially interesting in the context of 3D streaming since we have this same pattern of a user viewing only a part of a content. -\subsection{Prefetching in video streaming} -\copied{} - -We briefly survey other research on prefetching that focuses on non-continuous interaction in other types of media. - -In the context of navigating in a video, a recent work in~\citep{video-bookmarks} prefetches video chunks located after bookmarks along the video timeline. -Their work, however, focuses on changing the user behavior to improve the prefetching hit rate, by depicting the state of the prefetched buffer to the user. -Carlier et al.\ also consider prefetching in the context of zoomable videos in an earlier work~\citep{zoomable-video}, and showed that predicting which region of videos the user will zoom into or pan to by analyzing interaction traces from users is difficult. - -Prefetching for navigation through a sequence of short online videos is considered in~\citep{user-generated-videos}. -Each switch from the current video to the next can be treated as a non-continuous interaction. -The authors proposed recommendation-aware prefetching --- to prefetch the prefix of videos from the search result list and related video list, as these videos are likely to be of interest to the user and other users from the same community. - -\citep{video-navigation-mpd} consider the problem of prefetching in the context of a hypervideo; non-continuous interaction happens when users click on a hyperlink in the video. -They propose a formal framework that captures the click probability, the bandwidth, and the bit rate of videos as a markov decision problem, and derive an optimal prefetching policy. - -\citep{joserlin} propose Joserlin, a generic framework for prefetching that applies to any non-continuous media, but focuses on peer-to-peer streaming applications. -They do not predict which item to prefetch, but rather focus on how to schedule the prefetch request and response. - -There is a huge body of work on prefetching web objects in the context of the world wide web. -Interested readers can refer to numerous surveys written on this topic, e.g.~\citep{survey-caching-prefetching}. - +% \subsection{Prefetching in video streaming} +% \copied{} +% +% We briefly survey other research on prefetching that focuses on non-continuous interaction in other types of media. +% +% In the context of navigating in a video, a recent work in~\citep{video-bookmarks} prefetches video chunks located after bookmarks along the video timeline. +% Their work, however, focuses on changing the user behavior to improve the prefetching hit rate, by depicting the state of the prefetched buffer to the user. +% Carlier et al.\ also consider prefetching in the context of zoomable videos in an earlier work~\citep{zoomable-video}, and showed that predicting which region of videos the user will zoom into or pan to by analyzing interaction traces from users is difficult. +% +% Prefetching for navigation through a sequence of short online videos is considered in~\citep{user-generated-videos}. +% Each switch from the current video to the next can be treated as a non-continuous interaction. +% The authors proposed recommendation-aware prefetching --- to prefetch the prefix of videos from the search result list and related video list, as these videos are likely to be of interest to the user and other users from the same community. +% +% \citep{video-navigation-mpd} consider the problem of prefetching in the context of a hypervideo; non-continuous interaction happens when users click on a hyperlink in the video. +% They propose a formal framework that captures the click probability, the bandwidth, and the bit rate of videos as a markov decision problem, and derive an optimal prefetching policy. +% +% \citep{joserlin} propose Joserlin, a generic framework for prefetching that applies to any non-continuous media, but focuses on peer-to-peer streaming applications. +% They do not predict which item to prefetch, but rather focus on how to schedule the prefetch request and response. +% +% There is a huge body of work on prefetching web objects in the context of the world wide web. +% Interested readers can refer to numerous surveys written on this topic, e.g.~\citep{survey-caching-prefetching}. +%