Stream Chat Unreal SDK
Loading...
Searching...
No Matches
HtmlParser.h
1// Copyright 2022 Stream.IO, Inc. All Rights Reserved.
2
3#pragma once
4
5#include "CoreMinimal.h"
6
7namespace HtmlTag
8{
9const FName Anchor{TEXT("a")};
10const FName Strong{TEXT("strong")};
11const FName Emphasis{TEXT("em")};
12const FName InlineCode{TEXT("code")};
13const FName DeletedText{TEXT("del")};
14const FName Heading1{TEXT("h1")};
15const FName Heading2{TEXT("h2")};
16const FName Heading3{TEXT("h3")};
17const FName Heading4{TEXT("h4")};
18const FName Heading5{TEXT("h5")};
19const FName Heading6{TEXT("h6")};
20const FName UnorderedList{TEXT("ul")};
21const FName OrderedList{TEXT("ol")};
22const FName ListItem{TEXT("li")};
23const FName Paragraph{TEXT("p")};
24const FName LineBreak{TEXT("br")};
25const FName HorizontalRule{TEXT("hr")};
26} // namespace HtmlTag
27
28class FHtmlScanner
29{
30public:
31 explicit FHtmlScanner(const FString& InSource);
32
33 enum class ETokenType : uint8
34 {
35 AngleOpen,
36 AngleClose,
37 Slash,
38 Equal,
39 String,
40 Identifier,
41 Content,
42 Eof,
43 Error
44 };
45
46 struct FToken
47 {
49 FStringView Lexeme;
50 };
51
52 FToken ScanToken();
53 const FString& GetOutput() const;
54 int32 PrevStart = 0;
55 int32 Start = 0;
56 int32 Current = 0;
57
58private:
59 TCHAR Advance();
60 void SkipWhitespace();
61
62 bool IsAtEnd() const;
63 TCHAR Peek() const;
64
65 FToken MakeToken(ETokenType Type) const;
66
67 FToken String();
68 FToken Identifier();
69 FToken Content();
70
71 FString Source;
72 bool bInTag = false;
73};
74
84{
85public:
86 struct FElement
87 {
88 FStringView Name;
89 TMap<FStringView, FStringView> Attributes;
90 int32 OpeningTagStart;
91 };
92
93 using FCallbackFn = TFunction<void(const FHtmlParser& Parser)>;
94
95 // Initialize with source string. Doesn't take ownership of string, so caller must ensure it stays in memory.
96 // Callback is called on each content chunk as it is found, along with the stack of surrounding element names
97 explicit FHtmlParser(const FString& Source, FCallbackFn InCallback);
98
99 // Parse the source string. Returns success.
100 bool Parse();
101
102 // The range of the content in the current run
103 // <strong>Hello<em> world</em</strong>
104 // ^^^^^
105 FTextRange GetContentRange() const;
106 // The range from the start of the current element, to the end of the current content
107 // <strong>Hello<em> world</em</strong>
108 // ^^^^^^^^^^^^^
109 FTextRange GetOriginalRange() const;
110 FStringView GetContent() const;
111 const FString& GetOutput() const;
112
113 uint32 Line = 0;
114 int32 ParagraphStartIndex = 0;
115 TArray<FElement> ElementStack;
116
117private:
118 void Advance();
119 bool AdvanceMatching(FHtmlScanner::ETokenType TokenType);
120
121 bool Element();
122 bool Attribute();
123 bool Content();
124 void CloseElement();
125 void Newline(uint32 Index);
126 FCallbackFn Callback;
127 FHtmlScanner::FToken Current;
128 FHtmlScanner Scanner;
129};
Parses a subset of XHTML Does NOT support:
Definition: HtmlParser.h:84
ETokenType
The type of token.
Definition: Token.h:15
@ Type
Type of the message.
@ Equal
Matches values that are equal to a specified value.