Handle period at end of URL

Fix parsing URL when encountering a period at the end of the url by setting it as disallowed from being present at the end of a URL. Some characters are disallowed to be present at the end of URLs. Presently, the period character is the only disallowed character. A character is the last character in the URL if it is followed by is_whitespace() or if it's the last character in the string. Closes: https://github.com/damus-io/damus/issues/1638 LNURL1DP68GURN8GHJ7EM9W3SKCCNE9E3K7MF0D3H82UNVWQHKWUN9V4HXGCTHDC6RZVGR8SW3G Signed-off-by: kernelkind <kernelkind@gmail.com> Reviewed-by: William Casarin <jb55@jb55.com> Signed-off-by: William Casarin <jb55@jb55.com>
2024-09-29 16:30:44 +00:00 · 2023-12-21 14:40:06 -05:00 · 2023-12-21 14:40:06 -05:00 · e547e26d99
commit e547e26d99
parent f6044a9eea
3 changed files with 105 additions and 4 deletions
--- a/damus-c/cursor.h
+++ b/damus-c/cursor.h
@ -489,6 +489,32 @@ static inline int is_whitespace(int c) {
    return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
 }

+
+static inline int next_char_is_whitespace(unsigned char *curChar, unsigned char *endChar) {
+    unsigned char * next = curChar + 1;
+    if(next > endChar) return 0;
+    else if(next == endChar) return 1;
+    return is_whitespace(*next);
+}
+
+static int char_disallowed_at_end_url(char c){
+    return c == '.';
+}
+
+static inline int is_final_url_char(unsigned char *curChar, unsigned char *endChar){
+    if(is_whitespace(*curChar)){
+        return 1;
+    }
+    else if(next_char_is_whitespace(curChar, endChar)) {
+        // next char is whitespace so this char could be the final char in the url
+        return char_disallowed_at_end_url(*curChar);
+    }
+    else{
+        // next char isn't whitespace so it can't be a final char
+        return 0;
+    }
+}
+
 static inline int is_underscore(int c) {
    return c == '_';
 }
@ -670,6 +696,23 @@ static inline int consume_until_whitespace(struct cursor *cur, int or_end) {
    return or_end;
 }

+static inline int consume_until_end_url(struct cursor *cur, int or_end) {
+    char c;
+    int consumedAtLeastOne = 0;
+    
+    while (cur->p < cur->end) {
+        c = *cur->p;
+        
+        if (is_final_url_char(cur->p, cur->end))
+            return consumedAtLeastOne;
+        
+        cur->p++;
+        consumedAtLeastOne = 1;
+    }
+    
+    return or_end;
+}
+
 static inline int consume_until_non_alphanumeric(struct cursor *cur, int or_end) {
    char c;
    int consumedAtLeastOne = 0;
--- a/damus-c/damus.c
+++ b/damus-c/damus.c
@ -117,7 +117,7 @@ static int consume_url_fragment(struct cursor *cur)

    cur->p++;

-    return consume_until_whitespace(cur, 1);
+    return consume_until_end_url(cur, 1);
 }

 static int consume_url_path(struct cursor *cur)
@ -134,7 +134,7 @@ static int consume_url_path(struct cursor *cur)
    while (cur->p < cur->end) {
        c = *cur->p;

-        if (c == '?' || c == '#' || is_whitespace(c)) {
+        if (c == '?' || c == '#' || is_final_url_char(cur->p, cur->end)) {
            return 1;
        }

@ -152,7 +152,7 @@ static int consume_url_host(struct cursor *cur)
 	while (cur->p < cur->end) {
 		c = *cur->p;
 		// TODO: handle IDNs
-        if (is_alphanumeric(c) || c == '.' || c == '-')
+        if ((is_alphanumeric(c) || c == '.' || c == '-') && !is_final_url_char(cur->p, cur->end))
 		{
 			count++;
 			cur->p++;
--- a/damusTests/UrlTests.swift
+++ b/damusTests/UrlTests.swift
@ -100,5 +100,63 @@ final class UrlTests: XCTestCase {
        XCTAssertEqual(blocks[1].asURL, testURL)
        XCTAssertEqual(blocks[2].asText, " this is not a hashtag!")
    }
-
+    
+    func testParseURL_OneURLEndPeriodSimple_RemovesPeriod(){
+        testParseURL(inputURLString: "http://example.com.", expectedURLs: "http://example.com")
+    }
+    
+    func testParseURL_OneURL_RemovesPeriod(){
+        testParseURL(inputURLString: "http://example.com/.test", expectedURLs: "http://example.com/.test")
+    }
+    
+    func testParseURL_OneURLEndPeriodAndSpaceSimple_RemovesPeriod(){
+        testParseURL(inputURLString: "http://example.com. ", expectedURLs: "http://example.com")
+    }
+    
+    func testParseURL_OneURLEndPeriodComplex_RemovesPeriod(){
+        testParseURL(inputURLString: "http://example.com/test.", expectedURLs: "http://example.com/test")
+    }
+    
+    func testParseURL_TwoURLEndPeriodSimple_RemovesPeriods(){
+        testParseURL(inputURLString: "http://example.com. http://example.com.", expectedURLs: "http://example.com", "http://example.com")
+    }
+    
+    func testParseURL_ThreeURLEndPeriodSimple_RemovesPeriods(){
+        testParseURL(inputURLString: "http://example.com. http://example.com. http://example.com.", expectedURLs: "http://example.com", "http://example.com", "http://example.com")
+    }
+    
+    func testParseURL_TwoURLEndPeriodFirstComplexSecondSimple_RemovesPeriods(){
+        testParseURL(inputURLString: "http://example.com/test. http://example.com.", expectedURLs: "http://example.com/test", "http://example.com")
+    }
+    
+    func testParseURL_TwoURLEndPeriodFirstSimpleSecondComplex_RemovesPeriods(){
+        testParseURL(inputURLString: "http://example.com. http://example.com/test.", expectedURLs: "http://example.com", "http://example.com/test")
+    }
+    
+    func testParseURL_TwoURLEndPeriodFirstComplexSecondComplex_RemovesPeriods(){
+        testParseURL(inputURLString: "http://example.com/test. http://example.com/test.", expectedURLs: "http://example.com/test", "http://example.com/test")
+    }
+    
+    func testParseURL_OneURLEndPeriodSerachQuery_RemovesPeriod(){
+        testParseURL(inputURLString: "https://www.example.com/search?q=test+query.", expectedURLs: "https://www.example.com/search?q=test+query")
+    }
+}
+
+func testParseURL(inputURLString: String, expectedURLs: String...) {
+    let parsedURL: [Block] = parse_note_content(content: .content(inputURLString, nil)).blocks.filter {
+        $0.isURL
+    }
+    
+    if(expectedURLs.count != parsedURL.count) {
+        XCTFail()
+    }
+    
+    for i in 0..<parsedURL.count {
+        guard let expectedURL = URL(string: expectedURLs[i]) else {
+            XCTFail()
+            return
+        }
+
+        XCTAssertEqual(parsedURL[i].asURL, expectedURL)
+    }
 }