From 2782f24690894ca011069e0b0f20949629f9a9a9 Mon Sep 17 00:00:00 2001 From: Sam Samskies Date: Mon, 27 Feb 2023 09:21:38 -1000 Subject: [PATCH] fix URL parsing edge cases (#360) This should fix the following: - URLs in parentheses - URLs at the end of a sentence --- packages/app/src/Const.ts | 7 ------ packages/app/src/Element/Text.tsx | 6 ++--- packages/app/src/Util.test.ts | 40 +++++++++++++++++++++++++++++++ packages/app/src/Util.ts | 7 ++++++ 4 files changed, 50 insertions(+), 10 deletions(-) create mode 100644 packages/app/src/Util.test.ts diff --git a/packages/app/src/Const.ts b/packages/app/src/Const.ts index 9745227f..e6b3ea2d 100644 --- a/packages/app/src/Const.ts +++ b/packages/app/src/Const.ts @@ -84,13 +84,6 @@ export const EmailRegex = // eslint-disable-next-line no-useless-escape /^(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$/; -/** - * Generic URL regex - */ -export const UrlRegex = - // eslint-disable-next-line no-useless-escape - /((?:http|ftp|https):\/\/(?:[\w+?\.\w+])+(?:[a-zA-Z0-9\~\!\@\#\$\%\^\&\*\(\)_\-\=\+\\\/\?\.\:\;\'\,]*)?)/i; - /** * Extract file extensions regex */ diff --git a/packages/app/src/Element/Text.tsx b/packages/app/src/Element/Text.tsx index 1f41ab2f..ae9c87ae 100644 --- a/packages/app/src/Element/Text.tsx +++ b/packages/app/src/Element/Text.tsx @@ -4,8 +4,8 @@ import { Link } from "react-router-dom"; import ReactMarkdown from "react-markdown"; import { visit, SKIP } from "unist-util-visit"; -import { UrlRegex, MentionRegex, InvoiceRegex, HashtagRegex } from "Const"; -import { eventLink, hexToBech32, unwrap } from "Util"; +import { MentionRegex, InvoiceRegex, HashtagRegex } from "Const"; +import { eventLink, hexToBech32, splitByUrl, unwrap } from "Util"; import Invoice from "Element/Invoice"; import Hashtag from "Element/Hashtag"; @@ -36,7 +36,7 @@ export default function Text({ content, tags, creator, users }: TextProps) { return fragments .map(f => { if (typeof f === "string") { - return f.split(UrlRegex).map(a => { + return splitByUrl(f).map(a => { if (a.startsWith("http")) { return ; } diff --git a/packages/app/src/Util.test.ts b/packages/app/src/Util.test.ts new file mode 100644 index 00000000..878f76ed --- /dev/null +++ b/packages/app/src/Util.test.ts @@ -0,0 +1,40 @@ +import { splitByUrl } from "./Util"; + +describe("splitByUrl", () => { + it("should split a string by URLs", () => { + const inputStr = + "@npub1q6mcr8t not https://example.com- sure what your stack is, https://example.com but I made a https://example.com! simple example (https://example.com) of how https://example.com/yo-yo https://example.example.com to do this https://example.com, https://example.com?q=asdf for Next.js apps hosted on Vercel https://example.com. Scarcity in money provides the incentive to create abundance in other things as there is a mechanism to reliably store value. https://i.imgur.com/rkqhjeq.png Every form of money that could be inflated by way of force or technological advancement has been."; + const expectedOutput = [ + "@npub1q6mcr8t not ", + "https://example.com-", + " sure what your stack is, ", + "https://example.com", + " but I made a ", + "https://example.com", + "! simple example (", + "https://example.com", + ") of how ", + "https://example.com/yo-yo", + " ", + "https://example.example.com", + " to do this ", + "https://example.com", + ", ", + "https://example.com?q=asdf", + " for Next.js apps hosted on Vercel ", + "https://example.com", + ". Scarcity in money provides the incentive to create abundance in other things as there is a mechanism to reliably store value. ", + "https://i.imgur.com/rkqhjeq.png", + " Every form of money that could be inflated by way of force or technological advancement has been.", + ]; + + expect(splitByUrl(inputStr)).toEqual(expectedOutput); + }); + + it("should return an array with a single string if no URLs are found", () => { + const inputStr = "This is a regular string with no URLs"; + const expectedOutput = ["This is a regular string with no URLs"]; + + expect(splitByUrl(inputStr)).toEqual(expectedOutput); + }); +}); diff --git a/packages/app/src/Util.ts b/packages/app/src/Util.ts index 49e236bb..4ee046ad 100644 --- a/packages/app/src/Util.ts +++ b/packages/app/src/Util.ts @@ -222,3 +222,10 @@ export function tagFilterOfTextRepost(note: TaggedRawEvent, id?: u256): (tag: st export function groupByPubkey(acc: Record, user: MetadataCache) { return { ...acc, [user.pubkey]: user }; } + +export function splitByUrl(str: string) { + const urlRegex = + /((?:http|ftp|https):\/\/(?:[\w+?.\w+])+(?:[a-zA-Z0-9~!@#$%^&*()_\-=+\\/?.:;',]*)?(?:[-A-Za-z0-9+&@#/%=~_|]))/i; + + return str.split(urlRegex); +}