fix URL parsing edge cases (#360)
This should fix the following: - URLs in parentheses - URLs at the end of a sentence
This commit is contained in:
parent
f934dcd092
commit
2782f24690
@ -84,13 +84,6 @@ export const EmailRegex =
|
|||||||
// eslint-disable-next-line no-useless-escape
|
// eslint-disable-next-line no-useless-escape
|
||||||
/^(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$/;
|
/^(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$/;
|
||||||
|
|
||||||
/**
|
|
||||||
* Generic URL regex
|
|
||||||
*/
|
|
||||||
export const UrlRegex =
|
|
||||||
// eslint-disable-next-line no-useless-escape
|
|
||||||
/((?:http|ftp|https):\/\/(?:[\w+?\.\w+])+(?:[a-zA-Z0-9\~\!\@\#\$\%\^\&\*\(\)_\-\=\+\\\/\?\.\:\;\'\,]*)?)/i;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract file extensions regex
|
* Extract file extensions regex
|
||||||
*/
|
*/
|
||||||
|
@ -4,8 +4,8 @@ import { Link } from "react-router-dom";
|
|||||||
import ReactMarkdown from "react-markdown";
|
import ReactMarkdown from "react-markdown";
|
||||||
import { visit, SKIP } from "unist-util-visit";
|
import { visit, SKIP } from "unist-util-visit";
|
||||||
|
|
||||||
import { UrlRegex, MentionRegex, InvoiceRegex, HashtagRegex } from "Const";
|
import { MentionRegex, InvoiceRegex, HashtagRegex } from "Const";
|
||||||
import { eventLink, hexToBech32, unwrap } from "Util";
|
import { eventLink, hexToBech32, splitByUrl, unwrap } from "Util";
|
||||||
import Invoice from "Element/Invoice";
|
import Invoice from "Element/Invoice";
|
||||||
import Hashtag from "Element/Hashtag";
|
import Hashtag from "Element/Hashtag";
|
||||||
|
|
||||||
@ -36,7 +36,7 @@ export default function Text({ content, tags, creator, users }: TextProps) {
|
|||||||
return fragments
|
return fragments
|
||||||
.map(f => {
|
.map(f => {
|
||||||
if (typeof f === "string") {
|
if (typeof f === "string") {
|
||||||
return f.split(UrlRegex).map(a => {
|
return splitByUrl(f).map(a => {
|
||||||
if (a.startsWith("http")) {
|
if (a.startsWith("http")) {
|
||||||
return <HyperText key={a} link={a} creator={creator} />;
|
return <HyperText key={a} link={a} creator={creator} />;
|
||||||
}
|
}
|
||||||
|
40
packages/app/src/Util.test.ts
Normal file
40
packages/app/src/Util.test.ts
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
import { splitByUrl } from "./Util";
|
||||||
|
|
||||||
|
describe("splitByUrl", () => {
|
||||||
|
it("should split a string by URLs", () => {
|
||||||
|
const inputStr =
|
||||||
|
"@npub1q6mcr8t not https://example.com- sure what your stack is, https://example.com but I made a https://example.com! simple example (https://example.com) of how https://example.com/yo-yo https://example.example.com to do this https://example.com, https://example.com?q=asdf for Next.js apps hosted on Vercel https://example.com. Scarcity in money provides the incentive to create abundance in other things as there is a mechanism to reliably store value. https://i.imgur.com/rkqhjeq.png Every form of money that could be inflated by way of force or technological advancement has been.";
|
||||||
|
const expectedOutput = [
|
||||||
|
"@npub1q6mcr8t not ",
|
||||||
|
"https://example.com-",
|
||||||
|
" sure what your stack is, ",
|
||||||
|
"https://example.com",
|
||||||
|
" but I made a ",
|
||||||
|
"https://example.com",
|
||||||
|
"! simple example (",
|
||||||
|
"https://example.com",
|
||||||
|
") of how ",
|
||||||
|
"https://example.com/yo-yo",
|
||||||
|
" ",
|
||||||
|
"https://example.example.com",
|
||||||
|
" to do this ",
|
||||||
|
"https://example.com",
|
||||||
|
", ",
|
||||||
|
"https://example.com?q=asdf",
|
||||||
|
" for Next.js apps hosted on Vercel ",
|
||||||
|
"https://example.com",
|
||||||
|
". Scarcity in money provides the incentive to create abundance in other things as there is a mechanism to reliably store value. ",
|
||||||
|
"https://i.imgur.com/rkqhjeq.png",
|
||||||
|
" Every form of money that could be inflated by way of force or technological advancement has been.",
|
||||||
|
];
|
||||||
|
|
||||||
|
expect(splitByUrl(inputStr)).toEqual(expectedOutput);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should return an array with a single string if no URLs are found", () => {
|
||||||
|
const inputStr = "This is a regular string with no URLs";
|
||||||
|
const expectedOutput = ["This is a regular string with no URLs"];
|
||||||
|
|
||||||
|
expect(splitByUrl(inputStr)).toEqual(expectedOutput);
|
||||||
|
});
|
||||||
|
});
|
@ -222,3 +222,10 @@ export function tagFilterOfTextRepost(note: TaggedRawEvent, id?: u256): (tag: st
|
|||||||
export function groupByPubkey(acc: Record<HexKey, MetadataCache>, user: MetadataCache) {
|
export function groupByPubkey(acc: Record<HexKey, MetadataCache>, user: MetadataCache) {
|
||||||
return { ...acc, [user.pubkey]: user };
|
return { ...acc, [user.pubkey]: user };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function splitByUrl(str: string) {
|
||||||
|
const urlRegex =
|
||||||
|
/((?:http|ftp|https):\/\/(?:[\w+?.\w+])+(?:[a-zA-Z0-9~!@#$%^&*()_\-=+\\/?.:;',]*)?(?:[-A-Za-z0-9+&@#/%=~_|]))/i;
|
||||||
|
|
||||||
|
return str.split(urlRegex);
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user