words in bolden
parent
cd65a4ec59
commit
26c892386e
10
crawler.py
10
crawler.py
|
@ -363,13 +363,15 @@ def recursion(nod, article, number, driver, dircrea, bk=False):
|
||||||
else:
|
else:
|
||||||
article += "["+nod.text+"]"+"("+linksite + ")"
|
article += "["+nod.text+"]"+"("+linksite + ")"
|
||||||
elif tag_name=="b" or tag_name=="strong":
|
elif tag_name=="b" or tag_name=="strong":
|
||||||
|
try:
|
||||||
p_childNodes = driver.execute_script("return arguments[0].childNodes;", nod)
|
p_childNodes = driver.execute_script("return arguments[0].childNodes;", nod)
|
||||||
for pnode in p_childNodes:
|
for pnode in p_childNodes:
|
||||||
article, number = recursion(pnode, article, number, driver, dircrea, True)
|
article, number = recursion(pnode, article, number, driver, dircrea, True)
|
||||||
# txt = nod.text
|
except:
|
||||||
# while len(txt) > 0 and txt[-1] == " ":
|
txt = nod.text
|
||||||
# txt = txt[:-1]
|
while len(txt) > 0 and txt[-1] == " ":
|
||||||
# article += " **" + txt + "** "
|
txt = txt[:-1]
|
||||||
|
article += " **" + txt + "** "
|
||||||
elif tag_name=="em":
|
elif tag_name=="em":
|
||||||
if bk:
|
if bk:
|
||||||
article += "**"
|
article += "**"
|
||||||
|
|
Loading…
Reference in New Issue