1
0
ins/main.py
Haisto e22b3d5072
Some checks failed
Haisto / push (push) Failing after 15m0s
ins!
2024-06-24 21:44:07 +08:00

312 lines
11 KiB
Python

import pandas as pd
import os
import re
import datetime
import time
import pytz
import requests
import ssl
import urllib.parse
import OpenSSL
from dateutil import parser
def get_host_info(url):
parsed_url = urllib.parse.urlparse(url)
host = parsed_url.netloc
return host
def get_certificate_expiration_date(host):
result = ''
hostname = host
port = 443
cert = ssl.get_server_certificate((hostname, port)).encode()
if(cert):
cert_obj = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, cert)
cert_expire_time = parser.parse(cert_obj.get_notAfter().decode("UTF-8")).strftime('%Y-%m-%d %H:%M:%S')
if(cert_obj.has_expired()):
result = ''
else:
current_date = datetime.datetime.now()
remaining_days = (datetime.datetime.strptime(cert_expire_time, "%Y-%m-%d %H:%M:%S") - current_date).days
yymmdd_expiration_date = str(cert_expire_time)[0:10]
result = str(yymmdd_expiration_date)+"(剩"+str(remaining_days)+ "天到期)"
else:
result = ''
return result
def get_all_tag(website_info_data):
all_tag = []
all_tag_info_data = []
# 遍历数据,获取所有的tag
for website_info_index, website_info_row in website_info_data.iterrows():
tag_list = website_info_row["Tag"].split(";")
pure_tag_list = []
for tag in tag_list:
pure_tag = tag.strip()
if pure_tag != "":
pure_tag_list.append(pure_tag)
if pure_tag not in all_tag:
all_tag.append(pure_tag)
all_tag_info_data.append([])
print("pure_tag_list", pure_tag_list)
print(
"tag==>>",
website_info_index,
website_info_row["Tag"],
"pure_tag_list==>>",
pure_tag_list,
)
# 遍历所有数据,将数据放到all_tag_info_data 中
for website_info_index, website_info_row in website_info_data.iterrows():
tag_list = website_info_row["Tag"].split(";")
for tag in tag_list:
pure_tag = tag.strip()
if pure_tag != "":
all_tag_info_data[all_tag.index(pure_tag)].append(website_info_row)
print("all_tag", all_tag, "all_tag_info_data", all_tag_info_data)
return {"all_tag": all_tag, "all_tag_info_data": all_tag_info_data}
def short_url(url):
result = ""
if(url.startswith("http://")):
url = url[7:]
if(url.startswith("https://")):
url = url[8:]
if(url.startswith("www.")):
url = url[4:]
if(url.endswith("/")):
url = url[:-1]
if len(url) > 30:
result = url[0:30] + "..."
else:
result = url
return result
def replaceTemplate(template, reInfo, data):
reResult = re.findall(reInfo, template)
new_read_me = template.replace(reResult[0], data)
return new_read_me
def create_tag_table_html(tag_name, tag_info_data):
print("==create_tag_table_html", tag_name)
website_info_html = "<a href='#目录'>🔙目录</a>" + "\n" + "<table>"
website_info_html = (
website_info_html
+ "<tr>"
+ "<td width='400'>"
+ "<span>(づ。◕‿‿◕。)づ</span><br/><span>Name</span>"
+ "</td>"
+ "<td>"
+ "<span> (●゚ω゚●)</span><br/><span>Description</span>"
+ "</td>"
+ "</tr>"
)
for info_data in tag_info_data:
print(
"==>>",
{
"Name": info_data["Name"],
"Url": info_data["Url"],
"Description": info_data["Description"],
},
)
website_info_html = (
website_info_html
+ "<tr>"
+ "<td>"
+ info_data["Name"]
+ "</td>"
+ "<td>"
+ info_data["Description"]
+ "</td>"
+ "</tr>"
)
website_info_html = website_info_html + "</table>" + "\n" + "<a href='#目录'>🔙目录</a>" + "\n"
return website_info_html
def main():
print("当前服务器时间", datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
# 读取csv文件
website_info_data = pd.read_csv("./website_info.csv")
# 反转数据,保证最新的数据在最前面
website_info_data = website_info_data.reindex(index=website_info_data.index[::-1])
print(website_info_data)
# 遍历数据
for website_info_index, website_info_row in website_info_data.iterrows():
print("=start=>>", website_info_index, website_info_row["Url"])
# 检测网站可用性,记录请求时间,完成数据拼接
expiration_date=''
try:
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
# 检测网站是否正常
website_info_row_url_result = requests.get(
website_info_row["Url"], timeout=5, headers=headers
)
try:
tmp_host = get_host_info(website_info_row["Url"])
expiration_date_result = get_certificate_expiration_date(tmp_host)
if(len(expiration_date_result) > 0):
expiration_date = expiration_date_result
except Exception as e:
expiration_date = ''
print("!!expiration_date", expiration_date)
total_ms = str(
int(website_info_row_url_result.elapsed.total_seconds() * 1000)
)
# 响应码为2开头,标注绿色,否则标注红色
if website_info_row_url_result.status_code:
website_info_row["Name"] = (
"<span style='font-weight: 600'>"
+ website_info_row["Name"]
+ "</span>"
+ "<span>"
+ (
" 🟢 " + total_ms + "ms"
if (str(website_info_row_url_result.status_code).startswith("1") or str(website_info_row_url_result.status_code).startswith("2") or str(website_info_row_url_result.status_code).startswith("3") or str(website_info_row_url_result.status_code).startswith("4"))
else " 🔴"
)
+ "</span><br/>"
)
# 无法响应,标注红色
except Exception as e:
print("error==", e)
website_info_row["Name"] = (
"<span style='font-weight: 600'>"
+ website_info_row["Name"]
+ " 🔴"
+ "</span><br/>"
)
finally:
website_info_row["Name"] = (
"<span>"
+ website_info_row["Name"]
+ "</span>"
+ "<a href='"
+ website_info_row["Url"]
+ "'>"
+ (short_url(website_info_row["Url"]))
+ "</a>"
+ (
"<br/><span>SSL证书到期时间:" + expiration_date + "</span>"
if (expiration_date and ("🟢" in website_info_row["Name"])) else ""
)
)
print("finish", website_info_row["Url"], website_info_row["Name"])
# 完成table数据拼接
website_info_html = "<table>"
website_info_html = (
website_info_html
+ "<tr>"
+ "<td width='400'>"
+ "<span>(づ。◕‿‿◕。)づ</span><br/><span>Name</span>"
+ "</td>"
+ "<td>"
+ "<span> (●゚ω゚●)</span><br/><span>Description</span>"
+ "</td>"
+ "<td width='300'>"
+ "<span> ︿( ̄︶ ̄)︿</span><br/><span>Tag</span>"
+ "</td>"
+ "</tr>"
)
for website_info_index, website_info_row in website_info_data.iterrows():
website_info_html = (
website_info_html
+ "<tr>"
+ "<td>"
+ website_info_row["Name"]
+ "</td>"
+ "<td>"
+ website_info_row["Description"]
+ "</td>"
+ "<td>"
+ website_info_row["Tag"]
+ "</td>"
+ "</tr>"
)
website_info_html = website_info_html + "</table>"
# 根据EditREADME.md模板,替换占位符, 生成最终数据
readme_md = ""
with open(os.path.join(os.getcwd(), "EditREADME.md"), "r") as load_f:
readme_md = load_f.read()
mail_re = r"--insStart----insEnd--"
in_datetime = datetime.datetime.fromtimestamp(
int(time.time()), pytz.timezone("Asia/Shanghai")
).strftime("%Y-%m-%d %H:%M:%S")
all_info_content = (
"\n\n"
+ "## 开源灵感库已收录"
+ str(len(website_info_data))
+ "束灵感INS!"
+ "(~ ̄▽ ̄)~更新时间("
+ in_datetime
+ ")\n\n"
+ website_info_html
+ "\n\n"
)
new_read_me = replaceTemplate(readme_md, mail_re, all_info_content)
print("new_read_me", new_read_me)
# 生成类别数据
tag_re = r"--tagStart----tagEnd--"
all_tag_result = get_all_tag(website_info_data)
all_tag = all_tag_result["all_tag"]
all_tag_info_data = all_tag_result["all_tag_info_data"]
print("==all_tag_info_data==", all_tag_info_data)
print("==all_tag==", all_tag)
all_tag_content = ""
for tag_content in all_tag:
tag_html = create_tag_table_html(
tag_content, all_tag_info_data[all_tag.index(tag_content)]
)
tag_whole_content = "## " + tag_content + "\n\n" + tag_html + "\n\n"
all_tag_content = all_tag_content + tag_whole_content
new_read_me = replaceTemplate(new_read_me, tag_re, all_tag_content)
# 添加索引锚点
tag_index_info = ""
for tag_index, tag_content in enumerate(all_tag):
if tag_index != (len(all_tag) - 1):
tag_index_info = (
tag_index_info
+ "<a href='#"
+ tag_content
+ "'>"
+ tag_content + "(" + str(len(all_tag_info_data[all_tag.index(tag_content)])) + ")"
+ "</a>"
+ ", "
)
else:
tag_index_info = (
tag_index_info
+ "<a href='#"
+ tag_content
+ "'>"
+ tag_content + "(" + str(len(all_tag_info_data[all_tag.index(tag_content)])) + ")"
+ "</a>"
)
tag_index_re = r"--tagIndexInfoStart----tagIndexInfoEnd--"
new_read_me = replaceTemplate(new_read_me, tag_index_re, tag_index_info)
# 将生成的数据写入README.md
with open(os.path.join(os.getcwd(), "README.md"), "w") as load_f:
load_f.write(new_read_me)
main()