add convert pdf py

9ef2d24b · weiwei · 849567b1 · 9ef2d24b · 9ef2d24b
--- a/convertodt.py
+++ b/convertodt.py
+#python3
+#encoding=utf-8
+import sys
+import requests, json
+import json
+
+import os
+
+# 参考：这是ci文件中的传入参数
+# python3 convertpdf.py ${CODECODE_PROTOCOL}${CODECODE_DOMAIN}:3000 ci_job_token ${CI_JOB_TOKEN} ${CI_PROJECT_ID}
+
+# 主机地址和端口号，用于访问api时构造url
+host_with_port = sys.argv[1]
+
+# token类型字符串
+token_type_str = sys.argv[2]
+
+# 传入的token内容
+token = sys.argv[3]
+
+# 传入的项目id
+projectid = sys.argv[4]
+
+# 调用获取主机信息api，用于访问api时使用
+gethostinfo_url = host_with_port + "/api/v4/gethostinfo?" + token_type_str + "=" + token
+r_host = requests.get(gethostinfo_url)
+host_json_str = json.dumps(r_host.json())
+hostinfo = json.loads(host_json_str)
+if not hostinfo['host_port']:
+    host_with_port_info = hostinfo['host_domain']
+else:
+    host_with_port_info = hostinfo['host_domain'] + ":" + hostinfo['host_port']
+    
+# 调用获取学生提交文件信息的api
+getfilesinfo_url = host_with_port + "/api/v4/studentfileinfo?" + token_type_str + "=" + token + "&projectid=" + projectid
+
+# 访问api
+r_studentfiles = requests.get(getfilesinfo_url)
+
+# 将json转换为字符串
+studentfiles_json_str = json.dumps(r_studentfiles.json())
+studentfilesinfo = json.loads(studentfiles_json_str)
+
+# 将文件信息转换为数组
+filenames_arr = studentfilesinfo['filesinfo']
+
+# 循环全部文件
+# 接口返回的文件是按创建时间降序排列，所以下面统计完最后一次提交的docx文档后会直接退出
+for item in filenames_arr:
+    
+    filename, ext_name = os.path.splitext(item)
+    # 如果后缀名是docx的，则生成odt文件，统计信息
+    if ext_name == ".docx":
+        
+        # 如果没找到文件，就继续下一条数据
+        if not (os.path.exists(item)):
+            os.system("没有找到文件：" + item)
+            continue
+        
+        # 先将文件转换成odt
+        os.system("soffice --headless --convert-to odt " + "\"" + item + "\"")
+        # 整理odt文件名
+        file_odt = filename + ".odt"
+        # 统计报告信息
+        os.system("python3 storepy/statistics.py " + "\"" + file_odt + "\"" + " " + host_with_port + " " + projectid + " " + token_type_str + " " + token)
+        
+        # 退出
+        exit(0)
--- a/convertpdf.py
+++ b/convertpdf.py
+#python3
+#encoding=utf-8
+import sys
+import requests, json
+import json
+
+import os
+
+# 参考：这是ci文件中的传入参数
+# python3 convertpdf.py ${CODECODE_PROTOCOL}${CODECODE_DOMAIN}:3000 ci_job_token ${CI_JOB_TOKEN} ${CI_PROJECT_ID}
+
+# 主机地址和端口号，用于访问api时构造url
+host_with_port = sys.argv[1]
+
+# token类型字符串
+token_type_str = sys.argv[2]
+
+# 传入的token内容
+token = sys.argv[3]
+
+# 传入的项目id
+projectid = sys.argv[4]
+
+# 调用获取主机信息api，用于访问api时使用
+gethostinfo_url = host_with_port + "/api/v4/gethostinfo?" + token_type_str + "=" + token
+r_host = requests.get(gethostinfo_url)
+host_json_str = json.dumps(r_host.json())
+hostinfo = json.loads(host_json_str)
+if not hostinfo['host_port']:
+    host_with_port_info = hostinfo['host_domain']
+else:
+    host_with_port_info = hostinfo['host_domain'] + ":" + hostinfo['host_port']
+    
+# 调用获取学生提交文件信息的api
+getfilesinfo_url = host_with_port + "/api/v4/studentfileinfo?" + token_type_str + "=" + token + "&projectid=" + projectid
+
+# 访问api
+r_studentfiles = requests.get(getfilesinfo_url)
+
+# 将json转换为字符串
+studentfiles_json_str = json.dumps(r_studentfiles.json())
+studentfilesinfo = json.loads(studentfiles_json_str)
+
+# 将文件信息转换为数组
+filenames_arr = studentfilesinfo['filesinfo']
+
+# 循环全部文件
+for item in filenames_arr:
+    # 只在文件是docx或pptx时才转换成pdf
+    filename, ext_name = os.path.splitext(item)
+    if (ext_name == ".docx" or ext_name == ".pptx"):
+        
+        if not (os.path.exists(item)):
+            os.system("没有找到文件：" + item)
+            continue
+        
+        # 执行命令，将文件转换成pdf
+        convert_pdf = "soffice --headless --convert-to pdf " +  "\"" + item + "\"" + " --outdir ./public"
+        os.system(convert_pdf)
\ No newline at end of file