最近研究PDF智能解析,试了下WPS的接口,效果一般。
文档地址:https://solution.wps.cn/docs/convert/pdf-to-docs.html
将pdf转json
<?php
// 配置参数 (请替换为您的实际值)
$app_id = "XXX"; // 您的应用 ID
$app_key = "XXX"; // 您的应用 KEY
$api_host = "https://solution.wps.cn";
// 目标文件类型 (Path 参数)
$office_type = "json"; # 转word 就换成 docx
// Body 参数
$body_data = [
'url' => 'http://XXXX/XXX.pdf',
'page_num_begin' => 1,
'page_num_end' => 3,
'text_unify' => true,
'sheet_option' => 0,
'export_type' => 'html',
];
$body_json = json_encode($body_data);
$date_gmt = gmdate('D, d M Y H:i:s T'); // T is GMT/UTC
$content_type = "application/json";
$content_md5 = md5($body_json);
$sign_string = $app_key . $content_md5 . $content_type . $date_gmt;
$signature = sha1($sign_string);
$authorization = "WPS-2:{$app_id}:{$signature}";
$url = $api_host . "/api/developer/v1/office/pdf/convert/to/" . $office_type;
// 构建 Header 数组
$headers = [
"Date: {$date_gmt}",
"Content-Md5: {$content_md5}",
"Content-Type: {$content_type}",
"Authorization: {$authorization}",
"Content-Length: " . strlen($body_json) // 添加 Content-Length 以确保兼容性
];
// 初始化 cURL
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); // 返回响应内容
curl_setopt($ch, CURLOPT_POST, true); // 设置为 POST 请求
curl_setopt($ch, CURLOPT_POSTFIELDS, $body_json); // 设置 Body 数据
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); // 设置 Header
// 执行请求
$response = curl_exec($ch);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$curl_error = curl_error($ch);
curl_close($ch);
if ($response === false) {
echo "cURL 错误: " . $curl_error . "<hr>";
} else {
echo "HTTP 状态码: " . $http_code . "<hr>";
echo "响应内容:<hr>";
// 尝试格式化 JSON 响应
$decoded_response = json_decode($response, true);
if (json_last_error() === JSON_ERROR_NONE) {
print_r($decoded_response);
} else {
echo $response . "<hr>";
}
}
// 额外的调试信息
echo "<hr>--- 调试信息 ---<hr>";
echo "Date Header: {$date_gmt}<hr>";
echo "Content-Md5 Header: {$content_md5}<hr>";
echo "Content-Type Header: {$content_type}<hr>";
echo "Body (JSON): {$body_json}<hr>";
echo "Sign String: {$sign_string}<hr>";
echo "Signature (SHA1): {$signature}<hr>";
echo "Authorization Header: {$authorization}<hr>";
?>下载json文件
<?php
$app_id = "XXX";
$app_key = "XXX";
$api_host = "https://solution.wps.cn";
$office_type = "json";
# 上一个接口返回的task_id
$task_id = "1dc424d29a114627947b73c8732ee6b5";
$date_gmt = gmdate('D, d M Y H:i:s T');
$content_type = "application/json";
$uri_path = "/api/developer/v1/tasks/convert/to/{$office_type}/{$task_id}";
$content_md5 = md5($uri_path);
$sign_string = $app_key . $content_md5 . $content_type . $date_gmt;
$signature = sha1($sign_string);
$authorization = "WPS-2:{$app_id}:{$signature}";
// 完整的请求 URL
$url = $api_host . $uri_path;
// 构建 Header 数组
$headers = [
"Date: {$date_gmt}",
"Content-Md5: {$content_md5}",
"Content-Type: {$content_type}",
"Authorization: {$authorization}",
];
// 初始化 cURL
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); // 返回响应内容
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET'); // 明确设置为 GET 请求
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); // 设置 Header
// 执行请求
$response = curl_exec($ch);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$curl_error = curl_error($ch);
curl_close($ch);
if ($response === false) {
echo "cURL 错误: " . $curl_error . "<hr>";
} else {
echo "HTTP 状态码: " . $http_code . "<hr>";
echo "响应内容:<hr>";
// 尝试格式化 JSON 响应
$decoded_response = json_decode($response, true);
if (json_last_error() === JSON_ERROR_NONE) {
print_r($decoded_response);
} else {
echo $response . "<hr>";
}
}
// 额外的调试信息
echo "<hr>--- 调试信息 ---<hr>";
echo "URI Path (用于 MD5): {$uri_path}\n";
echo "Date Header: {$date_gmt}\n";
echo "Content-Md5 Header (URI MD5): {$content_md5}\n";
echo "Sign String: {$sign_string}\n";
echo "Authorization Header: {$authorization}\n";
?>API收费标准
价格好像也不便宜
评论 (0)