Khanh Hoang - Kenn
Kenn is a user experience designer and front end developer who enjoys creating beautiful and usable web and mobile experiences.
This has been an issue that has bothered me for a while. I finally found a solution that worked and doesn’t kill your server in the process.
This is what I’m running for this test:
OS: CentOS release 5.2 (Final)
PHP: PHP 5.2.8
Openoffice 1.2.3
Firstly I installed several programs using yum. You will need to use DAG’s repo:
rpm -Uhv http://apt.sw.be/redhat/el5/en/x86_64/rpmforge/RPMS//rpmforge-release-0.3.6-1.el5.rf.x86_64.rpm
yum install unoconv openoffice.org-headless openoffice.org-writer
unoconv is a handy tool that can be run as a demon and talk to the open office binary, via the command line.
In order to run the commands via apache you need to change the apache home directory and make it writable.
mkdir /home/apache
chown apache:apache /home/apache
usermod -d /home/apache apache
chmd 755 /home/apache
Now the apache user can create the hidden .openoffice.org2.0 directory.
With the setup out of the wa,y we need to start the open office deamon.
I did this as root but you could start this as apache.
unoconv –listener &
This basically creates the following deamon
soffice.bin -nologo -nodefault -accept=socket,host=localhost,port=2002;urp;StarOffice.ComponentContext
You can now send requests to port 2002 using unoconv
/usr/bin/unoconv --server localhost --port 2002 --stdout -f pdf input.doc
This will output the PDF file to the stdout.
Here is a cakephp component that I wrote to talk to unoconv. Please note this is very alpha and has only had a small amount of testing but works If you want to use it you must create these directories in your cake install.
‘TMP_FOLDER’, TMP . ‘filegenerator/’
ROOT . ‘/uploads/generatedpdfs/’
ROOT . ‘/uploads/docfiles/’
It can be used via a form upload
$this->Filegenerator = new FilegeneratorComponent ($this->params["form"]['uploaddocfile']); // if the filegenerator did all it's magic ok then process if($this->Filegenerator){ // returns the text version of the PDF $text = $this->Filegenerator->convertDocToTxt(); // returns the html of the PDF $html = $this->Filegenerator->convertDocToHtml(); // returns the generated pdf file $pdf = $this->Filegenerator->convertDocToPdf($doc_id); }
The component called filegenerator.php
<?php /** * Class Used to convert files. *@author jamiescott.net */ class FilegeneratorComponent extends Object { // input folder types private $allowable_files = array ('application/msword' => 'doc' ); // variable set if the constuctor loaded correctly. private $pass = false; // store the file info from constuctor reference private $fileinfo; /** * Enter description here... * * @param array $fileinfo * Expected : * ( [name] => test.doc [type] => application/msword [tmp_name] => /Applications/MAMP/tmp/php/php09PYNO [error] => 0 [size] => 79360 ) * * * @return unknown */ function __construct($fileinfo) { // folder to process all the files etc define ( 'TMP_FOLDER', TMP . 'filegenerator/' . $this->generatefoldername () . '/' ); // where unoconv is installed define ( 'UNOCONV_PATH', '/usr/bin/unoconv' ); // where to store pdf files define ( 'PDFSTORE', ROOT . '/uploads/generatedpdfs/' ); // where to store doc files define ( 'DOCSTORE', ROOT . '/uploads/docfiles/' ); // apache home dir define ( 'APACHEHOME', '/home/apache' ); // set some shell enviroment vars putenv ( "HOME=".APACHEHOME ); putenv ( "PWD=".APACHEHOME ); // check the file info is passed the tmp file is there and the correct file type is set // and the tmp folder could be created if (is_array ( $fileinfo ) &amp;&amp; file_exists ( $fileinfo ['tmp_name'] ) &amp;&amp; in_array ( $fileinfo ['type'], array_keys ( $this->allowable_files ) ) &amp;&amp; $this->createtmp ()) { // bass by reference $this->fileinfo = &amp;$fileinfo; // the constuctor ran ok $this->pass = true; // return true to the instantiation return true; } else { // faild to instantiate return false; } } /** * * takes the file set in the constuctor and turns it into a pdf * stores it in /uploads/docfiles and returns the filename * * @return filename if pdf was generated */ function convertDocToPdf($foldername=false) { if ($this->pass) { // generate a random name $output_pdf_name = $this->generatefoldername () . '.pdf'; // move it to the tmp folder for processing if (! copy ( $this->fileinfo ['tmp_name'], TMP_FOLDER . 'input.doc' )) die ( 'Error copying the doc file' ); $command = UNOCONV_PATH; $args = ' --server localhost --port 2002 --stdout -f pdf ' . TMP_FOLDER . 'input.doc'; $run = $command . $args; //echo $run; die; $pdf = shell_exec ( $run ); $end_of_line = strpos ( $pdf, "\n" ); $start_of_file = substr ( $pdf, 0, $end_of_line ); if (! eregi ( '%PDF', $start_of_file )) die ( 'Error Generating the PDF file' ); if(!file_exists(PDFSTORE.$foldername)){ mkdir(PDFSTORE.$foldername); } // file saved if(!$this->_createandsave($pdf, PDFSTORE.'/'.$foldername.'/', $output_pdf_name)){ die('Error Saving The PDF'); } return $output_pdf_name; } } /** * Return a text version of the Doc * * @return unknown */ function convertDocToTxt() { if ($this->pass) { // move it to the tmp folder for processing if (! copy ( $this->fileinfo ['tmp_name'], TMP_FOLDER . 'input.doc' )) die ( 'Error copying the doc file' ); $command = UNOCONV_PATH; $args = ' --server localhost --port 2002 --stdout -f txt ' . TMP_FOLDER . 'input.doc'; $run = $command . $args; //echo $run; die; $txt = shell_exec ( $run ); // guess that if there is less than this characters probably an error if (strlen($txt) < 10) die ( 'Error Generating the TXT' ); // return the txt from the PDF return $txt; } } /** * Convert the do to heml and return the html * * @return unknown */ function convertDocToHtml() { if ($this->pass) { // move it to the tmp folder for processing if (! copy ( $this->fileinfo ['tmp_name'], TMP_FOLDER . 'input.doc' )) die ( 'Error copying the doc file' ); $command = UNOCONV_PATH; $args = ' --server localhost --port 2002 --stdout -f html ' . TMP_FOLDER . 'input.doc'; $run = $command . $args; //echo $run; die; $html= shell_exec ( $run ); $end_of_line = strpos ( $html, "\n" ); $start_of_file = substr ( $html, 0, $end_of_line ); if (! eregi ( 'HTML', $start_of_file )) die ( 'Error Generating the HTML' ); // return the txt from the PDF return $html; } } /** * Create file and store data * * @param unknown_type $data * @param unknown_type $location * @return unknown */ function _createandsave($data, $location, $file) { if (is_writable ( $location )) { // In our example we're opening $filename in append mode. // The file pointer is at the bottom of the file hence // that's where $somecontent will go when we fwrite() it. if (! $handle = fopen ( $location.$file, 'w' )) { trigger_error("Cannot open file ($location$file)"); return false; } // Write $somecontent to our opened file. if (fwrite ( $handle, $data ) === FALSE) { trigger_error("Cannot write to file ($location$file)"); return false; } fclose ( $handle ); return true; } else { trigger_error("The file $location.$file is not writable"); return false; } } function __destruct() { // remove the tmp folder if (file_exists ( TMP_FOLDER ) &amp;&amp; strlen ( TMP_FOLDER ) > 4) $this->removetmp (); } /** * Create the tmp directory to hold and process the files * * @return unknown */ function createtmp() { if (is_writable ( TMP )) { if (mkdir ( TMP_FOLDER )) return true; } else { return false; } return false; } /** * Delete the tmp dir * * @return unknown */ function removetmp() { if (strlen ( TMP_FOLDER ) > 3 &amp;&amp; file_exists ( TMP_FOLDER )) { if ($this->recursive_remove_directory ( TMP_FOLDER )) return true; } return false; } /** * Return a rendom string for the folder name * * @return unknown */ function generatefoldername() { return md5 ( microtime () ); } /** * Recursivly delete directroy or empty it * * @param unknown_type $directory * @param unknown_type $empty * @return unknown */ function recursive_remove_directory($directory, $empty = FALSE) { // if the path has a slash at the end we remove it here if (substr ( $directory, - 1 ) == '/') { $directory = substr ( $directory, 0, - 1 ); } // if the path is not valid or is not a directory ... if (! file_exists ( $directory ) || ! is_dir ( $directory )) { // ... we return false and exit the function return FALSE; // ... if the path is not readable } elseif (! is_readable ( $directory )) { // ... we return false and exit the function return FALSE; // ... else if the path is readable } else { // we open the directory $handle = opendir ( $directory ); // and scan through the items inside while ( FALSE !== ($item = readdir ( $handle )) ) { // if the filepointer is not the current directory // or the parent directory if ($item != '.' &amp;&amp; $item != '..') { // we build the new path to delete $path = $directory . '/' . $item; // if the new path is a directory if (is_dir ( $path )) { // we call this function with the new path recursive_remove_directory ( $path ); // if the new path is a file } else { // we remove the file unlink ( $path ); } } } // close the directory closedir ( $handle ); // if the option to empty is not set to true if ($empty == FALSE) { // try to delete the now empty directory if (! rmdir ( $directory )) { // return false if not possible return FALSE; } } // return success return TRUE; } } } ?>
Bình luận (0)
Add Comment